diff --git a/sys/conf/defines b/sys/conf/defines
new file mode 100644
index 00000000000..ea7046324e3
--- /dev/null
+++ b/sys/conf/defines
@@ -0,0 +1,15 @@
+/:#if.*[ \t]*KPROF/d
+/:#if.*[ \t]*PGINPROF/d
+/:#if.*[ \t]*UNFAST/d
+/:#if.*[ \t]*INSECURE/d
+/:#if.*[ \t]*TRACE/d
+/:#if.*[ \t]*DISKMON/d
+/:#if.*[ \t]*INTRLVE/d
+/:#if.*[ \t]*lint/d
+/:#if.*[ \t]*notdef/d
+/:#if.*[ \t]*unneeded/d
+/:#if.*[ \t]*vax/d
+/:#if.*[ \t]*TCPTRUEOOB/d
+/:#if.*[ \t]*irele/d
+/:#if.*[ \t]*ilock/d
+/:#if.*[ \t]*notyet/d
diff --git a/sys/conf/files b/sys/conf/files
new file mode 100644
index 00000000000..c083f2e1dee
--- /dev/null
+++ b/sys/conf/files
@@ -0,0 +1,274 @@
+isofs/cd9660/cd9660_bmap.c	optional cd9660
+isofs/cd9660/cd9660_lookup.c	optional cd9660
+isofs/cd9660/cd9660_node.c	optional cd9660
+isofs/cd9660/cd9660_rrip.c	optional cd9660
+isofs/cd9660/cd9660_util.c	optional cd9660
+isofs/cd9660/cd9660_vfsops.c	optional cd9660
+isofs/cd9660/cd9660_vnops.c	optional cd9660
+kdb/kdb_access.c	optional kadb
+kdb/kdb_command.c	optional kadb
+kdb/kdb_ctype.c		optional kadb
+kdb/kdb_expr.c		optional kadb
+kdb/kdb_format.c	optional kadb
+kdb/kdb_input.c		optional kadb
+kdb/kdb_message.c	optional kadb
+kdb/kdb_output.c	optional kadb
+kdb/kdb_pcs.c		optional kadb
+kdb/kdb_print.c		optional kadb
+kdb/kdb_runpcs.c	optional kadb
+kdb/kdb_sym.c		optional kadb
+kdb/kdb_trap.c		optional kadb
+kern/init_main.c	standard
+kern/init_sysent.c	standard
+kern/kern_acct.c	standard
+kern/kern_clock.c	standard
+kern/kern_descrip.c	standard
+kern/kern_exec.c	standard
+kern/kern_exit.c	standard
+kern/kern_fork.c	standard
+kern/kern_ktrace.c	standard
+kern/kern_malloc.c	standard
+kern/kern_physio.c	standard
+kern/kern_proc.c	standard
+kern/kern_prot.c	standard
+kern/kern_resource.c	standard
+kern/kern_sig.c		standard
+kern/kern_subr.c	standard
+kern/kern_synch.c	standard
+kern/kern_sysctl.c	standard
+kern/kern_time.c	standard
+kern/kern_xxx.c		standard
+kern/subr_log.c		standard
+kern/subr_prf.c		standard
+kern/subr_prof.c	standard
+kern/subr_rmap.c	standard
+kern/subr_xxx.c		standard
+kern/sys_generic.c	standard
+kern/sys_process.c	standard
+kern/sys_socket.c	standard
+kern/sysv_shm.c		optional sysvshm
+kern/tty.c		standard
+kern/tty_compat.c	standard
+kern/tty_conf.c		standard
+kern/tty_pty.c		optional pty
+kern/tty_subr.c		standard
+kern/tty_tb.c		optional tb
+kern/tty_tty.c		standard
+kern/uipc_domain.c	standard
+kern/uipc_mbuf.c	standard
+kern/uipc_proto.c	standard
+kern/uipc_socket.c	standard
+kern/uipc_socket2.c	standard
+kern/uipc_syscalls.c	standard
+kern/uipc_usrreq.c	standard
+kern/vfs_bio.c		standard
+kern/vfs_cache.c	standard
+kern/vfs_cluster.c	standard
+kern/vfs_conf.c		standard
+kern/vfs_init.c		standard
+kern/vfs_lookup.c	standard
+kern/vfs_subr.c		standard
+kern/vfs_syscalls.c	standard
+kern/vfs_vnops.c	standard
+miscfs/deadfs/dead_vnops.c	standard
+miscfs/fdesc/fdesc_vfsops.c	optional fdesc
+miscfs/fdesc/fdesc_vnops.c	optional fdesc
+miscfs/fifofs/fifo_vnops.c	optional fifo
+miscfs/kernfs/kernfs_vfsops.c	optional kernfs
+miscfs/kernfs/kernfs_vnops.c	optional kernfs
+miscfs/nullfs/null_subr.c	optional nullfs
+miscfs/nullfs/null_vfsops.c	optional nullfs
+miscfs/nullfs/null_vnops.c	optional nullfs
+miscfs/portal/portal_vfsops.c	optional portal
+miscfs/portal/portal_vnops.c	optional portal
+miscfs/procfs/procfs_subr.c	optional procfs
+miscfs/procfs/procfs_vnops.c	optional procfs
+miscfs/procfs/procfs_vfsops.c	optional procfs
+miscfs/procfs/procfs_note.c	optional procfs
+miscfs/procfs/procfs_mem.c	optional procfs
+miscfs/procfs/procfs_ctl.c	optional procfs
+miscfs/procfs/procfs_status.c	optional procfs
+miscfs/procfs/procfs_regs.c	optional procfs
+miscfs/procfs/procfs_fpregs.c	optional procfs
+miscfs/specfs/spec_vnops.c	standard
+miscfs/umapfs/umap_subr.c	optional umapfs
+miscfs/umapfs/umap_vfsops.c	optional umapfs
+miscfs/umapfs/umap_vnops.c	optional umapfs
+miscfs/union/union_subr.c	optional union
+miscfs/union/union_vfsops.c	optional union
+miscfs/union/union_vnops.c	optional union
+net/bpf.c		optional bpfilter
+net/bpf_filter.c	optional bpfilter
+net/if.c		standard
+net/if_ethersubr.c	optional ether
+net/if_loop.c		optional loop
+net/if_sl.c		optional sl
+net/radix.c		standard
+net/raw_cb.c		standard
+net/raw_usrreq.c	standard
+net/route.c		standard
+net/rtsock.c		standard
+net/slcompress.c	optional sl
+netccitt/ccitt_proto.c	optional ccitt
+netccitt/llc_input.c	optional llc
+netccitt/llc_output.c	optional llc
+netccitt/llc_subr.c	optional llc
+netccitt/llc_timer.c	optional llc
+netccitt/pk_llcsubr.c	optional llc
+netccitt/pk_llcsubr.c	optional hdlc
+netccitt/hd_debug.c	optional hdlc
+netccitt/hd_input.c	optional hdlc
+netccitt/hd_output.c	optional hdlc
+netccitt/hd_subr.c	optional hdlc
+netccitt/hd_timer.c	optional hdlc
+netccitt/if_x25subr.c	optional ccitt
+netccitt/pk_acct.c	optional ccitt
+netccitt/pk_debug.c	optional ccitt
+netccitt/pk_input.c	optional ccitt
+netccitt/pk_output.c	optional ccitt
+netccitt/pk_subr.c	optional ccitt
+netccitt/pk_timer.c	optional ccitt
+netccitt/pk_usrreq.c	optional ccitt
+netimp/if_imp.c		optional imp
+netimp/if_imphost.c	optional imp
+netimp/raw_imp.c	optional imp
+netinet/if_ether.c	optional ether
+netinet/igmp.c		optional inet
+netinet/in.c		optional inet
+netinet/in_pcb.c	optional inet
+netinet/in_proto.c	optional inet
+netinet/ip_icmp.c	optional inet
+netinet/ip_input.c	optional inet
+netinet/ip_mroute.c	optional inet mrouting
+netinet/ip_output.c	optional inet
+netinet/raw_ip.c	optional inet
+netinet/tcp_debug.c	optional inet
+netinet/tcp_input.c	optional inet
+netinet/tcp_output.c	optional inet
+netinet/tcp_subr.c	optional inet
+netinet/tcp_timer.c	optional inet
+netinet/tcp_usrreq.c	optional inet
+netinet/udp_usrreq.c	optional inet
+netiso/clnp_debug.c	optional iso
+netiso/clnp_er.c	optional iso
+netiso/clnp_frag.c	optional iso
+netiso/clnp_input.c	optional iso
+netiso/clnp_options.c	optional iso
+netiso/clnp_output.c	optional iso
+netiso/clnp_raw.c	optional iso
+netiso/clnp_subr.c	optional iso
+netiso/clnp_timer.c	optional iso
+netiso/cltp_usrreq.c	optional iso
+netiso/esis.c		optional iso
+netiso/idrp_usrreq.c	optional iso
+netiso/if_eon.c		optional eon
+netiso/iso.c		optional iso
+netiso/iso_chksum.c	optional iso
+netiso/iso_pcb.c	optional iso
+netiso/iso_proto.c	optional iso
+netiso/iso_snpac.c	optional iso
+netiso/tp_astring.c	optional iso
+netiso/tp_astring.c	optional tpip
+netiso/tp_cons.c	optional iso
+netiso/tp_driver.c	optional iso
+netiso/tp_driver.c	optional tpip
+netiso/tp_emit.c	optional iso
+netiso/tp_emit.c	optional tpip
+netiso/tp_inet.c	optional iso
+netiso/tp_inet.c	optional tpip
+netiso/tp_input.c	optional iso
+netiso/tp_input.c	optional tpip
+netiso/tp_iso.c		optional iso
+netiso/tp_meas.c	optional iso
+netiso/tp_meas.c	optional tpip
+netiso/tp_output.c	optional iso
+netiso/tp_output.c	optional tpip
+netiso/tp_pcb.c		optional iso
+netiso/tp_pcb.c		optional tpip
+netiso/tp_subr.c	optional iso
+netiso/tp_subr.c	optional tpip
+netiso/tp_subr2.c	optional iso
+netiso/tp_subr2.c	optional tpip
+netiso/tp_timer.c	optional iso
+netiso/tp_timer.c	optional tpip
+netiso/tp_trace.c	optional iso
+netiso/tp_trace.c	optional tpip
+netiso/tp_usrreq.c	optional iso
+netiso/tp_usrreq.c	optional tpip
+netiso/tuba_subr.c	optional iso tuba
+netiso/tuba_table.c	optional iso tuba
+netiso/tuba_usrreq.c	optional iso tuba
+netns/idp_usrreq.c	optional ns
+netns/ns.c		optional ns
+netns/ns_error.c	optional ns
+netns/ns_input.c	optional ns
+netns/ns_ip.c		optional ns
+netns/ns_output.c	optional ns
+netns/ns_pcb.c		optional ns
+netns/ns_proto.c	optional ns
+netns/spp_debug.c	optional ns
+netns/spp_usrreq.c	optional ns
+nfs/nfs_bio.c		optional nfs
+nfs/nfs_node.c		optional nfs
+nfs/nfs_nqlease.c	optional nfs
+nfs/nfs_serv.c		optional nfs
+nfs/nfs_socket.c	optional nfs
+nfs/nfs_srvcache.c	optional nfs
+nfs/nfs_subs.c		optional nfs
+nfs/nfs_syscalls.c	optional nfs
+nfs/nfs_vfsops.c	optional nfs
+nfs/nfs_vnops.c		optional nfs
+ufs/ffs/ffs_alloc.c	optional ffs
+ufs/ffs/ffs_alloc.c	optional mfs
+ufs/ffs/ffs_balloc.c	optional ffs
+ufs/ffs/ffs_balloc.c	optional mfs
+ufs/ffs/ffs_inode.c	optional ffs
+ufs/ffs/ffs_inode.c	optional mfs
+ufs/ffs/ffs_subr.c	optional ffs
+ufs/ffs/ffs_subr.c	optional mfs
+ufs/ffs/ffs_tables.c	optional ffs
+ufs/ffs/ffs_tables.c	optional mfs
+ufs/ffs/ffs_vfsops.c	optional ffs
+ufs/ffs/ffs_vfsops.c	optional mfs
+ufs/ffs/ffs_vnops.c	optional ffs
+ufs/ffs/ffs_vnops.c	optional mfs
+ufs/lfs/lfs_alloc.c	optional lfs
+ufs/lfs/lfs_bio.c	optional lfs
+ufs/lfs/lfs_balloc.c	optional lfs
+ufs/lfs/lfs_cksum.c	optional lfs
+ufs/lfs/lfs_debug.c	optional lfs
+ufs/lfs/lfs_inode.c	optional lfs
+ufs/lfs/lfs_segment.c	optional lfs
+ufs/lfs/lfs_subr.c	optional lfs
+ufs/lfs/lfs_syscalls.c	optional lfs
+ufs/lfs/lfs_vfsops.c	optional lfs
+ufs/lfs/lfs_vnops.c	optional lfs
+ufs/mfs/mfs_vfsops.c	optional mfs
+ufs/mfs/mfs_vnops.c	optional mfs
+ufs/ufs/ufs_bmap.c	standard
+ufs/ufs/ufs_disksubr.c	standard
+ufs/ufs/ufs_ihash.c	standard
+ufs/ufs/ufs_inode.c	standard
+ufs/ufs/ufs_lockf.c	standard
+ufs/ufs/ufs_lookup.c	standard
+ufs/ufs/ufs_quota.c	standard
+ufs/ufs/ufs_vfsops.c	standard
+ufs/ufs/ufs_vnops.c	standard
+vm/device_pager.c	optional devpager
+vm/kern_lock.c		standard
+vm/swap_pager.c		optional swappager
+vm/vm_fault.c		standard
+vm/vm_glue.c		standard
+vm/vm_init.c		standard
+vm/vm_kern.c		standard
+vm/vm_map.c		standard
+vm/vm_meter.c		standard
+vm/vm_mmap.c		standard
+vm/vm_object.c		standard
+vm/vm_page.c		standard
+vm/vm_pageout.c		standard
+vm/vm_pager.c		standard
+vm/vm_swap.c		standard
+vm/vm_unix.c		standard
+vm/vm_user.c		standard
+vm/vnode_pager.c	optional vnodepager
diff --git a/sys/conf/files.newconf b/sys/conf/files.newconf
new file mode 100644
index 00000000000..7b0907ab894
--- /dev/null
+++ b/sys/conf/files.newconf
@@ -0,0 +1,274 @@
+#	@(#)files.newconf	8.9 (Berkeley) 3/31/94
+
+# generic attributes
+define	disk
+define	tape
+define	ifnet
+define	tty
+
+# net device attributes - we have generic code for ether.
+# we should have imp but right now it is a pseudo-device.
+define	ether
+# define	imp
+pseudo-device imp
+
+# scsi driver and associated stuff
+define	scsi { target = -1 }
+device	tg at scsi { drive = -1 }
+file	dev/scsi/scsi_subr.c	scsi
+
+device	sd at tg: disk
+file	dev/scsi/sd.c		sd needs-flag
+
+# device st at tg: tape	-- not yet
+
+# legitimate pseudo-devices
+pseudo-device bpfilter
+pseudo-device cd: disk
+pseudo-device loop
+pseudo-device pty: tty
+pseudo-device sl
+pseudo-device vn: disk
+
+# kernel sources
+file	isofs/cd9660/isofs_bmap.c	isofs
+file	isofs/cd9660/isofs_lookup.c	isofs
+file	isofs/cd9660/isofs_node.c	isofs
+file	isofs/cd9660/isofs_rrip.c	isofs
+file	isofs/cd9660/isofs_util.c	isofs
+file	isofs/cd9660/isofs_vfsops.c	isofs
+file	isofs/cd9660/isofs_vnops.c	isofs
+file	kern/init_main.c
+file	kern/init_sysent.c
+file	kern/kern_acct.c
+file	kern/kern_clock.c
+file	kern/kern_descrip.c
+file	kern/kern_exec.c
+file	kern/kern_exit.c
+file	kern/kern_fork.c
+file	kern/kern_ktrace.c	ktrace
+file	kern/kern_malloc.c
+file	kern/kern_physio.c
+file	kern/kern_proc.c
+file	kern/kern_prot.c
+file	kern/kern_resource.c
+file	kern/kern_sig.c
+file	kern/kern_subr.c
+file	kern/kern_synch.c
+file	kern/kern_sysctl.c
+file	kern/kern_time.c
+file	kern/kern_xxx.c
+file	kern/subr_autoconf.c
+file	kern/subr_log.c
+file	kern/subr_prf.c
+file	kern/subr_prof.c
+file	kern/subr_rmap.c
+file	kern/subr_xxx.c
+file	kern/sys_generic.c
+file	kern/sys_process.c
+file	kern/sys_socket.c
+file	kern/sysv_shm.c		sysvshm
+file	kern/tty.c
+file	kern/tty_compat.c
+file	kern/tty_conf.c
+file	kern/tty_pty.c		pty needs-count
+file	kern/tty_subr.c
+file	kern/tty_tb.c		tb needs-flag
+file	kern/tty_tty.c
+file	kern/uipc_domain.c
+file	kern/uipc_mbuf.c
+file	kern/uipc_proto.c
+file	kern/uipc_socket.c
+file	kern/uipc_socket2.c
+file	kern/uipc_syscalls.c
+file	kern/uipc_usrreq.c
+file	kern/vfs_bio.c
+file	kern/vfs_cache.c
+file	kern/vfs_cluster.c
+file	kern/vfs_conf.c
+file	kern/vfs_init.c
+file	kern/vfs_lookup.c
+file	kern/vfs_subr.c
+file	kern/vfs_syscalls.c
+file	kern/vfs_vnops.c
+file	miscfs/deadfs/dead_vnops.c
+file	miscfs/fdesc/fdesc_vfsops.c	fdesc
+file	miscfs/fdesc/fdesc_vnops.c	fdesc
+file	miscfs/fifofs/fifo_vnops.c	fifo
+file	miscfs/kernfs/kernfs_vfsops.c	kernfs
+file	miscfs/kernfs/kernfs_vnops.c	kernfs
+file	miscfs/nullfs/null_subr.c	nullfs
+file	miscfs/nullfs/null_vfsops.c	nullfs
+file	miscfs/nullfs/null_vnops.c	nullfs
+file	miscfs/portal/portal_vfsops.c	portal
+file	miscfs/portal/portal_vnops.c	portal
+file	miscfs/procfs/procfs_subr.c	procfs
+file	miscfs/procfs/procfs_vnops.c	procfs
+file	miscfs/procfs/procfs_vfsops.c	procfs
+file	miscfs/procfs/procfs_note.c	procfs
+file	miscfs/procfs/procfs_mem.c	procfs
+file	miscfs/procfs/procfs_ctl.c	procfs
+file	miscfs/procfs/procfs_status.c	procfs
+file	miscfs/procfs/procfs_regs.c	procfs
+file	miscfs/procfs/procfs_fpregs.c	procfs
+file	miscfs/specfs/spec_vnops.c
+file	miscfs/umapfs/umap_subr.c	umapfs
+file	miscfs/umapfs/umap_vfsops.c	umapfs
+file	miscfs/umapfs/umap_vnops.c	umapfs
+file	miscfs/union/union_subr.c	union
+file	miscfs/union/union_vfsops.c	union
+file	miscfs/union/union_vnops.c	union
+file	net/bpf.c		bpfilter needs-count
+file	net/bpf_filter.c	bpfilter needs-count
+file	net/if.c
+file	net/if_ethersubr.c	ether needs-flag
+file	net/if_loop.c		loop needs-count
+file	net/if_sl.c		sl needs-count
+file	net/radix.c
+file	net/raw_cb.c
+file	net/raw_usrreq.c
+file	net/route.c
+file	net/rtsock.c
+file	net/slcompress.c	sl
+file	netccitt/ccitt_proto.c	ccitt
+file	netccitt/llc_input.c	llc
+file	netccitt/llc_output.c	llc
+file	netccitt/llc_subr.c	llc
+file	netccitt/llc_timer.c	llc
+file	netccitt/hd_debug.c	hdlc
+file	netccitt/hd_input.c	hdlc
+file	netccitt/hd_output.c	hdlc
+file	netccitt/hd_subr.c	hdlc
+file	netccitt/hd_timer.c	hdlc
+file	netccitt/if_x25subr.c	ccitt
+file	netccitt/pk_acct.c	ccitt
+file	netccitt/pk_debug.c	ccitt
+file	netccitt/pk_input.c	ccitt
+file	netccitt/pk_llcsubr.c	llc hdlc
+file	netccitt/pk_output.c	ccitt
+file	netccitt/pk_subr.c	ccitt
+file	netccitt/pk_timer.c	ccitt
+file	netccitt/pk_usrreq.c	ccitt
+file	netimp/if_imp.c		imp needs-count
+file	netimp/if_imphost.c	imp needs-count
+file	netimp/raw_imp.c	imp
+file	netinet/if_ether.c	ether
+file	netinet/igmp.c		inet
+file	netinet/in.c		inet
+file	netinet/in_pcb.c	inet
+file	netinet/in_proto.c	inet
+file	netinet/ip_icmp.c	inet
+file	netinet/ip_input.c	inet
+file	netinet/ip_mroute.c	inet
+file	netinet/ip_output.c	inet
+file	netinet/raw_ip.c	inet
+file	netinet/tcp_debug.c	inet
+file	netinet/tcp_input.c	inet
+file	netinet/tcp_output.c	inet
+file	netinet/tcp_subr.c	inet
+file	netinet/tcp_timer.c	inet
+file	netinet/tcp_usrreq.c	inet
+file	netinet/udp_usrreq.c	inet
+file	netiso/clnp_debug.c	iso
+file	netiso/clnp_er.c	iso
+file	netiso/clnp_frag.c	iso
+file	netiso/clnp_input.c	iso
+file	netiso/clnp_options.c	iso
+file	netiso/clnp_output.c	iso
+file	netiso/clnp_raw.c	iso
+file	netiso/clnp_subr.c	iso
+file	netiso/clnp_timer.c	iso
+file	netiso/cltp_usrreq.c	iso
+file	netiso/esis.c		iso
+file	netiso/if_eon.c		eon
+file	netiso/idrp_usrreq.c	iso
+file	netiso/iso.c		iso
+file	netiso/iso_chksum.c	iso
+file	netiso/iso_pcb.c	iso
+file	netiso/iso_proto.c	iso
+file	netiso/iso_snpac.c	iso
+file	netiso/tp_astring.c	iso tpip
+file	netiso/tp_cons.c	iso
+file	netiso/tp_driver.c	iso tpip
+file	netiso/tp_emit.c	iso tpip
+file	netiso/tp_inet.c	iso tpip
+file	netiso/tp_input.c	iso tpip
+file	netiso/tp_iso.c		iso
+file	netiso/tp_meas.c	iso tpip
+file	netiso/tp_output.c	iso tpip
+file	netiso/tp_pcb.c		iso tpip
+file	netiso/tp_subr.c	iso tpip
+file	netiso/tp_subr2.c	iso tpip
+file	netiso/tp_timer.c	iso tpip
+file	netiso/tp_trace.c	iso tpip
+file	netiso/tp_usrreq.c	iso tpip
+file	netiso/tuba_subr.c	iso tuba
+file	netiso/tuba_table.c	iso tuba
+file	netiso/tuba_usrreq.c	iso tuba
+file	netns/idp_usrreq.c	ns
+file	netns/ns.c		ns
+file	netns/ns_error.c	ns
+file	netns/ns_input.c	ns
+file	netns/ns_ip.c		ns
+file	netns/ns_output.c	ns
+file	netns/ns_pcb.c		ns
+file	netns/ns_proto.c	ns
+file	netns/spp_debug.c	ns
+file	netns/spp_usrreq.c	ns
+file	nfs/nfs_bio.c		nfs
+file	nfs/nfs_node.c		nfs
+file	nfs/nfs_nqlease.c	nfs
+file	nfs/nfs_serv.c		nfs
+file	nfs/nfs_socket.c	nfs
+file	nfs/nfs_srvcache.c	nfs
+file	nfs/nfs_subs.c		nfs
+file	nfs/nfs_syscalls.c	nfs
+file	nfs/nfs_vfsops.c	nfs
+file	nfs/nfs_vnops.c		nfs
+file	ufs/ffs/ffs_alloc.c	ffs mfs
+file	ufs/ffs/ffs_balloc.c	ffs mfs
+file	ufs/ffs/ffs_inode.c	ffs mfs
+file	ufs/ffs/ffs_subr.c	ffs mfs
+file	ufs/ffs/ffs_tables.c	ffs mfs
+file	ufs/ffs/ffs_vfsops.c	ffs mfs
+file	ufs/ffs/ffs_vnops.c	ffs mfs
+file	ufs/lfs/lfs_alloc.c	lfs
+file	ufs/lfs/lfs_bio.c	lfs
+file	ufs/lfs/lfs_balloc.c	lfs
+file	ufs/lfs/lfs_cksum.c	lfs
+file	ufs/lfs/lfs_debug.c	lfs
+file	ufs/lfs/lfs_inode.c	lfs
+file	ufs/lfs/lfs_segment.c	lfs
+file	ufs/lfs/lfs_subr.c	lfs
+file	ufs/lfs/lfs_syscalls.c	lfs
+file	ufs/lfs/lfs_vfsops.c	lfs
+file	ufs/lfs/lfs_vnops.c	lfs
+file	ufs/mfs/mfs_vfsops.c	mfs
+file	ufs/mfs/mfs_vnops.c	mfs
+file	ufs/ufs/ufs_bmap.c	ffs lfs mfs
+file	ufs/ufs/ufs_disksubr.c	ffs lfs mfs
+file	ufs/ufs/ufs_ihash.c	ffs lfs mfs
+file	ufs/ufs/ufs_inode.c	ffs lfs mfs
+file	ufs/ufs/ufs_lockf.c	ffs lfs mfs
+file	ufs/ufs/ufs_lookup.c	ffs lfs mfs
+file	ufs/ufs/ufs_quota.c	ffs lfs mfs
+file	ufs/ufs/ufs_vfsops.c	ffs lfs mfs
+file	ufs/ufs/ufs_vnops.c	ffs lfs mfs
+file	vm/device_pager.c	devpager
+file	vm/kern_lock.c
+file	vm/swap_pager.c		swappager
+file	vm/vm_fault.c
+file	vm/vm_glue.c
+file	vm/vm_init.c
+file	vm/vm_kern.c
+file	vm/vm_map.c
+file	vm/vm_meter.c
+file	vm/vm_mmap.c
+file	vm/vm_object.c
+file	vm/vm_page.c
+file	vm/vm_pageout.c
+file	vm/vm_pager.c
+file	vm/vm_swap.c
+file	vm/vm_unix.c
+file	vm/vm_user.c
+file	vm/vnode_pager.c	vnodepager
diff --git a/sys/conf/newvers.sh b/sys/conf/newvers.sh
new file mode 100644
index 00000000000..83a2f04ad7a
--- /dev/null
+++ b/sys/conf/newvers.sh
@@ -0,0 +1,48 @@
+#!/bin/sh -
+#
+# Copyright (c) 1984, 1986, 1990, 1993
+#	The Regents of the University of California.  All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+# 3. All advertising materials mentioning features or use of this software
+#    must display the following acknowledgement:
+#	This product includes software developed by the University of
+#	California, Berkeley and its contributors.
+# 4. Neither the name of the University nor the names of its contributors
+#    may be used to endorse or promote products derived from this software
+#    without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+#	@(#)newvers.sh	8.1 (Berkeley) 4/20/94
+
+if [ ! -r version ]
+then
+	echo 0 > version
+fi
+
+touch version
+v=`cat version` u=${USER-root} d=`pwd` h=`hostname` t=`date`
+echo "char ostype[] = \"4.4BSD\";" > vers.c
+echo "char osrelease[] = \"4.4BSD-Lite\";" >> vers.c
+echo "char sccs[4] = { '@', '(', '#', ')' };" >>vers.c
+echo "char version[] = \"4.4BSD-Lite #${v}: ${t}\\n    ${u}@${h}:${d}\\n\";" >>vers.c
+
+echo `expr ${v} + 1` > version
diff --git a/sys/conf/nfsswapvmunix.c b/sys/conf/nfsswapvmunix.c
new file mode 100644
index 00000000000..f9812eb3314
--- /dev/null
+++ b/sys/conf/nfsswapvmunix.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfsswapvmunix.c	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Sample NFS swapvmunix configuration file.
+ * This should be filled in by the bootstrap program.
+ * See /sys/nfs/nfsdiskless.h for details of the fields.
+ */
+
+#include <sys/param.h>
+#include <sys/conf.h>
+#include <sys/socket.h>
+#include <sys/mount.h>
+
+#include <net/if.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfsdiskless.h>
+
+extern int nfs_mountroot();
+int (*mountroot)() = nfs_mountroot;
+
+dev_t	rootdev = NODEV;
+dev_t	argdev  = NODEV;
+dev_t	dumpdev = NODEV;
+
+struct	swdevt swdevt[] = {
+	{ NODEV,	0,	5000 },	/* happy:/u/swap.dopey  */
+	{ 0, 0, 0 }
+};
+struct nfs_diskless nfs_diskless = {
+	{ { 'q', 'e', '0', '\0' },
+	  { 0x10, 0x2, { 0x0, 0x0, 0x83, 0x68, 0x30, 0x2, } },
+	  { 0x10, 0x2, { 0x0, 0x0, 0x83, 0x68, 0x30, 0xff, } },
+	  { 0x10, 0x0, { 0x0, 0x0, 0xff, 0xff, 0xff, 0x0, } },
+ 	},
+	{ 0x10, 0x2, { 0x0, 0x0, 0x83, 0x68, 0x30, 0x12, } },
+	{
+	  (struct sockaddr *)0, SOCK_DGRAM, 0, (nfsv2fh_t *)0,
+	  0, 8192, 8192, 10, 100, (char *)0,
+	},
+	{
+		0xf,
+		0x9,
+		0x0,
+		0x0,
+		0x1,
+		0x0,
+		0x0,
+		0x0,
+		0xc,
+		0x0,
+		0x0,
+		0x0,
+		0x6,
+		0x0,
+		0x0,
+		0x0,
+		0x27,
+		0x18,
+		0x79,
+		0x27,
+		0x0,
+		0x0,
+		0x0,
+		0x0,
+		0x0,
+		0x0,
+		0x0,
+		0x0,
+		0x0,
+		0x0,
+		0x0,
+		0x0,
+	},
+	{ 0x10, 0x2, { 0x8, 0x1, 0x83, 0x68, 0x30, 0x5, } },
+	"happy",
+	{
+	  (struct sockaddr *)0, SOCK_DGRAM, 0, (nfsv2fh_t *)0,
+	  0, 8192, 8192, 10, 100, (char *)0,
+	},
+	{
+		0x0,
+		0x9,
+		0x0,
+		0x0,
+		0x1,
+		0x0,
+		0x0,
+		0x0,
+		0xc,
+		0x0,
+		0x0,
+		0x0,
+		0x2,
+		0x0,
+		0x0,
+		0x0,
+		0xd0,
+		0x48,
+		0x42,
+		0x25,
+		0x0,
+		0x0,
+		0x0,
+		0x0,
+		0x0,
+		0x0,
+		0x0,
+		0x0,
+		0x0,
+		0x0,
+		0x0,
+		0x0,
+	},
+	{ 0x10, 0x2, { 0x8, 0x1, 0x83, 0x68, 0x30, 0x5, } },
+	"happy",
+};
diff --git a/sys/conf/param.c b/sys/conf/param.c
new file mode 100644
index 00000000000..9f4e2cae857
--- /dev/null
+++ b/sys/conf/param.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 1980, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)param.c	8.2 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/socket.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/callout.h>
+#include <sys/clist.h>
+#include <sys/mbuf.h>
+#include <sys/kernel.h>
+
+#include <ufs/ufs/quota.h>
+
+#ifdef SYSVSHM
+#include <machine/vmparam.h>
+#include <sys/shm.h>
+#endif
+
+/*
+ * System parameter formulae.
+ *
+ * This file is copied into each directory where we compile
+ * the kernel; it should be modified there to suit local taste
+ * if necessary.
+ *
+ * Compiled with -DHZ=xx -DTIMEZONE=x -DDST=x -DMAXUSERS=xx
+ */
+
+#ifndef HZ
+#define	HZ 100
+#endif
+int	hz = HZ;
+int	tick = 1000000 / HZ;
+int	tickadj = 30000 / (60 * HZ);		/* can adjust 30ms in 60s */
+struct	timezone tz = { TIMEZONE, DST };
+#define	NPROC (20 + 16 * MAXUSERS)
+int	maxproc = NPROC;
+#define	NTEXT (80 + NPROC / 8)			/* actually the object cache */
+#define	NVNODE (NPROC + NTEXT + 100)
+int	desiredvnodes = NVNODE;
+int	maxfiles = 3 * (NPROC + MAXUSERS) + 80;
+int	ncallout = 16 + NPROC;
+int	nclist = 60 + 12 * MAXUSERS;
+int	nmbclusters = NMBCLUSTERS;
+int	fscale = FSCALE;	/* kernel uses `FSCALE', user uses `fscale' */
+
+/*
+ * Values in support of System V compatible shared memory.	XXX
+ */
+#ifdef SYSVSHM
+#define	SHMMAX	(SHMMAXPGS*NBPG)
+#define	SHMMIN	1
+#define	SHMMNI	32			/* <= SHMMMNI in shm.h */
+#define	SHMSEG	8
+#define	SHMALL	(SHMMAXPGS/CLSIZE)
+
+struct	shminfo shminfo = {
+	SHMMAX,
+	SHMMIN,
+	SHMMNI,
+	SHMSEG,
+	SHMALL
+};
+#endif
+
+/*
+ * These are initialized at bootstrap time
+ * to values dependent on memory size
+ */
+int	nbuf, nswbuf;
+
+/*
+ * These have to be allocated somewhere; allocating
+ * them here forces loader errors if this file is omitted
+ * (if they've been externed everywhere else; hah!).
+ */
+struct 	callout *callout;
+struct	cblock *cfree;
+struct	buf *buf, *swbuf;
+char	*buffers;
+
+/*
+ * Proc/pgrp hashing.
+ * Here so that hash table sizes can depend on MAXUSERS/NPROC.
+ * Hash size must be a power of two.
+ * NOW omission of this file will cause loader errors!
+ */
+
+#if NPROC > 1024
+#define	PIDHSZ		512
+#else
+#if NPROC > 512
+#define	PIDHSZ		256
+#else
+#if NPROC > 256
+#define	PIDHSZ		128
+#else
+#define	PIDHSZ		64
+#endif
+#endif
+#endif
+
+struct	proc *pidhash[PIDHSZ];
+struct	pgrp *pgrphash[PIDHSZ];
+int	pidhashmask = PIDHSZ - 1;
diff --git a/sys/conf/systags.sh b/sys/conf/systags.sh
new file mode 100644
index 00000000000..90714d76974
--- /dev/null
+++ b/sys/conf/systags.sh
@@ -0,0 +1,72 @@
+#! /bin/sh
+#
+# Copyright (c) 1992, 1993
+#	The Regents of the University of California.  All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+# 3. All advertising materials mentioning features or use of this software
+#    must display the following acknowledgement:
+#	This product includes software developed by the University of
+#	California, Berkeley and its contributors.
+# 4. Neither the name of the University nor the names of its contributors
+#    may be used to endorse or promote products derived from this software
+#    without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+#	@(#)systags.sh	8.1 (Berkeley) 6/10/93
+#
+# systags.sh - construct a system tags file using dependence relations
+#	in a .depend file
+#
+# First written May 16, 1992 by Van Jacobson, Lawrence Berkeley Laboratory.
+#
+# from: $Header: systags.sh,v 1.7 92/07/12 08:18:21 torek Exp $
+
+rm -f tags tags.tmp tags.cfiles tags.sfiles tags.hfiles
+MACHINE=`machine`
+sed -e "s,\./machine/,../../$MACHINE/include/,g" \
+    -e 's,[a-z][^/ 	]*/\.\./,,g' .depend | awk   '{
+		for (i = 1; i <= NF; ++i) {
+			t = substr($i, length($i) - 1)
+			if (t == ".c")
+				cfiles[$i] = 1;
+			else if (t == ".h")
+				hfiles[$i] = 1;
+			else if (t == ".s")
+				sfiles[$i] = 1;
+		}
+	};
+	END {
+		for (i in cfiles)
+			print i > "tags.cfiles";
+		for (i in sfiles)
+			print i > "tags.sfiles";
+		for (i in hfiles)
+			print i > "tags.hfiles";
+	}'
+
+ctags -t -d -w `cat tags.cfiles tags.hfiles tags.sfiles`
+egrep -o "^ENTRY\(.*\)|^ALTENTRY\(.*\)" `cat tags.sfiles` | \
+    sed "s;\([^:]*\):\([^(]*\)(\([^, )]*\)\(.*\);\3	\1	/^\2(\3\4$/;" >> tags
+
+mv tags tags.tmp
+sort -u tags.tmp > tags
+rm tags.tmp tags.cfiles tags.sfiles tags.hfiles
diff --git a/sys/fs/cd9660/TODO b/sys/fs/cd9660/TODO
new file mode 100644
index 00000000000..555d26ad7d1
--- /dev/null
+++ b/sys/fs/cd9660/TODO
@@ -0,0 +1,77 @@
+#	$Id: TODO,v 1.4 1993/09/07 15:40:51 ws Exp $
+
+ 1) should understand "older", original High Sierra ("CDROM001") type
+
+   Not yet. ( I don't have this technical information, yet. )
+
+ 2) should understand Rock Ridge
+
+   Yes, we have follows function.
+
+       o Symbolic Link
+       o Real Name(long name)
+       o File Attribute 
+       o Time stamp
+       o uid, gid
+       o Devices
+       o Relocated directories
+
+   Except follows:
+
+       o POSIX device number mapping
+
+         There is some preliminary stuff in there that (ab-)uses the mknod
+         system call, but this needs a writable filesystem
+         
+ 3) should be called cdfs, as there are other ISO file system soon possible
+
+   Not yet. Probably we should make another file system when the ECMA draft 
+   is valid and do it. For doing Rock Ridge Support, I can use almost same
+   code. So I just use the same file system interface...
+
+ 4) should have file handles implemented for use with NFS, etc
+
+   Yes. we have already this one, and I based it for this release. 
+
+ 5) should have name translation enabled by mount flag
+
+   Yes. we can disable the Rock Ridge Extension by follows option;
+
+      "mount -t isofs -o -norrip /dev/cd0d /cdrom"
+
+ 6) should run as a user process, and not take up kernel space (cdroms
+    are slow)
+
+   Not yet.
+
+ 7) ECMA support.
+
+   Not yet. we need not only a technical spec but also ECMA format
+   cd-rom itself!
+
+ 8) Character set change by SVD ( multi SVD support )
+
+   Not yet. We should also hack the other part of system as 8 bit
+   clean. As far as I know, if you export the cdrom by NFS, the client
+   can access the 8 bit clean (ie. Solaris Japanese with EUC code )
+
+ 9) Access checks in isofs_access
+
+   Not yet.
+
+ 10) Support for generation numbers
+
+   Yes. Default is to list only the last file (the one with the highest
+   generation number). If you mount with -gen, all files are shown with
+   their generation numbers. In both cases you can specify the generation
+   number on opening files (if you happen to know it) or leave it off,
+   when it will again find the last file.
+
+ 11) Support for extended attributes
+
+   Yes. Since this requires an extra block buffer for the attributes
+   this must be enabled on mounting with the option -extattr.
+
+----------
+Last update July 19, '93 by Atsushi Murai. (amurai@spec.co.jp)
+Last update August 19, '93 by Wolfgang Solfrank. (ws@tools.de)
diff --git a/sys/fs/cd9660/TODO.hibler b/sys/fs/cd9660/TODO.hibler
new file mode 100644
index 00000000000..3501aa296cd
--- /dev/null
+++ b/sys/fs/cd9660/TODO.hibler
@@ -0,0 +1,22 @@
+1. Investiate making ISOFS another UFS shared filesystem (ala FFS/MFS/LFS).
+   Since it was modelled after the inode code, we might be able to merge
+   them back.  It looks like a seperate (but very similar) lookup routine
+   will be needed due to the associated file stuff.
+
+2. Make filesystem exportable.  This comes for free if stacked with UFS.
+   Otherwise, the ufs_export routines need to be elevated to vfs_* routines.
+	[ DONE - hibler ]
+
+3. If it can't be merged with UFS, at least get them in sync.  For example,
+   it could use the same style hashing routines as in ufs/ufs_ihash.c
+
+4. It would be nice to be able to use the vfs_cluster code.
+   Unfortunately, if the logical block size is smaller than the page size,
+   it won't work.  Also, if throughtput is relatively constant for any
+   block size (as it is for the HP drive--150kbs) then clustering may not
+   buy much (or may even hurt when vfs_cluster comes up with a large sync
+   cluster).
+
+5. Seems like there should be a "notrans" or some such mount option to show
+   filenames as they really are without lower-casing, stripping of version
+   numbers, etc.  Does this make sense?
diff --git a/sys/fs/cd9660/cd9660_bmap.c b/sys/fs/cd9660/cd9660_bmap.c
new file mode 100644
index 00000000000..911eedfd06a
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_bmap.c
@@ -0,0 +1,102 @@
+/*-
+ * Copyright (c) 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cd9660_bmap.c	8.3 (Berkeley) 1/23/94
+ */
+
+#include <sys/param.h>
+#include <sys/namei.h>
+#include <sys/buf.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+
+/*
+ * Bmap converts a the logical block number of a file to its physical block
+ * number on the disk. The conversion is done by using the logical block
+ * number to index into the data block (extent) for the file.
+ */
+int
+cd9660_bmap(ap)
+	struct vop_bmap_args /* {
+		struct vnode *a_vp;
+		daddr_t  a_bn;
+		struct vnode **a_vpp;
+		daddr_t *a_bnp;
+		int *a_runp;
+	} */ *ap;
+{
+	struct iso_node *ip = VTOI(ap->a_vp);
+	daddr_t lblkno = ap->a_bn;
+	long bsize;
+
+	/*
+	 * Check for underlying vnode requests and ensure that logical
+	 * to physical mapping is requested.
+	 */
+	if (ap->a_vpp != NULL)
+		*ap->a_vpp = ip->i_devvp;
+	if (ap->a_bnp == NULL)
+		return (0);
+
+	/*
+	 * Compute the requested block number
+	 */
+	bsize = ip->i_mnt->logical_block_size;
+	*ap->a_bnp = (ip->iso_start + lblkno) * btodb(bsize);
+
+	/*
+	 * Determine maximum number of readahead blocks following the
+	 * requested block.
+	 */
+	if (ap->a_runp) {
+		int nblk;
+
+		nblk = (ip->i_size - (lblkno + 1) * bsize) / bsize;
+		if (nblk <= 0)
+			*ap->a_runp = 0;
+		else if (nblk >= MAXBSIZE/bsize)
+			*ap->a_runp = MAXBSIZE/bsize - 1;
+		else
+			*ap->a_runp = nblk;
+	}
+
+	return 0;
+}
diff --git a/sys/fs/cd9660/cd9660_lookup.c b/sys/fs/cd9660/cd9660_lookup.c
new file mode 100644
index 00000000000..62d1d3fc791
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_lookup.c
@@ -0,0 +1,465 @@
+/*-
+ * Copyright (c) 1989, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)ufs_lookup.c	7.33 (Berkeley) 5/19/91
+ *
+ *	@(#)cd9660_lookup.c	8.2 (Berkeley) 1/23/94
+ */
+
+#include <sys/param.h>
+#include <sys/namei.h>
+#include <sys/buf.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+#include <isofs/cd9660/iso_rrip.h>
+#include <isofs/cd9660/cd9660_rrip.h>
+
+struct	nchstats iso_nchstats;
+
+/*
+ * Convert a component of a pathname into a pointer to a locked inode.
+ * This is a very central and rather complicated routine.
+ * If the file system is not maintained in a strict tree hierarchy,
+ * this can result in a deadlock situation (see comments in code below).
+ *
+ * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
+ * whether the name is to be looked up, created, renamed, or deleted.
+ * When CREATE, RENAME, or DELETE is specified, information usable in
+ * creating, renaming, or deleting a directory entry may be calculated.
+ * If flag has LOCKPARENT or'ed into it and the target of the pathname
+ * exists, lookup returns both the target and its parent directory locked.
+ * When creating or renaming and LOCKPARENT is specified, the target may
+ * not be ".".  When deleting and LOCKPARENT is specified, the target may
+ * be "."., but the caller must check to ensure it does an vrele and iput
+ * instead of two iputs.
+ *
+ * Overall outline of ufs_lookup:
+ *
+ *	check accessibility of directory
+ *	look for name in cache, if found, then if at end of path
+ *	  and deleting or creating, drop it, else return name
+ *	search for name in directory, to found or notfound
+ * notfound:
+ *	if creating, return locked directory, leaving info on available slots
+ *	else return error
+ * found:
+ *	if at end of path and deleting, return information to allow delete
+ *	if at end of path and rewriting (RENAME and LOCKPARENT), lock target
+ *	  inode and return info to allow rewrite
+ *	if not at end, add name to cache; if at end and neither creating
+ *	  nor deleting, add name to cache
+ *
+ * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode unlocked.
+ */
+cd9660_lookup(ap)
+	struct vop_lookup_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	register struct vnode *vdp;	/* vnode for directory being searched */
+	register struct iso_node *dp;	/* inode for directory being searched */
+	register struct iso_mnt *imp;	/* file system that directory is in */
+	struct buf *bp;			/* a buffer of directory entries */
+	struct iso_directory_record *ep;/* the current directory entry */
+	int entryoffsetinblock;		/* offset of ep in bp's buffer */
+	int saveoffset;			/* offset of last directory entry in dir */
+	int numdirpasses;		/* strategy for directory search */
+	doff_t endsearch;		/* offset to end directory search */
+	struct iso_node *pdp;		/* saved dp during symlink work */
+	struct iso_node *tdp;		/* returned by iget */
+	int lockparent;			/* 1 => lockparent flag is set */
+	int wantparent;			/* 1 => wantparent or lockparent flag */
+	int error;
+	ino_t ino = 0;
+	int reclen;
+	u_short namelen;
+	char altname[NAME_MAX];
+	int res;
+	int assoc, len;
+	char *name;
+	struct vnode **vpp = ap->a_vpp;
+	struct componentname *cnp = ap->a_cnp;
+	struct ucred *cred = cnp->cn_cred;
+	int flags = cnp->cn_flags;
+	int nameiop = cnp->cn_nameiop;
+	
+	bp = NULL;
+	*vpp = NULL;
+	vdp = ap->a_dvp;
+	dp = VTOI(vdp);
+	imp = dp->i_mnt;
+	lockparent = flags & LOCKPARENT;
+	wantparent = flags & (LOCKPARENT|WANTPARENT);
+	
+	/*
+	 * Check accessiblity of directory.
+	 */
+	if (vdp->v_type != VDIR)
+	    return (ENOTDIR);
+	if (error = VOP_ACCESS(vdp, VEXEC, cred, cnp->cn_proc))
+		return (error);
+	
+	/*
+	 * We now have a segment name to search for, and a directory to search.
+	 *
+	 * Before tediously performing a linear scan of the directory,
+	 * check the name cache to see if the directory/name pair
+	 * we are looking for is known already.
+	 */
+	if (error = cache_lookup(vdp, vpp, cnp)) {
+		int vpid;	/* capability number of vnode */
+
+		if (error == ENOENT)
+			return (error);
+#ifdef PARANOID
+		if ((vdp->v_flag & VROOT) && (flags & ISDOTDOT))
+			panic("ufs_lookup: .. through root");
+#endif
+		/*
+		 * Get the next vnode in the path.
+		 * See comment below starting `Step through' for
+		 * an explaination of the locking protocol.
+		 */
+		pdp = dp;
+		dp = VTOI(*vpp);
+		vdp = *vpp;
+		vpid = vdp->v_id;
+		if (pdp == dp) {
+			VREF(vdp);
+			error = 0;
+		} else if (flags & ISDOTDOT) {
+			ISO_IUNLOCK(pdp);
+			error = vget(vdp, 1);
+			if (!error && lockparent && (flags & ISLASTCN))
+				ISO_ILOCK(pdp);
+		} else {
+			error = vget(vdp, 1);
+			if (!lockparent || error || !(flags & ISLASTCN))
+				ISO_IUNLOCK(pdp);
+		}
+		/*
+		 * Check that the capability number did not change
+		 * while we were waiting for the lock.
+		 */
+		if (!error) {
+			if (vpid == vdp->v_id)
+				return (0);
+			iso_iput(dp);
+			if (lockparent && pdp != dp && (flags & ISLASTCN))
+				ISO_IUNLOCK(pdp);
+		}
+		ISO_ILOCK(pdp);
+		dp = pdp;
+		vdp = ITOV(dp);
+		*vpp = NULL;
+	}
+	
+	len = cnp->cn_namelen;
+	name = cnp->cn_nameptr;
+	/*
+	 * A leading `=' means, we are looking for an associated file
+	 */
+	if (assoc = (imp->iso_ftype != ISO_FTYPE_RRIP && *name == ASSOCCHAR)) {
+		len--;
+		name++;
+	}
+	
+	/*
+	 * If there is cached information on a previous search of
+	 * this directory, pick up where we last left off.
+	 * We cache only lookups as these are the most common
+	 * and have the greatest payoff. Caching CREATE has little
+	 * benefit as it usually must search the entire directory
+	 * to determine that the entry does not exist. Caching the
+	 * location of the last DELETE or RENAME has not reduced
+	 * profiling time and hence has been removed in the interest
+	 * of simplicity.
+	 */
+	if (nameiop != LOOKUP || dp->i_diroff == 0 ||
+	    dp->i_diroff > dp->i_size) {
+		entryoffsetinblock = 0;
+		dp->i_offset = 0;
+		numdirpasses = 1;
+	} else {
+		dp->i_offset = dp->i_diroff;
+		entryoffsetinblock = iso_blkoff(imp, dp->i_offset);
+		if (entryoffsetinblock != 0) {
+			if (error = iso_blkatoff(dp, dp->i_offset, &bp))
+				return (error);
+		}
+		numdirpasses = 2;
+		iso_nchstats.ncs_2passes++;
+	}
+	endsearch = roundup(dp->i_size, imp->logical_block_size);
+	
+searchloop:
+	while (dp->i_offset < endsearch) {
+		/*
+		 * If offset is on a block boundary,
+		 * read the next directory block.
+		 * Release previous if it exists.
+		 */
+		if (iso_blkoff(imp, dp->i_offset) == 0) {
+			if (bp != NULL)
+				brelse(bp);
+			if (error = iso_blkatoff(dp, dp->i_offset, &bp))
+				return (error);
+			entryoffsetinblock = 0;
+		}
+		/*
+		 * Get pointer to next entry.
+		 */
+		ep = (struct iso_directory_record *)
+			(bp->b_un.b_addr + entryoffsetinblock);
+		
+		reclen = isonum_711 (ep->length);
+		if (reclen == 0) {
+			/* skip to next block, if any */
+			dp->i_offset =
+				roundup(dp->i_offset, imp->logical_block_size);
+			continue;
+		}
+		
+		if (reclen < ISO_DIRECTORY_RECORD_SIZE)
+			/* illegal entry, stop */
+			break;
+		
+		if (entryoffsetinblock + reclen > imp->logical_block_size)
+			/* entries are not allowed to cross boundaries */
+			break;
+		
+		/*
+		 * Check for a name match.
+		 */
+		namelen = isonum_711(ep->name_len);
+		
+		if (reclen < ISO_DIRECTORY_RECORD_SIZE + namelen)
+			/* illegal entry, stop */
+			break;
+		
+		switch (imp->iso_ftype) {
+		default:
+			if ((!(isonum_711(ep->flags)&4)) == !assoc) {
+				if ((len == 1
+				     && *name == '.')
+				    || (flags & ISDOTDOT)) {
+					if (namelen == 1
+					    && ep->name[0] == ((flags & ISDOTDOT) ? 1 : 0)) {
+						/*
+						 * Save directory entry's inode number and
+						 * reclen in ndp->ni_ufs area, and release
+						 * directory buffer.
+						 */
+						isodirino(&dp->i_ino,ep,imp);
+						goto found;
+					}
+					if (namelen != 1
+					    || ep->name[0] != 0)
+						goto notfound;
+				} else if (!(res = isofncmp(name,len,
+							    ep->name,namelen))) {
+					if (isonum_711(ep->flags)&2)
+						isodirino(&ino,ep,imp);
+					else
+						ino = dbtob(bp->b_blkno)
+							+ entryoffsetinblock;
+					saveoffset = dp->i_offset;
+				} else if (ino)
+					goto foundino;
+#ifdef	NOSORTBUG	/* On some CDs directory entries are not sorted correctly */
+				else if (res < 0)
+					goto notfound;
+				else if (res > 0 && numdirpasses == 2)
+					numdirpasses++;
+#endif
+			}
+			break;
+		case ISO_FTYPE_RRIP:
+			if (isonum_711(ep->flags)&2)
+				isodirino(&ino,ep,imp);
+			else
+				ino = dbtob(bp->b_blkno) + entryoffsetinblock;
+			dp->i_ino = ino;
+			cd9660_rrip_getname(ep,altname,&namelen,&dp->i_ino,imp);
+			if (namelen == cnp->cn_namelen
+			    && !bcmp(name,altname,namelen))
+				goto found;
+			ino = 0;
+			break;
+		}
+		dp->i_offset += reclen;
+		entryoffsetinblock += reclen;
+	}
+	if (ino) {
+foundino:
+		dp->i_ino = ino;
+		if (saveoffset != dp->i_offset) {
+			if (iso_lblkno(imp,dp->i_offset)
+			    != iso_lblkno(imp,saveoffset)) {
+				if (bp != NULL)
+					brelse(bp);
+				if (error = iso_blkatoff(dp, saveoffset, &bp))
+					return (error);
+			}
+			ep = (struct iso_directory_record *)(bp->b_un.b_addr
+							     + iso_blkoff(imp,saveoffset));
+			dp->i_offset = saveoffset;
+		}
+		goto found;
+	}
+notfound:
+	/*
+	 * If we started in the middle of the directory and failed
+	 * to find our target, we must check the beginning as well.
+	 */
+	if (numdirpasses == 2) {
+		numdirpasses--;
+		dp->i_offset = 0;
+		endsearch = dp->i_diroff;
+		goto searchloop;
+	}
+	if (bp != NULL)
+		brelse(bp);
+	/*
+	 * Insert name into cache (as non-existent) if appropriate.
+	 */
+	if (cnp->cn_flags & MAKEENTRY)
+		cache_enter(vdp, *vpp, cnp);
+	if (nameiop == CREATE || nameiop == RENAME)
+		return (EJUSTRETURN);
+	return (ENOENT);
+	
+found:
+	if (numdirpasses == 2)
+		iso_nchstats.ncs_pass2++;
+	if (bp != NULL)
+		brelse(bp);
+	
+	/*
+	 * Found component in pathname.
+	 * If the final component of path name, save information
+	 * in the cache as to where the entry was found.
+	 */
+	if ((flags & ISLASTCN) && nameiop == LOOKUP)
+		dp->i_diroff = dp->i_offset;
+	
+	/*
+	 * Step through the translation in the name.  We do not `iput' the
+	 * directory because we may need it again if a symbolic link
+	 * is relative to the current directory.  Instead we save it
+	 * unlocked as "pdp".  We must get the target inode before unlocking
+	 * the directory to insure that the inode will not be removed
+	 * before we get it.  We prevent deadlock by always fetching
+	 * inodes from the root, moving down the directory tree. Thus
+	 * when following backward pointers ".." we must unlock the
+	 * parent directory before getting the requested directory.
+	 * There is a potential race condition here if both the current
+	 * and parent directories are removed before the `iget' for the
+	 * inode associated with ".." returns.  We hope that this occurs
+	 * infrequently since we cannot avoid this race condition without
+	 * implementing a sophisticated deadlock detection algorithm.
+	 * Note also that this simple deadlock detection scheme will not
+	 * work if the file system has any hard links other than ".."
+	 * that point backwards in the directory structure.
+	 */
+	pdp = dp;
+	/*
+	 * If ino is different from dp->i_ino,
+	 * it's a relocated directory.
+	 */
+	if (flags & ISDOTDOT) {
+		ISO_IUNLOCK(pdp);	/* race to get the inode */
+		if (error = iso_iget(dp,dp->i_ino,
+				     dp->i_ino != ino,
+				     &tdp,ep)) {
+			ISO_ILOCK(pdp);
+			return (error);
+		}
+		if (lockparent && (flags & ISLASTCN))
+			ISO_ILOCK(pdp);
+		*vpp = ITOV(tdp);
+	} else if (dp->i_number == dp->i_ino) {
+		VREF(vdp);	/* we want ourself, ie "." */
+		*vpp = vdp;
+	} else {
+		if (error = iso_iget(dp,dp->i_ino,dp->i_ino!=ino,&tdp,ep))
+			return (error);
+		if (!lockparent || !(flags & ISLASTCN))
+			ISO_IUNLOCK(pdp);
+		*vpp = ITOV(tdp);
+	}
+	
+	/*
+	 * Insert name into cache if appropriate.
+	 */
+	if (cnp->cn_flags & MAKEENTRY)
+		cache_enter(vdp, *vpp, cnp);
+	return (0);
+}
+
+/*
+ * Return buffer with contents of block "offset"
+ * from the beginning of directory "ip".  If "res"
+ * is non-zero, fill it in with a pointer to the
+ * remaining space in the directory.
+ */
+iso_blkatoff(ip, offset, bpp)
+	struct iso_node *ip;
+	doff_t offset;
+	struct buf **bpp;
+{
+	register struct iso_mnt *imp = ip->i_mnt;
+	daddr_t lbn = iso_lblkno(imp,offset);
+	int bsize = iso_blksize(imp,ip,lbn);
+	struct buf *bp;
+	int error;
+	
+	if (error = bread(ITOV(ip),lbn,bsize,NOCRED,&bp)) {
+		brelse(bp);
+		*bpp = 0;
+		return (error);
+	}
+	*bpp = bp;
+	
+	return (0);
+}
diff --git a/sys/fs/cd9660/cd9660_node.c b/sys/fs/cd9660/cd9660_node.c
new file mode 100644
index 00000000000..d83a7a6f126
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_node.c
@@ -0,0 +1,648 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cd9660_node.c	8.2 (Berkeley) 1/23/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/stat.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+#include <isofs/cd9660/iso_rrip.h>
+
+#define	INOHSZ	512
+#if	((INOHSZ&(INOHSZ-1)) == 0)
+#define	INOHASH(dev,ino)	(((dev)+((ino)>>12))&(INOHSZ-1))
+#else
+#define	INOHASH(dev,ino)	(((unsigned)((dev)+((ino)>>12)))%INOHSZ)
+#endif
+
+union iso_ihead {
+	union  iso_ihead *ih_head[2];
+	struct iso_node *ih_chain[2];
+} iso_ihead[INOHSZ];
+
+#ifdef	ISODEVMAP
+#define	DNOHSZ	64
+#if	((DNOHSZ&(DNOHSZ-1)) == 0)
+#define	DNOHASH(dev,ino)	(((dev)+((ino)>>12))&(DNOHSZ-1))
+#else
+#define	DNOHASH(dev,ino)	(((unsigned)((dev)+((ino)>>12)))%DNOHSZ)
+#endif
+
+union iso_dhead {
+	union  iso_dhead  *dh_head[2];
+	struct iso_dnode *dh_chain[2];
+} iso_dhead[DNOHSZ];
+#endif
+
+int prtactive;	/* 1 => print out reclaim of active vnodes */
+
+/*
+ * Initialize hash links for inodes and dnodes.
+ */
+cd9660_init()
+{
+	register int i;
+	register union iso_ihead *ih = iso_ihead;
+#ifdef	ISODEVMAP
+	register union iso_dhead *dh = iso_dhead;
+#endif
+
+	for (i = INOHSZ; --i >= 0; ih++) {
+		ih->ih_head[0] = ih;
+		ih->ih_head[1] = ih;
+	}
+#ifdef	ISODEVMAP
+	for (i = DNOHSZ; --i >= 0; dh++) {
+		dh->dh_head[0] = dh;
+		dh->dh_head[1] = dh;
+	}
+#endif
+}
+
+#ifdef	ISODEVMAP
+/*
+ * Enter a new node into the device hash list
+ */
+struct iso_dnode *
+iso_dmap(dev,ino,create)
+	dev_t	dev;
+	ino_t	ino;
+	int	create;
+{
+	struct iso_dnode *dp;
+	union iso_dhead *dh;
+	
+	dh = &iso_dhead[DNOHASH(dev, ino)];
+	for (dp = dh->dh_chain[0];
+	     dp != (struct iso_dnode *)dh;
+	     dp = dp->d_forw)
+		if (ino == dp->i_number && dev == dp->i_dev)
+			return dp;
+
+	if (!create)
+		return (struct iso_dnode *)0;
+
+	MALLOC(dp,struct iso_dnode *,sizeof(struct iso_dnode),M_CACHE,M_WAITOK);
+	dp->i_dev = dev;
+	dp->i_number = ino;
+	insque(dp,dh);
+	
+	return dp;
+}
+
+void
+iso_dunmap(dev)
+	dev_t	dev;
+{
+	struct iso_dnode *dp, *dq;
+	union iso_dhead *dh;
+	
+	for (dh = iso_dhead; dh < iso_dhead + DNOHSZ; dh++) {
+		for (dp = dh->dh_chain[0];
+		     dp != (struct iso_dnode *)dh;
+		     dp = dq) {
+			dq = dp->d_forw;
+			if (dev == dp->i_dev) {
+				remque(dp);
+				FREE(dp,M_CACHE);
+			}
+		}
+	}
+}
+#endif
+
+/*
+ * Look up a ISOFS dinode number to find its incore vnode.
+ * If it is not in core, read it in from the specified device.
+ * If it is in core, wait for the lock bit to clear, then
+ * return the inode locked. Detection and handling of mount
+ * points must be done by the calling routine.
+ */
+iso_iget(xp, ino, relocated, ipp, isodir)
+	struct iso_node *xp;
+	ino_t ino;
+	struct iso_node **ipp;
+	struct iso_directory_record *isodir;
+{
+	dev_t dev = xp->i_dev;
+	struct mount *mntp = ITOV(xp)->v_mount;
+	register struct iso_node *ip, *iq;
+	register struct vnode *vp;
+	register struct iso_dnode *dp;
+	struct vnode *nvp;
+	struct buf *bp = NULL, *bp2 = NULL;
+	union iso_ihead *ih;
+	union iso_dhead *dh;
+	int i, error, result;
+	struct iso_mnt *imp;
+	ino_t defino;
+	
+	ih = &iso_ihead[INOHASH(dev, ino)];
+loop:
+	for (ip = ih->ih_chain[0];
+	     ip != (struct iso_node *)ih;
+	     ip = ip->i_forw) {
+		if (ino != ip->i_number || dev != ip->i_dev)
+			continue;
+		if ((ip->i_flag&ILOCKED) != 0) {
+			ip->i_flag |= IWANT;
+			sleep((caddr_t)ip, PINOD);
+			goto loop;
+		}
+		if (vget(ITOV(ip), 1))
+			goto loop;
+		*ipp = ip;
+		return 0;
+	}
+	/*
+	 * Allocate a new vnode/iso_node.
+	 */
+	if (error = getnewvnode(VT_ISOFS, mntp, cd9660_vnodeop_p, &nvp)) {
+		*ipp = 0;
+		return error;
+	}
+	MALLOC(ip, struct iso_node *, sizeof(struct iso_node),
+	       M_ISOFSNODE, M_WAITOK);
+	bzero((caddr_t)ip, sizeof(struct iso_node));
+	nvp->v_data = ip;
+	ip->i_vnode = nvp;
+	ip->i_flag = 0;
+	ip->i_devvp = 0;
+	ip->i_diroff = 0;
+	ip->i_lockf = 0;
+	
+	/*
+	 * Put it onto its hash chain and lock it so that other requests for
+	 * this inode will block if they arrive while we are sleeping waiting
+	 * for old data structures to be purged or for the contents of the
+	 * disk portion of this inode to be read.
+	 */
+	ip->i_dev = dev;
+	ip->i_number = ino;
+	insque(ip, ih);
+	ISO_ILOCK(ip);
+
+	imp = VFSTOISOFS (mntp);
+	ip->i_mnt = imp;
+	ip->i_devvp = imp->im_devvp;
+	VREF(ip->i_devvp);
+	
+	if (relocated) {
+		/*
+		 * On relocated directories we must
+		 * read the `.' entry out of a dir.
+		 */
+		ip->iso_start = ino >> imp->im_bshift;
+		if (error = iso_blkatoff(ip,0,&bp)) {
+			vrele(ip->i_devvp);
+			remque(ip);
+			ip->i_forw = ip;
+			ip->i_back = ip;
+			iso_iput(ip);
+			*ipp = 0;
+			return error;
+		}
+		isodir = (struct iso_directory_record *)bp->b_un.b_addr;
+	}
+	
+	ip->iso_extent = isonum_733(isodir->extent);
+	ip->i_size = isonum_733(isodir->size);
+	ip->iso_start = isonum_711(isodir->ext_attr_length) + ip->iso_extent;
+	
+	vp = ITOV(ip);
+	
+	/*
+	 * Setup time stamp, attribute
+	 */
+	vp->v_type = VNON;
+	switch (imp->iso_ftype) {
+	default:	/* ISO_FTYPE_9660 */
+		if ((imp->im_flags&ISOFSMNT_EXTATT)
+		    && isonum_711(isodir->ext_attr_length))
+			iso_blkatoff(ip,-isonum_711(isodir->ext_attr_length),
+				     &bp2);
+		cd9660_defattr(isodir,ip,bp2 );
+		cd9660_deftstamp(isodir,ip,bp2 );
+		break;
+	case ISO_FTYPE_RRIP:
+		result = cd9660_rrip_analyze(isodir,ip,imp);
+		break;
+	}
+	if (bp2)
+		brelse(bp2);
+	if (bp)
+		brelse(bp);
+	
+	/*
+	 * Initialize the associated vnode
+	 */
+	vp->v_type = IFTOVT(ip->inode.iso_mode);
+	
+	if ( vp->v_type == VFIFO ) {
+#ifdef	FIFO
+		extern int (**cd9660_fifoop_p)();
+		vp->v_op = cd9660_fifoop_p;
+#else
+		iso_iput(ip);
+		*ipp = 0;
+		return EOPNOTSUPP;
+#endif	/* FIFO */
+	} else if ( vp->v_type == VCHR || vp->v_type == VBLK ) {
+		extern int (**cd9660_specop_p)();
+
+		/*
+		 * if device, look at device number table for translation
+		 */
+#ifdef	ISODEVMAP
+		if (dp = iso_dmap(dev,ino,0))
+			ip->inode.iso_rdev = dp->d_dev;
+#endif
+		vp->v_op = cd9660_specop_p;
+		if (nvp = checkalias(vp, ip->inode.iso_rdev, mntp)) {
+			/*
+			 * Reinitialize aliased inode.
+			 */
+			vp = nvp;
+			iq = VTOI(vp);
+			iq->i_vnode = vp;
+			iq->i_flag = 0;
+			ISO_ILOCK(iq);
+			iq->i_dev = dev;
+			iq->i_number = ino;
+			iq->i_mnt = ip->i_mnt;
+			bcopy(&ip->iso_extent,&iq->iso_extent,
+			      (char *)(ip + 1) - (char *)&ip->iso_extent);
+			insque(iq, ih);
+			/*
+			 * Discard unneeded vnode
+			 * (This introduces the need of INACTIVE modification)
+			 */
+			ip->inode.iso_mode = 0;
+			iso_iput(ip);
+			ip = iq;
+		}
+	}
+	
+	if (ip->iso_extent == imp->root_extent)
+		vp->v_flag |= VROOT;
+	
+	*ipp = ip;
+	return 0;
+}
+
+/*
+ * Unlock and decrement the reference count of an inode structure.
+ */
+iso_iput(ip)
+	register struct iso_node *ip;
+{
+	
+	if ((ip->i_flag & ILOCKED) == 0)
+		panic("iso_iput");
+	ISO_IUNLOCK(ip);
+	vrele(ITOV(ip));
+}
+
+/*
+ * Last reference to an inode, write the inode out and if necessary,
+ * truncate and deallocate the file.
+ */
+int
+cd9660_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	register struct iso_node *ip = VTOI(vp);
+	int mode, error = 0;
+	
+	if (prtactive && vp->v_usecount != 0)
+		vprint("cd9660_inactive: pushing active", vp);
+	
+	ip->i_flag = 0;
+	/*
+	 * If we are done with the inode, reclaim it
+	 * so that it can be reused immediately.
+	 */
+	if (vp->v_usecount == 0 && ip->inode.iso_mode == 0)
+		vgone(vp);
+	return error;
+}
+
+/*
+ * Reclaim an inode so that it can be used for other purposes.
+ */
+int
+cd9660_reclaim(ap)
+	struct vop_reclaim_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct iso_node *ip = VTOI(vp);
+	int i;
+	
+	if (prtactive && vp->v_usecount != 0)
+		vprint("cd9660_reclaim: pushing active", vp);
+	/*
+	 * Remove the inode from its hash chain.
+	 */
+	remque(ip);
+	ip->i_forw = ip;
+	ip->i_back = ip;
+	/*
+	 * Purge old data structures associated with the inode.
+	 */
+	cache_purge(vp);
+	if (ip->i_devvp) {
+		vrele(ip->i_devvp);
+		ip->i_devvp = 0;
+	}
+	FREE(vp->v_data, M_ISOFSNODE);
+	vp->v_data = NULL;
+	return 0;
+}
+
+/*
+ * Lock an inode. If its already locked, set the WANT bit and sleep.
+ */
+iso_ilock(ip)
+	register struct iso_node *ip;
+{
+	
+	while (ip->i_flag & ILOCKED) {
+		ip->i_flag |= IWANT;
+		if (ip->i_spare0 == curproc->p_pid)
+			panic("locking against myself");
+		ip->i_spare1 = curproc->p_pid;
+		(void) sleep((caddr_t)ip, PINOD);
+	}
+	ip->i_spare1 = 0;
+	ip->i_spare0 = curproc->p_pid;
+	ip->i_flag |= ILOCKED;
+}
+
+/*
+ * Unlock an inode.  If WANT bit is on, wakeup.
+ */
+iso_iunlock(ip)
+	register struct iso_node *ip;
+{
+
+	if ((ip->i_flag & ILOCKED) == 0)
+		vprint("iso_iunlock: unlocked inode", ITOV(ip));
+	ip->i_spare0 = 0;
+	ip->i_flag &= ~ILOCKED;
+	if (ip->i_flag&IWANT) {
+		ip->i_flag &= ~IWANT;
+		wakeup((caddr_t)ip);
+	}
+}
+
+/*
+ * File attributes
+ */
+void
+cd9660_defattr(isodir,inop,bp)
+	struct iso_directory_record *isodir;
+	struct iso_node *inop;
+	struct buf *bp;
+{
+	struct buf *bp2 = NULL;
+	struct iso_mnt *imp;
+	struct iso_extended_attributes *ap = NULL;
+	int off;
+	
+	if (isonum_711(isodir->flags)&2) {
+		inop->inode.iso_mode = S_IFDIR;
+		/*
+		 * If we return 2, fts() will assume there are no subdirectories
+		 * (just links for the path and .), so instead we return 1.
+		 */
+		inop->inode.iso_links = 1;
+	} else {
+		inop->inode.iso_mode = S_IFREG;
+		inop->inode.iso_links = 1;
+	}
+	if (!bp
+	    && ((imp = inop->i_mnt)->im_flags&ISOFSMNT_EXTATT)
+	    && (off = isonum_711(isodir->ext_attr_length))) {
+		iso_blkatoff(inop,-off * imp->logical_block_size,&bp2);
+		bp = bp2;
+	}
+	if (bp) {
+		ap = (struct iso_extended_attributes *)bp->b_un.b_addr;
+		
+		if (isonum_711(ap->version) == 1) {
+			if (!(ap->perm[0]&0x40))
+				inop->inode.iso_mode |= VEXEC >> 6;
+			if (!(ap->perm[0]&0x10))
+				inop->inode.iso_mode |= VREAD >> 6;
+			if (!(ap->perm[0]&4))
+				inop->inode.iso_mode |= VEXEC >> 3;
+			if (!(ap->perm[0]&1))
+				inop->inode.iso_mode |= VREAD >> 3;
+			if (!(ap->perm[1]&0x40))
+				inop->inode.iso_mode |= VEXEC;
+			if (!(ap->perm[1]&0x10))
+				inop->inode.iso_mode |= VREAD;
+			inop->inode.iso_uid = isonum_723(ap->owner); /* what about 0? */
+			inop->inode.iso_gid = isonum_723(ap->group); /* what about 0? */
+		} else
+			ap = NULL;
+	}
+	if (!ap) {
+		inop->inode.iso_mode |= VREAD|VEXEC|(VREAD|VEXEC)>>3|(VREAD|VEXEC)>>6;
+		inop->inode.iso_uid = (uid_t)0;
+		inop->inode.iso_gid = (gid_t)0;
+	}
+	if (bp2)
+		brelse(bp2);
+}
+
+/*
+ * Time stamps
+ */
+void
+cd9660_deftstamp(isodir,inop,bp)
+	struct iso_directory_record *isodir;
+	struct iso_node *inop;
+	struct buf *bp;
+{
+	struct buf *bp2 = NULL;
+	struct iso_mnt *imp;
+	struct iso_extended_attributes *ap = NULL;
+	int off;
+	
+	if (!bp
+	    && ((imp = inop->i_mnt)->im_flags&ISOFSMNT_EXTATT)
+	    && (off = isonum_711(isodir->ext_attr_length))) {
+		iso_blkatoff(inop,-off * imp->logical_block_size,&bp2);
+		bp = bp2;
+	}
+	if (bp) {
+		ap = (struct iso_extended_attributes *)bp->b_un.b_addr;
+		
+		if (isonum_711(ap->version) == 1) {
+			if (!cd9660_tstamp_conv17(ap->ftime,&inop->inode.iso_atime))
+				cd9660_tstamp_conv17(ap->ctime,&inop->inode.iso_atime);
+			if (!cd9660_tstamp_conv17(ap->ctime,&inop->inode.iso_ctime))
+				inop->inode.iso_ctime = inop->inode.iso_atime;
+			if (!cd9660_tstamp_conv17(ap->mtime,&inop->inode.iso_mtime))
+				inop->inode.iso_mtime = inop->inode.iso_ctime;
+		} else
+			ap = NULL;
+	}
+	if (!ap) {
+		cd9660_tstamp_conv7(isodir->date,&inop->inode.iso_ctime);
+		inop->inode.iso_atime = inop->inode.iso_ctime;
+		inop->inode.iso_mtime = inop->inode.iso_ctime;
+	}
+	if (bp2)
+		brelse(bp2);
+}
+
+int
+cd9660_tstamp_conv7(pi,pu)
+char *pi;
+struct timeval *pu;
+{
+	int i;
+	int crtime, days;
+	int y, m, d, hour, minute, second, tz;
+	
+	y = pi[0] + 1900;
+	m = pi[1];
+	d = pi[2];
+	hour = pi[3];
+	minute = pi[4];
+	second = pi[5];
+	tz = pi[6];
+	
+	if (y < 1970) {
+		pu->tv_sec  = 0;
+		pu->tv_usec = 0;
+		return 0;
+	} else {
+#ifdef	ORIGINAL
+		/* computes day number relative to Sept. 19th,1989 */
+		/* don't even *THINK* about changing formula. It works! */
+		days = 367*(y-1980)-7*(y+(m+9)/12)/4-3*((y+(m-9)/7)/100+1)/4+275*m/9+d-100;
+#else
+		/*
+		 * Changed :-) to make it relative to Jan. 1st, 1970
+		 * and to disambiguate negative division
+		 */
+		days = 367*(y-1960)-7*(y+(m+9)/12)/4-3*((y+(m+9)/12-1)/100+1)/4+275*m/9+d-239;
+#endif
+		crtime = ((((days * 24) + hour) * 60 + minute) * 60) + second;
+		
+		/* timezone offset is unreliable on some disks */
+		if (-48 <= tz && tz <= 52)
+			crtime += tz * 15 * 60;
+	}
+	pu->tv_sec  = crtime;
+	pu->tv_usec = 0;
+	return 1;
+}
+
+static unsigned
+cd9660_chars2ui(begin,len)
+	unsigned char *begin;
+	int len;
+{
+	unsigned rc;
+	
+	for (rc = 0; --len >= 0;) {
+		rc *= 10;
+		rc += *begin++ - '0';
+	}
+	return rc;
+}
+
+int
+cd9660_tstamp_conv17(pi,pu)
+	unsigned char *pi;
+	struct timeval *pu;
+{
+	unsigned char buf[7];
+	
+	/* year:"0001"-"9999" -> -1900  */
+	buf[0] = cd9660_chars2ui(pi,4) - 1900;
+	
+	/* month: " 1"-"12"      -> 1 - 12 */
+	buf[1] = cd9660_chars2ui(pi + 4,2);
+	
+	/* day:   " 1"-"31"      -> 1 - 31 */
+	buf[2] = cd9660_chars2ui(pi + 6,2);
+	
+	/* hour:  " 0"-"23"      -> 0 - 23 */
+	buf[3] = cd9660_chars2ui(pi + 8,2);
+	
+	/* minute:" 0"-"59"      -> 0 - 59 */
+	buf[4] = cd9660_chars2ui(pi + 10,2);
+	
+	/* second:" 0"-"59"      -> 0 - 59 */
+	buf[5] = cd9660_chars2ui(pi + 12,2);
+	
+	/* difference of GMT */
+	buf[6] = pi[16];
+	
+	return cd9660_tstamp_conv7(buf,pu);
+}
+
+void
+isodirino(inump,isodir,imp)
+	ino_t *inump;
+	struct iso_directory_record *isodir;
+	struct iso_mnt *imp;
+{
+	*inump = (isonum_733(isodir->extent) + isonum_711(isodir->ext_attr_length))
+		 * imp->logical_block_size;
+}
diff --git a/sys/fs/cd9660/cd9660_node.h b/sys/fs/cd9660/cd9660_node.h
new file mode 100644
index 00000000000..45de67f1a6b
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_node.h
@@ -0,0 +1,143 @@
+/*-
+ * Copyright (c) 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cd9660_node.h	8.2 (Berkeley) 1/23/94
+ */
+
+/*
+ * Theoretically, directories can be more than 2Gb in length,
+ * however, in practice this seems unlikely. So, we define
+ * the type doff_t as a long to keep down the cost of doing
+ * lookup on a 32-bit machine. If you are porting to a 64-bit
+ * architecture, you should make doff_t the same as off_t.
+ */
+#define doff_t	long
+
+typedef	struct	{
+	struct timespec	iso_atime;	/* time of last access */
+	struct timespec	iso_mtime;	/* time of last modification */
+	struct timespec	iso_ctime;	/* time file changed */
+	u_short		iso_mode;	/* files access mode and type */
+	uid_t		iso_uid;	/* owner user id */
+	gid_t		iso_gid;	/* owner group id */
+	short		iso_links;	/* links of file */
+	dev_t		iso_rdev;	/* Major/Minor number for special */
+} ISO_RRIP_INODE;
+
+#ifdef	ISODEVMAP
+/*
+ * FOr device# (major,minor) translation table
+ */
+struct iso_dnode {
+	struct iso_dnode *d_chain[2];	/* hash chain, MUST be first */
+	dev_t		i_dev;		/* device where dnode resides */
+	ino_t		i_number;	/* the identity of the inode */
+	dev_t		d_dev;		/* device # for translation */
+};
+#define	d_forw		d_chain[0]
+#define	d_back		d_chain[1]
+#endif
+
+struct iso_node {
+	struct	iso_node *i_chain[2]; /* hash chain, MUST be first */
+	struct	vnode *i_vnode;	/* vnode associated with this inode */
+	struct	vnode *i_devvp;	/* vnode for block I/O */
+	u_long	i_flag;		/* see below */
+	dev_t	i_dev;		/* device where inode resides */
+	ino_t	i_number;	/* the identity of the inode */
+				/* we use the actual starting block of the file */
+	struct	iso_mnt *i_mnt;	/* filesystem associated with this inode */
+	struct	lockf *i_lockf;	/* head of byte-level lock list */
+	doff_t	i_endoff;	/* end of useful stuff in directory */
+	doff_t	i_diroff;	/* offset in dir, where we found last entry */
+	doff_t	i_offset;	/* offset of free space in directory */
+	ino_t	i_ino;		/* inode number of found directory */
+	long	i_spare0;
+	long	i_spare1;
+
+	long iso_extent;	/* extent of file */
+	long i_size;
+	long iso_start;		/* actual start of data of file (may be different */
+				/* from iso_extent, if file has extended attributes) */
+	ISO_RRIP_INODE  inode;
+};
+
+#define	i_forw		i_chain[0]
+#define	i_back		i_chain[1]
+
+/* flags */
+#define	ILOCKED		0x0001		/* inode is locked */
+#define	IWANT		0x0002		/* some process waiting on lock */
+#define	IACC		0x0020		/* inode access time to be updated */
+
+#define VTOI(vp) ((struct iso_node *)(vp)->v_data)
+#define ITOV(ip) ((ip)->i_vnode)
+
+#define ISO_ILOCK(ip)	iso_ilock(ip)
+#define ISO_IUNLOCK(ip)	iso_iunlock(ip)
+
+/*
+ * Prototypes for ISOFS vnode operations
+ */
+int cd9660_lookup __P((struct vop_lookup_args *));
+int cd9660_open __P((struct vop_open_args *));
+int cd9660_close __P((struct vop_close_args *));
+int cd9660_access __P((struct vop_access_args *));
+int cd9660_getattr __P((struct vop_getattr_args *));
+int cd9660_read __P((struct vop_read_args *));
+int cd9660_ioctl __P((struct vop_ioctl_args *));
+int cd9660_select __P((struct vop_select_args *));
+int cd9660_mmap __P((struct vop_mmap_args *));
+int cd9660_seek __P((struct vop_seek_args *));
+int cd9660_readdir __P((struct vop_readdir_args *));
+int cd9660_abortop __P((struct vop_abortop_args *));
+int cd9660_inactive __P((struct vop_inactive_args *));
+int cd9660_reclaim __P((struct vop_reclaim_args *));
+int cd9660_bmap __P((struct vop_bmap_args *));
+int cd9660_lock __P((struct vop_lock_args *));
+int cd9660_unlock __P((struct vop_unlock_args *));
+int cd9660_strategy __P((struct vop_strategy_args *));
+int cd9660_print __P((struct vop_print_args *));
+int cd9660_islocked __P((struct vop_islocked_args *));
+void cd9660_defattr __P((struct iso_directory_record *,
+			struct iso_node *, struct buf *));
+void cd9660_deftstamp __P((struct iso_directory_record *,
+			struct iso_node *, struct buf *));
+#ifdef	ISODEVMAP
+struct iso_dnode *iso_dmap __P((dev_t, ino_t, int));
+void iso_dunmap __P((dev_t));
+#endif
diff --git a/sys/fs/cd9660/cd9660_rrip.c b/sys/fs/cd9660/cd9660_rrip.c
new file mode 100644
index 00000000000..0923fa01477
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_rrip.c
@@ -0,0 +1,685 @@
+/*-
+ * Copyright (c) 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cd9660_rrip.c	8.2 (Berkeley) 1/23/94
+ */
+
+#include <sys/param.h>
+#include <sys/namei.h>
+#include <sys/buf.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <sys/time.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+#include <isofs/cd9660/cd9660_rrip.h>
+#include <isofs/cd9660/iso_rrip.h>
+
+/*
+ * POSIX file attribute
+ */
+static int
+cd9660_rrip_attr(p,ana)
+	ISO_RRIP_ATTR *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	ana->inop->inode.iso_mode = isonum_731(p->mode_l);
+	ana->inop->inode.iso_uid = (uid_t)isonum_731(p->uid_l);
+	ana->inop->inode.iso_gid = (gid_t)isonum_731(p->gid_l);
+	ana->inop->inode.iso_links = isonum_731(p->links_l);
+	ana->fields &= ~ISO_SUSP_ATTR;
+	return ISO_SUSP_ATTR;
+}
+
+static void
+cd9660_rrip_defattr(isodir,ana)
+	struct iso_directory_record *isodir;
+	ISO_RRIP_ANALYZE *ana;
+{
+	/* But this is a required field! */
+	printf("RRIP without PX field?\n");
+	cd9660_defattr(isodir,ana->inop,NULL);
+}
+
+/*
+ * Symbolic Links
+ */
+static int
+cd9660_rrip_slink(p,ana)
+	ISO_RRIP_SLINK  *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	register ISO_RRIP_SLINK_COMPONENT *pcomp;
+	register ISO_RRIP_SLINK_COMPONENT *pcompe;
+	int len, wlen, cont;
+	char *outbuf, *inbuf;
+	
+	pcomp = (ISO_RRIP_SLINK_COMPONENT *)p->component;
+	pcompe = (ISO_RRIP_SLINK_COMPONENT *)((char *)p + isonum_711(p->h.length));
+	len = *ana->outlen;
+	outbuf = ana->outbuf;
+	cont = ana->cont;
+	
+	/*
+	 * Gathering a Symbolic name from each component with path
+	 */
+	for (;
+	     pcomp < pcompe;
+	     pcomp = (ISO_RRIP_SLINK_COMPONENT *)((char *)pcomp + ISO_RRIP_SLSIZ
+						  + isonum_711(pcomp->clen))) {
+		
+		if (!cont) {
+			if (len < ana->maxlen) {
+				len++;
+				*outbuf++ = '/';
+			}
+		}
+		cont = 0;
+		
+		inbuf = "..";
+		wlen = 0;
+		
+		switch (*pcomp->cflag) {
+			
+		case ISO_SUSP_CFLAG_CURRENT:
+			/* Inserting Current */
+			wlen = 1;
+			break;
+			
+		case ISO_SUSP_CFLAG_PARENT:
+			/* Inserting Parent */
+			wlen = 2;
+			break;
+			
+		case ISO_SUSP_CFLAG_ROOT:
+			/* Inserting slash for ROOT */
+			/* start over from beginning(?) */
+			outbuf -= len;
+			len = 0;
+			break;
+			
+		case ISO_SUSP_CFLAG_VOLROOT:
+			/* Inserting a mount point i.e. "/cdrom" */
+			/* same as above */
+			outbuf -= len;
+			len = 0;
+			inbuf = ana->imp->im_mountp->mnt_stat.f_mntonname;
+			wlen = strlen(inbuf);
+			break;
+			
+		case ISO_SUSP_CFLAG_HOST:
+			/* Inserting hostname i.e. "kurt.tools.de" */
+			inbuf = hostname;
+			wlen = hostnamelen;
+			break;
+			
+		case ISO_SUSP_CFLAG_CONTINUE:
+			cont = 1;
+			/* fall thru */
+		case 0:
+			/* Inserting component */
+			wlen = isonum_711(pcomp->clen);
+			inbuf = pcomp->name;
+			break;
+		default:
+			printf("RRIP with incorrect flags?");
+			wlen = ana->maxlen + 1;
+			break;
+		}
+		
+		if (len + wlen > ana->maxlen) {
+			/* indicate error to caller */
+			ana->cont = 1;
+			ana->fields = 0;
+			ana->outbuf -= *ana->outlen;
+			*ana->outlen = 0;
+			return 0;
+		}
+		
+		bcopy(inbuf,outbuf,wlen);
+		outbuf += wlen;
+		len += wlen;
+		
+	}
+	ana->outbuf = outbuf;
+	*ana->outlen = len;
+	ana->cont = cont;
+	
+	if (!isonum_711(p->flags)) {
+		ana->fields &= ~ISO_SUSP_SLINK;
+		return ISO_SUSP_SLINK;
+	}
+	return 0;
+}
+
+/*
+ * Alternate name
+ */
+static int
+cd9660_rrip_altname(p,ana)
+	ISO_RRIP_ALTNAME *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	char *inbuf;
+	int wlen;
+	int cont;
+	
+	inbuf = "..";
+	wlen = 0;
+	cont = 0;
+	
+	switch (*p->flags) {
+	case ISO_SUSP_CFLAG_CURRENT:
+		/* Inserting Current */
+		wlen = 1;
+		break;
+		
+	case ISO_SUSP_CFLAG_PARENT:
+		/* Inserting Parent */
+		wlen = 2;
+		break;
+		
+	case ISO_SUSP_CFLAG_HOST:
+		/* Inserting hostname i.e. "kurt.tools.de" */
+		inbuf = hostname;
+		wlen = hostnamelen;
+		break;
+		
+	case ISO_SUSP_CFLAG_CONTINUE:
+		cont = 1;
+		/* fall thru */
+	case 0:
+		/* Inserting component */
+		wlen = isonum_711(p->h.length) - 5;
+		inbuf = (char *)p + 5;
+		break;
+		
+	default:
+		printf("RRIP with incorrect NM flags?\n");
+		wlen = ana->maxlen + 1;
+		break;
+	}
+	
+	if ((*ana->outlen += wlen) > ana->maxlen) {
+		/* treat as no name field */
+		ana->fields &= ~ISO_SUSP_ALTNAME;
+		ana->outbuf -= *ana->outlen - wlen;
+		*ana->outlen = 0;
+		return 0;
+	}
+	
+	bcopy(inbuf,ana->outbuf,wlen);
+	ana->outbuf += wlen;
+	
+	if (!cont) {
+		ana->fields &= ~ISO_SUSP_ALTNAME;
+		return ISO_SUSP_ALTNAME;
+	}
+	return 0;
+}
+
+static void
+cd9660_rrip_defname(isodir,ana)
+	struct iso_directory_record *isodir;
+	ISO_RRIP_ANALYZE *ana;
+{
+	strcpy(ana->outbuf,"..");
+	switch (*isodir->name) {
+	default:
+		isofntrans(isodir->name,isonum_711(isodir->name_len),
+			   ana->outbuf,ana->outlen,
+			   1,isonum_711(isodir->flags)&4);
+		break;
+	case 0:
+		*ana->outlen = 1;
+		break;
+	case 1:
+		*ana->outlen = 2;
+		break;
+	}
+}
+
+/*
+ * Parent or Child Link
+ */
+static int
+cd9660_rrip_pclink(p,ana)
+	ISO_RRIP_CLINK  *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	*ana->inump = isonum_733(p->dir_loc) << ana->imp->im_bshift;
+	ana->fields &= ~(ISO_SUSP_CLINK|ISO_SUSP_PLINK);
+	return *p->h.type == 'C' ? ISO_SUSP_CLINK : ISO_SUSP_PLINK;
+}
+
+/*
+ * Relocated directory
+ */
+static int
+cd9660_rrip_reldir(p,ana)
+	ISO_RRIP_RELDIR  *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	/* special hack to make caller aware of RE field */
+	*ana->outlen = 0;
+	ana->fields = 0;
+	return ISO_SUSP_RELDIR|ISO_SUSP_ALTNAME|ISO_SUSP_CLINK|ISO_SUSP_PLINK;
+}
+
+static int
+cd9660_rrip_tstamp(p,ana)
+	ISO_RRIP_TSTAMP *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	unsigned char *ptime;
+	
+	ptime = p->time;
+	
+	/* Check a format of time stamp (7bytes/17bytes) */
+	if (!(*p->flags&ISO_SUSP_TSTAMP_FORM17)) {
+		if (*p->flags&ISO_SUSP_TSTAMP_CREAT)
+			ptime += 7;
+		
+		if (*p->flags&ISO_SUSP_TSTAMP_MODIFY) {
+			cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_mtime);
+			ptime += 7;
+		} else
+			bzero(&ana->inop->inode.iso_mtime,sizeof(struct timeval));
+		
+		if (*p->flags&ISO_SUSP_TSTAMP_ACCESS) {
+			cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_atime);
+			ptime += 7;
+		} else
+			ana->inop->inode.iso_atime = ana->inop->inode.iso_mtime;
+		
+		if (*p->flags&ISO_SUSP_TSTAMP_ATTR)
+			cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_ctime);
+		else
+			ana->inop->inode.iso_ctime = ana->inop->inode.iso_mtime;
+		
+	} else {
+		if (*p->flags&ISO_SUSP_TSTAMP_CREAT)
+			ptime += 17;
+		
+		if (*p->flags&ISO_SUSP_TSTAMP_MODIFY) {
+			cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_mtime);
+			ptime += 17;
+		} else
+			bzero(&ana->inop->inode.iso_mtime,sizeof(struct timeval));
+		
+		if (*p->flags&ISO_SUSP_TSTAMP_ACCESS) {
+			cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_atime);
+			ptime += 17;
+		} else
+			ana->inop->inode.iso_atime = ana->inop->inode.iso_mtime;
+		
+		if (*p->flags&ISO_SUSP_TSTAMP_ATTR)
+			cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_ctime);
+		else
+			ana->inop->inode.iso_ctime = ana->inop->inode.iso_mtime;
+		
+	}
+	ana->fields &= ~ISO_SUSP_TSTAMP;
+	return ISO_SUSP_TSTAMP;
+}
+
+static void
+cd9660_rrip_deftstamp(isodir,ana)
+	struct iso_directory_record  *isodir;
+	ISO_RRIP_ANALYZE *ana;
+{
+	cd9660_deftstamp(isodir,ana->inop,NULL);
+}
+
+/*
+ * POSIX device modes
+ */
+static int
+cd9660_rrip_device(p,ana)
+	ISO_RRIP_DEVICE *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	unsigned high, low;
+	
+	high = isonum_733(p->dev_t_high_l);
+	low  = isonum_733(p->dev_t_low_l);
+	
+	if ( high == 0 ) {
+		ana->inop->inode.iso_rdev = makedev( major(low), minor(low) );
+	} else {
+		ana->inop->inode.iso_rdev = makedev( high, minor(low) );
+	}
+	ana->fields &= ~ISO_SUSP_DEVICE;
+	return ISO_SUSP_DEVICE;
+}
+
+/*
+ * Flag indicating
+ */
+static int
+cd9660_rrip_idflag(p,ana)
+	ISO_RRIP_IDFLAG *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	ana->fields &= isonum_711(p->flags)|~0xff; /* don't touch high bits */
+	/* special handling of RE field */
+	if (ana->fields&ISO_SUSP_RELDIR)
+		return cd9660_rrip_reldir(p,ana);
+	
+	return ISO_SUSP_IDFLAG;
+}
+
+/*
+ * Continuation pointer
+ */
+static int
+cd9660_rrip_cont(p,ana)
+	ISO_RRIP_CONT *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	ana->iso_ce_blk = isonum_733(p->location);
+	ana->iso_ce_off = isonum_733(p->offset);
+	ana->iso_ce_len = isonum_733(p->length);
+	return ISO_SUSP_CONT;
+}
+
+/*
+ * System Use end
+ */
+static int
+cd9660_rrip_stop(p,ana)
+	ISO_SUSP_HEADER *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	/* stop analyzing */
+	ana->fields = 0;
+	return ISO_SUSP_STOP;
+}
+
+/*
+ * Extension reference
+ */
+static int
+cd9660_rrip_extref(p,ana)
+	ISO_RRIP_EXTREF *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	if (isonum_711(p->len_id) != 10
+	    || bcmp((char *)p + 8,"RRIP_1991A",10)
+	    || isonum_711(p->version) != 1)
+		return 0;
+	ana->fields &= ~ISO_SUSP_EXTREF;
+	return ISO_SUSP_EXTREF;
+}
+
+typedef struct {
+	char type[2];
+	int (*func)();
+	void (*func2)();
+	int result;
+} RRIP_TABLE;
+
+static int
+cd9660_rrip_loop(isodir,ana,table)
+	struct iso_directory_record *isodir;
+	ISO_RRIP_ANALYZE *ana;
+	RRIP_TABLE *table;
+{
+	register RRIP_TABLE *ptable;
+	register ISO_SUSP_HEADER *phead;
+	register ISO_SUSP_HEADER *pend;
+	struct buf *bp = NULL;
+	int i;
+	char *pwhead;
+	int result;
+	
+	/*
+	 * Note: If name length is odd,
+	 *       it will be padding 1 byte  after the name
+	 */
+	pwhead = isodir->name + isonum_711(isodir->name_len);
+	if (!(isonum_711(isodir->name_len)&1))
+		pwhead++;
+	
+	/* If it's not the '.' entry of the root dir obey SP field */
+	if (*isodir->name != 0
+	    || isonum_733(isodir->extent) != ana->imp->root_extent)
+		pwhead += ana->imp->rr_skip;
+	else
+		pwhead += ana->imp->rr_skip0;
+	
+	phead = (ISO_SUSP_HEADER *)pwhead;
+	pend = (ISO_SUSP_HEADER *)((char *)isodir + isonum_711(isodir->length));
+	
+	result = 0;
+	while (1) {
+		ana->iso_ce_len = 0;
+		/*
+		 * Note: "pend" should be more than one SUSP header
+		 */ 
+		while (pend >= phead + 1) {
+			if (isonum_711(phead->version) == 1) {
+				for (ptable = table; ptable->func; ptable++) {
+					if (*phead->type == *ptable->type
+					    && phead->type[1] == ptable->type[1]) {
+						result |= ptable->func(phead,ana);
+						break;
+					}
+				}
+				if (!ana->fields)
+					break;
+			}
+			/*
+			 * move to next SUSP
+			 * Hopefully this works with newer versions, too
+			 */
+			phead = (ISO_SUSP_HEADER *)((char *)phead + isonum_711(phead->length));
+		}
+		
+		if ( ana->fields && ana->iso_ce_len ) {
+			if (ana->iso_ce_blk >= ana->imp->volume_space_size
+			    || ana->iso_ce_off + ana->iso_ce_len > ana->imp->logical_block_size
+			    || bread(ana->imp->im_devvp,
+				     ana->iso_ce_blk * ana->imp->logical_block_size / DEV_BSIZE,
+				     ana->imp->logical_block_size,NOCRED,&bp))
+				/* what to do now? */
+				break;
+			phead = (ISO_SUSP_HEADER *)(bp->b_un.b_addr + ana->iso_ce_off);
+			pend = (ISO_SUSP_HEADER *) ((char *)phead + ana->iso_ce_len);
+		} else
+			break;
+	}
+	if (bp)
+		brelse(bp);
+	/*
+	 * If we don't find the Basic SUSP stuffs, just set default value
+	 *   ( attribute/time stamp )
+	 */
+	for (ptable = table; ptable->func2; ptable++)
+		if (!(ptable->result&result))
+			ptable->func2(isodir,ana);
+	
+	return result;
+}
+
+static RRIP_TABLE rrip_table_analyze[] = {
+	{ "PX", cd9660_rrip_attr,	cd9660_rrip_defattr,	ISO_SUSP_ATTR },
+	{ "TF", cd9660_rrip_tstamp,	cd9660_rrip_deftstamp,	ISO_SUSP_TSTAMP },
+	{ "PN", cd9660_rrip_device,	0,			ISO_SUSP_DEVICE },
+	{ "RR", cd9660_rrip_idflag,	0,			ISO_SUSP_IDFLAG },
+	{ "CE", cd9660_rrip_cont,	0,			ISO_SUSP_CONT },
+	{ "ST", cd9660_rrip_stop,	0,			ISO_SUSP_STOP },
+	{ "",	0,			0,			0 }
+};
+
+int
+cd9660_rrip_analyze(isodir,inop,imp)
+	struct iso_directory_record *isodir;
+	struct iso_node *inop;
+	struct iso_mnt *imp;
+{
+	ISO_RRIP_ANALYZE analyze;
+	
+	analyze.inop = inop;
+	analyze.imp = imp;
+	analyze.fields = ISO_SUSP_ATTR|ISO_SUSP_TSTAMP|ISO_SUSP_DEVICE;
+	
+	return cd9660_rrip_loop(isodir,&analyze,rrip_table_analyze);
+}
+
+/* 
+ * Get Alternate Name from 'AL' record 
+ * If either no AL record or 0 length, 
+ *    it will be return the translated ISO9660 name,
+ */
+static RRIP_TABLE rrip_table_getname[] = {
+	{ "NM", cd9660_rrip_altname,	cd9660_rrip_defname,	ISO_SUSP_ALTNAME },
+	{ "CL", cd9660_rrip_pclink,	0,			ISO_SUSP_CLINK|ISO_SUSP_PLINK },
+	{ "PL", cd9660_rrip_pclink,	0,			ISO_SUSP_CLINK|ISO_SUSP_PLINK },
+	{ "RE", cd9660_rrip_reldir,	0,			ISO_SUSP_RELDIR },
+	{ "RR", cd9660_rrip_idflag,	0,			ISO_SUSP_IDFLAG },
+	{ "CE", cd9660_rrip_cont,	0,			ISO_SUSP_CONT },
+	{ "ST", cd9660_rrip_stop,	0,			ISO_SUSP_STOP },
+	{ "",	0,			0,			0 }
+};
+
+int
+cd9660_rrip_getname(isodir,outbuf,outlen,inump,imp)
+	struct iso_directory_record *isodir;
+	char *outbuf;
+	u_short *outlen;
+	ino_t *inump;
+	struct iso_mnt *imp;
+{
+	ISO_RRIP_ANALYZE analyze;
+	RRIP_TABLE *tab;
+	
+	analyze.outbuf = outbuf;
+	analyze.outlen = outlen;
+	analyze.maxlen = NAME_MAX;
+	analyze.inump = inump;
+	analyze.imp = imp;
+	analyze.fields = ISO_SUSP_ALTNAME|ISO_SUSP_RELDIR|ISO_SUSP_CLINK|ISO_SUSP_PLINK;
+	*outlen = 0;
+	
+	tab = rrip_table_getname;
+	if (*isodir->name == 0
+	    || *isodir->name == 1) {
+		cd9660_rrip_defname(isodir,&analyze);
+		
+		analyze.fields &= ~ISO_SUSP_ALTNAME;
+		tab++;
+	}
+	
+	return cd9660_rrip_loop(isodir,&analyze,tab);
+}
+
+/* 
+ * Get Symbolic Name from 'SL' record 
+ *
+ * Note: isodir should contains SL record!
+ */
+static RRIP_TABLE rrip_table_getsymname[] = {
+	{ "SL", cd9660_rrip_slink,	0,			ISO_SUSP_SLINK },
+	{ "RR", cd9660_rrip_idflag,	0,			ISO_SUSP_IDFLAG },
+	{ "CE", cd9660_rrip_cont,	0,			ISO_SUSP_CONT },
+	{ "ST", cd9660_rrip_stop,	0,			ISO_SUSP_STOP },
+	{ "",	0,			0,			0 }
+};
+
+int
+cd9660_rrip_getsymname(isodir,outbuf,outlen,imp)
+	struct iso_directory_record *isodir;
+	char *outbuf;
+	u_short *outlen;
+	struct iso_mnt *imp;
+{
+	ISO_RRIP_ANALYZE analyze;
+	
+	analyze.outbuf = outbuf;
+	analyze.outlen = outlen;
+	*outlen = 0;
+	analyze.maxlen = MAXPATHLEN;
+	analyze.cont = 1;		/* don't start with a slash */
+	analyze.imp = imp;
+	analyze.fields = ISO_SUSP_SLINK;
+	
+	return (cd9660_rrip_loop(isodir,&analyze,rrip_table_getsymname)&ISO_SUSP_SLINK);
+}
+
+static RRIP_TABLE rrip_table_extref[] = {
+	{ "ER", cd9660_rrip_extref,	0,			ISO_SUSP_EXTREF },
+	{ "CE", cd9660_rrip_cont,	0,			ISO_SUSP_CONT },
+	{ "ST", cd9660_rrip_stop,	0,			ISO_SUSP_STOP },
+	{ "",	0,			0,			0 }
+};
+
+/*
+ * Check for Rock Ridge Extension and return offset of its fields.
+ * Note: We require the ER field.
+ */
+int
+cd9660_rrip_offset(isodir,imp)
+	struct iso_directory_record *isodir;
+	struct iso_mnt *imp;
+{
+	ISO_RRIP_OFFSET *p;
+	ISO_RRIP_ANALYZE analyze;
+	
+	imp->rr_skip0 = 0;
+	p = (ISO_RRIP_OFFSET *)(isodir->name + 1);
+	if (bcmp(p,"SP\7\1\276\357",6)) {
+		/* Maybe, it's a CDROM XA disc? */
+		imp->rr_skip0 = 15;
+		p = (ISO_RRIP_OFFSET *)((char *)p + 15);
+		if (bcmp(p,"SP\7\1\276\357",6))
+			return -1;
+	}
+	
+	analyze.imp = imp;
+	analyze.fields = ISO_SUSP_EXTREF;
+	if (!(cd9660_rrip_loop(isodir,&analyze,rrip_table_extref)&ISO_SUSP_EXTREF))
+		return -1;
+	
+	return isonum_711(p->skip);
+}
diff --git a/sys/fs/cd9660/cd9660_rrip.h b/sys/fs/cd9660/cd9660_rrip.h
new file mode 100644
index 00000000000..b4017281f06
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_rrip.h
@@ -0,0 +1,146 @@
+/*-
+ * Copyright (c) 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cd9660_rrip.h	8.1 (Berkeley) 1/21/94
+ */
+
+typedef struct {
+	char 	      type		[ISODCL (  0,    1)];
+	unsigned char length		[ISODCL (  2,    2)]; /* 711 */
+	unsigned char version		[ISODCL (  3,    3)];
+} ISO_SUSP_HEADER;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	char mode_l			[ISODCL (  4,    7)]; /* 731 */
+	char mode_m			[ISODCL (  8,   11)]; /* 732 */
+	char links_l			[ISODCL ( 12,   15)]; /* 731 */
+	char links_m			[ISODCL ( 16,   19)]; /* 732 */
+	char uid_l			[ISODCL ( 20,   23)]; /* 731 */
+	char uid_m			[ISODCL ( 24,   27)]; /* 732 */
+	char gid_l			[ISODCL ( 28,   31)]; /* 731 */
+	char gid_m			[ISODCL ( 32,   35)]; /* 732 */
+} ISO_RRIP_ATTR;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	char dev_t_high_l		[ISODCL (  4,    7)]; /* 731 */
+	char dev_t_high_m		[ISODCL (  8,   11)]; /* 732 */
+	char dev_t_low_l		[ISODCL ( 12,   15)]; /* 731 */
+	char dev_t_low_m		[ISODCL ( 16,   19)]; /* 732 */
+} ISO_RRIP_DEVICE;
+
+#define	ISO_SUSP_CFLAG_CONTINUE	0x01
+#define	ISO_SUSP_CFLAG_CURRENT	0x02
+#define	ISO_SUSP_CFLAG_PARENT	0x04
+#define	ISO_SUSP_CFLAG_ROOT	0x08
+#define	ISO_SUSP_CFLAG_VOLROOT	0x10
+#define	ISO_SUSP_CFLAG_HOST	0x20
+
+typedef struct {
+	u_char cflag			[ISODCL (  1,    1)];
+	u_char clen			[ISODCL (  2,    2)];
+	u_char name			[0];
+} ISO_RRIP_SLINK_COMPONENT;
+#define	ISO_RRIP_SLSIZ	2
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	u_char flags			[ISODCL (  4,    4)];
+	u_char component		[ISODCL (  5,    5)];
+} ISO_RRIP_SLINK;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	char flags			[ISODCL (  4,    4)];
+} ISO_RRIP_ALTNAME;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	char dir_loc			[ISODCL (  4,    11)]; /* 733 */
+} ISO_RRIP_CLINK;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	char dir_loc			[ISODCL (  4,    11)]; /* 733 */
+} ISO_RRIP_PLINK;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+} ISO_RRIP_RELDIR;
+
+#define	ISO_SUSP_TSTAMP_FORM17	0x80
+#define	ISO_SUSP_TSTAMP_FORM7	0x00
+#define	ISO_SUSP_TSTAMP_CREAT	0x01
+#define	ISO_SUSP_TSTAMP_MODIFY	0x02
+#define	ISO_SUSP_TSTAMP_ACCESS	0x04
+#define	ISO_SUSP_TSTAMP_ATTR	0x08
+#define	ISO_SUSP_TSTAMP_BACKUP	0x10
+#define	ISO_SUSP_TSTAMP_EXPIRE	0x20
+#define	ISO_SUSP_TSTAMP_EFFECT	0x40
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	unsigned char flags		[ISODCL (  4,    4)];
+	unsigned char time		[ISODCL (  5,    5)];
+} ISO_RRIP_TSTAMP;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	unsigned char flags		[ISODCL (  4,    4)];
+} ISO_RRIP_IDFLAG;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	char len_id			[ISODCL (  4,    4)];
+	char len_des			[ISODCL (  5,	 5)];
+	char len_src			[ISODCL (  6,	 6)];
+	char version			[ISODCL (  7,	 7)];
+} ISO_RRIP_EXTREF;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	char check			[ISODCL (  4,	 5)];
+	char skip			[ISODCL (  6,	 6)];
+} ISO_RRIP_OFFSET;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	char location			[ISODCL (  4,	11)];
+	char offset			[ISODCL ( 12,	19)];
+	char length			[ISODCL ( 20,	27)];
+} ISO_RRIP_CONT;
diff --git a/sys/fs/cd9660/cd9660_util.c b/sys/fs/cd9660/cd9660_util.c
new file mode 100644
index 00000000000..f74f0515ff7
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_util.c
@@ -0,0 +1,236 @@
+/*-
+ * Copyright (c) 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cd9660_util.c	8.1 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/resourcevar.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/conf.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <miscfs/specfs/specdev.h> /* XXX */
+#include <miscfs/fifofs/fifo.h> /* XXX */
+#include <sys/malloc.h>
+#include <sys/dir.h>
+
+#include <isofs/cd9660/iso.h>
+
+#ifdef	__notanymore__
+int
+isonum_711 (p)
+unsigned char *p;
+{
+	return (*p);
+}
+
+int
+isonum_712 (p)
+signed char *p;
+{
+	return (*p);
+}
+
+int
+isonum_721 (p)
+unsigned char *p;
+{
+	/* little endian short */
+#if BYTE_ORDER != LITTLE_ENDIAN
+	printf ("isonum_721 called on non little-endian machine!\n");
+#endif
+
+	return *(short *)p;
+}
+
+int
+isonum_722 (p)
+unsigned char *p;
+{
+        /* big endian short */
+#if BYTE_ORDER != BIG_ENDIAN
+        printf ("isonum_722 called on non big-endian machine!\n");
+#endif
+
+	return *(short *)p;
+}
+
+int
+isonum_723 (p)
+unsigned char *p;
+{
+#if BYTE_ORDER == BIG_ENDIAN
+        return isonum_722 (p + 2);
+#elif BYTE_ORDER == LITTLE_ENDIAN
+	return isonum_721 (p);
+#else
+	printf ("isonum_723 unsupported byte order!\n");
+	return 0;
+#endif
+}
+
+int
+isonum_731 (p)
+unsigned char *p;
+{
+        /* little endian long */
+#if BYTE_ORDER != LITTLE_ENDIAN
+        printf ("isonum_731 called on non little-endian machine!\n");
+#endif
+
+	return *(long *)p;
+}
+
+int
+isonum_732 (p)
+unsigned char *p;
+{
+        /* big endian long */
+#if BYTE_ORDER != BIG_ENDIAN
+        printf ("isonum_732 called on non big-endian machine!\n");
+#endif
+
+	return *(long *)p;
+}
+
+int
+isonum_733 (p)
+unsigned char *p;
+{
+#if BYTE_ORDER == BIG_ENDIAN
+        return isonum_732 (p + 4);
+#elif BYTE_ORDER == LITTLE_ENDIAN
+	return isonum_731 (p);
+#else
+	printf ("isonum_733 unsupported byte order!\n");
+	return 0;
+#endif
+}
+#endif	/* __notanymore__ */
+
+/*
+ * translate and compare a filename
+ * Note: Version number plus ';' may be omitted.
+ */
+int
+isofncmp(unsigned char *fn,int fnlen,unsigned char *isofn,int isolen)
+{
+	int i, j;
+	char c;
+	
+	while (--fnlen >= 0) {
+		if (--isolen < 0)
+			return *fn;
+		if ((c = *isofn++) == ';') {
+			switch (*fn++) {
+			default:
+				return *--fn;
+			case 0:
+				return 0;
+			case ';':
+				break;
+			}
+			for (i = 0; --fnlen >= 0; i = i * 10 + *fn++ - '0') {
+				if (*fn < '0' || *fn > '9') {
+					return -1;
+				}
+			}
+			for (j = 0; --isolen >= 0; j = j * 10 + *isofn++ - '0');
+			return i - j;
+		}
+		if (c != *fn) {
+			if (c >= 'A' && c <= 'Z') {
+				if (c + ('a' - 'A') != *fn) {
+					if (*fn >= 'a' && *fn <= 'z')
+						return *fn - ('a' - 'A') - c;
+					else
+						return *fn - c;
+				}
+			} else
+				return *fn - c;
+		}
+		fn++;
+	}
+	if (isolen > 0) {
+		switch (*isofn) {
+		default:
+			return -1;
+		case '.':
+			if (isofn[1] != ';')
+				return -1;
+		case ';':
+			return 0;
+		}
+	}
+	return 0;
+}
+
+/*
+ * translate a filename
+ */
+void
+isofntrans(unsigned char *infn,int infnlen,
+	   unsigned char *outfn,unsigned short *outfnlen,
+	   int original,int assoc)
+{
+	int fnidx = 0;
+	
+	if (assoc) {
+		*outfn++ = ASSOCCHAR;
+		fnidx++;
+	}
+	for (; fnidx < infnlen; fnidx++) {
+		char c = *infn++;
+		
+		if (!original && c >= 'A' && c <= 'Z')
+			*outfn++ = c + ('a' - 'A');
+		else if (!original && c == '.' && *infn == ';')
+			break;
+		else if (!original && c == ';')
+			break;
+		else
+			*outfn++ = c;
+	}
+	*outfnlen = fnidx;
+}
diff --git a/sys/fs/cd9660/cd9660_vfsops.c b/sys/fs/cd9660/cd9660_vfsops.c
new file mode 100644
index 00000000000..02dd92af66f
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_vfsops.c
@@ -0,0 +1,681 @@
+/*-
+ * Copyright (c) 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cd9660_vfsops.c	8.3 (Berkeley) 1/31/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <miscfs/specfs/specdev.h>
+#include <sys/mount.h>
+#include <sys/buf.h>
+#include <sys/file.h>
+#include <sys/dkbad.h>
+#include <sys/disklabel.h>
+#include <sys/ioctl.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+
+extern int enodev ();
+
+struct vfsops cd9660_vfsops = {
+	cd9660_mount,
+	cd9660_start,
+	cd9660_unmount,
+	cd9660_root,
+	cd9660_quotactl,
+	cd9660_statfs,
+	cd9660_sync,
+	cd9660_vget,
+	cd9660_fhtovp,
+	cd9660_vptofh,
+	cd9660_init,
+};
+
+/*
+ * Called by vfs_mountroot when iso is going to be mounted as root.
+ *
+ * Name is updated by mount(8) after booting.
+ */
+#define ROOTNAME	"root_device"
+
+static iso_mountfs();
+
+cd9660_mountroot()
+{
+	register struct mount *mp;
+	extern struct vnode *rootvp;
+	struct proc *p = curproc;	/* XXX */
+	struct iso_mnt *imp;
+	register struct fs *fs;
+	u_int size;
+	int error;
+	struct iso_args args;
+	
+	/*
+	 * Get vnodes for swapdev and rootdev.
+	 */
+	if (bdevvp(swapdev, &swapdev_vp) || bdevvp(rootdev, &rootvp))
+		panic("cd9660_mountroot: can't setup bdevvp's");
+
+	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
+	bzero((char *)mp, (u_long)sizeof(struct mount));
+	mp->mnt_op = &cd9660_vfsops;
+	mp->mnt_flag = MNT_RDONLY;
+	args.flags = ISOFSMNT_ROOT;
+	if (error = iso_mountfs(rootvp, mp, p, &args)) {
+		free(mp, M_MOUNT);
+		return (error);
+	}
+	if (error = vfs_lock(mp)) {
+		(void)cd9660_unmount(mp, 0, p);
+		free(mp, M_MOUNT);
+		return (error);
+	}
+	TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+	mp->mnt_flag |= MNT_ROOTFS;
+	mp->mnt_vnodecovered = NULLVP;
+	imp = VFSTOISOFS(mp);
+	bzero(imp->im_fsmnt, sizeof(imp->im_fsmnt));
+	imp->im_fsmnt[0] = '/';
+	bcopy((caddr_t)imp->im_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
+	    MNAMELEN);
+	(void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+	    &size);
+	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+	(void) cd9660_statfs(mp, &mp->mnt_stat, p);
+	vfs_unlock(mp);
+	return (0);
+}
+
+/*
+ * Flag to allow forcible unmounting.
+ */
+int iso_doforce = 1;
+
+/*
+ * VFS Operations.
+ *
+ * mount system call
+ */
+cd9660_mount(mp, path, data, ndp, p)
+	register struct mount *mp;
+	char *path;
+	caddr_t data;
+	struct nameidata *ndp;
+	struct proc *p;
+{
+	struct vnode *devvp;
+	struct iso_args args;
+	u_int size;
+	int error;
+	struct iso_mnt *imp;
+	
+	if (error = copyin(data, (caddr_t)&args, sizeof (struct iso_args)))
+		return (error);
+	
+	if ((mp->mnt_flag & MNT_RDONLY) == 0)
+		return (EROFS);
+	
+	/*
+	 * If updating, check whether changing from read-only to
+	 * read/write; if there is no device name, that's all we do.
+	 */
+	if (mp->mnt_flag & MNT_UPDATE) {
+		imp = VFSTOISOFS(mp);
+		if (args.fspec == 0)
+			return (vfs_export(mp, &imp->im_export, &args.export));
+	}
+	/*
+	 * Not an update, or updating the name: look up the name
+	 * and verify that it refers to a sensible block device.
+	 */
+	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
+	if (error = namei(ndp))
+		return (error);
+	devvp = ndp->ni_vp;
+
+	if (devvp->v_type != VBLK) {
+		vrele(devvp);
+		return ENOTBLK;
+	}
+	if (major(devvp->v_rdev) >= nblkdev) {
+		vrele(devvp);
+		return ENXIO;
+	}
+	if ((mp->mnt_flag & MNT_UPDATE) == 0)
+		error = iso_mountfs(devvp, mp, p, &args);
+	else {
+		if (devvp != imp->im_devvp)
+			error = EINVAL;	/* needs translation */
+		else
+			vrele(devvp);
+	}
+	if (error) {
+		vrele(devvp);
+		return error;
+	}
+	imp = VFSTOISOFS(mp);
+	(void) copyinstr(path, imp->im_fsmnt, sizeof(imp->im_fsmnt)-1, &size);
+	bzero(imp->im_fsmnt + size, sizeof(imp->im_fsmnt) - size);
+	bcopy((caddr_t)imp->im_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
+	    MNAMELEN);
+	(void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+	    &size);
+	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+	(void) cd9660_statfs(mp, &mp->mnt_stat, p);
+	return 0;
+}
+
+/*
+ * Common code for mount and mountroot
+ */
+static iso_mountfs(devvp, mp, p, argp)
+	register struct vnode *devvp;
+	struct mount *mp;
+	struct proc *p;
+	struct iso_args *argp;
+{
+	register struct iso_mnt *isomp = (struct iso_mnt *)0;
+	struct buf *bp = NULL;
+	dev_t dev = devvp->v_rdev;
+	caddr_t base, space;
+	int havepart = 0, blks;
+	int error = EINVAL, i, size;
+	int needclose = 0;
+	int ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
+	extern struct vnode *rootvp;
+	int j;
+	int iso_bsize;
+	int iso_blknum;
+	struct iso_volume_descriptor *vdp;
+	struct iso_primary_descriptor *pri;
+	struct iso_directory_record *rootp;
+	int logical_block_size;
+	
+	if (!ronly)
+		return EROFS;
+	
+	/*
+	 * Disallow multiple mounts of the same device.
+	 * Disallow mounting of a device that is currently in use
+	 * (except for root, which might share swap device for miniroot).
+	 * Flush out any old buffers remaining from a previous use.
+	 */
+	if (error = vfs_mountedon(devvp))
+		return error;
+	if (vcount(devvp) > 1 && devvp != rootvp)
+		return EBUSY;
+	if (error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0))
+		return (error);
+
+	if (error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p))
+		return error;
+	needclose = 1;
+	
+	/* This is the "logical sector size".  The standard says this
+	 * should be 2048 or the physical sector size on the device,
+	 * whichever is greater.  For now, we'll just use a constant.
+	 */
+	iso_bsize = ISO_DEFAULT_BLOCK_SIZE;
+	
+	for (iso_blknum = 16; iso_blknum < 100; iso_blknum++) {
+		if (error = bread (devvp, btodb(iso_blknum * iso_bsize),
+				   iso_bsize, NOCRED, &bp))
+			goto out;
+		
+		vdp = (struct iso_volume_descriptor *)bp->b_un.b_addr;
+		if (bcmp (vdp->id, ISO_STANDARD_ID, sizeof vdp->id) != 0) {
+			error = EINVAL;
+			goto out;
+		}
+		
+		if (isonum_711 (vdp->type) == ISO_VD_END) {
+			error = EINVAL;
+			goto out;
+		}
+		
+		if (isonum_711 (vdp->type) == ISO_VD_PRIMARY)
+			break;
+		brelse(bp);
+	}
+	
+	if (isonum_711 (vdp->type) != ISO_VD_PRIMARY) {
+		error = EINVAL;
+		goto out;
+	}
+	
+	pri = (struct iso_primary_descriptor *)vdp;
+	
+	logical_block_size = isonum_723 (pri->logical_block_size);
+	
+	if (logical_block_size < DEV_BSIZE || logical_block_size > MAXBSIZE
+	    || (logical_block_size & (logical_block_size - 1)) != 0) {
+		error = EINVAL;
+		goto out;
+	}
+	
+	rootp = (struct iso_directory_record *)pri->root_directory_record;
+	
+	isomp = malloc(sizeof *isomp, M_ISOFSMNT, M_WAITOK);
+	bzero((caddr_t)isomp, sizeof *isomp);
+	isomp->logical_block_size = logical_block_size;
+	isomp->volume_space_size = isonum_733 (pri->volume_space_size);
+	bcopy (rootp, isomp->root, sizeof isomp->root);
+	isomp->root_extent = isonum_733 (rootp->extent);
+	isomp->root_size = isonum_733 (rootp->size);
+	
+	isomp->im_bmask = logical_block_size - 1;
+	isomp->im_bshift = 0;
+	while ((1 << isomp->im_bshift) < isomp->logical_block_size)
+		isomp->im_bshift++;
+	
+	bp->b_flags |= B_AGE;
+	brelse(bp);
+	bp = NULL;
+	
+	mp->mnt_data = (qaddr_t)isomp;
+	mp->mnt_stat.f_fsid.val[0] = (long)dev;
+	mp->mnt_stat.f_fsid.val[1] = MOUNT_CD9660;
+	mp->mnt_maxsymlinklen = 0;
+	mp->mnt_flag |= MNT_LOCAL;
+	isomp->im_mountp = mp;
+	isomp->im_dev = dev;
+	isomp->im_devvp = devvp;
+	
+	devvp->v_specflags |= SI_MOUNTEDON;
+	
+	/* Check the Rock Ridge Extention support */
+	if (!(argp->flags & ISOFSMNT_NORRIP)) {
+		if (error = bread (isomp->im_devvp,
+				   (isomp->root_extent + isonum_711(rootp->ext_attr_length))
+				   * isomp->logical_block_size / DEV_BSIZE,
+				   isomp->logical_block_size,NOCRED,&bp))
+		    goto out;
+		
+		rootp = (struct iso_directory_record *)bp->b_un.b_addr;
+		
+		if ((isomp->rr_skip = cd9660_rrip_offset(rootp,isomp)) < 0) {
+		    argp->flags  |= ISOFSMNT_NORRIP;
+		} else {
+		    argp->flags  &= ~ISOFSMNT_GENS;
+		}
+		
+		/*
+		 * The contents are valid,
+		 * but they will get reread as part of another vnode, so...
+		 */
+		bp->b_flags |= B_AGE;
+		brelse(bp);
+		bp = NULL;
+	}
+	isomp->im_flags = argp->flags&(ISOFSMNT_NORRIP|ISOFSMNT_GENS|ISOFSMNT_EXTATT);
+	switch (isomp->im_flags&(ISOFSMNT_NORRIP|ISOFSMNT_GENS)) {
+	default:
+	    isomp->iso_ftype = ISO_FTYPE_DEFAULT;
+	    break;
+	case ISOFSMNT_GENS|ISOFSMNT_NORRIP:
+	    isomp->iso_ftype = ISO_FTYPE_9660;
+	    break;
+	case 0:
+	    isomp->iso_ftype = ISO_FTYPE_RRIP;
+	    break;
+	}
+	
+	return 0;
+out:
+	if (bp)
+		brelse(bp);
+	if (needclose)
+		(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p);
+	if (isomp) {
+		free((caddr_t)isomp, M_ISOFSMNT);
+		mp->mnt_data = (qaddr_t)0;
+	}
+	return error;
+}
+
+/*
+ * Make a filesystem operational.
+ * Nothing to do at the moment.
+ */
+/* ARGSUSED */
+cd9660_start(mp, flags, p)
+	struct mount *mp;
+	int flags;
+	struct proc *p;
+{
+	return 0;
+}
+
+/*
+ * unmount system call
+ */
+int
+cd9660_unmount(mp, mntflags, p)
+	struct mount *mp;
+	int mntflags;
+	struct proc *p;
+{
+	register struct iso_mnt *isomp;
+	int i, error, ronly, flags = 0;
+	
+	if (mntflags & MNT_FORCE) {
+		if (!iso_doforce || (mp->mnt_flag & MNT_ROOTFS))
+			return (EINVAL);
+		flags |= FORCECLOSE;
+	}
+#if 0
+	mntflushbuf(mp, 0);
+	if (mntinvalbuf(mp))
+		return EBUSY;
+#endif
+	if (error = vflush(mp, NULLVP, flags))
+		return (error);
+
+	isomp = VFSTOISOFS(mp);
+
+#ifdef	ISODEVMAP
+	if (isomp->iso_ftype == ISO_FTYPE_RRIP)
+		iso_dunmap(isomp->im_dev);
+#endif
+	
+	isomp->im_devvp->v_specflags &= ~SI_MOUNTEDON;
+	error = VOP_CLOSE(isomp->im_devvp, FREAD, NOCRED, p);
+	vrele(isomp->im_devvp);
+	free((caddr_t)isomp, M_ISOFSMNT);
+	mp->mnt_data = (qaddr_t)0;
+	mp->mnt_flag &= ~MNT_LOCAL;
+	return (error);
+}
+
+/*
+ * Return root of a filesystem
+ */
+cd9660_root(mp, vpp)
+	struct mount *mp;
+	struct vnode **vpp;
+{
+	register struct iso_node *ip;
+	struct iso_node tip, *nip;
+	struct vnode tvp;
+	int error;
+	struct iso_mnt *imp = VFSTOISOFS (mp);
+	struct iso_directory_record *dp;
+	
+	tvp.v_mount = mp;
+	tvp.v_data = &tip;
+	ip = VTOI(&tvp);
+	ip->i_vnode = &tvp;
+	ip->i_dev = imp->im_dev;
+	ip->i_diroff = 0;
+	dp = (struct iso_directory_record *)imp->root;
+	isodirino(&ip->i_number,dp,imp);
+	
+	/*
+	 * With RRIP we must use the `.' entry of the root directory.
+	 * Simply tell iget, that it's a relocated directory.
+	 */
+	error = iso_iget(ip,ip->i_number,
+			 imp->iso_ftype == ISO_FTYPE_RRIP,
+			 &nip,dp);
+	if (error)
+		return error;
+	*vpp = ITOV(nip);
+	return 0;
+}
+
+/*
+ * Do operations associated with quotas, not supported
+ */
+/* ARGSUSED */
+int
+cd9660_quotactl(mp, cmd, uid, arg, p)
+	struct mount *mp;
+	int cmd;
+	uid_t uid;
+	caddr_t arg;
+	struct proc *p;
+{
+
+	return (EOPNOTSUPP);
+}
+
+/*
+ * Get file system statistics.
+ */
+cd9660_statfs(mp, sbp, p)
+	struct mount *mp;
+	register struct statfs *sbp;
+	struct proc *p;
+{
+	register struct iso_mnt *isomp;
+	register struct fs *fs;
+	
+	isomp = VFSTOISOFS(mp);
+	
+	sbp->f_type = MOUNT_CD9660;
+	sbp->f_bsize = isomp->logical_block_size;
+	sbp->f_iosize = sbp->f_bsize;	/* XXX */
+	sbp->f_blocks = isomp->volume_space_size;
+	sbp->f_bfree = 0; /* total free blocks */
+	sbp->f_bavail = 0; /* blocks free for non superuser */
+	sbp->f_files =  0; /* total files */
+	sbp->f_ffree = 0; /* free file nodes */
+	if (sbp != &mp->mnt_stat) {
+		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
+			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
+		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
+			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
+	}
+	/* Use the first spare for flags: */
+	sbp->f_spare[0] = isomp->im_flags;
+	return 0;
+}
+
+/* ARGSUSED */
+int
+cd9660_sync(mp, waitfor, cred, p)
+	struct mount *mp;
+	int waitfor;
+	struct ucred *cred;
+	struct proc *p;
+{
+	return (0);
+}
+
+/*
+ * Flat namespace lookup.
+ * Currently unsupported.
+ */
+/* ARGSUSED */
+int
+cd9660_vget(mp, ino, vpp)
+	struct mount *mp;
+	ino_t ino;
+	struct vnode **vpp;
+{
+
+	return (EOPNOTSUPP);
+}
+
+/*
+ * File handle to vnode
+ *
+ * Have to be really careful about stale file handles:
+ * - check that the inode number is in range
+ * - call iget() to get the locked inode
+ * - check for an unallocated inode (i_mode == 0)
+ * - check that the generation number matches
+ */
+
+struct ifid {
+	ushort	ifid_len;
+	ushort	ifid_pad;
+	int	ifid_ino;
+	long	ifid_start;
+};
+
+/* ARGSUSED */
+int
+cd9660_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
+	register struct mount *mp;
+	struct fid *fhp;
+	struct mbuf *nam;
+	struct vnode **vpp;
+	int *exflagsp;
+	struct ucred **credanonp;
+{
+	struct vnode			tvp;
+	int				error;
+	int				lbn, off;
+	struct ifid			*ifhp;
+	struct iso_mnt			*imp;
+	struct buf			*bp;
+	struct iso_directory_record	*dirp;
+	struct iso_node 		tip, *ip, *nip;
+	struct netcred			*np;
+	
+	imp = VFSTOISOFS (mp);
+	ifhp = (struct ifid *)fhp;
+	
+#ifdef	ISOFS_DBG
+	printf("fhtovp: ino %d, start %ld\n",
+	       ifhp->ifid_ino, ifhp->ifid_start);
+#endif
+	
+	np = vfs_export_lookup(mp, &imp->im_export, nam);
+	if (np == NULL)
+		return (EACCES);
+
+	lbn = iso_lblkno(imp, ifhp->ifid_ino);
+	if (lbn >= imp->volume_space_size) {
+		printf("fhtovp: lbn exceed volume space %d\n", lbn);
+		return (ESTALE);
+	}
+	
+	off = iso_blkoff(imp, ifhp->ifid_ino);
+	if (off + ISO_DIRECTORY_RECORD_SIZE > imp->logical_block_size) {
+		printf("fhtovp: crosses block boundary %d\n",
+		       off + ISO_DIRECTORY_RECORD_SIZE);
+		return (ESTALE);
+	}
+	
+	error = bread(imp->im_devvp, btodb(lbn * imp->logical_block_size),
+		      imp->logical_block_size, NOCRED, &bp);
+	if (error) {
+		printf("fhtovp: bread error %d\n",error);
+		brelse(bp);
+		return (error);
+	}
+	
+	dirp = (struct iso_directory_record *)(bp->b_un.b_addr + off);
+	if (off + isonum_711(dirp->length) > imp->logical_block_size) {
+		brelse(bp);
+		printf("fhtovp: directory crosses block boundary %d[off=%d/len=%d]\n",
+		       off+isonum_711(dirp->length), off,
+		       isonum_711(dirp->length));
+		return (ESTALE);
+	}
+	
+	if (isonum_733(dirp->extent) + isonum_711(dirp->ext_attr_length) !=
+	    ifhp->ifid_start) {
+		brelse(bp);
+		printf("fhtovp: file start miss %d vs %d\n",
+		       isonum_733(dirp->extent)+isonum_711(dirp->ext_attr_length),
+		       ifhp->ifid_start);
+		return (ESTALE);
+	}
+	brelse(bp);
+	
+	ip = &tip;
+	tvp.v_mount = mp;
+	tvp.v_data = ip;
+	ip->i_vnode = &tvp;
+	ip->i_dev = imp->im_dev;
+	if (error = iso_iget(ip, ifhp->ifid_ino, 0, &nip, dirp)) {
+		*vpp = NULLVP;
+		printf("fhtovp: failed to get inode\n");
+		return (error);
+	}
+	ip = nip;
+	/*
+	 * XXX need generation number?
+	 */
+	if (ip->inode.iso_mode == 0) {
+		iso_iput(ip);
+		*vpp = NULLVP;
+		printf("fhtovp: inode mode == 0\n");
+		return (ESTALE);
+	}
+	*vpp = ITOV(ip);
+	*exflagsp = np->netc_exflags;
+	*credanonp = &np->netc_anon;
+	return 0;
+}
+
+/*
+ * Vnode pointer to File handle
+ */
+/* ARGSUSED */
+cd9660_vptofh(vp, fhp)
+	struct vnode *vp;
+	struct fid *fhp;
+{
+	register struct iso_node *ip = VTOI(vp);
+	register struct ifid *ifhp;
+	register struct iso_mnt *mp = ip->i_mnt;
+	
+	ifhp = (struct ifid *)fhp;
+	ifhp->ifid_len = sizeof(struct ifid);
+	
+	ifhp->ifid_ino = ip->i_number;
+	ifhp->ifid_start = ip->iso_start;
+	
+#ifdef	ISOFS_DBG
+	printf("vptofh: ino %d, start %ld\n",
+	       ifhp->ifid_ino,ifhp->ifid_start);
+#endif
+	return 0;
+}
diff --git a/sys/fs/cd9660/cd9660_vnops.c b/sys/fs/cd9660/cd9660_vnops.c
new file mode 100644
index 00000000000..59f5a73f5c8
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_vnops.c
@@ -0,0 +1,1038 @@
+/*-
+ * Copyright (c) 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cd9660_vnops.c	8.3 (Berkeley) 1/23/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/resourcevar.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/conf.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <miscfs/specfs/specdev.h>
+#include <miscfs/fifofs/fifo.h>
+#include <sys/malloc.h>
+#include <sys/dir.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+#include <isofs/cd9660/iso_rrip.h>
+
+#if 0
+/*
+ * Mknod vnode call
+ *  Actually remap the device number
+ */
+cd9660_mknod(ndp, vap, cred, p)
+	struct nameidata *ndp;
+	struct ucred *cred;
+	struct vattr *vap;
+	struct proc *p;
+{
+#ifndef	ISODEVMAP
+	free(ndp->ni_pnbuf, M_NAMEI);
+	vput(ndp->ni_dvp);
+	vput(ndp->ni_vp);
+	return EINVAL;
+#else
+	register struct vnode *vp;
+	struct iso_node *ip;
+	struct iso_dnode *dp;
+	int error;
+	
+	vp = ndp->ni_vp;
+	ip = VTOI(vp);
+	
+	if (ip->i_mnt->iso_ftype != ISO_FTYPE_RRIP
+	    || vap->va_type != vp->v_type
+	    || (vap->va_type != VCHR && vap->va_type != VBLK)) {
+		free(ndp->ni_pnbuf, M_NAMEI);
+		vput(ndp->ni_dvp);
+		vput(ndp->ni_vp);
+		return EINVAL;
+	}
+	
+	dp = iso_dmap(ip->i_dev,ip->i_number,1);
+	if (ip->inode.iso_rdev == vap->va_rdev || vap->va_rdev == VNOVAL) {
+		/* same as the unmapped one, delete the mapping */
+		remque(dp);
+		FREE(dp,M_CACHE);
+	} else
+		/* enter new mapping */
+		dp->d_dev = vap->va_rdev;
+	
+	/*
+	 * Remove inode so that it will be reloaded by iget and
+	 * checked to see if it is an alias of an existing entry
+	 * in the inode cache.
+	 */
+	vput(vp);
+	vp->v_type = VNON;
+	vgone(vp);
+	return (0);
+#endif
+}
+#endif
+
+/*
+ * Open called.
+ *
+ * Nothing to do.
+ */
+/* ARGSUSED */
+int
+cd9660_open(ap)
+	struct vop_open_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	return (0);
+}
+
+/*
+ * Close called
+ *
+ * Update the times on the inode on writeable file systems.
+ */
+/* ARGSUSED */
+int
+cd9660_close(ap)
+	struct vop_close_args /* {
+		struct vnode *a_vp;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	return (0);
+}
+
+/*
+ * Check mode permission on inode pointer. Mode is READ, WRITE or EXEC.
+ * The mode is shifted to select the owner/group/other fields. The
+ * super user is granted all permissions.
+ */
+/* ARGSUSED */
+cd9660_access(ap)
+	struct vop_access_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	return (0);
+}
+
+cd9660_getattr(ap)
+	struct vop_getattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+
+{
+	struct vnode *vp = ap->a_vp;
+	register struct vattr *vap = ap->a_vap;
+	register struct iso_node *ip = VTOI(vp);
+	int i;
+
+	vap->va_fsid	= ip->i_dev;
+	vap->va_fileid	= ip->i_number;
+
+	vap->va_mode	= ip->inode.iso_mode;
+	vap->va_nlink	= ip->inode.iso_links;
+	vap->va_uid	= ip->inode.iso_uid;
+	vap->va_gid	= ip->inode.iso_gid;
+	vap->va_atime	= ip->inode.iso_atime;
+	vap->va_mtime	= ip->inode.iso_mtime;
+	vap->va_ctime	= ip->inode.iso_ctime;
+	vap->va_rdev	= ip->inode.iso_rdev;
+
+	vap->va_size	= (u_quad_t) ip->i_size;
+	vap->va_flags	= 0;
+	vap->va_gen = 1;
+	vap->va_blocksize = ip->i_mnt->logical_block_size;
+	vap->va_bytes	= (u_quad_t) ip->i_size;
+	vap->va_type	= vp->v_type;
+	return (0);
+}
+
+#if ISO_DEFAULT_BLOCK_SIZE >= NBPG
+#ifdef DEBUG
+extern int doclusterread;
+#else
+#define doclusterread 1
+#endif
+#else
+/* XXX until cluster routines can handle block sizes less than one page */
+#define doclusterread 0
+#endif
+
+/*
+ * Vnode op for reading.
+ */
+cd9660_read(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	register struct uio *uio = ap->a_uio;
+	register struct iso_node *ip = VTOI(vp);
+	register struct iso_mnt *imp;
+	struct buf *bp;
+	daddr_t lbn, bn, rablock;
+	off_t diff;
+	int rasize, error = 0;
+	long size, n, on;
+	
+	if (uio->uio_resid == 0)
+		return (0);
+	if (uio->uio_offset < 0)
+		return (EINVAL);
+	ip->i_flag |= IACC;
+	imp = ip->i_mnt;
+	do {
+		lbn = iso_lblkno(imp, uio->uio_offset);
+		on = iso_blkoff(imp, uio->uio_offset);
+		n = min((unsigned)(imp->logical_block_size - on),
+			uio->uio_resid);
+		diff = (off_t)ip->i_size - uio->uio_offset;
+		if (diff <= 0)
+			return (0);
+		if (diff < n)
+			n = diff;
+		size = iso_blksize(imp, ip, lbn);
+		rablock = lbn + 1;
+		if (doclusterread) {
+			if (iso_lblktosize(imp, rablock) <= ip->i_size)
+				error = cluster_read(vp, (off_t)ip->i_size,
+						     lbn, size, NOCRED, &bp);
+			else 
+				error = bread(vp, lbn, size, NOCRED, &bp);
+		} else {
+			if (vp->v_lastr + 1 == lbn &&
+			    iso_lblktosize(imp, rablock) < ip->i_size) {
+				rasize = iso_blksize(imp, ip, rablock);
+				error = breadn(vp, lbn, size, &rablock,
+					       &rasize, 1, NOCRED, &bp);
+			} else
+				error = bread(vp, lbn, size, NOCRED, &bp);
+		}
+		vp->v_lastr = lbn;
+		n = min(n, size - bp->b_resid);
+		if (error) {
+			brelse(bp);
+			return (error);
+		}
+
+		error = uiomove(bp->b_un.b_addr + on, (int)n, uio);
+		if (n + on == imp->logical_block_size ||
+		    uio->uio_offset == (off_t)ip->i_size)
+			bp->b_flags |= B_AGE;
+		brelse(bp);
+	} while (error == 0 && uio->uio_resid > 0 && n != 0);
+	return (error);
+}
+
+/* ARGSUSED */
+int
+cd9660_ioctl(ap)
+	struct vop_ioctl_args /* {
+		struct vnode *a_vp;
+		int  a_command;
+		caddr_t  a_data;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	printf("You did ioctl for isofs !!\n");
+	return (ENOTTY);
+}
+
+/* ARGSUSED */
+int
+cd9660_select(ap)
+	struct vop_select_args /* {
+		struct vnode *a_vp;
+		int  a_which;
+		int  a_fflags;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	/*
+	 * We should really check to see if I/O is possible.
+	 */
+	return (1);
+}
+
+/*
+ * Mmap a file
+ *
+ * NB Currently unsupported.
+ */
+/* ARGSUSED */
+int
+cd9660_mmap(ap)
+	struct vop_mmap_args /* {
+		struct vnode *a_vp;
+		int  a_fflags;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	return (EINVAL);
+}
+
+/*
+ * Seek on a file
+ *
+ * Nothing to do, so just return.
+ */
+/* ARGSUSED */
+int
+cd9660_seek(ap)
+	struct vop_seek_args /* {
+		struct vnode *a_vp;
+		off_t  a_oldoff;
+		off_t  a_newoff;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+
+	return (0);
+}
+
+/*
+ * Structure for reading directories
+ */
+struct isoreaddir {
+	struct dirent saveent;
+	struct dirent assocent;
+	struct dirent current;
+	off_t saveoff;
+	off_t assocoff;
+	off_t curroff;
+	struct uio *uio;
+	off_t uio_off;
+	u_int *cookiep;
+	int ncookies;
+	int eof;
+};
+
+static int
+iso_uiodir(idp,dp,off)
+	struct isoreaddir *idp;
+	struct dirent *dp;
+	off_t off;
+{
+	int error;
+	
+	dp->d_name[dp->d_namlen] = 0;
+	dp->d_reclen = DIRSIZ(dp);
+	
+	if (idp->uio->uio_resid < dp->d_reclen) {
+		idp->eof = 0;
+		return -1;
+	}
+	
+	if (idp->cookiep) {
+		if (idp->ncookies <= 0) {
+			idp->eof = 0;
+			return -1;
+		}
+		
+		*idp->cookiep++ = off;
+		--idp->ncookies;
+	}
+	
+	if (error = uiomove(dp,dp->d_reclen,idp->uio))
+		return error;
+	idp->uio_off = off;
+	return 0;
+}
+
+static int
+iso_shipdir(idp)
+	struct isoreaddir *idp;
+{
+	struct dirent *dp;
+	int cl, sl, assoc;
+	int error;
+	char *cname, *sname;
+	
+	cl = idp->current.d_namlen;
+	cname = idp->current.d_name;
+	if (assoc = cl > 1 && *cname == ASSOCCHAR) {
+		cl--;
+		cname++;
+	}
+	
+	dp = &idp->saveent;
+	sname = dp->d_name;
+	if (!(sl = dp->d_namlen)) {
+		dp = &idp->assocent;
+		sname = dp->d_name + 1;
+		sl = dp->d_namlen - 1;
+	}
+	if (sl > 0) {
+		if (sl != cl
+		    || bcmp(sname,cname,sl)) {
+			if (idp->assocent.d_namlen) {
+				if (error = iso_uiodir(idp,&idp->assocent,idp->assocoff))
+					return error;
+				idp->assocent.d_namlen = 0;
+			}
+			if (idp->saveent.d_namlen) {
+				if (error = iso_uiodir(idp,&idp->saveent,idp->saveoff))
+					return error;
+				idp->saveent.d_namlen = 0;
+			}
+		}
+	}
+	idp->current.d_reclen = DIRSIZ(&idp->current);
+	if (assoc) {
+		idp->assocoff = idp->curroff;
+		bcopy(&idp->current,&idp->assocent,idp->current.d_reclen);
+	} else {
+		idp->saveoff = idp->curroff;
+		bcopy(&idp->current,&idp->saveent,idp->current.d_reclen);
+	}
+	return 0;
+}
+
+/*
+ * Vnode op for readdir
+ * XXX make sure everything still works now that eofflagp and cookiep
+ * are no longer args.
+ */
+int
+cd9660_readdir(ap)
+	struct vop_readdir_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	register struct uio *uio = ap->a_uio;
+	struct isoreaddir *idp;
+	int entryoffsetinblock;
+	int error = 0;
+	int endsearch;
+	struct iso_directory_record *ep;
+	u_short elen;
+	int reclen;
+	struct iso_mnt *imp;
+	struct iso_node *ip;
+	struct buf *bp = NULL;
+	
+	ip = VTOI(ap->a_vp);
+	imp = ip->i_mnt;
+	
+	MALLOC(idp,struct isoreaddir *,sizeof(*idp),M_TEMP,M_WAITOK);
+	idp->saveent.d_namlen = 0;
+	idp->assocent.d_namlen = 0;
+	idp->uio = uio;
+#if 0
+	idp->cookiep = cookies;
+	idp->ncookies = ncookies;
+	idp->eof = 1;
+#else
+	idp->cookiep = 0;
+#endif
+	idp->curroff = uio->uio_offset;
+	
+	entryoffsetinblock = iso_blkoff(imp, idp->curroff);
+	if (entryoffsetinblock != 0) {
+		if (error = iso_blkatoff(ip, idp->curroff, &bp)) {
+			FREE(idp,M_TEMP);
+			return (error);
+		}
+	}
+	
+	endsearch = ip->i_size;
+	
+	while (idp->curroff < endsearch) {
+		/*
+		 * If offset is on a block boundary,
+		 * read the next directory block.
+		 * Release previous if it exists.
+		 */
+		
+		if (iso_blkoff(imp, idp->curroff) == 0) {
+			if (bp != NULL)
+				brelse(bp);
+			if (error = iso_blkatoff(ip, idp->curroff, &bp))
+				break;
+			entryoffsetinblock = 0;
+		}
+		/*
+		 * Get pointer to next entry.
+		 */
+		
+		ep = (struct iso_directory_record *)
+			(bp->b_un.b_addr + entryoffsetinblock);
+		
+		reclen = isonum_711 (ep->length);
+		if (reclen == 0) {
+			/* skip to next block, if any */
+			idp->curroff = roundup (idp->curroff,
+						imp->logical_block_size);
+			continue;
+		}
+		
+		if (reclen < ISO_DIRECTORY_RECORD_SIZE) {
+			error = EINVAL;
+			/* illegal entry, stop */
+			break;
+		}
+		
+		if (entryoffsetinblock + reclen > imp->logical_block_size) {
+			error = EINVAL;
+			/* illegal directory, so stop looking */
+			break;
+		}
+		
+		idp->current.d_namlen = isonum_711 (ep->name_len);
+		if (isonum_711(ep->flags)&2)
+			isodirino(&idp->current.d_fileno,ep,imp);
+		else
+			idp->current.d_fileno = dbtob(bp->b_blkno) +
+				idp->curroff;
+		
+		if (reclen < ISO_DIRECTORY_RECORD_SIZE + idp->current.d_namlen) {
+			error = EINVAL;
+			/* illegal entry, stop */
+			break;
+		}
+		
+		idp->curroff += reclen;
+		/*
+		 *
+		 */
+		switch (imp->iso_ftype) {
+		case ISO_FTYPE_RRIP:
+			cd9660_rrip_getname(ep,idp->current.d_name,
+					   (u_short *)&idp->current.d_namlen,
+					   &idp->current.d_fileno,imp);
+			if (idp->current.d_namlen)
+				error = iso_uiodir(idp,&idp->current,idp->curroff);
+			break;
+		default:	/* ISO_FTYPE_DEFAULT || ISO_FTYPE_9660 */
+			strcpy(idp->current.d_name,"..");
+			switch (ep->name[0]) {
+			case 0:
+				idp->current.d_namlen = 1;
+				error = iso_uiodir(idp,&idp->current,idp->curroff);
+				break;
+			case 1:
+				idp->current.d_namlen = 2;
+				error = iso_uiodir(idp,&idp->current,idp->curroff);
+				break;
+			default:
+				isofntrans(ep->name,idp->current.d_namlen,
+					   idp->current.d_name, &elen,
+					   imp->iso_ftype == ISO_FTYPE_9660,
+					   isonum_711(ep->flags)&4);
+				idp->current.d_namlen = (u_char)elen;
+				if (imp->iso_ftype == ISO_FTYPE_DEFAULT)
+					error = iso_shipdir(idp);
+				else
+					error = iso_uiodir(idp,&idp->current,idp->curroff);
+				break;
+			}
+		}
+		if (error)
+			break;
+		
+		entryoffsetinblock += reclen;
+	}
+	
+	if (!error && imp->iso_ftype == ISO_FTYPE_DEFAULT) {
+		idp->current.d_namlen = 0;
+		error = iso_shipdir(idp);
+	}
+	if (error < 0)
+		error = 0;
+	
+	if (bp)
+		brelse (bp);
+
+	uio->uio_offset = idp->uio_off;
+#if 0
+	*eofflagp = idp->eof;
+#endif
+	
+	FREE(idp,M_TEMP);
+	
+	return (error);
+}
+
+/*
+ * Return target name of a symbolic link
+ * Shouldn't we get the parent vnode and read the data from there?
+ * This could eventually result in deadlocks in cd9660_lookup.
+ * But otherwise the block read here is in the block buffer two times.
+ */
+typedef struct iso_directory_record ISODIR;
+typedef struct iso_node             ISONODE;
+typedef struct iso_mnt              ISOMNT;
+int
+cd9660_readlink(ap)
+	struct vop_readlink_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	ISONODE	*ip;
+	ISODIR	*dirp;                   
+	ISOMNT	*imp;
+	struct	buf *bp;
+	u_short	symlen;
+	int	error;
+	char	*symname;
+	ino_t	ino;
+	
+	ip  = VTOI(ap->a_vp);
+	imp = ip->i_mnt;
+	
+	if (imp->iso_ftype != ISO_FTYPE_RRIP)
+		return EINVAL;
+	
+	/*
+	 * Get parents directory record block that this inode included.
+	 */
+	error = bread(imp->im_devvp,
+		      (daddr_t)(ip->i_number / DEV_BSIZE),
+		      imp->logical_block_size,
+		      NOCRED,
+		      &bp);
+	if (error) {
+		brelse(bp);
+		return EINVAL;
+	}
+
+	/*
+	 * Setup the directory pointer for this inode
+	 */
+	dirp = (ISODIR *)(bp->b_un.b_addr + (ip->i_number & imp->im_bmask));
+#ifdef DEBUG
+	printf("lbn=%d,off=%d,bsize=%d,DEV_BSIZE=%d, dirp= %08x, b_addr=%08x, offset=%08x(%08x)\n",
+	       (daddr_t)(ip->i_number >> imp->im_bshift),
+	       ip->i_number & imp->im_bmask,
+	       imp->logical_block_size,
+	       DEV_BSIZE,
+	       dirp,
+	       bp->b_un.b_addr,
+	       ip->i_number,
+	       ip->i_number & imp->im_bmask );
+#endif
+	
+	/*
+	 * Just make sure, we have a right one....
+	 *   1: Check not cross boundary on block
+	 */
+	if ((ip->i_number & imp->im_bmask) + isonum_711(dirp->length)
+	    > imp->logical_block_size) {
+		brelse(bp);
+		return EINVAL;
+	}
+	
+	/*
+	 * Now get a buffer
+	 * Abuse a namei buffer for now.
+	 */
+	MALLOC(symname,char *,MAXPATHLEN,M_NAMEI,M_WAITOK);
+	
+	/*
+	 * Ok, we just gathering a symbolic name in SL record.
+	 */
+	if (cd9660_rrip_getsymname(dirp,symname,&symlen,imp) == 0) {
+		FREE(symname,M_NAMEI);
+		brelse(bp);
+		return EINVAL;
+	}
+	/*
+	 * Don't forget before you leave from home ;-)
+	 */
+	brelse(bp);
+	
+	/*
+	 * return with the symbolic name to caller's.
+	 */
+	error = uiomove(symname,symlen,ap->a_uio);
+	
+	FREE(symname,M_NAMEI);
+	
+	return error;
+}
+
+/*
+ * Ufs abort op, called after namei() when a CREATE/DELETE isn't actually
+ * done. If a buffer has been saved in anticipation of a CREATE, delete it.
+ */
+int
+cd9660_abortop(ap)
+	struct vop_abortop_args /* {
+		struct vnode *a_dvp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
+		FREE(ap->a_cnp->cn_pnbuf, M_NAMEI);
+	return 0;
+}
+
+/*
+ * Lock an inode.
+ */
+int
+cd9660_lock(ap)
+	struct vop_lock_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	register struct iso_node *ip = VTOI(ap->a_vp);
+
+	ISO_ILOCK(ip);
+	return 0;
+}
+
+/*
+ * Unlock an inode.
+ */
+int
+cd9660_unlock(ap)
+	struct vop_unlock_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	register struct iso_node *ip = VTOI(ap->a_vp);
+
+	if (!(ip->i_flag & ILOCKED))
+		panic("cd9660_unlock NOT LOCKED");
+	ISO_IUNLOCK(ip);
+	return 0;
+}
+
+/*
+ * Check for a locked inode.
+ */
+int
+cd9660_islocked(ap)
+	struct vop_islocked_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	if (VTOI(ap->a_vp)->i_flag & ILOCKED)
+		return 1;
+	return 0;
+}
+
+/*
+ * Calculate the logical to physical mapping if not done already,
+ * then call the device strategy routine.
+ */
+int
+cd9660_strategy(ap)
+	struct vop_strategy_args /* {
+		struct buf *a_bp;
+	} */ *ap;
+{
+	register struct buf *bp = ap->a_bp;
+	register struct vnode *vp = bp->b_vp;
+	register struct iso_node *ip;
+	int error;
+
+	ip = VTOI(vp);
+	if (vp->v_type == VBLK || vp->v_type == VCHR)
+		panic("cd9660_strategy: spec");
+	if (bp->b_blkno == bp->b_lblkno) {
+		if (error =
+		    VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL)) {
+			bp->b_error = error;
+			bp->b_flags |= B_ERROR;
+			biodone(bp);
+			return (error);
+		}
+		if ((long)bp->b_blkno == -1)
+			clrbuf(bp);
+	}
+	if ((long)bp->b_blkno == -1) {
+		biodone(bp);
+		return (0);
+	}
+	vp = ip->i_devvp;
+	bp->b_dev = vp->v_rdev;
+	VOCALL (vp->v_op, VOFFSET(vop_strategy), ap);
+	return (0);
+}
+
+/*
+ * Print out the contents of an inode.
+ */
+int
+cd9660_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	printf("tag VT_ISOFS, isofs vnode\n");
+	return 0;
+}
+
+/*
+ * Unsupported operation
+ */
+int
+cd9660_enotsupp()
+{
+
+	return (EOPNOTSUPP);
+}
+
+/*
+ * Global vfs data structures for isofs
+ */
+#define cd9660_create \
+	((int (*) __P((struct  vop_create_args *)))cd9660_enotsupp)
+#define cd9660_mknod ((int (*) __P((struct  vop_mknod_args *)))cd9660_enotsupp)
+#define cd9660_setattr \
+	((int (*) __P((struct  vop_setattr_args *)))cd9660_enotsupp)
+#define cd9660_write ((int (*) __P((struct  vop_write_args *)))cd9660_enotsupp)
+#define cd9660_fsync ((int (*) __P((struct  vop_fsync_args *)))nullop)
+#define cd9660_remove \
+	((int (*) __P((struct  vop_remove_args *)))cd9660_enotsupp)
+#define cd9660_link ((int (*) __P((struct  vop_link_args *)))cd9660_enotsupp)
+#define cd9660_rename \
+	((int (*) __P((struct  vop_rename_args *)))cd9660_enotsupp)
+#define cd9660_mkdir ((int (*) __P((struct  vop_mkdir_args *)))cd9660_enotsupp)
+#define cd9660_rmdir ((int (*) __P((struct  vop_rmdir_args *)))cd9660_enotsupp)
+#define cd9660_symlink \
+	((int (*) __P((struct vop_symlink_args *)))cd9660_enotsupp)
+#define cd9660_pathconf \
+	((int (*) __P((struct vop_pathconf_args *)))cd9660_enotsupp)
+#define cd9660_advlock \
+	((int (*) __P((struct vop_advlock_args *)))cd9660_enotsupp)
+#define cd9660_blkatoff \
+	((int (*) __P((struct  vop_blkatoff_args *)))cd9660_enotsupp)
+#define cd9660_valloc ((int(*) __P(( \
+		struct vnode *pvp, \
+		int mode, \
+		struct ucred *cred, \
+		struct vnode **vpp))) cd9660_enotsupp)
+#define cd9660_vfree ((int (*) __P((struct  vop_vfree_args *)))cd9660_enotsupp)
+#define cd9660_truncate \
+	((int (*) __P((struct  vop_truncate_args *)))cd9660_enotsupp)
+#define cd9660_update \
+	((int (*) __P((struct  vop_update_args *)))cd9660_enotsupp)
+#define cd9660_bwrite \
+	((int (*) __P((struct  vop_bwrite_args *)))cd9660_enotsupp)
+
+/*
+ * Global vfs data structures for nfs
+ */
+int (**cd9660_vnodeop_p)();
+struct vnodeopv_entry_desc cd9660_vnodeop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, cd9660_lookup },	/* lookup */
+	{ &vop_create_desc, cd9660_create },	/* create */
+	{ &vop_mknod_desc, cd9660_mknod },	/* mknod */
+	{ &vop_open_desc, cd9660_open },	/* open */
+	{ &vop_close_desc, cd9660_close },	/* close */
+	{ &vop_access_desc, cd9660_access },	/* access */
+	{ &vop_getattr_desc, cd9660_getattr },	/* getattr */
+	{ &vop_setattr_desc, cd9660_setattr },	/* setattr */
+	{ &vop_read_desc, cd9660_read },	/* read */
+	{ &vop_write_desc, cd9660_write },	/* write */
+	{ &vop_ioctl_desc, cd9660_ioctl },	/* ioctl */
+	{ &vop_select_desc, cd9660_select },	/* select */
+	{ &vop_mmap_desc, cd9660_mmap },	/* mmap */
+	{ &vop_fsync_desc, cd9660_fsync },	/* fsync */
+	{ &vop_seek_desc, cd9660_seek },	/* seek */
+	{ &vop_remove_desc, cd9660_remove },	/* remove */
+	{ &vop_link_desc, cd9660_link },	/* link */
+	{ &vop_rename_desc, cd9660_rename },	/* rename */
+	{ &vop_mkdir_desc, cd9660_mkdir },	/* mkdir */
+	{ &vop_rmdir_desc, cd9660_rmdir },	/* rmdir */
+	{ &vop_symlink_desc, cd9660_symlink },	/* symlink */
+	{ &vop_readdir_desc, cd9660_readdir },	/* readdir */
+	{ &vop_readlink_desc, cd9660_readlink },/* readlink */
+	{ &vop_abortop_desc, cd9660_abortop },	/* abortop */
+	{ &vop_inactive_desc, cd9660_inactive },/* inactive */
+	{ &vop_reclaim_desc, cd9660_reclaim },	/* reclaim */
+	{ &vop_lock_desc, cd9660_lock },	/* lock */
+	{ &vop_unlock_desc, cd9660_unlock },	/* unlock */
+	{ &vop_bmap_desc, cd9660_bmap },	/* bmap */
+	{ &vop_strategy_desc, cd9660_strategy },/* strategy */
+	{ &vop_print_desc, cd9660_print },	/* print */
+	{ &vop_islocked_desc, cd9660_islocked },/* islocked */
+	{ &vop_pathconf_desc, cd9660_pathconf },/* pathconf */
+	{ &vop_advlock_desc, cd9660_advlock },	/* advlock */
+	{ &vop_blkatoff_desc, cd9660_blkatoff },/* blkatoff */
+	{ &vop_valloc_desc, cd9660_valloc },	/* valloc */
+	{ &vop_vfree_desc, cd9660_vfree },	/* vfree */
+	{ &vop_truncate_desc, cd9660_truncate },/* truncate */
+	{ &vop_update_desc, cd9660_update },	/* update */
+	{ &vop_bwrite_desc, vn_bwrite },
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc cd9660_vnodeop_opv_desc =
+	{ &cd9660_vnodeop_p, cd9660_vnodeop_entries };
+
+/*
+ * Special device vnode ops
+ */
+int (**cd9660_specop_p)();
+struct vnodeopv_entry_desc cd9660_specop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, spec_lookup },	/* lookup */
+	{ &vop_create_desc, cd9660_create },	/* create */
+	{ &vop_mknod_desc, cd9660_mknod },	/* mknod */
+	{ &vop_open_desc, spec_open },		/* open */
+	{ &vop_close_desc, spec_close },	/* close */
+	{ &vop_access_desc, cd9660_access },	/* access */
+	{ &vop_getattr_desc, cd9660_getattr },	/* getattr */
+	{ &vop_setattr_desc, cd9660_setattr },	/* setattr */
+	{ &vop_read_desc, spec_read },		/* read */
+	{ &vop_write_desc, spec_write },	/* write */
+	{ &vop_ioctl_desc, spec_ioctl },	/* ioctl */
+	{ &vop_select_desc, spec_select },	/* select */
+	{ &vop_mmap_desc, spec_mmap },		/* mmap */
+	{ &vop_fsync_desc, spec_fsync },	/* fsync */
+	{ &vop_seek_desc, spec_seek },		/* seek */
+	{ &vop_remove_desc, cd9660_remove },	/* remove */
+	{ &vop_link_desc, cd9660_link },	/* link */
+	{ &vop_rename_desc, cd9660_rename },	/* rename */
+	{ &vop_mkdir_desc, cd9660_mkdir },	/* mkdir */
+	{ &vop_rmdir_desc, cd9660_rmdir },	/* rmdir */
+	{ &vop_symlink_desc, cd9660_symlink },	/* symlink */
+	{ &vop_readdir_desc, spec_readdir },	/* readdir */
+	{ &vop_readlink_desc, spec_readlink },	/* readlink */
+	{ &vop_abortop_desc, spec_abortop },	/* abortop */
+	{ &vop_inactive_desc, cd9660_inactive },/* inactive */
+	{ &vop_reclaim_desc, cd9660_reclaim },	/* reclaim */
+	{ &vop_lock_desc, cd9660_lock },	/* lock */
+	{ &vop_unlock_desc, cd9660_unlock },	/* unlock */
+	{ &vop_bmap_desc, spec_bmap },		/* bmap */
+		/* XXX strategy: panics, should be notsupp instead? */
+	{ &vop_strategy_desc, cd9660_strategy },/* strategy */
+	{ &vop_print_desc, cd9660_print },	/* print */
+	{ &vop_islocked_desc, cd9660_islocked },/* islocked */
+	{ &vop_pathconf_desc, spec_pathconf },	/* pathconf */
+	{ &vop_advlock_desc, spec_advlock },	/* advlock */
+	{ &vop_blkatoff_desc, spec_blkatoff },	/* blkatoff */
+	{ &vop_valloc_desc, spec_valloc },	/* valloc */
+	{ &vop_vfree_desc, spec_vfree },	/* vfree */
+	{ &vop_truncate_desc, spec_truncate },	/* truncate */
+	{ &vop_update_desc, cd9660_update },	/* update */
+	{ &vop_bwrite_desc, vn_bwrite },
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc cd9660_specop_opv_desc =
+	{ &cd9660_specop_p, cd9660_specop_entries };
+
+#ifdef FIFO
+int (**cd9660_fifoop_p)();
+struct vnodeopv_entry_desc cd9660_fifoop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, fifo_lookup },	/* lookup */
+	{ &vop_create_desc, cd9660_create },	/* create */
+	{ &vop_mknod_desc, cd9660_mknod },	/* mknod */
+	{ &vop_open_desc, fifo_open },		/* open */
+	{ &vop_close_desc, fifo_close },	/* close */
+	{ &vop_access_desc, cd9660_access },	/* access */
+	{ &vop_getattr_desc, cd9660_getattr },	/* getattr */
+	{ &vop_setattr_desc, cd9660_setattr },	/* setattr */
+	{ &vop_read_desc, fifo_read },		/* read */
+	{ &vop_write_desc, fifo_write },	/* write */
+	{ &vop_ioctl_desc, fifo_ioctl },	/* ioctl */
+	{ &vop_select_desc, fifo_select },	/* select */
+	{ &vop_mmap_desc, fifo_mmap },		/* mmap */
+	{ &vop_fsync_desc, fifo_fsync },	/* fsync */
+	{ &vop_seek_desc, fifo_seek },		/* seek */
+	{ &vop_remove_desc, cd9660_remove },	/* remove */
+	{ &vop_link_desc, cd9660_link },	/* link */
+	{ &vop_rename_desc, cd9660_rename },	/* rename */
+	{ &vop_mkdir_desc, cd9660_mkdir },	/* mkdir */
+	{ &vop_rmdir_desc, cd9660_rmdir },	/* rmdir */
+	{ &vop_symlink_desc, cd9660_symlink },	/* symlink */
+	{ &vop_readdir_desc, fifo_readdir },	/* readdir */
+	{ &vop_readlink_desc, fifo_readlink },	/* readlink */
+	{ &vop_abortop_desc, fifo_abortop },	/* abortop */
+	{ &vop_inactive_desc, cd9660_inactive },/* inactive */
+	{ &vop_reclaim_desc, cd9660_reclaim },	/* reclaim */
+	{ &vop_lock_desc, cd9660_lock },	/* lock */
+	{ &vop_unlock_desc, cd9660_unlock },	/* unlock */
+	{ &vop_bmap_desc, fifo_bmap },		/* bmap */
+	{ &vop_strategy_desc, fifo_badop },	/* strategy */
+	{ &vop_print_desc, cd9660_print },	/* print */
+	{ &vop_islocked_desc, cd9660_islocked },/* islocked */
+	{ &vop_pathconf_desc, fifo_pathconf },	/* pathconf */
+	{ &vop_advlock_desc, fifo_advlock },	/* advlock */
+	{ &vop_blkatoff_desc, fifo_blkatoff },	/* blkatoff */
+	{ &vop_valloc_desc, fifo_valloc },	/* valloc */
+	{ &vop_vfree_desc, fifo_vfree },	/* vfree */
+	{ &vop_truncate_desc, fifo_truncate },	/* truncate */
+	{ &vop_update_desc, cd9660_update },	/* update */
+	{ &vop_bwrite_desc, vn_bwrite },
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc cd9660_fifoop_opv_desc =
+	{ &cd9660_fifoop_p, cd9660_fifoop_entries };
+#endif /* FIFO */
diff --git a/sys/fs/cd9660/iso.h b/sys/fs/cd9660/iso.h
new file mode 100644
index 00000000000..e3567066e1c
--- /dev/null
+++ b/sys/fs/cd9660/iso.h
@@ -0,0 +1,256 @@
+/*-
+ * Copyright (c) 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)iso.h	8.2 (Berkeley) 1/23/94
+ */
+
+#define ISODCL(from, to) (to - from + 1)
+
+struct iso_volume_descriptor {
+	char type[ISODCL(1,1)]; /* 711 */
+	char id[ISODCL(2,6)];
+	char version[ISODCL(7,7)];
+	char data[ISODCL(8,2048)];
+};
+
+/* volume descriptor types */
+#define ISO_VD_PRIMARY 1
+#define ISO_VD_END 255
+
+#define ISO_STANDARD_ID "CD001"
+#define ISO_ECMA_ID     "CDW01"
+
+struct iso_primary_descriptor {
+	char type			[ISODCL (  1,   1)]; /* 711 */
+	char id				[ISODCL (  2,   6)];
+	char version			[ISODCL (  7,   7)]; /* 711 */
+	char unused1			[ISODCL (  8,   8)];
+	char system_id			[ISODCL (  9,  40)]; /* achars */
+	char volume_id			[ISODCL ( 41,  72)]; /* dchars */
+	char unused2			[ISODCL ( 73,  80)];
+	char volume_space_size		[ISODCL ( 81,  88)]; /* 733 */
+	char unused3			[ISODCL ( 89, 120)];
+	char volume_set_size		[ISODCL (121, 124)]; /* 723 */
+	char volume_sequence_number	[ISODCL (125, 128)]; /* 723 */
+	char logical_block_size		[ISODCL (129, 132)]; /* 723 */
+	char path_table_size		[ISODCL (133, 140)]; /* 733 */
+	char type_l_path_table		[ISODCL (141, 144)]; /* 731 */
+	char opt_type_l_path_table	[ISODCL (145, 148)]; /* 731 */
+	char type_m_path_table		[ISODCL (149, 152)]; /* 732 */
+	char opt_type_m_path_table	[ISODCL (153, 156)]; /* 732 */
+	char root_directory_record	[ISODCL (157, 190)]; /* 9.1 */
+	char volume_set_id		[ISODCL (191, 318)]; /* dchars */
+	char publisher_id		[ISODCL (319, 446)]; /* achars */
+	char preparer_id		[ISODCL (447, 574)]; /* achars */
+	char application_id		[ISODCL (575, 702)]; /* achars */
+	char copyright_file_id		[ISODCL (703, 739)]; /* 7.5 dchars */
+	char abstract_file_id		[ISODCL (740, 776)]; /* 7.5 dchars */
+	char bibliographic_file_id	[ISODCL (777, 813)]; /* 7.5 dchars */
+	char creation_date		[ISODCL (814, 830)]; /* 8.4.26.1 */
+	char modification_date		[ISODCL (831, 847)]; /* 8.4.26.1 */
+	char expiration_date		[ISODCL (848, 864)]; /* 8.4.26.1 */
+	char effective_date		[ISODCL (865, 881)]; /* 8.4.26.1 */
+	char file_structure_version	[ISODCL (882, 882)]; /* 711 */
+	char unused4			[ISODCL (883, 883)];
+	char application_data		[ISODCL (884, 1395)];
+	char unused5			[ISODCL (1396, 2048)];
+};
+#define ISO_DEFAULT_BLOCK_SIZE		2048
+
+struct iso_directory_record {
+	char length			[ISODCL (1, 1)]; /* 711 */
+	char ext_attr_length		[ISODCL (2, 2)]; /* 711 */
+	unsigned char extent		[ISODCL (3, 10)]; /* 733 */
+	unsigned char size		[ISODCL (11, 18)]; /* 733 */
+	char date			[ISODCL (19, 25)]; /* 7 by 711 */
+	char flags			[ISODCL (26, 26)];
+	char file_unit_size		[ISODCL (27, 27)]; /* 711 */
+	char interleave			[ISODCL (28, 28)]; /* 711 */
+	char volume_sequence_number	[ISODCL (29, 32)]; /* 723 */
+	char name_len			[ISODCL (33, 33)]; /* 711 */
+	char name			[0];
+};
+/* can't take sizeof(iso_directory_record), because of possible alignment
+   of the last entry (34 instead of 33) */
+#define ISO_DIRECTORY_RECORD_SIZE	33
+
+struct iso_extended_attributes {
+	unsigned char owner		[ISODCL (1, 4)]; /* 723 */
+	unsigned char group		[ISODCL (5, 8)]; /* 723 */
+	unsigned char perm		[ISODCL (9, 10)]; /* 9.5.3 */
+	char ctime			[ISODCL (11, 27)]; /* 8.4.26.1 */
+	char mtime			[ISODCL (28, 44)]; /* 8.4.26.1 */
+	char xtime			[ISODCL (45, 61)]; /* 8.4.26.1 */
+	char ftime			[ISODCL (62, 78)]; /* 8.4.26.1 */
+	char recfmt			[ISODCL (79, 79)]; /* 711 */
+	char recattr			[ISODCL (80, 80)]; /* 711 */
+	unsigned char reclen		[ISODCL (81, 84)]; /* 723 */
+	char system_id			[ISODCL (85, 116)]; /* achars */
+	char system_use			[ISODCL (117, 180)];
+	char version			[ISODCL (181, 181)]; /* 711 */
+	char len_esc			[ISODCL (182, 182)]; /* 711 */
+	char reserved			[ISODCL (183, 246)];
+	unsigned char len_au		[ISODCL (247, 250)]; /* 723 */
+};
+
+/* CD-ROM Format type */
+enum ISO_FTYPE  { ISO_FTYPE_DEFAULT, ISO_FTYPE_9660, ISO_FTYPE_RRIP, ISO_FTYPE_ECMA };
+
+#ifndef	ISOFSMNT_ROOT
+#define	ISOFSMNT_ROOT	0
+#endif
+
+struct iso_mnt {
+	int im_flags;
+
+	struct mount *im_mountp;
+	dev_t im_dev;
+	struct vnode *im_devvp;
+
+	int logical_block_size;
+	int im_bshift;
+	int im_bmask;
+	
+	int volume_space_size;
+	char im_fsmnt[50];
+	struct netexport im_export;
+	
+	char root[ISODCL (157, 190)];
+	int root_extent;
+	int root_size;
+	enum ISO_FTYPE  iso_ftype;
+	
+	int rr_skip;
+	int rr_skip0;
+};
+
+#define VFSTOISOFS(mp)	((struct iso_mnt *)((mp)->mnt_data))
+
+#define iso_blkoff(imp, loc) ((loc) & (imp)->im_bmask)
+#define iso_lblkno(imp, loc) ((loc) >> (imp)->im_bshift)
+#define iso_blksize(imp, ip, lbn) ((imp)->logical_block_size)
+#define iso_lblktosize(imp, blk) ((blk) << (imp)->im_bshift)
+
+int cd9660_mount __P((struct mount *,
+	    char *, caddr_t, struct nameidata *, struct proc *));
+int cd9660_start __P((struct mount *, int, struct proc *));
+int cd9660_unmount __P((struct mount *, int, struct proc *));
+int cd9660_root __P((struct mount *, struct vnode **));
+int cd9660_quotactl __P((struct mount *, int, uid_t, caddr_t, struct proc *));
+int cd9660_statfs __P((struct mount *, struct statfs *, struct proc *));
+int cd9660_sync __P((struct mount *, int, struct ucred *, struct proc *));
+int cd9660_vget __P((struct mount *, ino_t, struct vnode **));
+int cd9660_fhtovp __P((struct mount *, struct fid *, struct mbuf *,
+	    struct vnode **, int *, struct ucred **));
+int cd9660_vptofh __P((struct vnode *, struct fid *));
+int cd9660_init __P(());
+
+struct iso_node;
+int iso_blkatoff __P((struct iso_node *ip, long offset, struct buf **bpp)); 
+int iso_iget __P((struct iso_node *xp, ino_t ino, int relocated,
+		  struct iso_node **ipp, struct iso_directory_record *isodir));
+int iso_iput __P((struct iso_node *ip)); 
+int iso_ilock __P((struct iso_node *ip)); 
+int iso_iunlock __P((struct iso_node *ip)); 
+int cd9660_mountroot __P((void)); 
+
+extern int (**cd9660_vnodeop_p)();
+
+extern inline int
+isonum_711(p)
+	unsigned char *p;
+{
+	return *p;
+}
+
+extern inline int
+isonum_712(p)
+	char *p;
+{
+	return *p;
+}
+
+extern inline int
+isonum_721(p)
+	unsigned char *p;
+{
+	return *p|((char)p[1] << 8);
+}
+
+extern inline int
+isonum_722(p)
+	unsigned char *p;
+{
+	return ((char)*p << 8)|p[1];
+}
+
+extern inline int
+isonum_723(p)
+	unsigned char *p;
+{
+	return isonum_721(p);
+}
+
+extern inline int
+isonum_731(p)
+	unsigned char *p;
+{
+	return *p|(p[1] << 8)|(p[2] << 16)|(p[3] << 24);
+}
+
+extern inline int
+isonum_732(p)
+	unsigned char *p;
+{
+	return (*p << 24)|(p[1] << 16)|(p[2] << 8)|p[3];
+}
+
+extern inline int
+isonum_733(p)
+	unsigned char *p;
+{
+	return isonum_731(p);
+}
+
+int isofncmp __P((unsigned char *, int, unsigned char *, int));
+void isofntrans __P((unsigned char *, int, unsigned char *, unsigned short *,
+		     int, int));
+
+/*
+ * Associated files have a leading '='.
+ */
+#define	ASSOCCHAR	'='
diff --git a/sys/fs/cd9660/iso_rrip.h b/sys/fs/cd9660/iso_rrip.h
new file mode 100644
index 00000000000..78e4a775201
--- /dev/null
+++ b/sys/fs/cd9660/iso_rrip.h
@@ -0,0 +1,83 @@
+/*-
+ * Copyright (c) 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)iso_rrip.h	8.2 (Berkeley) 1/23/94
+ */
+
+
+/*
+ *	Analyze function flag (similar to RR field bits)
+ */
+#define	ISO_SUSP_ATTR		0x0001
+#define	ISO_SUSP_DEVICE		0x0002
+#define	ISO_SUSP_SLINK		0x0004
+#define	ISO_SUSP_ALTNAME	0x0008
+#define	ISO_SUSP_CLINK		0x0010
+#define	ISO_SUSP_PLINK		0x0020
+#define	ISO_SUSP_RELDIR		0x0040
+#define	ISO_SUSP_TSTAMP		0x0080
+#define	ISO_SUSP_IDFLAG		0x0100
+#define	ISO_SUSP_EXTREF		0x0200
+#define	ISO_SUSP_CONT		0x0400
+#define	ISO_SUSP_OFFSET		0x0800
+#define	ISO_SUSP_STOP		0x1000
+#define	ISO_SUSP_UNKNOWN	0x8000
+
+typedef struct {
+	struct iso_node	*inop;
+	int		fields;		/* interesting fields in this analysis */
+	daddr_t		iso_ce_blk;	/* block of continuation area */
+	off_t		iso_ce_off;	/* offset of continuation area */
+	int		iso_ce_len;	/* length of continuation area */
+	struct iso_mnt	*imp;		/* mount structure */
+	ino_t		*inump;		/* inode number pointer */
+	char		*outbuf;	/* name/symbolic link output area */
+	u_short		*outlen;	/* length of above */
+	u_short		maxlen;		/* maximum length of above */
+	int		cont;		/* continuation of above */
+} ISO_RRIP_ANALYZE;
+
+int cd9660_rrip_analyze __P((struct iso_directory_record *isodir,
+			    struct iso_node *inop, struct iso_mnt *imp));
+int cd9660_rrip_getname __P((struct iso_directory_record *isodir,
+			    char *outbuf, u_short *outlen,
+			    ino_t *inump, struct iso_mnt *imp));
+int cd9660_rrip_getsymname __P((struct iso_directory_record *isodir,
+			       char *outbuf, u_short *outlen,
+			       struct iso_mnt *imp));
+int cd9660_rrip_offset __P((struct iso_directory_record *isodir,
+			   struct iso_mnt *imp));
diff --git a/sys/fs/deadfs/dead_vnops.c b/sys/fs/deadfs/dead_vnops.c
new file mode 100644
index 00000000000..9d04652b7fc
--- /dev/null
+++ b/sys/fs/deadfs/dead_vnops.c
@@ -0,0 +1,354 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)dead_vnops.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/vnode.h>
+#include <sys/errno.h>
+#include <sys/namei.h>
+#include <sys/buf.h>
+
+/*
+ * Prototypes for dead operations on vnodes.
+ */
+int	dead_badop(),
+	dead_ebadf();
+int	dead_lookup __P((struct vop_lookup_args *));
+#define dead_create ((int (*) __P((struct  vop_create_args *)))dead_badop)
+#define dead_mknod ((int (*) __P((struct  vop_mknod_args *)))dead_badop)
+int	dead_open __P((struct vop_open_args *));
+#define dead_close ((int (*) __P((struct  vop_close_args *)))nullop)
+#define dead_access ((int (*) __P((struct  vop_access_args *)))dead_ebadf)
+#define dead_getattr ((int (*) __P((struct  vop_getattr_args *)))dead_ebadf)
+#define dead_setattr ((int (*) __P((struct  vop_setattr_args *)))dead_ebadf)
+int	dead_read __P((struct vop_read_args *));
+int	dead_write __P((struct vop_write_args *));
+int	dead_ioctl __P((struct vop_ioctl_args *));
+int	dead_select __P((struct vop_select_args *));
+#define dead_mmap ((int (*) __P((struct  vop_mmap_args *)))dead_badop)
+#define dead_fsync ((int (*) __P((struct  vop_fsync_args *)))nullop)
+#define dead_seek ((int (*) __P((struct  vop_seek_args *)))nullop)
+#define dead_remove ((int (*) __P((struct  vop_remove_args *)))dead_badop)
+#define dead_link ((int (*) __P((struct  vop_link_args *)))dead_badop)
+#define dead_rename ((int (*) __P((struct  vop_rename_args *)))dead_badop)
+#define dead_mkdir ((int (*) __P((struct  vop_mkdir_args *)))dead_badop)
+#define dead_rmdir ((int (*) __P((struct  vop_rmdir_args *)))dead_badop)
+#define dead_symlink ((int (*) __P((struct  vop_symlink_args *)))dead_badop)
+#define dead_readdir ((int (*) __P((struct  vop_readdir_args *)))dead_ebadf)
+#define dead_readlink ((int (*) __P((struct  vop_readlink_args *)))dead_ebadf)
+#define dead_abortop ((int (*) __P((struct  vop_abortop_args *)))dead_badop)
+#define dead_inactive ((int (*) __P((struct  vop_inactive_args *)))nullop)
+#define dead_reclaim ((int (*) __P((struct  vop_reclaim_args *)))nullop)
+int	dead_lock __P((struct vop_lock_args *));
+#define dead_unlock ((int (*) __P((struct  vop_unlock_args *)))nullop)
+int	dead_bmap __P((struct vop_bmap_args *));
+int	dead_strategy __P((struct vop_strategy_args *));
+int	dead_print __P((struct vop_print_args *));
+#define dead_islocked ((int (*) __P((struct  vop_islocked_args *)))nullop)
+#define dead_pathconf ((int (*) __P((struct  vop_pathconf_args *)))dead_ebadf)
+#define dead_advlock ((int (*) __P((struct  vop_advlock_args *)))dead_ebadf)
+#define dead_blkatoff ((int (*) __P((struct  vop_blkatoff_args *)))dead_badop)
+#define dead_valloc ((int (*) __P((struct  vop_valloc_args *)))dead_badop)
+#define dead_vfree ((int (*) __P((struct  vop_vfree_args *)))dead_badop)
+#define dead_truncate ((int (*) __P((struct  vop_truncate_args *)))nullop)
+#define dead_update ((int (*) __P((struct  vop_update_args *)))nullop)
+#define dead_bwrite ((int (*) __P((struct  vop_bwrite_args *)))nullop)
+
+int (**dead_vnodeop_p)();
+struct vnodeopv_entry_desc dead_vnodeop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, dead_lookup },	/* lookup */
+	{ &vop_create_desc, dead_create },	/* create */
+	{ &vop_mknod_desc, dead_mknod },	/* mknod */
+	{ &vop_open_desc, dead_open },	/* open */
+	{ &vop_close_desc, dead_close },	/* close */
+	{ &vop_access_desc, dead_access },	/* access */
+	{ &vop_getattr_desc, dead_getattr },	/* getattr */
+	{ &vop_setattr_desc, dead_setattr },	/* setattr */
+	{ &vop_read_desc, dead_read },	/* read */
+	{ &vop_write_desc, dead_write },	/* write */
+	{ &vop_ioctl_desc, dead_ioctl },	/* ioctl */
+	{ &vop_select_desc, dead_select },	/* select */
+	{ &vop_mmap_desc, dead_mmap },	/* mmap */
+	{ &vop_fsync_desc, dead_fsync },	/* fsync */
+	{ &vop_seek_desc, dead_seek },	/* seek */
+	{ &vop_remove_desc, dead_remove },	/* remove */
+	{ &vop_link_desc, dead_link },	/* link */
+	{ &vop_rename_desc, dead_rename },	/* rename */
+	{ &vop_mkdir_desc, dead_mkdir },	/* mkdir */
+	{ &vop_rmdir_desc, dead_rmdir },	/* rmdir */
+	{ &vop_symlink_desc, dead_symlink },	/* symlink */
+	{ &vop_readdir_desc, dead_readdir },	/* readdir */
+	{ &vop_readlink_desc, dead_readlink },	/* readlink */
+	{ &vop_abortop_desc, dead_abortop },	/* abortop */
+	{ &vop_inactive_desc, dead_inactive },	/* inactive */
+	{ &vop_reclaim_desc, dead_reclaim },	/* reclaim */
+	{ &vop_lock_desc, dead_lock },	/* lock */
+	{ &vop_unlock_desc, dead_unlock },	/* unlock */
+	{ &vop_bmap_desc, dead_bmap },	/* bmap */
+	{ &vop_strategy_desc, dead_strategy },	/* strategy */
+	{ &vop_print_desc, dead_print },	/* print */
+	{ &vop_islocked_desc, dead_islocked },	/* islocked */
+	{ &vop_pathconf_desc, dead_pathconf },	/* pathconf */
+	{ &vop_advlock_desc, dead_advlock },	/* advlock */
+	{ &vop_blkatoff_desc, dead_blkatoff },	/* blkatoff */
+	{ &vop_valloc_desc, dead_valloc },	/* valloc */
+	{ &vop_vfree_desc, dead_vfree },	/* vfree */
+	{ &vop_truncate_desc, dead_truncate },	/* truncate */
+	{ &vop_update_desc, dead_update },	/* update */
+	{ &vop_bwrite_desc, dead_bwrite },	/* bwrite */
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc dead_vnodeop_opv_desc =
+	{ &dead_vnodeop_p, dead_vnodeop_entries };
+
+/*
+ * Trivial lookup routine that always fails.
+ */
+/* ARGSUSED */
+int
+dead_lookup(ap)
+	struct vop_lookup_args /* {
+		struct vnode * a_dvp;
+		struct vnode ** a_vpp;
+		struct componentname * a_cnp;
+	} */ *ap;
+{
+
+	*ap->a_vpp = NULL;
+	return (ENOTDIR);
+}
+
+/*
+ * Open always fails as if device did not exist.
+ */
+/* ARGSUSED */
+dead_open(ap)
+	struct vop_open_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	return (ENXIO);
+}
+
+/*
+ * Vnode op for read
+ */
+/* ARGSUSED */
+dead_read(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+
+	if (chkvnlock(ap->a_vp))
+		panic("dead_read: lock");
+	/*
+	 * Return EOF for character devices, EIO for others
+	 */
+	if (ap->a_vp->v_type != VCHR)
+		return (EIO);
+	return (0);
+}
+
+/*
+ * Vnode op for write
+ */
+/* ARGSUSED */
+dead_write(ap)
+	struct vop_write_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+
+	if (chkvnlock(ap->a_vp))
+		panic("dead_write: lock");
+	return (EIO);
+}
+
+/*
+ * Device ioctl operation.
+ */
+/* ARGSUSED */
+dead_ioctl(ap)
+	struct vop_ioctl_args /* {
+		struct vnode *a_vp;
+		int  a_command;
+		caddr_t  a_data;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	if (!chkvnlock(ap->a_vp))
+		return (EBADF);
+	return (VCALL(ap->a_vp, VOFFSET(vop_ioctl), ap));
+}
+
+/* ARGSUSED */
+dead_select(ap)
+	struct vop_select_args /* {
+		struct vnode *a_vp;
+		int  a_which;
+		int  a_fflags;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	/*
+	 * Let the user find out that the descriptor is gone.
+	 */
+	return (1);
+}
+
+/*
+ * Just call the device strategy routine
+ */
+dead_strategy(ap)
+	struct vop_strategy_args /* {
+		struct buf *a_bp;
+	} */ *ap;
+{
+
+	if (ap->a_bp->b_vp == NULL || !chkvnlock(ap->a_bp->b_vp)) {
+		ap->a_bp->b_flags |= B_ERROR;
+		biodone(ap->a_bp);
+		return (EIO);
+	}
+	return (VOP_STRATEGY(ap->a_bp));
+}
+
+/*
+ * Wait until the vnode has finished changing state.
+ */
+dead_lock(ap)
+	struct vop_lock_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	if (!chkvnlock(ap->a_vp))
+		return (0);
+	return (VCALL(ap->a_vp, VOFFSET(vop_lock), ap));
+}
+
+/*
+ * Wait until the vnode has finished changing state.
+ */
+dead_bmap(ap)
+	struct vop_bmap_args /* {
+		struct vnode *a_vp;
+		daddr_t  a_bn;
+		struct vnode **a_vpp;
+		daddr_t *a_bnp;
+		int *a_runp;
+	} */ *ap;
+{
+
+	if (!chkvnlock(ap->a_vp))
+		return (EIO);
+	return (VOP_BMAP(ap->a_vp, ap->a_bn, ap->a_vpp, ap->a_bnp, ap->a_runp));
+}
+
+/*
+ * Print out the contents of a dead vnode.
+ */
+/* ARGSUSED */
+dead_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	printf("tag VT_NON, dead vnode\n");
+}
+
+/*
+ * Empty vnode failed operation
+ */
+dead_ebadf()
+{
+
+	return (EBADF);
+}
+
+/*
+ * Empty vnode bad operation
+ */
+dead_badop()
+{
+
+	panic("dead_badop called");
+	/* NOTREACHED */
+}
+
+/*
+ * Empty vnode null operation
+ */
+dead_nullop()
+{
+
+	return (0);
+}
+
+/*
+ * We have to wait during times when the vnode is
+ * in a state of change.
+ */
+chkvnlock(vp)
+	register struct vnode *vp;
+{
+	int locked = 0;
+
+	while (vp->v_flag & VXLOCK) {
+		vp->v_flag |= VXWANT;
+		sleep((caddr_t)vp, PINOD);
+		locked = 1;
+	}
+	return (locked);
+}
diff --git a/sys/fs/fdescfs/fdesc.h b/sys/fs/fdescfs/fdesc.h
new file mode 100644
index 00000000000..4c682e7bd37
--- /dev/null
+++ b/sys/fs/fdescfs/fdesc.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)fdesc.h	8.5 (Berkeley) 1/21/94
+ *
+ * $Id: fdesc.h,v 1.8 1993/04/06 15:28:33 jsp Exp $
+ */
+
+#ifdef KERNEL
+struct fdescmount {
+	struct vnode	*f_root;	/* Root node */
+};
+
+#define FD_ROOT		2
+#define FD_DEVFD	3
+#define FD_STDIN	4
+#define FD_STDOUT	5
+#define FD_STDERR	6
+#define FD_CTTY		7
+#define FD_DESC		8
+#define FD_MAX		12
+
+typedef enum {
+	Froot,
+	Fdevfd,
+	Fdesc,
+	Flink,
+	Fctty
+} fdntype;
+
+struct fdescnode {
+	struct fdescnode *fd_forw;	/* Hash chain */
+	struct fdescnode *fd_back;
+	struct vnode	*fd_vnode;	/* Back ptr to vnode */
+	fdntype		fd_type;	/* Type of this node */
+	unsigned	fd_fd;		/* Fd to be dup'ed */
+	char		*fd_link;	/* Link to fd/n */
+	int		fd_ix;		/* filesystem index */
+};
+
+#define VFSTOFDESC(mp)	((struct fdescmount *)((mp)->mnt_data))
+#define	VTOFDESC(vp) ((struct fdescnode *)(vp)->v_data)
+
+extern dev_t devctty;
+extern int fdesc_init __P((void));
+extern int fdesc_root __P((struct mount *, struct vnode **));
+extern int fdesc_allocvp __P((fdntype, int, struct mount *, struct vnode **));
+extern int (**fdesc_vnodeop_p)();
+extern struct vfsops fdesc_vfsops;
+#endif /* KERNEL */
diff --git a/sys/fs/fdescfs/fdesc_vfsops.c b/sys/fs/fdescfs/fdesc_vfsops.c
new file mode 100644
index 00000000000..80c543da655
--- /dev/null
+++ b/sys/fs/fdescfs/fdesc_vfsops.c
@@ -0,0 +1,288 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)fdesc_vfsops.c	8.4 (Berkeley) 1/21/94
+ *
+ * $Id: fdesc_vfsops.c,v 1.9 1993/04/06 15:28:33 jsp Exp $
+ */
+
+/*
+ * /dev/fd Filesystem
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/filedesc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <miscfs/fdesc/fdesc.h>
+
+/*
+ * Mount the per-process file descriptors (/dev/fd)
+ */
+int
+fdesc_mount(mp, path, data, ndp, p)
+	struct mount *mp;
+	char *path;
+	caddr_t data;
+	struct nameidata *ndp;
+	struct proc *p;
+{
+	int error = 0;
+	u_int size;
+	struct fdescmount *fmp;
+	struct vnode *rvp;
+
+	/*
+	 * Update is a no-op
+	 */
+	if (mp->mnt_flag & MNT_UPDATE)
+		return (EOPNOTSUPP);
+
+	error = fdesc_allocvp(Froot, FD_ROOT, mp, &rvp);
+	if (error)
+		return (error);
+
+	MALLOC(fmp, struct fdescmount *, sizeof(struct fdescmount),
+				M_UFSMNT, M_WAITOK);	/* XXX */
+	rvp->v_type = VDIR;
+	rvp->v_flag |= VROOT;
+	fmp->f_root = rvp;
+	/* XXX -- don't mark as local to work around fts() problems */
+	/*mp->mnt_flag |= MNT_LOCAL;*/
+	mp->mnt_data = (qaddr_t) fmp;
+	getnewfsid(mp, MOUNT_FDESC);
+
+	(void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+	bzero(mp->mnt_stat.f_mntfromname, MNAMELEN);
+	bcopy("fdesc", mp->mnt_stat.f_mntfromname, sizeof("fdesc"));
+	return (0);
+}
+
+int
+fdesc_start(mp, flags, p)
+	struct mount *mp;
+	int flags;
+	struct proc *p;
+{
+	return (0);
+}
+
+int
+fdesc_unmount(mp, mntflags, p)
+	struct mount *mp;
+	int mntflags;
+	struct proc *p;
+{
+	int error;
+	int flags = 0;
+	extern int doforce;
+	struct vnode *rootvp = VFSTOFDESC(mp)->f_root;
+
+	if (mntflags & MNT_FORCE) {
+		/* fdesc can never be rootfs so don't check for it */
+		if (!doforce)
+			return (EINVAL);
+		flags |= FORCECLOSE;
+	}
+
+	/*
+	 * Clear out buffer cache.  I don't think we
+	 * ever get anything cached at this level at the
+	 * moment, but who knows...
+	 */
+	if (rootvp->v_usecount > 1)
+		return (EBUSY);
+	if (error = vflush(mp, rootvp, flags))
+		return (error);
+
+	/*
+	 * Release reference on underlying root vnode
+	 */
+	vrele(rootvp);
+	/*
+	 * And blow it away for future re-use
+	 */
+	vgone(rootvp);
+	/*
+	 * Finally, throw away the fdescmount structure
+	 */
+	free(mp->mnt_data, M_UFSMNT);	/* XXX */
+	mp->mnt_data = 0;
+
+	return (0);
+}
+
+int
+fdesc_root(mp, vpp)
+	struct mount *mp;
+	struct vnode **vpp;
+{
+	struct vnode *vp;
+
+	/*
+	 * Return locked reference to root.
+	 */
+	vp = VFSTOFDESC(mp)->f_root;
+	VREF(vp);
+	VOP_LOCK(vp);
+	*vpp = vp;
+	return (0);
+}
+
+int
+fdesc_quotactl(mp, cmd, uid, arg, p)
+	struct mount *mp;
+	int cmd;
+	uid_t uid;
+	caddr_t arg;
+	struct proc *p;
+{
+
+	return (EOPNOTSUPP);
+}
+
+int
+fdesc_statfs(mp, sbp, p)
+	struct mount *mp;
+	struct statfs *sbp;
+	struct proc *p;
+{
+	struct filedesc *fdp;
+	int lim;
+	int i;
+	int last;
+	int freefd;
+
+	/*
+	 * Compute number of free file descriptors.
+	 * [ Strange results will ensue if the open file
+	 * limit is ever reduced below the current number
+	 * of open files... ]
+	 */
+	lim = p->p_rlimit[RLIMIT_NOFILE].rlim_cur;
+	fdp = p->p_fd;
+	last = min(fdp->fd_nfiles, lim);
+	freefd = 0;
+	for (i = fdp->fd_freefile; i < last; i++)
+		if (fdp->fd_ofiles[i] == NULL)
+			freefd++;
+
+	/*
+	 * Adjust for the fact that the fdesc array may not
+	 * have been fully allocated yet.
+	 */
+	if (fdp->fd_nfiles < lim)
+		freefd += (lim - fdp->fd_nfiles);
+
+	sbp->f_type = MOUNT_FDESC;
+	sbp->f_flags = 0;
+	sbp->f_bsize = DEV_BSIZE;
+	sbp->f_iosize = DEV_BSIZE;
+	sbp->f_blocks = 2;		/* 1K to keep df happy */
+	sbp->f_bfree = 0;
+	sbp->f_bavail = 0;
+	sbp->f_files = lim + 1;		/* Allow for "." */
+	sbp->f_ffree = freefd;		/* See comments above */
+	if (sbp != &mp->mnt_stat) {
+		bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+	}
+	return (0);
+}
+
+int
+fdesc_sync(mp, waitfor)
+	struct mount *mp;
+	int waitfor;
+{
+
+	return (0);
+}
+
+/*
+ * Fdesc flat namespace lookup.
+ * Currently unsupported.
+ */
+int
+fdesc_vget(mp, ino, vpp)
+	struct mount *mp;
+	ino_t ino;
+	struct vnode **vpp;
+{
+
+	return (EOPNOTSUPP);
+}
+
+int
+fdesc_fhtovp(mp, fhp, setgen, vpp)
+	struct mount *mp;
+	struct fid *fhp;
+	int setgen;
+	struct vnode **vpp;
+{
+	return (EOPNOTSUPP);
+}
+
+int
+fdesc_vptofh(vp, fhp)
+	struct vnode *vp;
+	struct fid *fhp;
+{
+
+	return (EOPNOTSUPP);
+}
+
+struct vfsops fdesc_vfsops = {
+	fdesc_mount,
+	fdesc_start,
+	fdesc_unmount,
+	fdesc_root,
+	fdesc_quotactl,
+	fdesc_statfs,
+	fdesc_sync,
+	fdesc_vget,
+	fdesc_fhtovp,
+	fdesc_vptofh,
+	fdesc_init,
+};
diff --git a/sys/fs/fdescfs/fdesc_vnops.c b/sys/fs/fdescfs/fdesc_vnops.c
new file mode 100644
index 00000000000..00d8675aea2
--- /dev/null
+++ b/sys/fs/fdescfs/fdesc_vnops.c
@@ -0,0 +1,974 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)fdesc_vnops.c	8.9 (Berkeley) 1/21/94
+ *
+ * $Id: fdesc_vnops.c,v 1.12 1993/04/06 16:17:17 jsp Exp $
+ */
+
+/*
+ * /dev/fd Filesystem
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>	/* boottime */
+#include <sys/resourcevar.h>
+#include <sys/filedesc.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/buf.h>
+#include <sys/dirent.h>
+#include <miscfs/fdesc/fdesc.h>
+
+#define cttyvp(p) ((p)->p_flag & P_CONTROLT ? (p)->p_session->s_ttyvp : NULL)
+
+#define FDL_WANT	0x01
+#define FDL_LOCKED	0x02
+static int fdcache_lock;
+
+dev_t devctty;
+
+#if (FD_STDIN != FD_STDOUT-1) || (FD_STDOUT != FD_STDERR-1)
+FD_STDIN, FD_STDOUT, FD_STDERR must be a sequence n, n+1, n+2
+#endif
+
+#define	NFDCACHE 3
+#define	FD_NHASH(ix) ((ix) & NFDCACHE)
+
+/*
+ * Cache head
+ */
+struct fdcache {
+	struct fdescnode	*fc_forw;
+	struct fdescnode	*fc_back;
+};
+
+static struct fdcache fdcache[NFDCACHE];
+
+/*
+ * Initialise cache headers
+ */
+fdesc_init()
+{
+	struct fdcache *fc;
+
+	devctty = makedev(nchrdev, 0);
+
+	for (fc = fdcache; fc < fdcache + NFDCACHE; fc++)
+		fc->fc_forw = fc->fc_back = (struct fdescnode *) fc;
+}
+
+/*
+ * Compute hash list for given target vnode
+ */
+static struct fdcache *
+fdesc_hash(ix)
+	int ix;
+{
+
+	return (&fdcache[FD_NHASH(ix)]);
+}
+
+int
+fdesc_allocvp(ftype, ix, mp, vpp)
+	fdntype ftype;
+	int ix;
+	struct mount *mp;
+	struct vnode **vpp;
+{
+	struct fdcache *fc;
+	struct fdescnode *fd;
+	int error = 0;
+
+loop:
+	fc = fdesc_hash(ix);
+	for (fd = fc->fc_forw; fd != (struct fdescnode *) fc; fd = fd->fd_forw) {
+		if (fd->fd_ix == ix && fd->fd_vnode->v_mount == mp) {
+			if (vget(fd->fd_vnode, 0))
+				goto loop;
+			*vpp = fd->fd_vnode;
+			return (error);
+		}
+	}
+
+	/*
+	 * otherwise lock the array while we call getnewvnode
+	 * since that can block.
+	 */ 
+	if (fdcache_lock & FDL_LOCKED) {
+		fdcache_lock |= FDL_WANT;
+		sleep((caddr_t) &fdcache_lock, PINOD);
+		goto loop;
+	}
+	fdcache_lock |= FDL_LOCKED;
+
+	error = getnewvnode(VT_FDESC, mp, fdesc_vnodeop_p, vpp);
+	if (error)
+		goto out;
+	MALLOC(fd, void *, sizeof(struct fdescnode), M_TEMP, M_WAITOK);
+	(*vpp)->v_data = fd;
+	fd->fd_vnode = *vpp;
+	fd->fd_type = ftype;
+	fd->fd_fd = -1;
+	fd->fd_link = 0;
+	fd->fd_ix = ix;
+	fc = fdesc_hash(ix);
+	insque(fd, fc);
+
+out:;
+	fdcache_lock &= ~FDL_LOCKED;
+
+	if (fdcache_lock & FDL_WANT) {
+		fdcache_lock &= ~FDL_WANT;
+		wakeup((caddr_t) &fdcache_lock);
+	}
+
+	return (error);
+}
+
+/*
+ * vp is the current namei directory
+ * ndp is the name to locate in that directory...
+ */
+int
+fdesc_lookup(ap)
+	struct vop_lookup_args /* {
+		struct vnode * a_dvp;
+		struct vnode ** a_vpp;
+		struct componentname * a_cnp;
+	} */ *ap;
+{
+	struct vnode **vpp = ap->a_vpp;
+	struct vnode *dvp = ap->a_dvp;
+	char *pname;
+	struct proc *p;
+	int nfiles;
+	unsigned fd;
+	int error;
+	struct vnode *fvp;
+	char *ln;
+
+	pname = ap->a_cnp->cn_nameptr;
+	if (ap->a_cnp->cn_namelen == 1 && *pname == '.') {
+		*vpp = dvp;
+		VREF(dvp);	
+		VOP_LOCK(dvp);
+		return (0);
+	}
+
+	p = ap->a_cnp->cn_proc;
+	nfiles = p->p_fd->fd_nfiles;
+
+	switch (VTOFDESC(dvp)->fd_type) {
+	default:
+	case Flink:
+	case Fdesc:
+	case Fctty:
+		error = ENOTDIR;
+		goto bad;
+
+	case Froot:
+		if (ap->a_cnp->cn_namelen == 2 && bcmp(pname, "fd", 2) == 0) {
+			error = fdesc_allocvp(Fdevfd, FD_DEVFD, dvp->v_mount, &fvp);
+			if (error)
+				goto bad;
+			*vpp = fvp;
+			fvp->v_type = VDIR;
+			VOP_LOCK(fvp);
+			return (0);
+		}
+
+		if (ap->a_cnp->cn_namelen == 3 && bcmp(pname, "tty", 3) == 0) {
+			struct vnode *ttyvp = cttyvp(p);
+			if (ttyvp == NULL) {
+				error = ENXIO;
+				goto bad;
+			}
+			error = fdesc_allocvp(Fctty, FD_CTTY, dvp->v_mount, &fvp);
+			if (error)
+				goto bad;
+			*vpp = fvp;
+			fvp->v_type = VFIFO;
+			VOP_LOCK(fvp);
+			return (0);
+		}
+
+		ln = 0;
+		switch (ap->a_cnp->cn_namelen) {
+		case 5:
+			if (bcmp(pname, "stdin", 5) == 0) {
+				ln = "fd/0";
+				fd = FD_STDIN;
+			}
+			break;
+		case 6:
+			if (bcmp(pname, "stdout", 6) == 0) {
+				ln = "fd/1";
+				fd = FD_STDOUT;
+			} else
+			if (bcmp(pname, "stderr", 6) == 0) {
+				ln = "fd/2";
+				fd = FD_STDERR;
+			}
+			break;
+		}
+
+		if (ln) {
+			error = fdesc_allocvp(Flink, fd, dvp->v_mount, &fvp);
+			if (error)
+				goto bad;
+			VTOFDESC(fvp)->fd_link = ln;
+			*vpp = fvp;
+			fvp->v_type = VLNK;
+			VOP_LOCK(fvp);
+			return (0);
+		} else {
+			error = ENOENT;
+			goto bad;
+		}
+
+		/* FALL THROUGH */
+
+	case Fdevfd:
+		if (ap->a_cnp->cn_namelen == 2 && bcmp(pname, "..", 2) == 0) {
+			error = fdesc_root(dvp->v_mount, vpp);
+			return (error);
+		}
+
+		fd = 0;
+		while (*pname >= '0' && *pname <= '9') {
+			fd = 10 * fd + *pname++ - '0';
+			if (fd >= nfiles)
+				break;
+		}
+
+		if (*pname != '\0') {
+			error = ENOENT;
+			goto bad;
+		}
+
+		if (fd >= nfiles || p->p_fd->fd_ofiles[fd] == NULL) {
+			error = EBADF;
+			goto bad;
+		}
+
+		error = fdesc_allocvp(Fdesc, FD_DESC+fd, dvp->v_mount, &fvp);
+		if (error)
+			goto bad;
+		VTOFDESC(fvp)->fd_fd = fd;
+		*vpp = fvp;
+		return (0);
+	}
+
+bad:;
+	*vpp = NULL;
+	return (error);
+}
+
+int
+fdesc_open(ap)
+	struct vop_open_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	int error = 0;
+
+	switch (VTOFDESC(vp)->fd_type) {
+	case Fdesc:
+		/*
+		 * XXX Kludge: set p->p_dupfd to contain the value of the
+		 * the file descriptor being sought for duplication. The error 
+		 * return ensures that the vnode for this device will be
+		 * released by vn_open. Open will detect this special error and
+		 * take the actions in dupfdopen.  Other callers of vn_open or
+		 * VOP_OPEN will simply report the error.
+		 */
+		ap->a_p->p_dupfd = VTOFDESC(vp)->fd_fd;	/* XXX */
+		error = ENODEV;
+		break;
+
+	case Fctty:
+		error = cttyopen(devctty, ap->a_mode, 0, ap->a_p);
+		break;
+	}
+
+	return (error);
+}
+
+static int
+fdesc_attr(fd, vap, cred, p)
+	int fd;
+	struct vattr *vap;
+	struct ucred *cred;
+	struct proc *p;
+{
+	struct filedesc *fdp = p->p_fd;
+	struct file *fp;
+	struct stat stb;
+	int error;
+
+	if (fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL)
+		return (EBADF);
+
+	switch (fp->f_type) {
+	case DTYPE_VNODE:
+		error = VOP_GETATTR((struct vnode *) fp->f_data, vap, cred, p);
+		if (error == 0 && vap->va_type == VDIR) {
+			/*
+			 * don't allow directories to show up because
+			 * that causes loops in the namespace.
+			 */
+			vap->va_type = VFIFO;
+		}
+		break;
+
+	case DTYPE_SOCKET:
+		error = soo_stat((struct socket *)fp->f_data, &stb);
+		if (error == 0) {
+			vattr_null(vap);
+			vap->va_type = VSOCK;
+			vap->va_mode = stb.st_mode;
+			vap->va_nlink = stb.st_nlink;
+			vap->va_uid = stb.st_uid;
+			vap->va_gid = stb.st_gid;
+			vap->va_fsid = stb.st_dev;
+			vap->va_fileid = stb.st_ino;
+			vap->va_size = stb.st_size;
+			vap->va_blocksize = stb.st_blksize;
+			vap->va_atime = stb.st_atimespec;
+			vap->va_mtime = stb.st_mtimespec;
+			vap->va_ctime = stb.st_ctimespec;
+			vap->va_gen = stb.st_gen;
+			vap->va_flags = stb.st_flags;
+			vap->va_rdev = stb.st_rdev;
+			vap->va_bytes = stb.st_blocks * stb.st_blksize;
+		}
+		break;
+
+	default:
+		panic("fdesc attr");
+		break;
+	}
+
+	return (error);
+}
+
+int
+fdesc_getattr(ap)
+	struct vop_getattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct vattr *vap = ap->a_vap;
+	unsigned fd;
+	int error = 0;
+
+	switch (VTOFDESC(vp)->fd_type) {
+	case Froot:
+	case Fdevfd:
+	case Flink:
+	case Fctty:
+		bzero((caddr_t) vap, sizeof(*vap));
+		vattr_null(vap);
+		vap->va_fileid = VTOFDESC(vp)->fd_ix;
+
+		switch (VTOFDESC(vp)->fd_type) {
+		case Flink:
+			vap->va_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH;
+			vap->va_type = VLNK;
+			vap->va_nlink = 1;
+			vap->va_size = strlen(VTOFDESC(vp)->fd_link);
+			break;
+
+		case Fctty:
+			vap->va_mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH;
+			vap->va_type = VFIFO;
+			vap->va_nlink = 1;
+			vap->va_size = 0;
+			break;
+
+		default:
+			vap->va_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH;
+			vap->va_type = VDIR;
+			vap->va_nlink = 2;
+			vap->va_size = DEV_BSIZE;
+			break;
+		}
+		vap->va_uid = 0;
+		vap->va_gid = 0;
+		vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+		vap->va_blocksize = DEV_BSIZE;
+		vap->va_atime.ts_sec = boottime.tv_sec;
+		vap->va_atime.ts_nsec = 0;
+		vap->va_mtime = vap->va_atime;
+		vap->va_ctime = vap->va_mtime;
+		vap->va_gen = 0;
+		vap->va_flags = 0;
+		vap->va_rdev = 0;
+		vap->va_bytes = 0;
+		break;
+
+	case Fdesc:
+		fd = VTOFDESC(vp)->fd_fd;
+		error = fdesc_attr(fd, vap, ap->a_cred, ap->a_p);
+		break;
+
+	default:
+		panic("fdesc_getattr");
+		break;	
+	}
+
+	if (error == 0)
+		vp->v_type = vap->va_type;
+
+	return (error);
+}
+
+int
+fdesc_setattr(ap)
+	struct vop_setattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct filedesc *fdp = ap->a_p->p_fd;
+	struct file *fp;
+	unsigned fd;
+	int error;
+
+	/*
+	 * Can't mess with the root vnode
+	 */
+	switch (VTOFDESC(ap->a_vp)->fd_type) {
+	case Fdesc:
+		break;
+
+	case Fctty:
+		return (0);
+
+	default:
+		return (EACCES);
+	}
+
+	fd = VTOFDESC(ap->a_vp)->fd_fd;
+	if (fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) {
+		return (EBADF);
+	}
+
+	/*
+	 * Can setattr the underlying vnode, but not sockets!
+	 */
+	switch (fp->f_type) {
+	case DTYPE_VNODE:
+		error = VOP_SETATTR((struct vnode *) fp->f_data, ap->a_vap, ap->a_cred, ap->a_p);
+		break;
+
+	case DTYPE_SOCKET:
+		error = 0;
+		break;
+
+	default:
+		panic("fdesc setattr");
+		break;
+	}
+
+	return (error);
+}
+
+#define UIO_MX 16
+
+static struct dirtmp {
+	u_long d_fileno;
+	u_short d_reclen;
+	u_short d_namlen;
+	char d_name[8];
+} rootent[] = {
+	{ FD_DEVFD, UIO_MX, 2, "fd" },
+	{ FD_STDIN, UIO_MX, 5, "stdin" },
+	{ FD_STDOUT, UIO_MX, 6, "stdout" },
+	{ FD_STDERR, UIO_MX, 6, "stderr" },
+	{ FD_CTTY, UIO_MX, 3, "tty" },
+	{ 0 }
+};
+
+int
+fdesc_readdir(ap)
+	struct vop_readdir_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	struct uio *uio = ap->a_uio;
+	struct filedesc *fdp;
+	int i;
+	int error;
+
+	switch (VTOFDESC(ap->a_vp)->fd_type) {
+	case Fctty:
+		return (0);
+
+	case Fdesc:
+		return (ENOTDIR);
+
+	default:
+		break;
+	}
+
+	fdp = uio->uio_procp->p_fd;
+
+	if (VTOFDESC(ap->a_vp)->fd_type == Froot) {
+		struct dirent d;
+		struct dirent *dp = &d;
+		struct dirtmp *dt;
+
+		i = uio->uio_offset / UIO_MX;
+		error = 0;
+
+		while (uio->uio_resid > 0) {
+			dt = &rootent[i];
+			if (dt->d_fileno == 0) {
+				/**eofflagp = 1;*/
+				break;
+			}
+			i++;
+			
+			switch (dt->d_fileno) {
+			case FD_CTTY:
+				if (cttyvp(uio->uio_procp) == NULL)
+					continue;
+				break;
+
+			case FD_STDIN:
+			case FD_STDOUT:
+			case FD_STDERR:
+				if ((dt->d_fileno-FD_STDIN) >= fdp->fd_nfiles)
+					continue;
+				if (fdp->fd_ofiles[dt->d_fileno-FD_STDIN] == NULL)
+					continue;
+				break;
+			}
+			bzero((caddr_t) dp, UIO_MX);
+			dp->d_fileno = dt->d_fileno;
+			dp->d_namlen = dt->d_namlen;
+			dp->d_type = DT_UNKNOWN;
+			dp->d_reclen = dt->d_reclen;
+			bcopy(dt->d_name, dp->d_name, dp->d_namlen+1);
+			error = uiomove((caddr_t) dp, UIO_MX, uio);
+			if (error)
+				break;
+		}
+		uio->uio_offset = i * UIO_MX;
+		return (error);
+	}
+
+	i = uio->uio_offset / UIO_MX;
+	error = 0;
+	while (uio->uio_resid > 0) {
+		if (i >= fdp->fd_nfiles)
+			break;
+
+		if (fdp->fd_ofiles[i] != NULL) {
+			struct dirent d;
+			struct dirent *dp = &d;
+
+			bzero((caddr_t) dp, UIO_MX);
+
+			dp->d_namlen = sprintf(dp->d_name, "%d", i);
+			dp->d_reclen = UIO_MX;
+			dp->d_type = DT_UNKNOWN;
+			dp->d_fileno = i + FD_STDIN;
+			/*
+			 * And ship to userland
+			 */
+			error = uiomove((caddr_t) dp, UIO_MX, uio);
+			if (error)
+				break;
+		}
+		i++;
+	}
+
+	uio->uio_offset = i * UIO_MX;
+	return (error);
+}
+
+int
+fdesc_readlink(ap)
+	struct vop_readlink_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	int error;
+
+	if (vp->v_type != VLNK)
+		return (EPERM);
+
+	if (VTOFDESC(vp)->fd_type == Flink) {
+		char *ln = VTOFDESC(vp)->fd_link;
+		error = uiomove(ln, strlen(ln), ap->a_uio);
+	} else {
+		error = EOPNOTSUPP;
+	}
+
+	return (error);
+}
+
+int
+fdesc_read(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	int error = EOPNOTSUPP;
+
+	switch (VTOFDESC(ap->a_vp)->fd_type) {
+	case Fctty:
+		error = cttyread(devctty, ap->a_uio, ap->a_ioflag);
+		break;
+
+	default:
+		error = EOPNOTSUPP;
+		break;
+	}
+	
+	return (error);
+}
+
+int
+fdesc_write(ap)
+	struct vop_write_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	int error = EOPNOTSUPP;
+
+	switch (VTOFDESC(ap->a_vp)->fd_type) {
+	case Fctty:
+		error = cttywrite(devctty, ap->a_uio, ap->a_ioflag);
+		break;
+
+	default:
+		error = EOPNOTSUPP;
+		break;
+	}
+	
+	return (error);
+}
+
+int
+fdesc_ioctl(ap)
+	struct vop_ioctl_args /* {
+		struct vnode *a_vp;
+		int  a_command;
+		caddr_t  a_data;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	int error = EOPNOTSUPP;
+
+	switch (VTOFDESC(ap->a_vp)->fd_type) {
+	case Fctty:
+		error = cttyioctl(devctty, ap->a_command, ap->a_data,
+					ap->a_fflag, ap->a_p);
+		break;
+
+	default:
+		error = EOPNOTSUPP;
+		break;
+	}
+	
+	return (error);
+}
+
+int
+fdesc_select(ap)
+	struct vop_select_args /* {
+		struct vnode *a_vp;
+		int  a_which;
+		int  a_fflags;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	int error = EOPNOTSUPP;
+
+	switch (VTOFDESC(ap->a_vp)->fd_type) {
+	case Fctty:
+		error = cttyselect(devctty, ap->a_fflags, ap->a_p);
+		break;
+
+	default:
+		error = EOPNOTSUPP;
+		break;
+	}
+	
+	return (error);
+}
+
+int
+fdesc_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+
+	/*
+	 * Clear out the v_type field to avoid
+	 * nasty things happening in vgone().
+	 */
+	vp->v_type = VNON;
+	return (0);
+}
+
+int
+fdesc_reclaim(ap)
+	struct vop_reclaim_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+
+	remque(VTOFDESC(vp));
+	FREE(vp->v_data, M_TEMP);
+	vp->v_data = 0;
+
+	return (0);
+}
+
+/*
+ * Return POSIX pathconf information applicable to special devices.
+ */
+fdesc_pathconf(ap)
+	struct vop_pathconf_args /* {
+		struct vnode *a_vp;
+		int a_name;
+		int *a_retval;
+	} */ *ap;
+{
+
+	switch (ap->a_name) {
+	case _PC_LINK_MAX:
+		*ap->a_retval = LINK_MAX;
+		return (0);
+	case _PC_MAX_CANON:
+		*ap->a_retval = MAX_CANON;
+		return (0);
+	case _PC_MAX_INPUT:
+		*ap->a_retval = MAX_INPUT;
+		return (0);
+	case _PC_PIPE_BUF:
+		*ap->a_retval = PIPE_BUF;
+		return (0);
+	case _PC_CHOWN_RESTRICTED:
+		*ap->a_retval = 1;
+		return (0);
+	case _PC_VDISABLE:
+		*ap->a_retval = _POSIX_VDISABLE;
+		return (0);
+	default:
+		return (EINVAL);
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * Print out the contents of a /dev/fd vnode.
+ */
+/* ARGSUSED */
+int
+fdesc_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	printf("tag VT_NON, fdesc vnode\n");
+	return (0);
+}
+
+/*void*/
+int
+fdesc_vfree(ap)
+	struct vop_vfree_args /* {
+		struct vnode *a_pvp;
+		ino_t a_ino;
+		int a_mode;
+	} */ *ap;
+{
+
+	return (0);
+}
+
+/*
+ * /dev/fd vnode unsupported operation
+ */
+int
+fdesc_enotsupp()
+{
+
+	return (EOPNOTSUPP);
+}
+
+/*
+ * /dev/fd "should never get here" operation
+ */
+int
+fdesc_badop()
+{
+
+	panic("fdesc: bad op");
+	/* NOTREACHED */
+}
+
+/*
+ * /dev/fd vnode null operation
+ */
+int
+fdesc_nullop()
+{
+
+	return (0);
+}
+
+#define fdesc_create ((int (*) __P((struct  vop_create_args *)))fdesc_enotsupp)
+#define fdesc_mknod ((int (*) __P((struct  vop_mknod_args *)))fdesc_enotsupp)
+#define fdesc_close ((int (*) __P((struct  vop_close_args *)))nullop)
+#define fdesc_access ((int (*) __P((struct  vop_access_args *)))nullop)
+#define fdesc_mmap ((int (*) __P((struct  vop_mmap_args *)))fdesc_enotsupp)
+#define fdesc_fsync ((int (*) __P((struct  vop_fsync_args *)))nullop)
+#define fdesc_seek ((int (*) __P((struct  vop_seek_args *)))nullop)
+#define fdesc_remove ((int (*) __P((struct  vop_remove_args *)))fdesc_enotsupp)
+#define fdesc_link ((int (*) __P((struct  vop_link_args *)))fdesc_enotsupp)
+#define fdesc_rename ((int (*) __P((struct  vop_rename_args *)))fdesc_enotsupp)
+#define fdesc_mkdir ((int (*) __P((struct  vop_mkdir_args *)))fdesc_enotsupp)
+#define fdesc_rmdir ((int (*) __P((struct  vop_rmdir_args *)))fdesc_enotsupp)
+#define fdesc_symlink ((int (*) __P((struct vop_symlink_args *)))fdesc_enotsupp)
+#define fdesc_abortop ((int (*) __P((struct  vop_abortop_args *)))nullop)
+#define fdesc_lock ((int (*) __P((struct  vop_lock_args *)))nullop)
+#define fdesc_unlock ((int (*) __P((struct  vop_unlock_args *)))nullop)
+#define fdesc_bmap ((int (*) __P((struct  vop_bmap_args *)))fdesc_badop)
+#define fdesc_strategy ((int (*) __P((struct  vop_strategy_args *)))fdesc_badop)
+#define fdesc_islocked ((int (*) __P((struct  vop_islocked_args *)))nullop)
+#define fdesc_advlock ((int (*) __P((struct vop_advlock_args *)))fdesc_enotsupp)
+#define fdesc_blkatoff \
+	((int (*) __P((struct  vop_blkatoff_args *)))fdesc_enotsupp)
+#define fdesc_vget ((int (*) __P((struct  vop_vget_args *)))fdesc_enotsupp)
+#define fdesc_valloc ((int(*) __P(( \
+		struct vnode *pvp, \
+		int mode, \
+		struct ucred *cred, \
+		struct vnode **vpp))) fdesc_enotsupp)
+#define fdesc_truncate \
+	((int (*) __P((struct  vop_truncate_args *)))fdesc_enotsupp)
+#define fdesc_update ((int (*) __P((struct  vop_update_args *)))fdesc_enotsupp)
+#define fdesc_bwrite ((int (*) __P((struct  vop_bwrite_args *)))fdesc_enotsupp)
+
+int (**fdesc_vnodeop_p)();
+struct vnodeopv_entry_desc fdesc_vnodeop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, fdesc_lookup },	/* lookup */
+	{ &vop_create_desc, fdesc_create },	/* create */
+	{ &vop_mknod_desc, fdesc_mknod },	/* mknod */
+	{ &vop_open_desc, fdesc_open },		/* open */
+	{ &vop_close_desc, fdesc_close },	/* close */
+	{ &vop_access_desc, fdesc_access },	/* access */
+	{ &vop_getattr_desc, fdesc_getattr },	/* getattr */
+	{ &vop_setattr_desc, fdesc_setattr },	/* setattr */
+	{ &vop_read_desc, fdesc_read },		/* read */
+	{ &vop_write_desc, fdesc_write },	/* write */
+	{ &vop_ioctl_desc, fdesc_ioctl },	/* ioctl */
+	{ &vop_select_desc, fdesc_select },	/* select */
+	{ &vop_mmap_desc, fdesc_mmap },		/* mmap */
+	{ &vop_fsync_desc, fdesc_fsync },	/* fsync */
+	{ &vop_seek_desc, fdesc_seek },		/* seek */
+	{ &vop_remove_desc, fdesc_remove },	/* remove */
+	{ &vop_link_desc, fdesc_link },		/* link */
+	{ &vop_rename_desc, fdesc_rename },	/* rename */
+	{ &vop_mkdir_desc, fdesc_mkdir },	/* mkdir */
+	{ &vop_rmdir_desc, fdesc_rmdir },	/* rmdir */
+	{ &vop_symlink_desc, fdesc_symlink },	/* symlink */
+	{ &vop_readdir_desc, fdesc_readdir },	/* readdir */
+	{ &vop_readlink_desc, fdesc_readlink },	/* readlink */
+	{ &vop_abortop_desc, fdesc_abortop },	/* abortop */
+	{ &vop_inactive_desc, fdesc_inactive },	/* inactive */
+	{ &vop_reclaim_desc, fdesc_reclaim },	/* reclaim */
+	{ &vop_lock_desc, fdesc_lock },		/* lock */
+	{ &vop_unlock_desc, fdesc_unlock },	/* unlock */
+	{ &vop_bmap_desc, fdesc_bmap },		/* bmap */
+	{ &vop_strategy_desc, fdesc_strategy },	/* strategy */
+	{ &vop_print_desc, fdesc_print },	/* print */
+	{ &vop_islocked_desc, fdesc_islocked },	/* islocked */
+	{ &vop_pathconf_desc, fdesc_pathconf },	/* pathconf */
+	{ &vop_advlock_desc, fdesc_advlock },	/* advlock */
+	{ &vop_blkatoff_desc, fdesc_blkatoff },	/* blkatoff */
+	{ &vop_valloc_desc, fdesc_valloc },	/* valloc */
+	{ &vop_vfree_desc, fdesc_vfree },	/* vfree */
+	{ &vop_truncate_desc, fdesc_truncate },	/* truncate */
+	{ &vop_update_desc, fdesc_update },	/* update */
+	{ &vop_bwrite_desc, fdesc_bwrite },	/* bwrite */
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc fdesc_vnodeop_opv_desc =
+	{ &fdesc_vnodeop_p, fdesc_vnodeop_entries };
diff --git a/sys/fs/fifofs/fifo.h b/sys/fs/fifofs/fifo.h
new file mode 100644
index 00000000000..e89186d8b89
--- /dev/null
+++ b/sys/fs/fifofs/fifo.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)fifo.h	8.2 (Berkeley) 2/2/94
+ */
+
+#ifdef FIFO
+/*
+ * Prototypes for fifo operations on vnodes.
+ */
+int	fifo_badop(),
+	fifo_ebadf();
+
+int	fifo_lookup __P((struct vop_lookup_args *));
+#define fifo_create ((int (*) __P((struct  vop_create_args *)))fifo_badop)
+#define fifo_mknod ((int (*) __P((struct  vop_mknod_args *)))fifo_badop)
+int	fifo_open __P((struct vop_open_args *));
+int	fifo_close __P((struct vop_close_args *));
+#define fifo_access ((int (*) __P((struct  vop_access_args *)))fifo_ebadf)
+#define fifo_getattr ((int (*) __P((struct  vop_getattr_args *)))fifo_ebadf)
+#define fifo_setattr ((int (*) __P((struct  vop_setattr_args *)))fifo_ebadf)
+int	fifo_read __P((struct vop_read_args *));
+int	fifo_write __P((struct vop_write_args *));
+int	fifo_ioctl __P((struct vop_ioctl_args *));
+int	fifo_select __P((struct vop_select_args *));
+#define fifo_mmap ((int (*) __P((struct  vop_mmap_args *)))fifo_badop)
+#define fifo_fsync ((int (*) __P((struct  vop_fsync_args *)))nullop)
+#define fifo_seek ((int (*) __P((struct  vop_seek_args *)))fifo_badop)
+#define fifo_remove ((int (*) __P((struct  vop_remove_args *)))fifo_badop)
+#define fifo_link ((int (*) __P((struct  vop_link_args *)))fifo_badop)
+#define fifo_rename ((int (*) __P((struct  vop_rename_args *)))fifo_badop)
+#define fifo_mkdir ((int (*) __P((struct  vop_mkdir_args *)))fifo_badop)
+#define fifo_rmdir ((int (*) __P((struct  vop_rmdir_args *)))fifo_badop)
+#define fifo_symlink ((int (*) __P((struct  vop_symlink_args *)))fifo_badop)
+#define fifo_readdir ((int (*) __P((struct  vop_readdir_args *)))fifo_badop)
+#define fifo_readlink ((int (*) __P((struct  vop_readlink_args *)))fifo_badop)
+#define fifo_abortop ((int (*) __P((struct  vop_abortop_args *)))fifo_badop)
+#define fifo_inactive ((int (*) __P((struct  vop_inactive_args *)))nullop)
+#define fifo_reclaim ((int (*) __P((struct  vop_reclaim_args *)))nullop)
+int	fifo_lock __P((struct vop_lock_args *));
+int	fifo_unlock __P((struct vop_unlock_args *));
+int	fifo_bmap __P((struct vop_bmap_args *));
+#define fifo_strategy ((int (*) __P((struct  vop_strategy_args *)))fifo_badop)
+int	fifo_print __P((struct vop_print_args *));
+#define fifo_islocked ((int (*) __P((struct  vop_islocked_args *)))nullop)
+int	fifo_pathconf __P((struct vop_pathconf_args *));
+int	fifo_advlock __P((struct vop_advlock_args *));
+#define fifo_blkatoff ((int (*) __P((struct  vop_blkatoff_args *)))fifo_badop)
+#define fifo_valloc ((int (*) __P((struct  vop_valloc_args *)))fifo_badop)
+#define fifo_reallocblks \
+	((int (*) __P((struct  vop_reallocblks_args *)))fifo_badop)
+#define fifo_vfree ((int (*) __P((struct  vop_vfree_args *)))fifo_badop)
+#define fifo_truncate ((int (*) __P((struct  vop_truncate_args *)))nullop)
+#define fifo_update ((int (*) __P((struct  vop_update_args *)))nullop)
+#define fifo_bwrite ((int (*) __P((struct  vop_bwrite_args *)))nullop)
+#endif /* FIFO */
diff --git a/sys/fs/fifofs/fifo_vnops.c b/sys/fs/fifofs/fifo_vnops.c
new file mode 100644
index 00000000000..bad33a430b6
--- /dev/null
+++ b/sys/fs/fifofs/fifo_vnops.c
@@ -0,0 +1,494 @@
+/*
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)fifo_vnops.c	8.2 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/time.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/stat.h>
+#include <sys/systm.h>
+#include <sys/ioctl.h>
+#include <sys/file.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <miscfs/fifofs/fifo.h>
+
+/*
+ * This structure is associated with the FIFO vnode and stores
+ * the state associated with the FIFO.
+ */
+struct fifoinfo {
+	struct socket	*fi_readsock;
+	struct socket	*fi_writesock;
+	long		fi_readers;
+	long		fi_writers;
+};
+
+int (**fifo_vnodeop_p)();
+struct vnodeopv_entry_desc fifo_vnodeop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, fifo_lookup },		/* lookup */
+	{ &vop_create_desc, fifo_create },		/* create */
+	{ &vop_mknod_desc, fifo_mknod },		/* mknod */
+	{ &vop_open_desc, fifo_open },			/* open */
+	{ &vop_close_desc, fifo_close },		/* close */
+	{ &vop_access_desc, fifo_access },		/* access */
+	{ &vop_getattr_desc, fifo_getattr },		/* getattr */
+	{ &vop_setattr_desc, fifo_setattr },		/* setattr */
+	{ &vop_read_desc, fifo_read },			/* read */
+	{ &vop_write_desc, fifo_write },		/* write */
+	{ &vop_ioctl_desc, fifo_ioctl },		/* ioctl */
+	{ &vop_select_desc, fifo_select },		/* select */
+	{ &vop_mmap_desc, fifo_mmap },			/* mmap */
+	{ &vop_fsync_desc, fifo_fsync },		/* fsync */
+	{ &vop_seek_desc, fifo_seek },			/* seek */
+	{ &vop_remove_desc, fifo_remove },		/* remove */
+	{ &vop_link_desc, fifo_link },			/* link */
+	{ &vop_rename_desc, fifo_rename },		/* rename */
+	{ &vop_mkdir_desc, fifo_mkdir },		/* mkdir */
+	{ &vop_rmdir_desc, fifo_rmdir },		/* rmdir */
+	{ &vop_symlink_desc, fifo_symlink },		/* symlink */
+	{ &vop_readdir_desc, fifo_readdir },		/* readdir */
+	{ &vop_readlink_desc, fifo_readlink },		/* readlink */
+	{ &vop_abortop_desc, fifo_abortop },		/* abortop */
+	{ &vop_inactive_desc, fifo_inactive },		/* inactive */
+	{ &vop_reclaim_desc, fifo_reclaim },		/* reclaim */
+	{ &vop_lock_desc, fifo_lock },			/* lock */
+	{ &vop_unlock_desc, fifo_unlock },		/* unlock */
+	{ &vop_bmap_desc, fifo_bmap },			/* bmap */
+	{ &vop_strategy_desc, fifo_strategy },		/* strategy */
+	{ &vop_print_desc, fifo_print },		/* print */
+	{ &vop_islocked_desc, fifo_islocked },		/* islocked */
+	{ &vop_pathconf_desc, fifo_pathconf },		/* pathconf */
+	{ &vop_advlock_desc, fifo_advlock },		/* advlock */
+	{ &vop_blkatoff_desc, fifo_blkatoff },		/* blkatoff */
+	{ &vop_valloc_desc, fifo_valloc },		/* valloc */
+	{ &vop_vfree_desc, fifo_vfree },		/* vfree */
+	{ &vop_truncate_desc, fifo_truncate },		/* truncate */
+	{ &vop_update_desc, fifo_update },		/* update */
+	{ &vop_bwrite_desc, fifo_bwrite },		/* bwrite */
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc fifo_vnodeop_opv_desc =
+	{ &fifo_vnodeop_p, fifo_vnodeop_entries };
+
+/*
+ * Trivial lookup routine that always fails.
+ */
+/* ARGSUSED */
+fifo_lookup(ap)
+	struct vop_lookup_args /* {
+		struct vnode * a_dvp;
+		struct vnode ** a_vpp;
+		struct componentname * a_cnp;
+	} */ *ap;
+{
+	
+	*ap->a_vpp = NULL;
+	return (ENOTDIR);
+}
+
+/*
+ * Open called to set up a new instance of a fifo or
+ * to find an active instance of a fifo.
+ */
+/* ARGSUSED */
+fifo_open(ap)
+	struct vop_open_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct fifoinfo *fip;
+	struct socket *rso, *wso;
+	int error;
+	static char openstr[] = "fifo";
+
+	if ((ap->a_mode & (FREAD|FWRITE)) == (FREAD|FWRITE))
+		return (EINVAL);
+	if ((fip = vp->v_fifoinfo) == NULL) {
+		MALLOC(fip, struct fifoinfo *, sizeof(*fip), M_VNODE, M_WAITOK);
+		vp->v_fifoinfo = fip;
+		if (error = socreate(AF_UNIX, &rso, SOCK_STREAM, 0)) {
+			free(fip, M_VNODE);
+			vp->v_fifoinfo = NULL;
+			return (error);
+		}
+		fip->fi_readsock = rso;
+		if (error = socreate(AF_UNIX, &wso, SOCK_STREAM, 0)) {
+			(void)soclose(rso);
+			free(fip, M_VNODE);
+			vp->v_fifoinfo = NULL;
+			return (error);
+		}
+		fip->fi_writesock = wso;
+		if (error = unp_connect2(wso, rso)) {
+			(void)soclose(wso);
+			(void)soclose(rso);
+			free(fip, M_VNODE);
+			vp->v_fifoinfo = NULL;
+			return (error);
+		}
+		fip->fi_readers = fip->fi_writers = 0;
+		wso->so_state |= SS_CANTRCVMORE;
+		rso->so_state |= SS_CANTSENDMORE;
+	}
+	error = 0;
+	if (ap->a_mode & FREAD) {
+		fip->fi_readers++;
+		if (fip->fi_readers == 1) {
+			fip->fi_writesock->so_state &= ~SS_CANTSENDMORE;
+			if (fip->fi_writers > 0)
+				wakeup((caddr_t)&fip->fi_writers);
+		}
+		if (ap->a_mode & O_NONBLOCK)
+			return (0);
+		while (fip->fi_writers == 0) {
+			VOP_UNLOCK(vp);
+			error = tsleep((caddr_t)&fip->fi_readers,
+			    PCATCH | PSOCK, openstr, 0);
+			VOP_LOCK(vp);
+			if (error)
+				break;
+		}
+	} else {
+		fip->fi_writers++;
+		if (fip->fi_readers == 0 && (ap->a_mode & O_NONBLOCK)) {
+			error = ENXIO;
+		} else {
+			if (fip->fi_writers == 1) {
+				fip->fi_readsock->so_state &= ~SS_CANTRCVMORE;
+				if (fip->fi_readers > 0)
+					wakeup((caddr_t)&fip->fi_readers);
+			}
+			while (fip->fi_readers == 0) {
+				VOP_UNLOCK(vp);
+				error = tsleep((caddr_t)&fip->fi_writers,
+				    PCATCH | PSOCK, openstr, 0);
+				VOP_LOCK(vp);
+				if (error)
+					break;
+			}
+		}
+	}
+	if (error)
+		VOP_CLOSE(vp, ap->a_mode, ap->a_cred, ap->a_p);
+	return (error);
+}
+
+/*
+ * Vnode op for read
+ */
+/* ARGSUSED */
+fifo_read(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	register struct uio *uio = ap->a_uio;
+	register struct socket *rso = ap->a_vp->v_fifoinfo->fi_readsock;
+	int error, startresid;
+
+#ifdef DIAGNOSTIC
+	if (uio->uio_rw != UIO_READ)
+		panic("fifo_read mode");
+#endif
+	if (uio->uio_resid == 0)
+		return (0);
+	if (ap->a_ioflag & IO_NDELAY)
+		rso->so_state |= SS_NBIO;
+	startresid = uio->uio_resid;
+	VOP_UNLOCK(ap->a_vp);
+	error = soreceive(rso, (struct mbuf **)0, uio, (int *)0,
+		(struct mbuf **)0, (struct mbuf **)0);
+	VOP_LOCK(ap->a_vp);
+	/*
+	 * Clear EOF indication after first such return.
+	 */
+	if (uio->uio_resid == startresid)
+		rso->so_state &= ~SS_CANTRCVMORE;
+	if (ap->a_ioflag & IO_NDELAY)
+		rso->so_state &= ~SS_NBIO;
+	return (error);
+}
+
+/*
+ * Vnode op for write
+ */
+/* ARGSUSED */
+fifo_write(ap)
+	struct vop_write_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	struct socket *wso = ap->a_vp->v_fifoinfo->fi_writesock;
+	int error;
+
+#ifdef DIAGNOSTIC
+	if (ap->a_uio->uio_rw != UIO_WRITE)
+		panic("fifo_write mode");
+#endif
+	if (ap->a_ioflag & IO_NDELAY)
+		wso->so_state |= SS_NBIO;
+	VOP_UNLOCK(ap->a_vp);
+	error = sosend(wso, (struct mbuf *)0, ap->a_uio, 0, (struct mbuf *)0, 0);
+	VOP_LOCK(ap->a_vp);
+	if (ap->a_ioflag & IO_NDELAY)
+		wso->so_state &= ~SS_NBIO;
+	return (error);
+}
+
+/*
+ * Device ioctl operation.
+ */
+/* ARGSUSED */
+fifo_ioctl(ap)
+	struct vop_ioctl_args /* {
+		struct vnode *a_vp;
+		int  a_command;
+		caddr_t  a_data;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct file filetmp;
+
+	if (ap->a_command == FIONBIO)
+		return (0);
+	if (ap->a_fflag & FREAD)
+		filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_readsock;
+	else
+		filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_writesock;
+	return (soo_ioctl(&filetmp, ap->a_command, ap->a_data, ap->a_p));
+}
+
+/* ARGSUSED */
+fifo_select(ap)
+	struct vop_select_args /* {
+		struct vnode *a_vp;
+		int  a_which;
+		int  a_fflags;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct file filetmp;
+
+	if (ap->a_fflags & FREAD)
+		filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_readsock;
+	else
+		filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_writesock;
+	return (soo_select(&filetmp, ap->a_which, ap->a_p));
+}
+
+/*
+ * This is a noop, simply returning what one has been given.
+ */
+fifo_bmap(ap)
+	struct vop_bmap_args /* {
+		struct vnode *a_vp;
+		daddr_t  a_bn;
+		struct vnode **a_vpp;
+		daddr_t *a_bnp;
+	} */ *ap;
+{
+
+	if (ap->a_vpp != NULL)
+		*ap->a_vpp = ap->a_vp;
+	if (ap->a_bnp != NULL)
+		*ap->a_bnp = ap->a_bn;
+	return (0);
+}
+
+/*
+ * At the moment we do not do any locking.
+ */
+/* ARGSUSED */
+fifo_lock(ap)
+	struct vop_lock_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	return (0);
+}
+
+/* ARGSUSED */
+fifo_unlock(ap)
+	struct vop_unlock_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	return (0);
+}
+
+/*
+ * Device close routine
+ */
+/* ARGSUSED */
+fifo_close(ap)
+	struct vop_close_args /* {
+		struct vnode *a_vp;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct fifoinfo *fip = vp->v_fifoinfo;
+	int error1, error2;
+
+	if (ap->a_fflag & FWRITE) {
+		fip->fi_writers--;
+		if (fip->fi_writers == 0)
+			socantrcvmore(fip->fi_readsock);
+	} else {
+		fip->fi_readers--;
+		if (fip->fi_readers == 0)
+			socantsendmore(fip->fi_writesock);
+	}
+	if (vp->v_usecount > 1)
+		return (0);
+	error1 = soclose(fip->fi_readsock);
+	error2 = soclose(fip->fi_writesock);
+	FREE(fip, M_VNODE);
+	vp->v_fifoinfo = NULL;
+	if (error1)
+		return (error1);
+	return (error2);
+}
+
+/*
+ * Print out the contents of a fifo vnode.
+ */
+fifo_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	printf("tag VT_NON");
+	fifo_printinfo(ap->a_vp);
+	printf("\n");
+}
+
+/*
+ * Print out internal contents of a fifo vnode.
+ */
+fifo_printinfo(vp)
+	struct vnode *vp;
+{
+	register struct fifoinfo *fip = vp->v_fifoinfo;
+
+	printf(", fifo with %d readers and %d writers",
+		fip->fi_readers, fip->fi_writers);
+}
+
+/*
+ * Return POSIX pathconf information applicable to fifo's.
+ */
+fifo_pathconf(ap)
+	struct vop_pathconf_args /* {
+		struct vnode *a_vp;
+		int a_name;
+		int *a_retval;
+	} */ *ap;
+{
+
+	switch (ap->a_name) {
+	case _PC_LINK_MAX:
+		*ap->a_retval = LINK_MAX;
+		return (0);
+	case _PC_PIPE_BUF:
+		*ap->a_retval = PIPE_BUF;
+		return (0);
+	case _PC_CHOWN_RESTRICTED:
+		*ap->a_retval = 1;
+		return (0);
+	default:
+		return (EINVAL);
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * Fifo failed operation
+ */
+fifo_ebadf()
+{
+
+	return (EBADF);
+}
+
+/*
+ * Fifo advisory byte-level locks.
+ */
+/* ARGSUSED */
+fifo_advlock(ap)
+	struct vop_advlock_args /* {
+		struct vnode *a_vp;
+		caddr_t  a_id;
+		int  a_op;
+		struct flock *a_fl;
+		int  a_flags;
+	} */ *ap;
+{
+
+	return (EOPNOTSUPP);
+}
+
+/*
+ * Fifo bad operation
+ */
+fifo_badop()
+{
+
+	panic("fifo_badop called");
+	/* NOTREACHED */
+}
diff --git a/sys/fs/nullfs/null.h b/sys/fs/nullfs/null.h
new file mode 100644
index 00000000000..14286ffeee0
--- /dev/null
+++ b/sys/fs/nullfs/null.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)null.h	8.2 (Berkeley) 1/21/94
+ *
+ * $Id: lofs.h,v 1.8 1992/05/30 10:05:43 jsp Exp jsp $
+ */
+
+struct null_args {
+	char		*target;	/* Target of loopback  */
+};
+
+struct null_mount {
+	struct mount	*nullm_vfs;
+	struct vnode	*nullm_rootvp;	/* Reference to root null_node */
+};
+
+#ifdef KERNEL
+/*
+ * A cache of vnode references
+ */
+struct null_node {
+	struct null_node	*null_forw;	/* Hash chain */
+	struct null_node	*null_back;
+	struct vnode	        *null_lowervp;	/* VREFed once */
+	struct vnode		*null_vnode;	/* Back pointer */
+};
+
+extern int null_node_create __P((struct mount *mp, struct vnode *target, struct vnode **vpp));
+
+#define	MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data))
+#define	VTONULL(vp) ((struct null_node *)(vp)->v_data)
+#define	NULLTOV(xp) ((xp)->null_vnode)
+#ifdef NULLFS_DIAGNOSTIC
+extern struct vnode *null_checkvp __P((struct vnode *vp, char *fil, int lno));
+#define	NULLVPTOLOWERVP(vp) null_checkvp((vp), __FILE__, __LINE__)
+#else
+#define	NULLVPTOLOWERVP(vp) (VTONULL(vp)->null_lowervp)
+#endif
+
+extern int (**null_vnodeop_p)();
+extern struct vfsops null_vfsops;
+#endif /* KERNEL */
diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c
new file mode 100644
index 00000000000..a31723fe4c2
--- /dev/null
+++ b/sys/fs/nullfs/null_subr.c
@@ -0,0 +1,293 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)null_subr.c	8.4 (Berkeley) 1/21/94
+ *
+ * $Id: lofs_subr.c,v 1.11 1992/05/30 10:05:43 jsp Exp jsp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <miscfs/nullfs/null.h>
+
+#define LOG2_SIZEVNODE 7		/* log2(sizeof struct vnode) */
+#define	NNULLNODECACHE 16
+#define	NULL_NHASH(vp) ((((u_long)vp)>>LOG2_SIZEVNODE) & (NNULLNODECACHE-1))
+
+/*
+ * Null layer cache:
+ * Each cache entry holds a reference to the lower vnode
+ * along with a pointer to the alias vnode.  When an
+ * entry is added the lower vnode is VREF'd.  When the
+ * alias is removed the lower vnode is vrele'd.
+ */
+
+/*
+ * Cache head
+ */
+struct null_node_cache {
+	struct null_node	*ac_forw;
+	struct null_node	*ac_back;
+};
+
+static struct null_node_cache null_node_cache[NNULLNODECACHE];
+
+/*
+ * Initialise cache headers
+ */
+nullfs_init()
+{
+	struct null_node_cache *ac;
+#ifdef NULLFS_DIAGNOSTIC
+	printf("nullfs_init\n");		/* printed during system boot */
+#endif
+
+	for (ac = null_node_cache; ac < null_node_cache + NNULLNODECACHE; ac++)
+		ac->ac_forw = ac->ac_back = (struct null_node *) ac;
+}
+
+/*
+ * Compute hash list for given lower vnode
+ */
+static struct null_node_cache *
+null_node_hash(lowervp)
+struct vnode *lowervp;
+{
+
+	return (&null_node_cache[NULL_NHASH(lowervp)]);
+}
+
+/*
+ * Return a VREF'ed alias for lower vnode if already exists, else 0.
+ */
+static struct vnode *
+null_node_find(mp, lowervp)
+	struct mount *mp;
+	struct vnode *lowervp;
+{
+	struct null_node_cache *hd;
+	struct null_node *a;
+	struct vnode *vp;
+
+	/*
+	 * Find hash base, and then search the (two-way) linked
+	 * list looking for a null_node structure which is referencing
+	 * the lower vnode.  If found, the increment the null_node
+	 * reference count (but NOT the lower vnode's VREF counter).
+	 */
+	hd = null_node_hash(lowervp);
+loop:
+	for (a = hd->ac_forw; a != (struct null_node *) hd; a = a->null_forw) {
+		if (a->null_lowervp == lowervp && NULLTOV(a)->v_mount == mp) {
+			vp = NULLTOV(a);
+			/*
+			 * We need vget for the VXLOCK
+			 * stuff, but we don't want to lock
+			 * the lower node.
+			 */
+			if (vget(vp, 0)) {
+				printf ("null_node_find: vget failed.\n");
+				goto loop;
+			};
+			return (vp);
+		}
+	}
+
+	return NULL;
+}
+
+
+/*
+ * Make a new null_node node.
+ * Vp is the alias vnode, lofsvp is the lower vnode.
+ * Maintain a reference to (lowervp).
+ */
+static int
+null_node_alloc(mp, lowervp, vpp)
+	struct mount *mp;
+	struct vnode *lowervp;
+	struct vnode **vpp;
+{
+	struct null_node_cache *hd;
+	struct null_node *xp;
+	struct vnode *othervp, *vp;
+	int error;
+
+	if (error = getnewvnode(VT_NULL, mp, null_vnodeop_p, vpp))
+		return (error);
+	vp = *vpp;
+
+	MALLOC(xp, struct null_node *, sizeof(struct null_node), M_TEMP, M_WAITOK);
+	vp->v_type = lowervp->v_type;
+	xp->null_vnode = vp;
+	vp->v_data = xp;
+	xp->null_lowervp = lowervp;
+	/*
+	 * Before we insert our new node onto the hash chains,
+	 * check to see if someone else has beaten us to it.
+	 * (We could have slept in MALLOC.)
+	 */
+	if (othervp = null_node_find(lowervp)) {
+		FREE(xp, M_TEMP);
+		vp->v_type = VBAD;	/* node is discarded */
+		vp->v_usecount = 0;	/* XXX */
+		*vpp = othervp;
+		return 0;
+	};
+	VREF(lowervp);   /* Extra VREF will be vrele'd in null_node_create */
+	hd = null_node_hash(lowervp);
+	insque(xp, hd);
+	return 0;
+}
+
+
+/*
+ * Try to find an existing null_node vnode refering
+ * to it, otherwise make a new null_node vnode which
+ * contains a reference to the lower vnode.
+ */
+int
+null_node_create(mp, lowervp, newvpp)
+	struct mount *mp;
+	struct vnode *lowervp;
+	struct vnode **newvpp;
+{
+	struct vnode *aliasvp;
+
+	if (aliasvp = null_node_find(mp, lowervp)) {
+		/*
+		 * null_node_find has taken another reference
+		 * to the alias vnode.
+		 */
+#ifdef NULLFS_DIAGNOSTIC
+		vprint("null_node_create: exists", NULLTOV(ap));
+#endif
+		/* VREF(aliasvp); --- done in null_node_find */
+	} else {
+		int error;
+
+		/*
+		 * Get new vnode.
+		 */
+#ifdef NULLFS_DIAGNOSTIC
+		printf("null_node_create: create new alias vnode\n");
+#endif
+
+		/*
+		 * Make new vnode reference the null_node.
+		 */
+		if (error = null_node_alloc(mp, lowervp, &aliasvp))
+			return error;
+
+		/*
+		 * aliasvp is already VREF'd by getnewvnode()
+		 */
+	}
+
+	vrele(lowervp);
+
+#ifdef DIAGNOSTIC
+	if (lowervp->v_usecount < 1) {
+		/* Should never happen... */
+		vprint ("null_node_create: alias ");
+		vprint ("null_node_create: lower ");
+		printf ("null_node_create: lower has 0 usecount.\n");
+		panic ("null_node_create: lower has 0 usecount.");
+	};
+#endif
+
+#ifdef NULLFS_DIAGNOSTIC
+	vprint("null_node_create: alias", aliasvp);
+	vprint("null_node_create: lower", lowervp);
+#endif
+
+	*newvpp = aliasvp;
+	return (0);
+}
+#ifdef NULLFS_DIAGNOSTIC
+struct vnode *
+null_checkvp(vp, fil, lno)
+	struct vnode *vp;
+	char *fil;
+	int lno;
+{
+	struct null_node *a = VTONULL(vp);
+#ifdef notyet
+	/*
+	 * Can't do this check because vop_reclaim runs
+	 * with a funny vop vector.
+	 */
+	if (vp->v_op != null_vnodeop_p) {
+		printf ("null_checkvp: on non-null-node\n");
+		while (null_checkvp_barrier) /*WAIT*/ ;
+		panic("null_checkvp");
+	};
+#endif
+	if (a->null_lowervp == NULL) {
+		/* Should never happen */
+		int i; u_long *p;
+		printf("vp = %x, ZERO ptr\n", vp);
+		for (p = (u_long *) a, i = 0; i < 8; i++)
+			printf(" %x", p[i]);
+		printf("\n");
+		/* wait for debugger */
+		while (null_checkvp_barrier) /*WAIT*/ ;
+		panic("null_checkvp");
+	}
+	if (a->null_lowervp->v_usecount < 1) {
+		int i; u_long *p;
+		printf("vp = %x, unref'ed lowervp\n", vp);
+		for (p = (u_long *) a, i = 0; i < 8; i++)
+			printf(" %x", p[i]);
+		printf("\n");
+		/* wait for debugger */
+		while (null_checkvp_barrier) /*WAIT*/ ;
+		panic ("null with unref'ed lowervp");
+	};
+#ifdef notyet
+	printf("null %x/%d -> %x/%d [%s, %d]\n",
+	        NULLTOV(a), NULLTOV(a)->v_usecount,
+		a->null_lowervp, a->null_lowervp->v_usecount,
+		fil, lno);
+#endif
+	return a->null_lowervp;
+}
+#endif
diff --git a/sys/fs/nullfs/null_vfsops.c b/sys/fs/nullfs/null_vfsops.c
new file mode 100644
index 00000000000..b0d2df75cda
--- /dev/null
+++ b/sys/fs/nullfs/null_vfsops.c
@@ -0,0 +1,366 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)null_vfsops.c	8.2 (Berkeley) 1/21/94
+ *
+ * @(#)lofs_vfsops.c	1.2 (Berkeley) 6/18/92
+ * $Id: lofs_vfsops.c,v 1.9 1992/05/30 10:26:24 jsp Exp jsp $
+ */
+
+/*
+ * Null Layer
+ * (See null_vnops.c for a description of what this does.)
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <miscfs/nullfs/null.h>
+
+/*
+ * Mount null layer
+ */
+int
+nullfs_mount(mp, path, data, ndp, p)
+	struct mount *mp;
+	char *path;
+	caddr_t data;
+	struct nameidata *ndp;
+	struct proc *p;
+{
+	int error = 0;
+	struct null_args args;
+	struct vnode *lowerrootvp, *vp;
+	struct vnode *nullm_rootvp;
+	struct null_mount *xmp;
+	u_int size;
+
+#ifdef NULLFS_DIAGNOSTIC
+	printf("nullfs_mount(mp = %x)\n", mp);
+#endif
+
+	/*
+	 * Update is a no-op
+	 */
+	if (mp->mnt_flag & MNT_UPDATE) {
+		return (EOPNOTSUPP);
+		/* return VFS_MOUNT(MOUNTTONULLMOUNT(mp)->nullm_vfs, path, data, ndp, p);*/
+	}
+
+	/*
+	 * Get argument
+	 */
+	if (error = copyin(data, (caddr_t)&args, sizeof(struct null_args)))
+		return (error);
+
+	/*
+	 * Find lower node
+	 */
+	NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT|LOCKLEAF,
+		UIO_USERSPACE, args.target, p);
+	if (error = namei(ndp))
+		return (error);
+
+	/*
+	 * Sanity check on lower vnode
+	 */
+	lowerrootvp = ndp->ni_vp;
+
+	vrele(ndp->ni_dvp);
+	ndp->ni_dvp = NULL;
+
+	xmp = (struct null_mount *) malloc(sizeof(struct null_mount),
+				M_UFSMNT, M_WAITOK);	/* XXX */
+
+	/*
+	 * Save reference to underlying FS
+	 */
+	xmp->nullm_vfs = lowerrootvp->v_mount;
+
+	/*
+	 * Save reference.  Each mount also holds
+	 * a reference on the root vnode.
+	 */
+	error = null_node_create(mp, lowerrootvp, &vp);
+	/*
+	 * Unlock the node (either the lower or the alias)
+	 */
+	VOP_UNLOCK(vp);
+	/*
+	 * Make sure the node alias worked
+	 */
+	if (error) {
+		vrele(lowerrootvp);
+		free(xmp, M_UFSMNT);	/* XXX */
+		return (error);
+	}
+
+	/*
+	 * Keep a held reference to the root vnode.
+	 * It is vrele'd in nullfs_unmount.
+	 */
+	nullm_rootvp = vp;
+	nullm_rootvp->v_flag |= VROOT;
+	xmp->nullm_rootvp = nullm_rootvp;
+	if (NULLVPTOLOWERVP(nullm_rootvp)->v_mount->mnt_flag & MNT_LOCAL)
+		mp->mnt_flag |= MNT_LOCAL;
+	mp->mnt_data = (qaddr_t) xmp;
+	getnewfsid(mp, MOUNT_LOFS);
+
+	(void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+	(void) copyinstr(args.target, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 
+	    &size);
+	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+#ifdef NULLFS_DIAGNOSTIC
+	printf("nullfs_mount: lower %s, alias at %s\n",
+		mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
+#endif
+	return (0);
+}
+
+/*
+ * VFS start.  Nothing needed here - the start routine
+ * on the underlying filesystem will have been called
+ * when that filesystem was mounted.
+ */
+int
+nullfs_start(mp, flags, p)
+	struct mount *mp;
+	int flags;
+	struct proc *p;
+{
+	return (0);
+	/* return VFS_START(MOUNTTONULLMOUNT(mp)->nullm_vfs, flags, p); */
+}
+
+/*
+ * Free reference to null layer
+ */
+int
+nullfs_unmount(mp, mntflags, p)
+	struct mount *mp;
+	int mntflags;
+	struct proc *p;
+{
+	struct vnode *nullm_rootvp = MOUNTTONULLMOUNT(mp)->nullm_rootvp;
+	int error;
+	int flags = 0;
+	extern int doforce;
+
+#ifdef NULLFS_DIAGNOSTIC
+	printf("nullfs_unmount(mp = %x)\n", mp);
+#endif
+
+	if (mntflags & MNT_FORCE) {
+		/* lofs can never be rootfs so don't check for it */
+		if (!doforce)
+			return (EINVAL);
+		flags |= FORCECLOSE;
+	}
+
+	/*
+	 * Clear out buffer cache.  I don't think we
+	 * ever get anything cached at this level at the
+	 * moment, but who knows...
+	 */
+#if 0
+	mntflushbuf(mp, 0); 
+	if (mntinvalbuf(mp, 1))
+		return (EBUSY);
+#endif
+	if (nullm_rootvp->v_usecount > 1)
+		return (EBUSY);
+	if (error = vflush(mp, nullm_rootvp, flags))
+		return (error);
+
+#ifdef NULLFS_DIAGNOSTIC
+	vprint("alias root of lower", nullm_rootvp);
+#endif	 
+	/*
+	 * Release reference on underlying root vnode
+	 */
+	vrele(nullm_rootvp);
+	/*
+	 * And blow it away for future re-use
+	 */
+	vgone(nullm_rootvp);
+	/*
+	 * Finally, throw away the null_mount structure
+	 */
+	free(mp->mnt_data, M_UFSMNT);	/* XXX */
+	mp->mnt_data = 0;
+	return 0;
+}
+
+int
+nullfs_root(mp, vpp)
+	struct mount *mp;
+	struct vnode **vpp;
+{
+	struct vnode *vp;
+
+#ifdef NULLFS_DIAGNOSTIC
+	printf("nullfs_root(mp = %x, vp = %x->%x)\n", mp,
+			MOUNTTONULLMOUNT(mp)->nullm_rootvp,
+			NULLVPTOLOWERVP(MOUNTTONULLMOUNT(mp)->nullm_rootvp)
+			);
+#endif
+
+	/*
+	 * Return locked reference to root.
+	 */
+	vp = MOUNTTONULLMOUNT(mp)->nullm_rootvp;
+	VREF(vp);
+	VOP_LOCK(vp);
+	*vpp = vp;
+	return 0;
+}
+
+int
+nullfs_quotactl(mp, cmd, uid, arg, p)
+	struct mount *mp;
+	int cmd;
+	uid_t uid;
+	caddr_t arg;
+	struct proc *p;
+{
+	return VFS_QUOTACTL(MOUNTTONULLMOUNT(mp)->nullm_vfs, cmd, uid, arg, p);
+}
+
+int
+nullfs_statfs(mp, sbp, p)
+	struct mount *mp;
+	struct statfs *sbp;
+	struct proc *p;
+{
+	int error;
+	struct statfs mstat;
+
+#ifdef NULLFS_DIAGNOSTIC
+	printf("nullfs_statfs(mp = %x, vp = %x->%x)\n", mp,
+			MOUNTTONULLMOUNT(mp)->nullm_rootvp,
+			NULLVPTOLOWERVP(MOUNTTONULLMOUNT(mp)->nullm_rootvp)
+			);
+#endif
+
+	bzero(&mstat, sizeof(mstat));
+
+	error = VFS_STATFS(MOUNTTONULLMOUNT(mp)->nullm_vfs, &mstat, p);
+	if (error)
+		return (error);
+
+	/* now copy across the "interesting" information and fake the rest */
+	sbp->f_type = mstat.f_type;
+	sbp->f_flags = mstat.f_flags;
+	sbp->f_bsize = mstat.f_bsize;
+	sbp->f_iosize = mstat.f_iosize;
+	sbp->f_blocks = mstat.f_blocks;
+	sbp->f_bfree = mstat.f_bfree;
+	sbp->f_bavail = mstat.f_bavail;
+	sbp->f_files = mstat.f_files;
+	sbp->f_ffree = mstat.f_ffree;
+	if (sbp != &mp->mnt_stat) {
+		bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+	}
+	return (0);
+}
+
+int
+nullfs_sync(mp, waitfor, cred, p)
+	struct mount *mp;
+	int waitfor;
+	struct ucred *cred;
+	struct proc *p;
+{
+	/*
+	 * XXX - Assumes no data cached at null layer.
+	 */
+	return (0);
+}
+
+int
+nullfs_vget(mp, ino, vpp)
+	struct mount *mp;
+	ino_t ino;
+	struct vnode **vpp;
+{
+	
+	return VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, vpp);
+}
+
+int
+nullfs_fhtovp(mp, fidp, nam, vpp, exflagsp, credanonp)
+	struct mount *mp;
+	struct fid *fidp;
+	struct mbuf *nam;
+	struct vnode **vpp;
+	int *exflagsp;
+	struct ucred**credanonp;
+{
+
+	return VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fidp, nam, vpp, exflagsp,credanonp);
+}
+
+int
+nullfs_vptofh(vp, fhp)
+	struct vnode *vp;
+	struct fid *fhp;
+{
+	return VFS_VPTOFH(NULLVPTOLOWERVP(vp), fhp);
+}
+
+int nullfs_init __P((void));
+
+struct vfsops null_vfsops = {
+	nullfs_mount,
+	nullfs_start,
+	nullfs_unmount,
+	nullfs_root,
+	nullfs_quotactl,
+	nullfs_statfs,
+	nullfs_sync,
+	nullfs_vget,
+	nullfs_fhtovp,
+	nullfs_vptofh,
+	nullfs_init,
+};
diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c
new file mode 100644
index 00000000000..115ff6f4643
--- /dev/null
+++ b/sys/fs/nullfs/null_vnops.c
@@ -0,0 +1,462 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * John Heidemann of the UCLA Ficus project.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)null_vnops.c	8.1 (Berkeley) 6/10/93
+ *
+ * Ancestors:
+ *	@(#)lofs_vnops.c	1.2 (Berkeley) 6/18/92
+ *	$Id: lofs_vnops.c,v 1.11 1992/05/30 10:05:43 jsp Exp jsp $
+ *	...and...
+ *	@(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project
+ */
+
+/*
+ * Null Layer
+ *
+ * (See mount_null(8) for more information.)
+ *
+ * The null layer duplicates a portion of the file system
+ * name space under a new name.  In this respect, it is
+ * similar to the loopback file system.  It differs from
+ * the loopback fs in two respects:  it is implemented using
+ * a stackable layers techniques, and it's "null-node"s stack above
+ * all lower-layer vnodes, not just over directory vnodes.
+ *
+ * The null layer has two purposes.  First, it serves as a demonstration
+ * of layering by proving a layer which does nothing.  (It actually
+ * does everything the loopback file system does, which is slightly
+ * more than nothing.)  Second, the null layer can serve as a prototype
+ * layer.  Since it provides all necessary layer framework,
+ * new file system layers can be created very easily be starting
+ * with a null layer.
+ *
+ * The remainder of this man page examines the null layer as a basis
+ * for constructing new layers.
+ *
+ *
+ * INSTANTIATING NEW NULL LAYERS
+ *
+ * New null layers are created with mount_null(8).
+ * Mount_null(8) takes two arguments, the pathname
+ * of the lower vfs (target-pn) and the pathname where the null
+ * layer will appear in the namespace (alias-pn).  After
+ * the null layer is put into place, the contents
+ * of target-pn subtree will be aliased under alias-pn.
+ *
+ *
+ * OPERATION OF A NULL LAYER
+ *
+ * The null layer is the minimum file system layer,
+ * simply bypassing all possible operations to the lower layer
+ * for processing there.  The majority of its activity centers
+ * on the bypass routine, though which nearly all vnode operations
+ * pass.
+ *
+ * The bypass routine accepts arbitrary vnode operations for
+ * handling by the lower layer.  It begins by examing vnode
+ * operation arguments and replacing any null-nodes by their
+ * lower-layer equivlants.  It then invokes the operation
+ * on the lower layer.  Finally, it replaces the null-nodes
+ * in the arguments and, if a vnode is return by the operation,
+ * stacks a null-node on top of the returned vnode.
+ *
+ * Although bypass handles most operations, 
+ * vop_getattr, _inactive, _reclaim, and _print are not bypassed.
+ * Vop_getattr must change the fsid being returned.
+ * Vop_inactive and vop_reclaim are not bypassed so that
+ * they can handle freeing null-layer specific data.
+ * Vop_print is not bypassed to avoid excessive debugging
+ * information.
+ *
+ *
+ * INSTANTIATING VNODE STACKS
+ *
+ * Mounting associates the null layer with a lower layer,
+ * effect stacking two VFSes.  Vnode stacks are instead
+ * created on demand as files are accessed.
+ *
+ * The initial mount creates a single vnode stack for the
+ * root of the new null layer.  All other vnode stacks
+ * are created as a result of vnode operations on
+ * this or other null vnode stacks.
+ *
+ * New vnode stacks come into existance as a result of
+ * an operation which returns a vnode.  
+ * The bypass routine stacks a null-node above the new
+ * vnode before returning it to the caller.
+ *
+ * For example, imagine mounting a null layer with
+ * "mount_null /usr/include /dev/layer/null".
+ * Changing directory to /dev/layer/null will assign
+ * the root null-node (which was created when the null layer was mounted).
+ * Now consider opening "sys".  A vop_lookup would be
+ * done on the root null-node.  This operation would bypass through
+ * to the lower layer which would return a vnode representing 
+ * the UFS "sys".  Null_bypass then builds a null-node
+ * aliasing the UFS "sys" and returns this to the caller.
+ * Later operations on the null-node "sys" will repeat this
+ * process when constructing other vnode stacks.
+ *
+ *
+ * CREATING OTHER FILE SYSTEM LAYERS
+ *
+ * One of the easiest ways to construct new file system layers is to make
+ * a copy of the null layer, rename all files and variables, and
+ * then begin modifing the copy.  Sed can be used to easily rename
+ * all variables.
+ *
+ * The umap layer is an example of a layer descended from the 
+ * null layer.
+ *
+ *
+ * INVOKING OPERATIONS ON LOWER LAYERS
+ *
+ * There are two techniques to invoke operations on a lower layer 
+ * when the operation cannot be completely bypassed.  Each method
+ * is appropriate in different situations.  In both cases,
+ * it is the responsibility of the aliasing layer to make
+ * the operation arguments "correct" for the lower layer
+ * by mapping an vnode arguments to the lower layer.
+ *
+ * The first approach is to call the aliasing layer's bypass routine.
+ * This method is most suitable when you wish to invoke the operation
+ * currently being hanldled on the lower layer.  It has the advantage
+ * that the bypass routine already must do argument mapping.
+ * An example of this is null_getattrs in the null layer.
+ *
+ * A second approach is to directly invoked vnode operations on
+ * the lower layer with the VOP_OPERATIONNAME interface.
+ * The advantage of this method is that it is easy to invoke
+ * arbitrary operations on the lower layer.  The disadvantage
+ * is that vnodes arguments must be manualy mapped.
+ *
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/buf.h>
+#include <miscfs/nullfs/null.h>
+
+
+int null_bug_bypass = 0;   /* for debugging: enables bypass printf'ing */
+
+/*
+ * This is the 10-Apr-92 bypass routine.
+ *    This version has been optimized for speed, throwing away some
+ * safety checks.  It should still always work, but it's not as
+ * robust to programmer errors.
+ *    Define SAFETY to include some error checking code.
+ *
+ * In general, we map all vnodes going down and unmap them on the way back.
+ * As an exception to this, vnodes can be marked "unmapped" by setting
+ * the Nth bit in operation's vdesc_flags.
+ *
+ * Also, some BSD vnode operations have the side effect of vrele'ing
+ * their arguments.  With stacking, the reference counts are held
+ * by the upper node, not the lower one, so we must handle these
+ * side-effects here.  This is not of concern in Sun-derived systems
+ * since there are no such side-effects.
+ *
+ * This makes the following assumptions:
+ * - only one returned vpp
+ * - no INOUT vpp's (Sun's vop_open has one of these)
+ * - the vnode operation vector of the first vnode should be used
+ *   to determine what implementation of the op should be invoked
+ * - all mapped vnodes are of our vnode-type (NEEDSWORK:
+ *   problems on rmdir'ing mount points and renaming?)
+ */ 
+int
+null_bypass(ap)
+	struct vop_generic_args /* {
+		struct vnodeop_desc *a_desc;
+		<other random data follows, presumably>
+	} */ *ap;
+{
+	extern int (**null_vnodeop_p)();  /* not extern, really "forward" */
+	register struct vnode **this_vp_p;
+	int error;
+	struct vnode *old_vps[VDESC_MAX_VPS];
+	struct vnode **vps_p[VDESC_MAX_VPS];
+	struct vnode ***vppp;
+	struct vnodeop_desc *descp = ap->a_desc;
+	int reles, i;
+
+	if (null_bug_bypass)
+		printf ("null_bypass: %s\n", descp->vdesc_name);
+
+#ifdef SAFETY
+	/*
+	 * We require at least one vp.
+	 */
+	if (descp->vdesc_vp_offsets == NULL ||
+	    descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
+		panic ("null_bypass: no vp's in map.\n");
+#endif
+
+	/*
+	 * Map the vnodes going in.
+	 * Later, we'll invoke the operation based on
+	 * the first mapped vnode's operation vector.
+	 */
+	reles = descp->vdesc_flags;
+	for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
+		if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
+			break;   /* bail out at end of list */
+		vps_p[i] = this_vp_p = 
+			VOPARG_OFFSETTO(struct vnode**,descp->vdesc_vp_offsets[i],ap);
+		/*
+		 * We're not guaranteed that any but the first vnode
+		 * are of our type.  Check for and don't map any
+		 * that aren't.  (We must always map first vp or vclean fails.)
+		 */
+		if (i && (*this_vp_p)->v_op != null_vnodeop_p) {
+			old_vps[i] = NULL;
+		} else {
+			old_vps[i] = *this_vp_p;
+			*(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p);
+			/*
+			 * XXX - Several operations have the side effect
+			 * of vrele'ing their vp's.  We must account for
+			 * that.  (This should go away in the future.)
+			 */
+			if (reles & 1)
+				VREF(*this_vp_p);
+		}
+			
+	}
+
+	/*
+	 * Call the operation on the lower layer
+	 * with the modified argument structure.
+	 */
+	error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
+
+	/*
+	 * Maintain the illusion of call-by-value
+	 * by restoring vnodes in the argument structure
+	 * to their original value.
+	 */
+	reles = descp->vdesc_flags;
+	for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
+		if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
+			break;   /* bail out at end of list */
+		if (old_vps[i]) {
+			*(vps_p[i]) = old_vps[i];
+			if (reles & 1)
+				vrele(*(vps_p[i]));
+		}
+	}
+
+	/*
+	 * Map the possible out-going vpp
+	 * (Assumes that the lower layer always returns
+	 * a VREF'ed vpp unless it gets an error.)
+	 */
+	if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET &&
+	    !(descp->vdesc_flags & VDESC_NOMAP_VPP) &&
+	    !error) {
+		/*
+		 * XXX - even though some ops have vpp returned vp's,
+		 * several ops actually vrele this before returning.
+		 * We must avoid these ops.
+		 * (This should go away when these ops are regularized.)
+		 */
+		if (descp->vdesc_flags & VDESC_VPP_WILLRELE)
+			goto out;
+		vppp = VOPARG_OFFSETTO(struct vnode***,
+				 descp->vdesc_vpp_offset,ap);
+		error = null_node_create(old_vps[0]->v_mount, **vppp, *vppp);
+	}
+
+ out:
+	return (error);
+}
+
+
+/*
+ *  We handle getattr only to change the fsid.
+ */
+int
+null_getattr(ap)
+	struct vop_getattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	int error;
+	if (error = null_bypass(ap))
+		return (error);
+	/* Requires that arguments be restored. */
+	ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
+	return (0);
+}
+
+
+int
+null_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	/*
+	 * Do nothing (and _don't_ bypass).
+	 * Wait to vrele lowervp until reclaim,
+	 * so that until then our null_node is in the
+	 * cache and reusable.
+	 *
+	 * NEEDSWORK: Someday, consider inactive'ing
+	 * the lowervp and then trying to reactivate it
+	 * with capabilities (v_id)
+	 * like they do in the name lookup cache code.
+	 * That's too much work for now.
+	 */
+	return (0);
+}
+
+int
+null_reclaim(ap)
+	struct vop_reclaim_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct null_node *xp = VTONULL(vp);
+	struct vnode *lowervp = xp->null_lowervp;
+
+	/*
+	 * Note: in vop_reclaim, vp->v_op == dead_vnodeop_p,
+	 * so we can't call VOPs on ourself.
+	 */
+	/* After this assignment, this node will not be re-used. */
+	xp->null_lowervp = NULL;
+	remque(xp);
+	FREE(vp->v_data, M_TEMP);
+	vp->v_data = NULL;
+	vrele (lowervp);
+	return (0);
+}
+
+
+int
+null_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	printf ("\ttag VT_NULLFS, vp=%x, lowervp=%x\n", vp, NULLVPTOLOWERVP(vp));
+	return (0);
+}
+
+
+/*
+ * XXX - vop_strategy must be hand coded because it has no
+ * vnode in its arguments.
+ * This goes away with a merged VM/buffer cache.
+ */
+int
+null_strategy(ap)
+	struct vop_strategy_args /* {
+		struct buf *a_bp;
+	} */ *ap;
+{
+	struct buf *bp = ap->a_bp;
+	int error;
+	struct vnode *savedvp;
+
+	savedvp = bp->b_vp;
+	bp->b_vp = NULLVPTOLOWERVP(bp->b_vp);
+
+	error = VOP_STRATEGY(bp);
+
+	bp->b_vp = savedvp;
+
+	return (error);
+}
+
+
+/*
+ * XXX - like vop_strategy, vop_bwrite must be hand coded because it has no
+ * vnode in its arguments.
+ * This goes away with a merged VM/buffer cache.
+ */
+int
+null_bwrite(ap)
+	struct vop_bwrite_args /* {
+		struct buf *a_bp;
+	} */ *ap;
+{
+	struct buf *bp = ap->a_bp;
+	int error;
+	struct vnode *savedvp;
+
+	savedvp = bp->b_vp;
+	bp->b_vp = NULLVPTOLOWERVP(bp->b_vp);
+
+	error = VOP_BWRITE(bp);
+
+	bp->b_vp = savedvp;
+
+	return (error);
+}
+
+/*
+ * Global vfs data structures
+ */
+int (**null_vnodeop_p)();
+struct vnodeopv_entry_desc null_vnodeop_entries[] = {
+	{ &vop_default_desc, null_bypass },
+
+	{ &vop_getattr_desc, null_getattr },
+	{ &vop_inactive_desc, null_inactive },
+	{ &vop_reclaim_desc, null_reclaim },
+	{ &vop_print_desc, null_print },
+
+	{ &vop_strategy_desc, null_strategy },
+	{ &vop_bwrite_desc, null_bwrite },
+
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc null_vnodeop_opv_desc =
+	{ &null_vnodeop_p, null_vnodeop_entries };
diff --git a/sys/fs/portalfs/portal.h b/sys/fs/portalfs/portal.h
new file mode 100644
index 00000000000..38d7ee0cdd2
--- /dev/null
+++ b/sys/fs/portalfs/portal.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)portal.h	8.4 (Berkeley) 1/21/94
+ *
+ * $Id: portal.h,v 1.3 1992/05/30 10:05:24 jsp Exp jsp $
+ */
+
+struct portal_args {
+	char		*pa_config;	/* Config file */
+	int		pa_socket;	/* Socket to server */
+};
+
+struct portal_cred {
+	int		pcr_flag;		/* File open mode */
+	uid_t		pcr_uid;		/* From ucred */
+	short		pcr_ngroups;		/* From ucred */
+	gid_t		pcr_groups[NGROUPS];	/* From ucred */
+};
+
+#ifdef KERNEL
+struct portalmount {
+	struct vnode	*pm_root;	/* Root node */
+	struct file	*pm_server;	/* Held reference to server socket */
+};
+
+struct portalnode {
+	int		pt_size;	/* Length of Arg */
+	char		*pt_arg;	/* Arg to send to server */
+	int		pt_fileid;	/* cookie */
+};
+
+#define VFSTOPORTAL(mp)	((struct portalmount *)((mp)->mnt_data))
+#define	VTOPORTAL(vp) ((struct portalnode *)(vp)->v_data)
+
+#define PORTAL_ROOTFILEID	2
+
+extern int (**portal_vnodeop_p)();
+extern struct vfsops portal_vfsops;
+#endif /* KERNEL */
diff --git a/sys/fs/portalfs/portal_vfsops.c b/sys/fs/portalfs/portal_vfsops.c
new file mode 100644
index 00000000000..39e8563009b
--- /dev/null
+++ b/sys/fs/portalfs/portal_vfsops.c
@@ -0,0 +1,313 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)portal_vfsops.c	8.6 (Berkeley) 1/21/94
+ *
+ * $Id: portal_vfsops.c,v 1.5 1992/05/30 10:25:27 jsp Exp jsp $
+ */
+
+/*
+ * Portal Filesystem
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/proc.h>
+#include <sys/filedesc.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/domain.h>
+#include <sys/un.h>
+#include <miscfs/portal/portal.h>
+
+int
+portal_init()
+{
+
+	return (0);
+}
+
+/*
+ * Mount the per-process file descriptors (/dev/fd)
+ */
+int
+portal_mount(mp, path, data, ndp, p)
+	struct mount *mp;
+	char *path;
+	caddr_t data;
+	struct nameidata *ndp;
+	struct proc *p;
+{
+	struct file *fp;
+	struct portal_args args;
+	struct portalmount *fmp;
+	struct socket *so;
+	struct vnode *rvp;
+	u_int size;
+	int error;
+
+	/*
+	 * Update is a no-op
+	 */
+	if (mp->mnt_flag & MNT_UPDATE)
+		return (EOPNOTSUPP);
+
+	if (error = copyin(data, (caddr_t) &args, sizeof(struct portal_args)))
+		return (error);
+
+	if (error = getsock(p->p_fd, args.pa_socket, &fp))
+		return (error);
+	so = (struct socket *) fp->f_data;
+	if (so->so_proto->pr_domain->dom_family != AF_UNIX)
+		return (ESOCKTNOSUPPORT);
+
+	error = getnewvnode(VT_PORTAL, mp, portal_vnodeop_p, &rvp); /* XXX */
+	if (error)
+		return (error);
+	MALLOC(rvp->v_data, void *, sizeof(struct portalnode),
+		M_TEMP, M_WAITOK);
+
+	fmp = (struct portalmount *) malloc(sizeof(struct portalmount),
+				 M_UFSMNT, M_WAITOK);	/* XXX */
+	rvp->v_type = VDIR;
+	rvp->v_flag |= VROOT;
+	VTOPORTAL(rvp)->pt_arg = 0;
+	VTOPORTAL(rvp)->pt_size = 0;
+	VTOPORTAL(rvp)->pt_fileid = PORTAL_ROOTFILEID;
+	fmp->pm_root = rvp;
+	fmp->pm_server = fp; fp->f_count++;
+
+	mp->mnt_flag |= MNT_LOCAL;
+	mp->mnt_data = (qaddr_t) fmp;
+	getnewfsid(mp, MOUNT_PORTAL);
+
+	(void)copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+	(void)copyinstr(args.pa_config,
+	    mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
+	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+
+#ifdef notdef
+	bzero(mp->mnt_stat.f_mntfromname, MNAMELEN);
+	bcopy("portal", mp->mnt_stat.f_mntfromname, sizeof("portal"));
+#endif
+
+	return (0);
+}
+
+int
+portal_start(mp, flags, p)
+	struct mount *mp;
+	int flags;
+	struct proc *p;
+{
+
+	return (0);
+}
+
+int
+portal_unmount(mp, mntflags, p)
+	struct mount *mp;
+	int mntflags;
+	struct proc *p;
+{
+	extern int doforce;
+	struct vnode *rootvp = VFSTOPORTAL(mp)->pm_root;
+	int error, flags = 0;
+
+
+	if (mntflags & MNT_FORCE) {
+		/* portal can never be rootfs so don't check for it */
+		if (!doforce)
+			return (EINVAL);
+		flags |= FORCECLOSE;
+	}
+
+	/*
+	 * Clear out buffer cache.  I don't think we
+	 * ever get anything cached at this level at the
+	 * moment, but who knows...
+	 */
+#ifdef notyet
+	mntflushbuf(mp, 0); 
+	if (mntinvalbuf(mp, 1))
+		return (EBUSY);
+#endif
+	if (rootvp->v_usecount > 1)
+		return (EBUSY);
+	if (error = vflush(mp, rootvp, flags))
+		return (error);
+
+	/*
+	 * Release reference on underlying root vnode
+	 */
+	vrele(rootvp);
+	/*
+	 * And blow it away for future re-use
+	 */
+	vgone(rootvp);
+	/*
+	 * Shutdown the socket.  This will cause the select in the
+	 * daemon to wake up, and then the accept will get ECONNABORTED
+	 * which it interprets as a request to go and bury itself.
+	 */
+	soshutdown((struct socket *) VFSTOPORTAL(mp)->pm_server->f_data, 2);
+	/*
+	 * Discard reference to underlying file.  Must call closef because
+	 * this may be the last reference.
+	 */
+	closef(VFSTOPORTAL(mp)->pm_server, (struct proc *) 0);
+	/*
+	 * Finally, throw away the portalmount structure
+	 */
+	free(mp->mnt_data, M_UFSMNT);	/* XXX */
+	mp->mnt_data = 0;
+	return (0);
+}
+
+int
+portal_root(mp, vpp)
+	struct mount *mp;
+	struct vnode **vpp;
+{
+	struct vnode *vp;
+
+
+	/*
+	 * Return locked reference to root.
+	 */
+	vp = VFSTOPORTAL(mp)->pm_root;
+	VREF(vp);
+	VOP_LOCK(vp);
+	*vpp = vp;
+	return (0);
+}
+
+int
+portal_quotactl(mp, cmd, uid, arg, p)
+	struct mount *mp;
+	int cmd;
+	uid_t uid;
+	caddr_t arg;
+	struct proc *p;
+{
+
+	return (EOPNOTSUPP);
+}
+
+int
+portal_statfs(mp, sbp, p)
+	struct mount *mp;
+	struct statfs *sbp;
+	struct proc *p;
+{
+
+	sbp->f_type = MOUNT_PORTAL;
+	sbp->f_flags = 0;
+	sbp->f_bsize = DEV_BSIZE;
+	sbp->f_iosize = DEV_BSIZE;
+	sbp->f_blocks = 2;		/* 1K to keep df happy */
+	sbp->f_bfree = 0;
+	sbp->f_bavail = 0;
+	sbp->f_files = 1;		/* Allow for "." */
+	sbp->f_ffree = 0;		/* See comments above */
+	if (sbp != &mp->mnt_stat) {
+		bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+	}
+	return (0);
+}
+
+int
+portal_sync(mp, waitfor)
+	struct mount *mp;
+	int waitfor;
+{
+
+	return (0);
+}
+
+int
+portal_vget(mp, ino, vpp)
+	struct mount *mp;
+	ino_t ino;
+	struct vnode **vpp;
+{
+
+	return (EOPNOTSUPP);
+}
+
+int
+portal_fhtovp(mp, fhp, vpp)
+	struct mount *mp;
+	struct fid *fhp;
+	struct vnode **vpp;
+{
+
+	return (EOPNOTSUPP);
+}
+
+int
+portal_vptofh(vp, fhp)
+	struct vnode *vp;
+	struct fid *fhp;
+{
+
+	return (EOPNOTSUPP);
+}
+
+struct vfsops portal_vfsops = {
+	portal_mount,
+	portal_start,
+	portal_unmount,
+	portal_root,
+	portal_quotactl,
+	portal_statfs,
+	portal_sync,
+	portal_vget,
+	portal_fhtovp,
+	portal_vptofh,
+	portal_init,
+};
diff --git a/sys/fs/portalfs/portal_vnops.c b/sys/fs/portalfs/portal_vnops.c
new file mode 100644
index 00000000000..5e170261e71
--- /dev/null
+++ b/sys/fs/portalfs/portal_vnops.c
@@ -0,0 +1,707 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)portal_vnops.c	8.8 (Berkeley) 1/21/94
+ *
+ * $Id: portal_vnops.c,v 1.4 1992/05/30 10:05:24 jsp Exp jsp $
+ */
+
+/*
+ * Portal Filesystem
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/proc.h>
+#include <sys/filedesc.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <sys/malloc.h>
+#include <sys/namei.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/un.h>
+#include <sys/unpcb.h>
+#include <miscfs/portal/portal.h>
+
+static int portal_fileid = PORTAL_ROOTFILEID+1;
+
+static void
+portal_closefd(p, fd)
+	struct proc *p;
+	int fd;
+{
+	int error;
+	struct {
+		int fd;
+	} ua;
+	int rc;
+
+	ua.fd = fd;
+	error = close(p, &ua, &rc);
+	/*
+	 * We should never get an error, and there isn't anything
+	 * we could do if we got one, so just print a message.
+	 */
+	if (error)
+		printf("portal_closefd: error = %d\n", error);
+}
+
+/*
+ * vp is the current namei directory
+ * cnp is the name to locate in that directory...
+ */
+int
+portal_lookup(ap)
+	struct vop_lookup_args /* {
+		struct vnode * a_dvp;
+		struct vnode ** a_vpp;
+		struct componentname * a_cnp;
+	} */ *ap;
+{
+	char *pname = ap->a_cnp->cn_nameptr;
+	struct portalnode *pt;
+	int error;
+	struct vnode *fvp = 0;
+	char *path;
+	int size;
+
+	if (ap->a_cnp->cn_namelen == 1 && *pname == '.') {
+		*ap->a_vpp = ap->a_dvp;
+		VREF(ap->a_dvp);
+		/*VOP_LOCK(ap->a_dvp);*/
+		return (0);
+	}
+
+
+	error = getnewvnode(VT_PORTAL, ap->a_dvp->v_mount, portal_vnodeop_p, &fvp);
+	if (error)
+		goto bad;
+	fvp->v_type = VREG;
+	MALLOC(fvp->v_data, void *, sizeof(struct portalnode),
+		M_TEMP, M_WAITOK);
+
+	pt = VTOPORTAL(fvp);
+	/*
+	 * Save all of the remaining pathname and
+	 * advance the namei next pointer to the end
+	 * of the string.
+	 */
+	for (size = 0, path = pname; *path; path++)
+		size++;
+	ap->a_cnp->cn_consume = size - ap->a_cnp->cn_namelen;
+
+	pt->pt_arg = malloc(size+1, M_TEMP, M_WAITOK);
+	pt->pt_size = size+1;
+	bcopy(pname, pt->pt_arg, pt->pt_size);
+	pt->pt_fileid = portal_fileid++;
+
+	*ap->a_vpp = fvp;
+	/*VOP_LOCK(fvp);*/
+	return (0);
+
+bad:;
+	if (fvp) {
+		vrele(fvp);
+	}
+	*ap->a_vpp = NULL;
+	return (error);
+}
+
+static int
+portal_connect(so, so2)
+	struct socket *so;
+	struct socket *so2;
+{
+	/* from unp_connect, bypassing the namei stuff... */
+	struct socket *so3;
+	struct unpcb *unp2;
+	struct unpcb *unp3;
+
+	if (so2 == 0)
+		return (ECONNREFUSED);
+
+	if (so->so_type != so2->so_type)
+		return (EPROTOTYPE);
+
+	if ((so2->so_options & SO_ACCEPTCONN) == 0)
+		return (ECONNREFUSED);
+
+	if ((so3 = sonewconn(so2, 0)) == 0)
+		return (ECONNREFUSED);
+
+	unp2 = sotounpcb(so2);
+	unp3 = sotounpcb(so3);
+	if (unp2->unp_addr)
+		unp3->unp_addr = m_copy(unp2->unp_addr, 0, (int)M_COPYALL);
+
+	so2 = so3;
+
+
+	return (unp_connect2(so, so2));
+}
+
+int
+portal_open(ap)
+	struct vop_open_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct socket *so = 0;
+	struct portalnode *pt;
+	struct proc *p = ap->a_p;
+	struct vnode *vp = ap->a_vp;
+	int s;
+	struct uio auio;
+	struct iovec aiov[2];
+	int res;
+	struct mbuf *cm = 0;
+	struct cmsghdr *cmsg;
+	int newfds;
+	int *ip;
+	int fd;
+	int error;
+	int len;
+	struct portalmount *fmp;
+	struct file *fp;
+	struct portal_cred pcred;
+
+	/*
+	 * Nothing to do when opening the root node.
+	 */
+	if (vp->v_flag & VROOT)
+		return (0);
+
+	/*
+	 * Can't be opened unless the caller is set up
+	 * to deal with the side effects.  Check for this
+	 * by testing whether the p_dupfd has been set.
+	 */
+	if (p->p_dupfd >= 0)
+		return (ENODEV);
+
+	pt = VTOPORTAL(vp);
+	fmp = VFSTOPORTAL(vp->v_mount);
+
+	/*
+	 * Create a new socket.
+	 */
+	error = socreate(AF_UNIX, &so, SOCK_STREAM, 0);
+	if (error)
+		goto bad;
+
+	/*
+	 * Reserve some buffer space
+	 */
+	res = pt->pt_size + sizeof(pcred) + 512;	/* XXX */
+	error = soreserve(so, res, res);
+	if (error)
+		goto bad;
+
+	/*
+	 * Kick off connection
+	 */
+	error = portal_connect(so, (struct socket *)fmp->pm_server->f_data);
+	if (error)
+		goto bad;
+
+	/*
+	 * Wait for connection to complete
+	 */
+	/*
+	 * XXX: Since the mount point is holding a reference on the
+	 * underlying server socket, it is not easy to find out whether
+	 * the server process is still running.  To handle this problem
+	 * we loop waiting for the new socket to be connected (something
+	 * which will only happen if the server is still running) or for
+	 * the reference count on the server socket to drop to 1, which
+	 * will happen if the server dies.  Sleep for 5 second intervals
+	 * and keep polling the reference count.   XXX.
+	 */
+	s = splnet();
+	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
+		if (fmp->pm_server->f_count == 1) {
+			error = ECONNREFUSED;
+			splx(s);
+			goto bad;
+		}
+		(void) tsleep((caddr_t) &so->so_timeo, PSOCK, "portalcon", 5 * hz);
+	}
+	splx(s);
+
+	if (so->so_error) {
+		error = so->so_error;
+		goto bad;
+	}
+		
+	/*
+	 * Set miscellaneous flags
+	 */
+	so->so_rcv.sb_timeo = 0;
+	so->so_snd.sb_timeo = 0;
+	so->so_rcv.sb_flags |= SB_NOINTR;
+	so->so_snd.sb_flags |= SB_NOINTR;
+
+
+	pcred.pcr_flag = ap->a_mode;
+	pcred.pcr_uid = ap->a_cred->cr_uid;
+	pcred.pcr_ngroups = ap->a_cred->cr_ngroups;
+	bcopy(ap->a_cred->cr_groups, pcred.pcr_groups, NGROUPS * sizeof(gid_t));
+	aiov[0].iov_base = (caddr_t) &pcred;
+	aiov[0].iov_len = sizeof(pcred);
+	aiov[1].iov_base = pt->pt_arg;
+	aiov[1].iov_len = pt->pt_size;
+	auio.uio_iov = aiov;
+	auio.uio_iovcnt = 2;
+	auio.uio_rw = UIO_WRITE;
+	auio.uio_segflg = UIO_SYSSPACE;
+	auio.uio_procp = p;
+	auio.uio_offset = 0;
+	auio.uio_resid = aiov[0].iov_len + aiov[1].iov_len;
+
+	error = sosend(so, (struct mbuf *) 0, &auio,
+			(struct mbuf *) 0, (struct mbuf *) 0, 0);
+	if (error)
+		goto bad;
+
+	len = auio.uio_resid = sizeof(int);
+	do {
+		struct mbuf *m = 0;
+		int flags = MSG_WAITALL;
+		error = soreceive(so, (struct mbuf **) 0, &auio,
+					&m, &cm, &flags);
+		if (error)
+			goto bad;
+
+		/*
+		 * Grab an error code from the mbuf.
+		 */
+		if (m) {
+			m = m_pullup(m, sizeof(int));	/* Needed? */
+			if (m) {
+				error = *(mtod(m, int *));
+				m_freem(m);
+			} else {
+				error = EINVAL;
+			}
+		} else {
+			if (cm == 0) {
+				error = ECONNRESET;	 /* XXX */
+#ifdef notdef
+				break;
+#endif
+			}
+		}
+	} while (cm == 0 && auio.uio_resid == len && !error);
+
+	if (cm == 0)
+		goto bad;
+
+	if (auio.uio_resid) {
+		error = 0;
+#ifdef notdef
+		error = EMSGSIZE;
+		goto bad;
+#endif
+	}
+
+	/*
+	 * XXX: Break apart the control message, and retrieve the
+	 * received file descriptor.  Note that more than one descriptor
+	 * may have been received, or that the rights chain may have more
+	 * than a single mbuf in it.  What to do?
+	 */
+	cmsg = mtod(cm, struct cmsghdr *);
+	newfds = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof (int);
+	if (newfds == 0) {
+		error = ECONNREFUSED;
+		goto bad;
+	}
+	/*
+	 * At this point the rights message consists of a control message
+	 * header, followed by a data region containing a vector of
+	 * integer file descriptors.  The fds were allocated by the action
+	 * of receiving the control message.
+	 */
+	ip = (int *) (cmsg + 1);
+	fd = *ip++;
+	if (newfds > 1) {
+		/*
+		 * Close extra fds.
+		 */
+		int i;
+		printf("portal_open: %d extra fds\n", newfds - 1);
+		for (i = 1; i < newfds; i++) {
+			portal_closefd(p, *ip);
+			ip++;
+		}
+	}
+
+	/*
+	 * Check that the mode the file is being opened for is a subset 
+	 * of the mode of the existing descriptor.
+	 */
+ 	fp = p->p_fd->fd_ofiles[fd];
+	if (((ap->a_mode & (FREAD|FWRITE)) | fp->f_flag) != fp->f_flag) {
+		portal_closefd(p, fd);
+		error = EACCES;
+		goto bad;
+	}
+
+	/*
+	 * Save the dup fd in the proc structure then return the
+	 * special error code (ENXIO) which causes magic things to
+	 * happen in vn_open.  The whole concept is, well, hmmm.
+	 */
+	p->p_dupfd = fd;
+	error = ENXIO;
+
+bad:;
+	/*
+	 * And discard the control message.
+	 */
+	if (cm) { 
+		m_freem(cm);
+	}
+
+	if (so) {
+		soshutdown(so, 2);
+		soclose(so);
+	}
+	return (error);
+}
+
+int
+portal_getattr(ap)
+	struct vop_getattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct vattr *vap = ap->a_vap;
+
+	bzero(vap, sizeof(*vap));
+	vattr_null(vap);
+	vap->va_uid = 0;
+	vap->va_gid = 0;
+	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+	vap->va_size = DEV_BSIZE;
+	vap->va_blocksize = DEV_BSIZE;
+	microtime(&vap->va_atime);
+	vap->va_mtime = vap->va_atime;
+	vap->va_ctime = vap->va_ctime;
+	vap->va_gen = 0;
+	vap->va_flags = 0;
+	vap->va_rdev = 0;
+	/* vap->va_qbytes = 0; */
+	vap->va_bytes = 0;
+	/* vap->va_qsize = 0; */
+	if (vp->v_flag & VROOT) {
+		vap->va_type = VDIR;
+		vap->va_mode = S_IRUSR|S_IWUSR|S_IXUSR|
+				S_IRGRP|S_IWGRP|S_IXGRP|
+				S_IROTH|S_IWOTH|S_IXOTH;
+		vap->va_nlink = 2;
+		vap->va_fileid = 2;
+	} else {
+		vap->va_type = VREG;
+		vap->va_mode = S_IRUSR|S_IWUSR|
+				S_IRGRP|S_IWGRP|
+				S_IROTH|S_IWOTH;
+		vap->va_nlink = 1;
+		vap->va_fileid = VTOPORTAL(vp)->pt_fileid;
+	}
+	return (0);
+}
+
+int
+portal_setattr(ap)
+	struct vop_setattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	/*
+	 * Can't mess with the root vnode
+	 */
+	if (ap->a_vp->v_flag & VROOT)
+		return (EACCES);
+
+	return (0);
+}
+
+/*
+ * Fake readdir, just return empty directory.
+ * It is hard to deal with '.' and '..' so don't bother.
+ */
+int
+portal_readdir(ap)
+	struct vop_readdir_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+
+	return (0);
+}
+
+int
+portal_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	return (0);
+}
+
+int
+portal_reclaim(ap)
+	struct vop_reclaim_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct portalnode *pt = VTOPORTAL(ap->a_vp);
+
+	if (pt->pt_arg) {
+		free((caddr_t) pt->pt_arg, M_TEMP);
+		pt->pt_arg = 0;
+	}
+	FREE(ap->a_vp->v_data, M_TEMP);
+	ap->a_vp->v_data = 0;
+
+	return (0);
+}
+
+/*
+ * Return POSIX pathconf information applicable to special devices.
+ */
+portal_pathconf(ap)
+	struct vop_pathconf_args /* {
+		struct vnode *a_vp;
+		int a_name;
+		int *a_retval;
+	} */ *ap;
+{
+
+	switch (ap->a_name) {
+	case _PC_LINK_MAX:
+		*ap->a_retval = LINK_MAX;
+		return (0);
+	case _PC_MAX_CANON:
+		*ap->a_retval = MAX_CANON;
+		return (0);
+	case _PC_MAX_INPUT:
+		*ap->a_retval = MAX_INPUT;
+		return (0);
+	case _PC_PIPE_BUF:
+		*ap->a_retval = PIPE_BUF;
+		return (0);
+	case _PC_CHOWN_RESTRICTED:
+		*ap->a_retval = 1;
+		return (0);
+	case _PC_VDISABLE:
+		*ap->a_retval = _POSIX_VDISABLE;
+		return (0);
+	default:
+		return (EINVAL);
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * Print out the contents of a Portal vnode.
+ */
+/* ARGSUSED */
+int
+portal_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	printf("tag VT_PORTAL, portal vnode\n");
+	return (0);
+}
+
+/*void*/
+int
+portal_vfree(ap)
+	struct vop_vfree_args /* {
+		struct vnode *a_pvp;
+		ino_t a_ino;
+		int a_mode;
+	} */ *ap;
+{
+
+	return (0);
+}
+
+
+/*
+ * Portal vnode unsupported operation
+ */
+int
+portal_enotsupp()
+{
+
+	return (EOPNOTSUPP);
+}
+
+/*
+ * Portal "should never get here" operation
+ */
+int
+portal_badop()
+{
+
+	panic("portal: bad op");
+	/* NOTREACHED */
+}
+
+/*
+ * Portal vnode null operation
+ */
+int
+portal_nullop()
+{
+
+	return (0);
+}
+
+#define portal_create ((int (*) __P((struct vop_create_args *)))portal_enotsupp)
+#define portal_mknod ((int (*) __P((struct  vop_mknod_args *)))portal_enotsupp)
+#define portal_close ((int (*) __P((struct  vop_close_args *)))nullop)
+#define portal_access ((int (*) __P((struct  vop_access_args *)))nullop)
+#define portal_read ((int (*) __P((struct  vop_read_args *)))portal_enotsupp)
+#define portal_write ((int (*) __P((struct  vop_write_args *)))portal_enotsupp)
+#define portal_ioctl ((int (*) __P((struct  vop_ioctl_args *)))portal_enotsupp)
+#define portal_select ((int (*) __P((struct vop_select_args *)))portal_enotsupp)
+#define portal_mmap ((int (*) __P((struct  vop_mmap_args *)))portal_enotsupp)
+#define portal_fsync ((int (*) __P((struct  vop_fsync_args *)))nullop)
+#define portal_seek ((int (*) __P((struct  vop_seek_args *)))nullop)
+#define portal_remove ((int (*) __P((struct vop_remove_args *)))portal_enotsupp)
+#define portal_link ((int (*) __P((struct  vop_link_args *)))portal_enotsupp)
+#define portal_rename ((int (*) __P((struct vop_rename_args *)))portal_enotsupp)
+#define portal_mkdir ((int (*) __P((struct  vop_mkdir_args *)))portal_enotsupp)
+#define portal_rmdir ((int (*) __P((struct  vop_rmdir_args *)))portal_enotsupp)
+#define portal_symlink \
+	((int (*) __P((struct  vop_symlink_args *)))portal_enotsupp)
+#define portal_readlink \
+	((int (*) __P((struct  vop_readlink_args *)))portal_enotsupp)
+#define portal_abortop ((int (*) __P((struct  vop_abortop_args *)))nullop)
+#define portal_lock ((int (*) __P((struct  vop_lock_args *)))nullop)
+#define portal_unlock ((int (*) __P((struct  vop_unlock_args *)))nullop)
+#define portal_bmap ((int (*) __P((struct  vop_bmap_args *)))portal_badop)
+#define portal_strategy \
+	((int (*) __P((struct  vop_strategy_args *)))portal_badop)
+#define portal_islocked ((int (*) __P((struct  vop_islocked_args *)))nullop)
+#define portal_advlock \
+	((int (*) __P((struct  vop_advlock_args *)))portal_enotsupp)
+#define portal_blkatoff \
+	((int (*) __P((struct  vop_blkatoff_args *)))portal_enotsupp)
+#define portal_valloc ((int(*) __P(( \
+		struct vnode *pvp, \
+		int mode, \
+		struct ucred *cred, \
+		struct vnode **vpp))) portal_enotsupp)
+#define portal_truncate \
+	((int (*) __P((struct  vop_truncate_args *)))portal_enotsupp)
+#define portal_update ((int (*) __P((struct vop_update_args *)))portal_enotsupp)
+#define portal_bwrite ((int (*) __P((struct vop_bwrite_args *)))portal_enotsupp)
+
+int (**portal_vnodeop_p)();
+struct vnodeopv_entry_desc portal_vnodeop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, portal_lookup },		/* lookup */
+	{ &vop_create_desc, portal_create },		/* create */
+	{ &vop_mknod_desc, portal_mknod },		/* mknod */
+	{ &vop_open_desc, portal_open },		/* open */
+	{ &vop_close_desc, portal_close },		/* close */
+	{ &vop_access_desc, portal_access },		/* access */
+	{ &vop_getattr_desc, portal_getattr },		/* getattr */
+	{ &vop_setattr_desc, portal_setattr },		/* setattr */
+	{ &vop_read_desc, portal_read },		/* read */
+	{ &vop_write_desc, portal_write },		/* write */
+	{ &vop_ioctl_desc, portal_ioctl },		/* ioctl */
+	{ &vop_select_desc, portal_select },		/* select */
+	{ &vop_mmap_desc, portal_mmap },		/* mmap */
+	{ &vop_fsync_desc, portal_fsync },		/* fsync */
+	{ &vop_seek_desc, portal_seek },		/* seek */
+	{ &vop_remove_desc, portal_remove },		/* remove */
+	{ &vop_link_desc, portal_link },		/* link */
+	{ &vop_rename_desc, portal_rename },		/* rename */
+	{ &vop_mkdir_desc, portal_mkdir },		/* mkdir */
+	{ &vop_rmdir_desc, portal_rmdir },		/* rmdir */
+	{ &vop_symlink_desc, portal_symlink },		/* symlink */
+	{ &vop_readdir_desc, portal_readdir },		/* readdir */
+	{ &vop_readlink_desc, portal_readlink },	/* readlink */
+	{ &vop_abortop_desc, portal_abortop },		/* abortop */
+	{ &vop_inactive_desc, portal_inactive },	/* inactive */
+	{ &vop_reclaim_desc, portal_reclaim },		/* reclaim */
+	{ &vop_lock_desc, portal_lock },		/* lock */
+	{ &vop_unlock_desc, portal_unlock },		/* unlock */
+	{ &vop_bmap_desc, portal_bmap },		/* bmap */
+	{ &vop_strategy_desc, portal_strategy },	/* strategy */
+	{ &vop_print_desc, portal_print },		/* print */
+	{ &vop_islocked_desc, portal_islocked },	/* islocked */
+	{ &vop_pathconf_desc, portal_pathconf },	/* pathconf */
+	{ &vop_advlock_desc, portal_advlock },		/* advlock */
+	{ &vop_blkatoff_desc, portal_blkatoff },	/* blkatoff */
+	{ &vop_valloc_desc, portal_valloc },		/* valloc */
+	{ &vop_vfree_desc, portal_vfree },		/* vfree */
+	{ &vop_truncate_desc, portal_truncate },	/* truncate */
+	{ &vop_update_desc, portal_update },		/* update */
+	{ &vop_bwrite_desc, portal_bwrite },		/* bwrite */
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc portal_vnodeop_opv_desc =
+	{ &portal_vnodeop_p, portal_vnodeop_entries };
diff --git a/sys/fs/procfs/README b/sys/fs/procfs/README
new file mode 100644
index 00000000000..38811b3f6e3
--- /dev/null
+++ b/sys/fs/procfs/README
@@ -0,0 +1,113 @@
+saute procfs lyonnais
+
+procfs supports two levels of directory.  the filesystem root
+directory contains a representation of the system process table.
+this consists of an entry for each active and zombie process, and
+an additional entry "curproc" which always represents the process
+making the lookup request.
+
+each of the sub-directories contains several files.  these files
+are used to control and interrogate processes.  the files implemented
+are:
+
+	file	- xxx.  the exec'ed file.
+
+	status  - r/o.  returns process status.
+
+	ctl	- w/o.  sends a control message to the process.
+			for example:
+				echo hup > /proc/curproc/note
+			will send a SIGHUP to the shell.
+			whereas
+				echo attach > /proc/1293/ctl
+			would set up process 1293 for debugging.
+			see below for more details.
+
+	mem	- r/w.  virtual memory image of the process.
+			parts of the address space are readable
+			only if they exist in the target process.
+			a more reasonable alternative might be
+			to return zero pages instead of an error.
+			comments?
+
+	note	- w/o.  writing a string here sends the
+			equivalent note to the process.
+			[ not implemented. ]
+
+	notepg	- w/o.  the same as note, but sends to all
+			members of the process group.
+			[ not implemented. ]
+
+	regs	- r/w.	process register set.  this can be read
+			or written any time even if the process
+			is not stopped.  since the bsd kernel
+			is single-processor, this implementation
+			will get the "right" register values.
+			a multi-proc kernel would need to do some
+			synchronisation.
+
+this then looks like:
+
+% ls -li /proc
+total 0
+   9 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 0
+  17 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 1
+  89 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 10
+  25 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 2
+2065 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 257
+2481 dr-xr-xr-x  2 jsp   staff  0 Sep 21 15:06 309
+ 265 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 32
+3129 dr-xr-xr-x  2 jsp   staff  0 Sep 21 15:06 390
+3209 dr-xr-xr-x  2 jsp   staff  0 Sep 21 15:06 400
+3217 dr-xr-xr-x  2 jsp   staff  0 Sep 21 15:06 401
+3273 dr-xr-xr-x  2 jsp   staff  0 Sep 21 15:06 408
+ 393 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 48
+ 409 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 50
+ 465 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 57
+ 481 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 59
+ 537 dr-xr-xr-x  2 root  kmem   0 Sep 21 15:06 66
+ 545 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 67
+ 657 dr-xr-xr-x  2 jsp   staff  0 Sep 21 15:06 81
+ 665 dr-xr-xr-x  2 jsp   staff  0 Sep 21 15:06 82
+ 673 dr-xr-xr-x  2 jsp   staff  0 Sep 21 15:06 83
+ 681 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 84
+3273 dr-xr-xr-x  2 jsp   staff  0 Sep 21 15:06 curproc
+% ls -li /proc/curproc
+total 408
+3341 --w-------  1 jsp  staff       0 Sep 21 15:06 ctl
+1554 -r-xr-xr-x  1 bin  bin     90112 Mar 29 04:52 file
+3339 -rw-------  1 jsp  staff  118784 Sep 21 15:06 mem
+3343 --w-------  1 jsp  staff       0 Sep 21 15:06 note
+3344 --w-------  1 jsp  staff       0 Sep 21 15:06 notepg
+3340 -rw-------  1 jsp  staff       0 Sep 21 15:06 regs
+3342 -r--r--r--  1 jsp  staff       0 Sep 21 15:06 status
+% df /proc/curproc /proc/curproc/file
+Filesystem  512-blocks    Used   Avail Capacity  Mounted on
+proc                 2       2       0   100%    /proc
+/dev/wd0a        16186   13548    1018    93%    /
+% cat /proc/curproc/status
+cat 446 439 400 81 12,0 ctty 748620684 270000 0 0 0 20000 nochan 11 20 20 20 0 21 117
+
+
+
+the basic sequence of commands written to "ctl" would be
+
+	attach		- this stops the target process and
+			  arranges for the sending process
+			  to become the debug control process
+	wait		- wait for the target process to come to
+			  a steady state ready for debugging.
+	step		- single step, with no signal delivery.
+	run		- continue running, with no signal delivery,
+			  until next trap or breakpoint.
+	<signame>	- deliver signal <signame> and continue running.
+	detach		- continue execution of the target process
+			  and remove it from control by the debug process
+
+in a normal debugging environment, where the target is fork/exec'd by
+the debugger, the debugger should fork and the child should stop itself
+(with a self-inflicted SIGSTOP).  the parent should do a "wait" then an
+"attach".  as before, the child will hit a breakpoint on the first
+instruction in any newly exec'd image.
+
+$Id: README,v 3.1 1993/12/15 09:40:17 jsp Exp $
diff --git a/sys/fs/procfs/procfs.h b/sys/fs/procfs/procfs.h
new file mode 100644
index 00000000000..f7b8fa3ef0e
--- /dev/null
+++ b/sys/fs/procfs/procfs.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)procfs.h	8.6 (Berkeley) 2/3/94
+ *
+ * From:
+ *	$Id: procfs.h,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+/*
+ * The different types of node in a procfs filesystem
+ */
+typedef enum {
+	Proot,		/* the filesystem root */
+	Pproc,		/* a process-specific sub-directory */
+	Pfile,		/* the executable file */
+	Pmem,		/* the process's memory image */
+	Pregs,		/* the process's register set */
+	Pfpregs,	/* the process's FP register set */
+	Pctl,		/* process control */
+	Pstatus,	/* process status */
+	Pnote,		/* process notifier */
+	Pnotepg		/* process group notifier */
+} pfstype;
+
+/*
+ * control data for the proc file system.
+ */
+struct pfsnode {
+	struct pfsnode	*pfs_next;	/* next on list */
+	struct vnode	*pfs_vnode;	/* vnode associated with this pfsnode */
+	pfstype		pfs_type;	/* type of procfs node */
+	pid_t		pfs_pid;	/* associated process */
+	u_short		pfs_mode;	/* mode bits for stat() */
+	u_long		pfs_flags;	/* open flags */
+	u_long		pfs_fileno;	/* unique file id */
+};
+
+#define PROCFS_NOTELEN	64	/* max length of a note (/proc/$pid/note) */
+#define PROCFS_CTLLEN 	8	/* max length of a ctl msg (/proc/$pid/ctl */
+
+/*
+ * Kernel stuff follows
+ */
+#ifdef KERNEL
+#define CNEQ(cnp, s, len) \
+	 ((cnp)->cn_namelen == (len) && \
+	  (bcmp((s), (cnp)->cn_nameptr, (len)) == 0))
+
+/*
+ * Format of a directory entry in /proc, ...
+ * This must map onto struct dirent (see <dirent.h>)
+ */
+#define PROCFS_NAMELEN 8
+struct pfsdent {
+	u_long	d_fileno;
+	u_short	d_reclen;
+	u_char	d_type;
+	u_char	d_namlen;
+	char	d_name[PROCFS_NAMELEN];
+};
+#define UIO_MX sizeof(struct pfsdent)
+#define PROCFS_FILENO(pid, type) \
+	(((type) == Proot) ? \
+			2 : \
+			((((pid)+1) << 3) + ((int) (type))))
+
+/*
+ * Convert between pfsnode vnode
+ */
+#define VTOPFS(vp)	((struct pfsnode *)(vp)->v_data)
+#define PFSTOV(pfs)	((pfs)->pfs_vnode)
+
+typedef struct vfs_namemap vfs_namemap_t;
+struct vfs_namemap {
+	const char *nm_name;
+	int nm_val;
+};
+
+extern int vfs_getuserstr __P((struct uio *, char *, int *));
+extern vfs_namemap_t *vfs_findname __P((vfs_namemap_t *, char *, int));
+
+/* <machine/reg.h> */
+struct reg;
+struct fpreg;
+
+#define PFIND(pid) ((pid) ? pfind(pid) : &proc0)
+extern int procfs_freevp __P((struct vnode *));
+extern int procfs_allocvp __P((struct mount *, struct vnode **, long, pfstype));
+extern struct vnode *procfs_findtextvp __P((struct proc *));
+extern int procfs_sstep __P((struct proc *));
+extern void procfs_fix_sstep __P((struct proc *));
+extern int procfs_read_regs __P((struct proc *, struct reg *));
+extern int procfs_write_regs __P((struct proc *, struct reg *));
+extern int procfs_read_fpregs __P((struct proc *, struct fpreg *));
+extern int procfs_write_fpregs __P((struct proc *, struct fpreg *));
+extern int procfs_donote __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+extern int procfs_doregs __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+extern int procfs_dofpregs __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+extern int procfs_domem __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+extern int procfs_doctl __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+extern int procfs_dostatus __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+
+#define PROCFS_LOCKED	0x01
+#define PROCFS_WANT	0x02
+
+extern int (**procfs_vnodeop_p)();
+extern struct vfsops procfs_vfsops;
+
+/*
+ * Prototypes for procfs vnode ops
+ */
+int	procfs_badop();	/* varargs */
+int	procfs_rw __P((struct vop_read_args *));
+int	procfs_lookup __P((struct vop_lookup_args *));
+#define procfs_create ((int (*) __P((struct vop_create_args *))) procfs_badop)
+#define procfs_mknod ((int (*) __P((struct vop_mknod_args *))) procfs_badop)
+int	procfs_open __P((struct vop_open_args *));
+int	procfs_close __P((struct vop_close_args *));
+int	procfs_access __P((struct vop_access_args *));
+int	procfs_getattr __P((struct vop_getattr_args *));
+int	procfs_setattr __P((struct vop_setattr_args *));
+#define	procfs_read procfs_rw
+#define	procfs_write procfs_rw
+int	procfs_ioctl __P((struct vop_ioctl_args *));
+#define procfs_select ((int (*) __P((struct vop_select_args *))) procfs_badop)
+#define procfs_mmap ((int (*) __P((struct vop_mmap_args *))) procfs_badop)
+#define procfs_fsync ((int (*) __P((struct vop_fsync_args *))) procfs_badop)
+#define procfs_seek ((int (*) __P((struct vop_seek_args *))) procfs_badop)
+#define procfs_remove ((int (*) __P((struct vop_remove_args *))) procfs_badop)
+#define procfs_link ((int (*) __P((struct vop_link_args *))) procfs_badop)
+#define procfs_rename ((int (*) __P((struct vop_rename_args *))) procfs_badop)
+#define procfs_mkdir ((int (*) __P((struct vop_mkdir_args *))) procfs_badop)
+#define procfs_rmdir ((int (*) __P((struct vop_rmdir_args *))) procfs_badop)
+#define procfs_symlink ((int (*) __P((struct vop_symlink_args *))) procfs_badop)
+int	procfs_readdir __P((struct vop_readdir_args *));
+#define procfs_readlink ((int (*) __P((struct vop_readlink_args *))) procfs_badop)
+int	procfs_abortop __P((struct vop_abortop_args *));
+int	procfs_inactive __P((struct vop_inactive_args *));
+int	procfs_reclaim __P((struct vop_reclaim_args *));
+#define procfs_lock ((int (*) __P((struct vop_lock_args *))) nullop)
+#define procfs_unlock ((int (*) __P((struct vop_unlock_args *))) nullop)
+int	procfs_bmap __P((struct vop_bmap_args *));
+#define	procfs_strategy ((int (*) __P((struct vop_strategy_args *))) procfs_badop)
+int	procfs_print __P((struct vop_print_args *));
+#define procfs_islocked ((int (*) __P((struct vop_islocked_args *))) nullop)
+#define procfs_advlock ((int (*) __P((struct vop_advlock_args *))) procfs_badop)
+#define procfs_blkatoff ((int (*) __P((struct vop_blkatoff_args *))) procfs_badop)
+#define procfs_valloc ((int (*) __P((struct vop_valloc_args *))) procfs_badop)
+#define procfs_vfree ((int (*) __P((struct vop_vfree_args *))) nullop)
+#define procfs_truncate ((int (*) __P((struct vop_truncate_args *))) procfs_badop)
+#define procfs_update ((int (*) __P((struct vop_update_args *))) nullop)
+#endif /* KERNEL */
diff --git a/sys/fs/procfs/procfs_ctl.c b/sys/fs/procfs/procfs_ctl.c
new file mode 100644
index 00000000000..a42a03ce91c
--- /dev/null
+++ b/sys/fs/procfs/procfs_ctl.c
@@ -0,0 +1,302 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)procfs_ctl.c	8.3 (Berkeley) 1/21/94
+ *
+ * From:
+ *	$Id: procfs_ctl.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/ioctl.h>
+#include <sys/tty.h>
+#include <sys/resource.h>
+#include <sys/resourcevar.h>
+#include <miscfs/procfs/procfs.h>
+
+/*
+ * True iff process (p) is in trace wait state
+ * relative to process (curp)
+ */
+#define TRACE_WAIT_P(curp, p) \
+	((p)->p_stat == SSTOP && \
+	 (p)->p_pptr == (curp) && \
+	 ((p)->p_flag & P_TRACED))
+
+#ifdef notdef
+#define FIX_SSTEP(p) { \
+		procfs_fix_sstep(p); \
+	} \
+}
+#else
+#define FIX_SSTEP(p)
+#endif
+
+#define PROCFS_CTL_ATTACH	1
+#define PROCFS_CTL_DETACH	2
+#define PROCFS_CTL_STEP		3
+#define PROCFS_CTL_RUN		4
+#define PROCFS_CTL_WAIT		5
+
+static vfs_namemap_t ctlnames[] = {
+	/* special /proc commands */
+	{ "attach",	PROCFS_CTL_ATTACH },
+	{ "detach",	PROCFS_CTL_DETACH },
+	{ "step",	PROCFS_CTL_STEP },
+	{ "run",	PROCFS_CTL_RUN },
+	{ "wait",	PROCFS_CTL_WAIT },
+	{ 0 },
+};
+
+static vfs_namemap_t signames[] = {
+	/* regular signal names */
+	{ "hup",	SIGHUP },	{ "int",	SIGINT },
+	{ "quit",	SIGQUIT },	{ "ill",	SIGILL },
+	{ "trap",	SIGTRAP },	{ "abrt",	SIGABRT },
+	{ "iot",	SIGIOT },	{ "emt",	SIGEMT },
+	{ "fpe",	SIGFPE },	{ "kill",	SIGKILL },
+	{ "bus",	SIGBUS },	{ "segv",	SIGSEGV },
+	{ "sys",	SIGSYS },	{ "pipe",	SIGPIPE },
+	{ "alrm",	SIGALRM },	{ "term",	SIGTERM },
+	{ "urg",	SIGURG },	{ "stop",	SIGSTOP },
+	{ "tstp",	SIGTSTP },	{ "cont",	SIGCONT },
+	{ "chld",	SIGCHLD },	{ "ttin",	SIGTTIN },
+	{ "ttou",	SIGTTOU },	{ "io",		SIGIO },
+	{ "xcpu",	SIGXCPU },	{ "xfsz",	SIGXFSZ },
+	{ "vtalrm",	SIGVTALRM },	{ "prof",	SIGPROF },
+	{ "winch",	SIGWINCH },	{ "info",	SIGINFO },
+	{ "usr1",	SIGUSR1 },	{ "usr2",	SIGUSR2 },
+	{ 0 },
+};
+
+static int
+procfs_control(curp, p, op)
+	struct proc *curp;
+	struct proc *p;
+	int op;
+{
+	int error;
+
+	/*
+	 * Attach - attaches the target process for debugging
+	 * by the calling process.
+	 */
+	if (op == PROCFS_CTL_ATTACH) {
+		/* check whether already being traced */
+		if (p->p_flag & P_TRACED)
+			return (EBUSY);
+
+		/* can't trace yourself! */
+		if (p->p_pid == curp->p_pid)
+			return (EINVAL);
+
+		/*
+		 * Go ahead and set the trace flag.
+		 * Save the old parent (it's reset in
+		 *   _DETACH, and also in kern_exit.c:wait4()
+		 * Reparent the process so that the tracing
+		 *   proc gets to see all the action.
+		 * Stop the target.
+		 */
+		p->p_flag |= P_TRACED;
+		p->p_xstat = 0;		/* XXX ? */
+		if (p->p_pptr != curp) {
+			p->p_oppid = p->p_pptr->p_pid;
+			proc_reparent(p, curp);
+		}
+		psignal(p, SIGSTOP);
+		return (0);
+	}
+
+	/*
+	 * Target process must be stopped, owned by (curp) and
+	 * be set up for tracing (P_TRACED flag set).
+	 * Allow DETACH to take place at any time for sanity.
+	 * Allow WAIT any time, of course.
+	 */
+	switch (op) {
+	case PROCFS_CTL_DETACH:
+	case PROCFS_CTL_WAIT:
+		break;
+
+	default:
+		if (!TRACE_WAIT_P(curp, p))
+			return (EBUSY);
+	}
+
+	/*
+	 * do single-step fixup if needed
+	 */
+	FIX_SSTEP(p);
+
+	/*
+	 * Don't deliver any signal by default.
+	 * To continue with a signal, just send
+	 * the signal name to the ctl file
+	 */
+	p->p_xstat = 0;
+
+	switch (op) {
+	/*
+	 * Detach.  Cleans up the target process, reparent it if possible
+	 * and set it running once more.
+	 */
+	case PROCFS_CTL_DETACH:
+		/* if not being traced, then this is a painless no-op */
+		if ((p->p_flag & P_TRACED) == 0)
+			return (0);
+
+		/* not being traced any more */
+		p->p_flag &= ~P_TRACED;
+
+		/* give process back to original parent */
+		if (p->p_oppid != p->p_pptr->p_pid) {
+			struct proc *pp;
+
+			pp = pfind(p->p_oppid);
+			if (pp)
+				proc_reparent(p, pp);
+		}
+
+		p->p_oppid = 0;
+		p->p_flag &= ~P_WAITED;	/* XXX ? */
+		wakeup((caddr_t) curp);	/* XXX for CTL_WAIT below ? */
+
+		break;
+
+	/*
+	 * Step.  Let the target process execute a single instruction.
+	 */
+	case PROCFS_CTL_STEP:
+		procfs_sstep(p);
+		break;
+
+	/*
+	 * Run.  Let the target process continue running until a breakpoint
+	 * or some other trap.
+	 */
+	case PROCFS_CTL_RUN:
+		break;
+
+	/*
+	 * Wait for the target process to stop.
+	 * If the target is not being traced then just wait
+	 * to enter
+	 */
+	case PROCFS_CTL_WAIT:
+		error = 0;
+		if (p->p_flag & P_TRACED) {
+			while (error == 0 &&
+					(p->p_stat != SSTOP) &&
+					(p->p_flag & P_TRACED) &&
+					(p->p_pptr == curp)) {
+				error = tsleep((caddr_t) p,
+						PWAIT|PCATCH, "procfsx", 0);
+			}
+			if (error == 0 && !TRACE_WAIT_P(curp, p))
+				error = EBUSY;
+		} else {
+			while (error == 0 && p->p_stat != SSTOP) {
+				error = tsleep((caddr_t) p,
+						PWAIT|PCATCH, "procfs", 0);
+			}
+		}
+		return (error);
+
+	default:
+		panic("procfs_control");
+	}
+
+	if (p->p_stat == SSTOP)
+		setrunnable(p);
+	return (0);
+}
+
+int
+procfs_doctl(curp, p, pfs, uio)
+	struct proc *curp;
+	struct pfsnode *pfs;
+	struct uio *uio;
+	struct proc *p;
+{
+	int xlen;
+	int error;
+	char msg[PROCFS_CTLLEN+1];
+	vfs_namemap_t *nm;
+
+	if (uio->uio_rw != UIO_WRITE)
+		return (EOPNOTSUPP);
+
+	xlen = PROCFS_CTLLEN;
+	error = vfs_getuserstr(uio, msg, &xlen);
+	if (error)
+		return (error);
+
+	/*
+	 * Map signal names into signal generation
+	 * or debug control.  Unknown commands and/or signals
+	 * return EOPNOTSUPP.
+	 *
+	 * Sending a signal while the process is being debugged
+	 * also has the side effect of letting the target continue
+	 * to run.  There is no way to single-step a signal delivery.
+	 */
+	error = EOPNOTSUPP;
+
+	nm = vfs_findname(ctlnames, msg, xlen);
+	if (nm) {
+		error = procfs_control(curp, p, nm->nm_val);
+	} else {
+		nm = vfs_findname(signames, msg, xlen);
+		if (nm) {
+			if (TRACE_WAIT_P(curp, p)) {
+				p->p_xstat = nm->nm_val;
+				FIX_SSTEP(p);
+				setrunnable(p);
+			} else {
+				psignal(p, nm->nm_val);
+			}
+			error = 0;
+		}
+	}
+
+	return (error);
+}
diff --git a/sys/fs/procfs/procfs_fpregs.c b/sys/fs/procfs/procfs_fpregs.c
new file mode 100644
index 00000000000..6d850a6a881
--- /dev/null
+++ b/sys/fs/procfs/procfs_fpregs.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)procfs_fpregs.c	8.1 (Berkeley) 1/27/94
+ *
+ * From:
+ *	$Id: procfs_regs.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <machine/reg.h>
+#include <miscfs/procfs/procfs.h>
+
+int
+procfs_dofpregs(curp, p, pfs, uio)
+	struct proc *curp;
+	struct proc *p;
+	struct pfsnode *pfs;
+	struct uio *uio;
+{
+	int error;
+	struct fpreg r;
+	char *kv;
+	int kl;
+
+	kl = sizeof(r);
+	kv = (char *) &r;
+
+	kv += uio->uio_offset;
+	kl -= uio->uio_offset;
+	if (kl > uio->uio_resid)
+		kl = uio->uio_resid;
+
+	if (kl < 0)
+		error = EINVAL;
+	else
+		error = procfs_read_fpregs(p, &r);
+	if (error == 0)
+		error = uiomove(kv, kl, uio);
+	if (error == 0 && uio->uio_rw == UIO_WRITE) {
+		if (p->p_stat != SSTOP)
+			error = EBUSY;
+		else
+			error = procfs_write_fpregs(p, &r);
+	}
+
+	uio->uio_offset = 0;
+	return (error);
+}
diff --git a/sys/fs/procfs/procfs_mem.c b/sys/fs/procfs/procfs_mem.c
new file mode 100644
index 00000000000..039983da09c
--- /dev/null
+++ b/sys/fs/procfs/procfs_mem.c
@@ -0,0 +1,302 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993 Sean Eric Fagan
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry and Sean Eric Fagan.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)procfs_mem.c	8.4 (Berkeley) 1/21/94
+ *
+ * From:
+ *	$Id: procfs_mem.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+/*
+ * This is a lightly hacked and merged version
+ * of sef's pread/pwrite functions
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <miscfs/procfs/procfs.h>
+#include <vm/vm.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+
+static int
+procfs_rwmem(p, uio)
+	struct proc *p;
+	struct uio *uio;
+{
+	int error;
+	int writing;
+
+	writing = uio->uio_rw == UIO_WRITE;
+
+	/*
+	 * Only map in one page at a time.  We don't have to, but it
+	 * makes things easier.  This way is trivial - right?
+	 */
+	do {
+		vm_map_t map, tmap;
+		vm_object_t object;
+		vm_offset_t kva;
+		vm_offset_t uva;
+		int page_offset;		/* offset into page */
+		vm_offset_t pageno;		/* page number */
+		vm_map_entry_t out_entry;
+		vm_prot_t out_prot;
+		vm_page_t m;
+		boolean_t wired, single_use;
+		vm_offset_t off;
+		u_int len;
+		int fix_prot;
+
+		uva = (vm_offset_t) uio->uio_offset;
+		if (uva > VM_MAXUSER_ADDRESS) {
+			error = 0;
+			break;
+		}
+
+		/*
+		 * Get the page number of this segment.
+		 */
+		pageno = trunc_page(uva);
+		page_offset = uva - pageno;
+
+		/*
+		 * How many bytes to copy
+		 */
+		len = min(PAGE_SIZE - page_offset, uio->uio_resid);
+
+		/*
+		 * The map we want...
+		 */
+		map = &p->p_vmspace->vm_map;
+  
+		/*
+		 * Check the permissions for the area we're interested
+		 * in.
+		 */
+		fix_prot = 0;
+		if (writing)
+			fix_prot = !vm_map_check_protection(map, pageno,
+					pageno + PAGE_SIZE, VM_PROT_WRITE);
+
+		if (fix_prot) {
+			/*
+			 * If the page is not writable, we make it so.
+			 * XXX It is possible that a page may *not* be
+			 * read/executable, if a process changes that!
+			 * We will assume, for now, that a page is either
+			 * VM_PROT_ALL, or VM_PROT_READ|VM_PROT_EXECUTE.
+			 */
+			error = vm_map_protect(map, pageno,
+					pageno + PAGE_SIZE, VM_PROT_ALL, 0);
+			if (error)
+				break;
+		}
+
+		/*
+		 * Now we need to get the page.  out_entry, out_prot, wired,
+		 * and single_use aren't used.  One would think the vm code
+		 * would be a *bit* nicer...  We use tmap because
+		 * vm_map_lookup() can change the map argument.
+		 */
+		tmap = map;
+		error = vm_map_lookup(&tmap, pageno,
+				      writing ? VM_PROT_WRITE : VM_PROT_READ,
+				      &out_entry, &object, &off, &out_prot,
+				      &wired, &single_use);
+		/*
+		 * We're done with tmap now.
+		 */
+		if (!error)
+			vm_map_lookup_done(tmap, out_entry);
+  
+		/*
+		 * Fault the page in...
+		 */
+		if (!error && writing && object->shadow) {
+			m = vm_page_lookup(object, off);
+			if (m == 0 || (m->flags & PG_COPYONWRITE))
+				error = vm_fault(map, pageno,
+							VM_PROT_WRITE, FALSE);
+		}
+
+		/* Find space in kernel_map for the page we're interested in */
+		if (!error)
+			error = vm_map_find(kernel_map, object, off, &kva,
+					PAGE_SIZE, 1);
+
+		if (!error) {
+			/*
+			 * Neither vm_map_lookup() nor vm_map_find() appear
+			 * to add a reference count to the object, so we do
+			 * that here and now.
+			 */
+			vm_object_reference(object);
+
+			/*
+			 * Mark the page we just found as pageable.
+			 */
+			error = vm_map_pageable(kernel_map, kva,
+				kva + PAGE_SIZE, 0);
+
+			/*
+			 * Now do the i/o move.
+			 */
+			if (!error)
+				error = uiomove(kva + page_offset, len, uio);
+
+			vm_map_remove(kernel_map, kva, kva + PAGE_SIZE);
+		}
+		if (fix_prot)
+			vm_map_protect(map, pageno, pageno + PAGE_SIZE,
+					VM_PROT_READ|VM_PROT_EXECUTE, 0);
+	} while (error == 0 && uio->uio_resid > 0);
+
+	return (error);
+}
+
+/*
+ * Copy data in and out of the target process.
+ * We do this by mapping the process's page into
+ * the kernel and then doing a uiomove direct
+ * from the kernel address space.
+ */
+int
+procfs_domem(curp, p, pfs, uio)
+	struct proc *curp;
+	struct proc *p;
+	struct pfsnode *pfs;
+	struct uio *uio;
+{
+	int error;
+
+	if (uio->uio_resid == 0)
+		return (0);
+
+	error = procfs_rwmem(p, uio);
+
+	return (error);
+}
+
+/*
+ * Given process (p), find the vnode from which
+ * it's text segment is being executed.
+ *
+ * It would be nice to grab this information from
+ * the VM system, however, there is no sure-fire
+ * way of doing that.  Instead, fork(), exec() and
+ * wait() all maintain the p_textvp field in the
+ * process proc structure which contains a held
+ * reference to the exec'ed vnode.
+ */
+struct vnode *
+procfs_findtextvp(p)
+	struct proc *p;
+{
+	return (p->p_textvp);
+}
+
+
+#ifdef probably_never
+/*
+ * Given process (p), find the vnode from which
+ * it's text segment is being mapped.
+ *
+ * (This is here, rather than in procfs_subr in order
+ * to keep all the VM related code in one place.)
+ */
+struct vnode *
+procfs_findtextvp(p)
+	struct proc *p;
+{
+	int error;
+	vm_object_t object;
+	vm_offset_t pageno;		/* page number */
+
+	/* find a vnode pager for the user address space */
+
+	for (pageno = VM_MIN_ADDRESS;
+			pageno < VM_MAXUSER_ADDRESS;
+			pageno += PAGE_SIZE) {
+		vm_map_t map;
+		vm_map_entry_t out_entry;
+		vm_prot_t out_prot;
+		boolean_t wired, single_use;
+		vm_offset_t off;
+
+		map = &p->p_vmspace->vm_map;
+		error = vm_map_lookup(&map, pageno,
+			      VM_PROT_READ,
+			      &out_entry, &object, &off, &out_prot,
+			      &wired, &single_use);
+
+		if (!error) {
+			vm_pager_t pager;
+
+			printf("procfs: found vm object\n");
+			vm_map_lookup_done(map, out_entry);
+			printf("procfs: vm object = %x\n", object);
+
+			/*
+			 * At this point, assuming no errors, object
+			 * is the VM object mapping UVA (pageno).
+			 * Ensure it has a vnode pager, then grab
+			 * the vnode from that pager's handle.
+			 */
+
+			pager = object->pager;
+			printf("procfs: pager = %x\n", pager);
+			if (pager)
+				printf("procfs: found pager, type = %d\n", pager->pg_type);
+			if (pager && pager->pg_type == PG_VNODE) {
+				struct vnode *vp;
+
+				vp = (struct vnode *) pager->pg_handle;
+				printf("procfs: vp = 0x%x\n", vp);
+				return (vp);
+			}
+		}
+	}
+
+	printf("procfs: text object not found\n");
+	return (0);
+}
+#endif /* probably_never */
diff --git a/sys/fs/procfs/procfs_note.c b/sys/fs/procfs/procfs_note.c
new file mode 100644
index 00000000000..bf2f160baa0
--- /dev/null
+++ b/sys/fs/procfs/procfs_note.c
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)procfs_note.c	8.2 (Berkeley) 1/21/94
+ *
+ * From:
+ *	$Id: procfs_note.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/signal.h>
+#include <miscfs/procfs/procfs.h>
+
+int
+procfs_donote(curp, p, pfs, uio)
+	struct proc *curp;
+	struct proc *p;
+	struct pfsnode *pfs;
+	struct uio *uio;
+{
+	int xlen;
+	int error;
+	char note[PROCFS_NOTELEN+1];
+
+	if (uio->uio_rw != UIO_WRITE)
+		return (EINVAL);
+
+	xlen = PROCFS_NOTELEN;
+	error = vfs_getuserstr(uio, note, &xlen);
+	if (error)
+		return (error);
+
+	/* send to process's notify function */
+	return (EOPNOTSUPP);
+}
diff --git a/sys/fs/procfs/procfs_regs.c b/sys/fs/procfs/procfs_regs.c
new file mode 100644
index 00000000000..fa95fef8f10
--- /dev/null
+++ b/sys/fs/procfs/procfs_regs.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)procfs_regs.c	8.3 (Berkeley) 1/27/94
+ *
+ * From:
+ *	$Id: procfs_regs.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <machine/reg.h>
+#include <miscfs/procfs/procfs.h>
+
+int
+procfs_doregs(curp, p, pfs, uio)
+	struct proc *curp;
+	struct proc *p;
+	struct pfsnode *pfs;
+	struct uio *uio;
+{
+	int error;
+	struct reg r;
+	char *kv;
+	int kl;
+
+	kl = sizeof(r);
+	kv = (char *) &r;
+
+	kv += uio->uio_offset;
+	kl -= uio->uio_offset;
+	if (kl > uio->uio_resid)
+		kl = uio->uio_resid;
+
+	if (kl < 0)
+		error = EINVAL;
+	else
+		error = procfs_read_regs(p, &r);
+	if (error == 0)
+		error = uiomove(kv, kl, uio);
+	if (error == 0 && uio->uio_rw == UIO_WRITE) {
+		if (p->p_stat != SSTOP)
+			error = EBUSY;
+		else
+			error = procfs_write_regs(p, &r);
+	}
+
+	uio->uio_offset = 0;
+	return (error);
+}
diff --git a/sys/fs/procfs/procfs_status.c b/sys/fs/procfs/procfs_status.c
new file mode 100644
index 00000000000..d88aaabdfb0
--- /dev/null
+++ b/sys/fs/procfs/procfs_status.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)procfs_status.c	8.3 (Berkeley) 2/17/94
+ *
+ * From:
+ *	$Id: procfs_status.c,v 3.1 1993/12/15 09:40:17 jsp Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/ioctl.h>
+#include <sys/tty.h>
+#include <sys/resource.h>
+#include <sys/resourcevar.h>
+#include <miscfs/procfs/procfs.h>
+
+int
+procfs_dostatus(curp, p, pfs, uio)
+	struct proc *curp;
+	struct proc *p;
+	struct pfsnode *pfs;
+	struct uio *uio;
+{
+	struct session *sess;
+	struct tty *tp;
+	struct ucred *cr;
+	char *ps;
+	char *sep;
+	int pid, ppid, pgid, sid;
+	int i;
+	int xlen;
+	int error;
+	char psbuf[256];		/* XXX - conservative */
+
+	if (uio->uio_rw != UIO_READ)
+		return (EOPNOTSUPP);
+
+	pid = p->p_pid;
+	ppid = p->p_pptr ? p->p_pptr->p_pid : 0,
+	pgid = p->p_pgrp->pg_id;
+	sess = p->p_pgrp->pg_session;
+	sid = sess->s_leader ? sess->s_leader->p_pid : 0;
+
+/* comm pid ppid pgid sid maj,min ctty,sldr start ut st wmsg uid groups ... */
+
+	ps = psbuf;
+	bcopy(p->p_comm, ps, MAXCOMLEN);
+	ps[MAXCOMLEN] = '\0';
+	ps += strlen(ps);
+	ps += sprintf(ps, " %d %d %d %d ", pid, ppid, pgid, sid);
+
+	if ((p->p_flag&P_CONTROLT) && (tp = sess->s_ttyp))
+		ps += sprintf(ps, "%d,%d ", major(tp->t_dev), minor(tp->t_dev));
+	else
+		ps += sprintf(ps, "%d,%d ", -1, -1);
+
+	sep = "";
+	if (sess->s_ttyvp) {
+		ps += sprintf(ps, "%sctty", sep);
+		sep = ",";
+	}
+	if (SESS_LEADER(p)) {
+		ps += sprintf(ps, "%ssldr", sep);
+		sep = ",";
+	}
+	if (*sep != ',')
+		ps += sprintf(ps, "noflags");
+
+	if (p->p_flag & P_INMEM)
+		ps += sprintf(ps, " %d,%d",
+			p->p_stats->p_start.tv_sec,
+			p->p_stats->p_start.tv_usec);
+	else
+		ps += sprintf(ps, " -1,-1");
+	
+	{
+		struct timeval ut, st;
+
+		calcru(p, &ut, &st, (void *) 0);
+		ps += sprintf(ps, " %d,%d %d,%d",
+			ut.tv_sec,
+			ut.tv_usec,
+			st.tv_sec,
+			st.tv_usec);
+	}
+
+	ps += sprintf(ps, " %s",
+		(p->p_wchan && p->p_wmesg) ? p->p_wmesg : "nochan");
+
+	cr = p->p_ucred;
+
+	ps += sprintf(ps, " %d", cr->cr_uid, cr->cr_gid);
+	for (i = 0; i < cr->cr_ngroups; i++)
+		ps += sprintf(ps, ",%d", cr->cr_groups[i]);
+	ps += sprintf(ps, "\n");
+
+	xlen = ps - psbuf;
+	xlen -= uio->uio_offset;
+	ps = psbuf + uio->uio_offset;
+	xlen = min(xlen, uio->uio_resid);
+	if (xlen <= 0)
+		error = 0;
+	else
+		error = uiomove(ps, xlen, uio);
+
+	return (error);
+}
diff --git a/sys/fs/procfs/procfs_subr.c b/sys/fs/procfs/procfs_subr.c
new file mode 100644
index 00000000000..b371af19af0
--- /dev/null
+++ b/sys/fs/procfs/procfs_subr.c
@@ -0,0 +1,314 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)procfs_subr.c	8.4 (Berkeley) 1/27/94
+ *
+ * From:
+ *	$Id: procfs_subr.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <miscfs/procfs/procfs.h>
+
+static struct pfsnode *pfshead;
+static int pfsvplock;
+
+/*
+ * allocate a pfsnode/vnode pair.  the vnode is
+ * referenced, but not locked.
+ *
+ * the pid, pfs_type, and mount point uniquely
+ * identify a pfsnode.  the mount point is needed
+ * because someone might mount this filesystem
+ * twice.
+ *
+ * all pfsnodes are maintained on a singly-linked
+ * list.  new nodes are only allocated when they cannot
+ * be found on this list.  entries on the list are
+ * removed when the vfs reclaim entry is called.
+ *
+ * a single lock is kept for the entire list.  this is
+ * needed because the getnewvnode() function can block
+ * waiting for a vnode to become free, in which case there
+ * may be more than one process trying to get the same
+ * vnode.  this lock is only taken if we are going to
+ * call getnewvnode, since the kernel itself is single-threaded.
+ *
+ * if an entry is found on the list, then call vget() to
+ * take a reference.  this is done because there may be
+ * zero references to it and so it needs to removed from
+ * the vnode free list.
+ */
+int
+procfs_allocvp(mp, vpp, pid, pfs_type)
+	struct mount *mp;
+	struct vnode **vpp;
+	long pid;
+	pfstype pfs_type;
+{
+	int error;
+	struct pfsnode *pfs;
+	struct pfsnode **pp;
+
+loop:
+	for (pfs = pfshead; pfs != 0; pfs = pfs->pfs_next) {
+		if (pfs->pfs_pid == pid &&
+		    pfs->pfs_type == pfs_type &&
+		    PFSTOV(pfs)->v_mount == mp) {
+			if (vget(pfs->pfs_vnode, 0))
+				goto loop;
+			*vpp = pfs->pfs_vnode;
+			return (0);
+		}
+	}
+
+	/*
+	 * otherwise lock the vp list while we call getnewvnode
+	 * since that can block.
+	 */ 
+	if (pfsvplock & PROCFS_LOCKED) {
+		pfsvplock |= PROCFS_WANT;
+		sleep((caddr_t) &pfsvplock, PINOD);
+		goto loop;
+	}
+	pfsvplock |= PROCFS_LOCKED;
+
+	error = getnewvnode(VT_PROCFS, mp, procfs_vnodeop_p, vpp);
+	if (error)
+		goto out;
+
+	MALLOC((*vpp)->v_data, void *, sizeof(struct pfsnode),
+		M_TEMP, M_WAITOK);
+
+	pfs = VTOPFS(*vpp);
+	pfs->pfs_next = 0;
+	pfs->pfs_pid = (pid_t) pid;
+	pfs->pfs_type = pfs_type;
+	pfs->pfs_vnode = *vpp;
+	pfs->pfs_flags = 0;
+	pfs->pfs_fileno = PROCFS_FILENO(pid, pfs_type);
+
+	switch (pfs_type) {
+	case Proot:	/* /proc = dr-xr-xr-x */
+		pfs->pfs_mode = (VREAD|VEXEC) |
+				(VREAD|VEXEC) >> 3 |
+				(VREAD|VEXEC) >> 6;
+		break;
+
+	case Pproc:
+		pfs->pfs_mode = (VREAD|VEXEC) |
+				(VREAD|VEXEC) >> 3 |
+				(VREAD|VEXEC) >> 6;
+		break;
+
+	case Pfile:
+		pfs->pfs_mode = (VREAD|VWRITE);
+		break;
+
+	case Pmem:
+		pfs->pfs_mode = (VREAD|VWRITE);
+		break;
+
+	case Pregs:
+		pfs->pfs_mode = (VREAD|VWRITE);
+		break;
+
+	case Pfpregs:
+		pfs->pfs_mode = (VREAD|VWRITE);
+		break;
+
+	case Pctl:
+		pfs->pfs_mode = (VWRITE);
+		break;
+
+	case Pstatus:
+		pfs->pfs_mode = (VREAD) |
+				(VREAD >> 3) |
+				(VREAD >> 6);
+		break;
+
+	case Pnote:
+		pfs->pfs_mode = (VWRITE);
+		break;
+
+	case Pnotepg:
+		pfs->pfs_mode = (VWRITE);
+		break;
+
+	default:
+		panic("procfs_allocvp");
+	}
+
+	/* add to procfs vnode list */
+	for (pp = &pfshead; *pp; pp = &(*pp)->pfs_next)
+		continue;
+	*pp = pfs;
+
+out:
+	pfsvplock &= ~PROCFS_LOCKED;
+
+	if (pfsvplock & PROCFS_WANT) {
+		pfsvplock &= ~PROCFS_WANT;
+		wakeup((caddr_t) &pfsvplock);
+	}
+
+	return (error);
+}
+
+int
+procfs_freevp(vp)
+	struct vnode *vp;
+{
+	struct pfsnode **pfspp;
+	struct pfsnode *pfs = VTOPFS(vp);
+
+	for (pfspp = &pfshead; *pfspp != 0; pfspp = &(*pfspp)->pfs_next) {
+		if (*pfspp == pfs) {
+			*pfspp = pfs->pfs_next;
+			break;
+		}
+	}
+
+	FREE(vp->v_data, M_TEMP);
+	vp->v_data = 0;
+	return (0);
+}
+
+int
+procfs_rw(ap)
+	struct vop_read_args *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct uio *uio = ap->a_uio;
+	struct proc *curp = uio->uio_procp;
+	struct pfsnode *pfs = VTOPFS(vp);
+	struct proc *p;
+
+	p = PFIND(pfs->pfs_pid);
+	if (p == 0)
+		return (EINVAL);
+
+	switch (pfs->pfs_type) {
+	case Pnote:
+	case Pnotepg:
+		return (procfs_donote(curp, p, pfs, uio));
+
+	case Pregs:
+		return (procfs_doregs(curp, p, pfs, uio));
+
+	case Pfpregs:
+		return (procfs_dofpregs(curp, p, pfs, uio));
+
+	case Pctl:
+		return (procfs_doctl(curp, p, pfs, uio));
+
+	case Pstatus:
+		return (procfs_dostatus(curp, p, pfs, uio));
+
+	case Pmem:
+		return (procfs_domem(curp, p, pfs, uio));
+
+	default:
+		return (EOPNOTSUPP);
+	}
+}
+
+/*
+ * Get a string from userland into (buf).  Strip a trailing
+ * nl character (to allow easy access from the shell).
+ * The buffer should be *buflenp + 1 chars long.  vfs_getuserstr
+ * will automatically add a nul char at the end.
+ *
+ * Returns 0 on success or the following errors
+ *
+ * EINVAL:    file offset is non-zero.
+ * EMSGSIZE:  message is longer than kernel buffer
+ * EFAULT:    user i/o buffer is not addressable
+ */
+int
+vfs_getuserstr(uio, buf, buflenp)
+	struct uio *uio;
+	char *buf;
+	int *buflenp;
+{
+	int xlen;
+	int error;
+
+	if (uio->uio_offset != 0)
+		return (EINVAL);
+
+	xlen = *buflenp;
+
+	/* must be able to read the whole string in one go */
+	if (xlen < uio->uio_resid)
+		return (EMSGSIZE);
+	xlen = uio->uio_resid;
+
+	error = uiomove(buf, xlen, uio);
+	if (error)
+		return (error);
+
+	/* allow multiple writes without seeks */
+	uio->uio_offset = 0;
+
+	/* cleanup string and remove trailing newline */
+	buf[xlen] = '\0';
+	xlen = strlen(buf);
+	if (xlen > 0 && buf[xlen-1] == '\n')
+		buf[--xlen] = '\0';
+	*buflenp = xlen;
+
+	return (0);
+}
+
+vfs_namemap_t *
+vfs_findname(nm, buf, buflen)
+	vfs_namemap_t *nm;
+	char *buf;
+	int buflen;
+{
+	for (; nm->nm_name; nm++)
+		if (bcmp(buf, (char *) nm->nm_name, buflen+1) == 0)
+			return (nm);
+
+	return (0);
+}
diff --git a/sys/fs/procfs/procfs_vfsops.c b/sys/fs/procfs/procfs_vfsops.c
new file mode 100644
index 00000000000..3938ca12357
--- /dev/null
+++ b/sys/fs/procfs/procfs_vfsops.c
@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)procfs_vfsops.c	8.4 (Berkeley) 1/21/94
+ *
+ * From:
+ *	$Id: procfs_vfsops.c,v 3.1 1993/12/15 09:40:17 jsp Exp $
+ */
+
+/*
+ * procfs VFS interface
+ */
+
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/syslog.h>
+#include <sys/mount.h>
+#include <sys/signalvar.h>
+#include <sys/vnode.h>
+#include <miscfs/procfs/procfs.h>
+#include <vm/vm.h>			/* for PAGE_SIZE */
+
+/*
+ * VFS Operations.
+ *
+ * mount system call
+ */
+/* ARGSUSED */
+procfs_mount(mp, path, data, ndp, p)
+	struct mount *mp;
+	char *path;
+	caddr_t data;
+	struct nameidata *ndp;
+	struct proc *p;
+{
+	u_int size;
+
+	if (UIO_MX & (UIO_MX-1)) {
+		log(LOG_ERR, "procfs: invalid directory entry size");
+		return (EINVAL);
+	}
+
+	if (mp->mnt_flag & MNT_UPDATE)
+		return (EOPNOTSUPP);
+
+	mp->mnt_flag |= MNT_LOCAL;
+	mp->mnt_data = 0;
+	getnewfsid(mp, MOUNT_PROCFS);
+
+	(void) copyinstr(path, (caddr_t)mp->mnt_stat.f_mntonname, MNAMELEN, &size);
+	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+
+	size = sizeof("procfs") - 1;
+	bcopy("procfs", mp->mnt_stat.f_mntfromname, size);
+	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+
+	return (0);
+}
+
+/*
+ * unmount system call
+ */
+procfs_unmount(mp, mntflags, p)
+	struct mount *mp;
+	int mntflags;
+	struct proc *p;
+{
+	int error;
+	extern int doforce;
+	int flags = 0;
+
+	if (mntflags & MNT_FORCE) {
+		/* procfs can never be rootfs so don't check for it */
+		if (!doforce)
+			return (EINVAL);
+		flags |= FORCECLOSE;
+	}
+
+	if (error = vflush(mp, 0, flags))
+		return (error);
+
+	return (0);
+}
+
+procfs_root(mp, vpp)
+	struct mount *mp;
+	struct vnode **vpp;
+{
+	struct pfsnode *pfs;
+	struct vnode *vp;
+	int error;
+
+	error = procfs_allocvp(mp, &vp, (pid_t) 0, Proot);
+	if (error)
+		return (error);
+
+	vp->v_type = VDIR;
+	vp->v_flag = VROOT;
+	pfs = VTOPFS(vp);
+
+	*vpp = vp;
+	return (0);
+}
+
+/*
+ */
+/* ARGSUSED */
+procfs_start(mp, flags, p)
+	struct mount *mp;
+	int flags;
+	struct proc *p;
+{
+
+	return (0);
+}
+
+/*
+ * Get file system statistics.
+ */
+procfs_statfs(mp, sbp, p)
+	struct mount *mp;
+	struct statfs *sbp;
+	struct proc *p;
+{
+	sbp->f_type = MOUNT_PROCFS;
+	sbp->f_bsize = PAGE_SIZE;
+	sbp->f_iosize = PAGE_SIZE;
+	sbp->f_blocks = 1;	/* avoid divide by zero in some df's */
+	sbp->f_bfree = 0;
+	sbp->f_bavail = 0;
+	sbp->f_files = maxproc;			/* approx */
+	sbp->f_ffree = maxproc - nprocs;	/* approx */
+
+	if (sbp != &mp->mnt_stat) {
+		bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+	}
+
+	return (0);
+}
+
+
+procfs_quotactl(mp, cmds, uid, arg, p)
+	struct mount *mp;
+	int cmds;
+	uid_t uid;
+	caddr_t arg;
+	struct proc *p;
+{
+
+	return (EOPNOTSUPP);
+}
+
+procfs_sync(mp, waitfor)
+	struct mount *mp;
+	int waitfor;
+{
+
+	return (0);
+}
+
+procfs_vget(mp, ino, vpp)
+	struct mount *mp;
+	ino_t ino;
+	struct vnode **vpp;
+{
+
+	return (EOPNOTSUPP);
+}
+
+procfs_fhtovp(mp, fhp, vpp)
+	struct mount *mp;
+	struct fid *fhp;
+	struct vnode **vpp;
+{
+
+	return (EINVAL);
+}
+
+procfs_vptofh(vp, fhp)
+	struct vnode *vp;
+	struct fid *fhp;
+{
+
+	return EINVAL;
+}
+
+procfs_init()
+{
+
+	return (0);
+}
+
+struct vfsops procfs_vfsops = {
+	procfs_mount,
+	procfs_start,
+	procfs_unmount,
+	procfs_root,
+	procfs_quotactl,
+	procfs_statfs,
+	procfs_sync,
+	procfs_vget,
+	procfs_fhtovp,
+	procfs_vptofh,
+	procfs_init,
+};
diff --git a/sys/fs/procfs/procfs_vnops.c b/sys/fs/procfs/procfs_vnops.c
new file mode 100644
index 00000000000..4e1ee002bb9
--- /dev/null
+++ b/sys/fs/procfs/procfs_vnops.c
@@ -0,0 +1,814 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)procfs_vnops.c	8.6 (Berkeley) 2/7/94
+ *
+ * From:
+ *	$Id: procfs_vnops.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+/*
+ * procfs vnode interface
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/dirent.h>
+#include <sys/resourcevar.h>
+#include <miscfs/procfs/procfs.h>
+#include <vm/vm.h>	/* for PAGE_SIZE */
+
+/*
+ * Vnode Operations.
+ *
+ */
+
+/*
+ * This is a list of the valid names in the
+ * process-specific sub-directories.  It is
+ * used in procfs_lookup and procfs_readdir
+ */
+static struct pfsnames {
+	u_short	d_namlen;
+	char	d_name[PROCFS_NAMELEN];
+	pfstype	d_pfstype;
+} procent[] = {
+#define N(s) sizeof(s)-1, s
+	/* namlen, nam, type */
+	{  N("file"),	Pfile },
+	{  N("mem"),	Pmem },
+	{  N("regs"),	Pregs },
+	{  N("fpregs"),	Pfpregs },
+	{  N("ctl"),	Pctl },
+	{  N("status"),	Pstatus },
+	{  N("note"),	Pnote },
+	{  N("notepg"),	Pnotepg },
+#undef N
+};
+#define Nprocent (sizeof(procent)/sizeof(procent[0]))
+
+static pid_t atopid __P((const char *, u_int));
+
+/*
+ * set things up for doing i/o on
+ * the pfsnode (vp).  (vp) is locked
+ * on entry, and should be left locked
+ * on exit.
+ *
+ * for procfs we don't need to do anything
+ * in particular for i/o.  all that is done
+ * is to support exclusive open on process
+ * memory images.
+ */
+procfs_open(ap)
+	struct vop_open_args *ap;
+{
+	struct pfsnode *pfs = VTOPFS(ap->a_vp);
+
+	switch (pfs->pfs_type) {
+	case Pmem:
+		if (PFIND(pfs->pfs_pid) == 0)
+			return (ENOENT);	/* was ESRCH, jsp */
+
+		if ((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL) ||
+				(pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE))
+			return (EBUSY);
+
+
+		if (ap->a_mode & FWRITE)
+			pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL);
+
+		return (0);
+
+	default:
+		break;
+	}
+
+	return (0);
+}
+
+/*
+ * close the pfsnode (vp) after doing i/o.
+ * (vp) is not locked on entry or exit.
+ *
+ * nothing to do for procfs other than undo
+ * any exclusive open flag (see _open above).
+ */
+procfs_close(ap)
+	struct vop_close_args *ap;
+{
+	struct pfsnode *pfs = VTOPFS(ap->a_vp);
+
+	switch (pfs->pfs_type) {
+	case Pmem:
+		if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL))
+			pfs->pfs_flags &= ~(FWRITE|O_EXCL);
+		break;
+	}
+
+	return (0);
+}
+
+/*
+ * do an ioctl operation on pfsnode (vp).
+ * (vp) is not locked on entry or exit.
+ */
+procfs_ioctl(ap)
+	struct vop_ioctl_args *ap;
+{
+
+	return (ENOTTY);
+}
+
+/*
+ * do block mapping for pfsnode (vp).
+ * since we don't use the buffer cache
+ * for procfs this function should never
+ * be called.  in any case, it's not clear
+ * what part of the kernel ever makes use
+ * of this function.  for sanity, this is the
+ * usual no-op bmap, although returning
+ * (EIO) would be a reasonable alternative.
+ */
+procfs_bmap(ap)
+	struct vop_bmap_args *ap;
+{
+
+	if (ap->a_vpp != NULL)
+		*ap->a_vpp = ap->a_vp;
+	if (ap->a_bnp != NULL)
+		*ap->a_bnp = ap->a_bn;
+	return (0);
+}
+
+/*
+ * _inactive is called when the pfsnode
+ * is vrele'd and the reference count goes
+ * to zero.  (vp) will be on the vnode free
+ * list, so to get it back vget() must be
+ * used.
+ *
+ * for procfs, check if the process is still
+ * alive and if it isn't then just throw away
+ * the vnode by calling vgone().  this may
+ * be overkill and a waste of time since the
+ * chances are that the process will still be
+ * there and PFIND is not free.
+ *
+ * (vp) is not locked on entry or exit.
+ */
+procfs_inactive(ap)
+	struct vop_inactive_args *ap;
+{
+	struct pfsnode *pfs = VTOPFS(ap->a_vp);
+
+	if (PFIND(pfs->pfs_pid) == 0)
+		vgone(ap->a_vp);
+
+	return (0);
+}
+
+/*
+ * _reclaim is called when getnewvnode()
+ * wants to make use of an entry on the vnode
+ * free list.  at this time the filesystem needs
+ * to free any private data and remove the node
+ * from any private lists.
+ */
+procfs_reclaim(ap)
+	struct vop_reclaim_args *ap;
+{
+	int error;
+
+	error = procfs_freevp(ap->a_vp);
+	return (error);
+}
+
+/*
+ * Return POSIX pathconf information applicable to special devices.
+ */
+procfs_pathconf(ap)
+	struct vop_pathconf_args /* {
+		struct vnode *a_vp;
+		int a_name;
+		int *a_retval;
+	} */ *ap;
+{
+
+	switch (ap->a_name) {
+	case _PC_LINK_MAX:
+		*ap->a_retval = LINK_MAX;
+		return (0);
+	case _PC_MAX_CANON:
+		*ap->a_retval = MAX_CANON;
+		return (0);
+	case _PC_MAX_INPUT:
+		*ap->a_retval = MAX_INPUT;
+		return (0);
+	case _PC_PIPE_BUF:
+		*ap->a_retval = PIPE_BUF;
+		return (0);
+	case _PC_CHOWN_RESTRICTED:
+		*ap->a_retval = 1;
+		return (0);
+	case _PC_VDISABLE:
+		*ap->a_retval = _POSIX_VDISABLE;
+		return (0);
+	default:
+		return (EINVAL);
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * _print is used for debugging.
+ * just print a readable description
+ * of (vp).
+ */
+procfs_print(ap)
+	struct vop_print_args *ap;
+{
+	struct pfsnode *pfs = VTOPFS(ap->a_vp);
+
+	printf("tag VT_PROCFS, pid %d, mode %x, flags %x\n",
+		pfs->pfs_pid,
+		pfs->pfs_mode, pfs->pfs_flags);
+}
+
+/*
+ * _abortop is called when operations such as
+ * rename and create fail.  this entry is responsible
+ * for undoing any side-effects caused by the lookup.
+ * this will always include freeing the pathname buffer.
+ */
+procfs_abortop(ap)
+	struct vop_abortop_args *ap;
+{
+
+	if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
+		FREE(ap->a_cnp->cn_pnbuf, M_NAMEI);
+	return (0);
+}
+
+/*
+ * generic entry point for unsupported operations
+ */
+procfs_badop()
+{
+
+	return (EIO);
+}
+
+/*
+ * Invent attributes for pfsnode (vp) and store
+ * them in (vap).
+ * Directories lengths are returned as zero since
+ * any real length would require the genuine size
+ * to be computed, and nothing cares anyway.
+ *
+ * this is relatively minimal for procfs.
+ */
+procfs_getattr(ap)
+	struct vop_getattr_args *ap;
+{
+	struct pfsnode *pfs = VTOPFS(ap->a_vp);
+	struct vattr *vap = ap->a_vap;
+	struct proc *procp;
+	int error;
+
+	/* first check the process still exists */
+	switch (pfs->pfs_type) {
+	case Proot:
+		procp = 0;
+		break;
+
+	default:
+		procp = PFIND(pfs->pfs_pid);
+		if (procp == 0)
+			return (ENOENT);
+	}
+
+	error = 0;
+
+	/* start by zeroing out the attributes */
+	VATTR_NULL(vap);
+
+	/* next do all the common fields */
+	vap->va_type = ap->a_vp->v_type;
+	vap->va_mode = pfs->pfs_mode;
+	vap->va_fileid = pfs->pfs_fileno;
+	vap->va_flags = 0;
+	vap->va_blocksize = PAGE_SIZE;
+	vap->va_bytes = vap->va_size = 0;
+
+	/*
+	 * If the process has exercised some setuid or setgid
+	 * privilege, then rip away read/write permission so
+	 * that only root can gain access.
+	 */
+	switch (pfs->pfs_type) {
+	case Pregs:
+	case Pfpregs:
+	case Pmem:
+		if (procp->p_flag & P_SUGID)
+			vap->va_mode &= ~((VREAD|VWRITE)|
+					  ((VREAD|VWRITE)>>3)|
+					  ((VREAD|VWRITE)>>6));
+		break;
+	}
+
+	/*
+	 * Make all times be current TOD.
+	 * It would be possible to get the process start
+	 * time from the p_stat structure, but there's
+	 * no "file creation" time stamp anyway, and the
+	 * p_stat structure is not addressible if u. gets
+	 * swapped out for that process.
+	 */
+	microtime(&vap->va_ctime);
+	vap->va_atime = vap->va_mtime = vap->va_ctime;
+
+	/*
+	 * now do the object specific fields
+	 *
+	 * The size could be set from struct reg, but it's hardly
+	 * worth the trouble, and it puts some (potentially) machine
+	 * dependent data into this machine-independent code.  If it
+	 * becomes important then this function should break out into
+	 * a per-file stat function in the corresponding .c file.
+	 */
+
+	switch (pfs->pfs_type) {
+	case Proot:
+		vap->va_nlink = 2;
+		vap->va_uid = 0;
+		vap->va_gid = 0;
+		break;
+
+	case Pproc:
+		vap->va_nlink = 2;
+		vap->va_uid = procp->p_ucred->cr_uid;
+		vap->va_gid = procp->p_ucred->cr_gid;
+		break;
+
+	case Pfile:
+		error = EOPNOTSUPP;
+		break;
+
+	case Pmem:
+		vap->va_nlink = 1;
+		vap->va_bytes = vap->va_size =
+			ctob(procp->p_vmspace->vm_tsize +
+				    procp->p_vmspace->vm_dsize +
+				    procp->p_vmspace->vm_ssize);
+		vap->va_uid = procp->p_ucred->cr_uid;
+		vap->va_gid = procp->p_ucred->cr_gid;
+		break;
+
+	case Pregs:
+	case Pfpregs:
+	case Pctl:
+	case Pstatus:
+	case Pnote:
+	case Pnotepg:
+		vap->va_nlink = 1;
+		vap->va_uid = procp->p_ucred->cr_uid;
+		vap->va_gid = procp->p_ucred->cr_gid;
+		break;
+
+	default:
+		panic("procfs_getattr");
+	}
+
+	return (error);
+}
+
+procfs_setattr(ap)
+	struct vop_setattr_args *ap;
+{
+	/*
+	 * just fake out attribute setting
+	 * it's not good to generate an error
+	 * return, otherwise things like creat()
+	 * will fail when they try to set the
+	 * file length to 0.  worse, this means
+	 * that echo $note > /proc/$pid/note will fail.
+	 */
+
+	return (0);
+}
+
+/*
+ * implement access checking.
+ *
+ * something very similar to this code is duplicated
+ * throughout the 4bsd kernel and should be moved
+ * into kern/vfs_subr.c sometime.
+ *
+ * actually, the check for super-user is slightly
+ * broken since it will allow read access to write-only
+ * objects.  this doesn't cause any particular trouble
+ * but does mean that the i/o entry points need to check
+ * that the operation really does make sense.
+ */
+procfs_access(ap)
+	struct vop_access_args *ap;
+{
+	struct vattr *vap;
+	struct vattr vattr;
+	int error;
+
+	/*
+	 * If you're the super-user,
+	 * you always get access.
+	 */
+	if (ap->a_cred->cr_uid == (uid_t) 0)
+		return (0);
+	vap = &vattr;
+	if (error = VOP_GETATTR(ap->a_vp, vap, ap->a_cred, ap->a_p))
+		return (error);
+
+	/*
+	 * Access check is based on only one of owner, group, public.
+	 * If not owner, then check group. If not a member of the
+	 * group, then check public access.
+	 */
+	if (ap->a_cred->cr_uid != vap->va_uid) {
+		gid_t *gp;
+		int i;
+
+		(ap->a_mode) >>= 3;
+		gp = ap->a_cred->cr_groups;
+		for (i = 0; i < ap->a_cred->cr_ngroups; i++, gp++)
+			if (vap->va_gid == *gp)
+				goto found;
+		ap->a_mode >>= 3;
+found:
+		;
+	}
+
+	if ((vap->va_mode & ap->a_mode) == ap->a_mode)
+		return (0);
+
+	return (EACCES);
+}
+
+/*
+ * lookup.  this is incredibly complicated in the
+ * general case, however for most pseudo-filesystems
+ * very little needs to be done.
+ *
+ * unless you want to get a migraine, just make sure your
+ * filesystem doesn't do any locking of its own.  otherwise
+ * read and inwardly digest ufs_lookup().
+ */
+procfs_lookup(ap)
+	struct vop_lookup_args *ap;
+{
+	struct componentname *cnp = ap->a_cnp;
+	struct vnode **vpp = ap->a_vpp;
+	struct vnode *dvp = ap->a_dvp;
+	char *pname = cnp->cn_nameptr;
+	int error = 0;
+	pid_t pid;
+	struct vnode *nvp;
+	struct pfsnode *pfs;
+	struct proc *procp;
+	pfstype pfs_type;
+	int i;
+
+	if (cnp->cn_namelen == 1 && *pname == '.') {
+		*vpp = dvp;
+		VREF(dvp);
+		/*VOP_LOCK(dvp);*/
+		return (0);
+	}
+
+	*vpp = NULL;
+
+	pfs = VTOPFS(dvp);
+	switch (pfs->pfs_type) {
+	case Proot:
+		if (cnp->cn_flags & ISDOTDOT)
+			return (EIO);
+
+		if (CNEQ(cnp, "curproc", 7))
+			pid = cnp->cn_proc->p_pid;
+		else
+			pid = atopid(pname, cnp->cn_namelen);
+		if (pid == NO_PID)
+			return (ENOENT);
+
+		procp = PFIND(pid);
+		if (procp == 0)
+			return (ENOENT);
+
+		error = procfs_allocvp(dvp->v_mount, &nvp, pid, Pproc);
+		if (error)
+			return (error);
+
+		nvp->v_type = VDIR;
+		pfs = VTOPFS(nvp);
+
+		*vpp = nvp;
+		return (0);
+
+	case Pproc:
+		if (cnp->cn_flags & ISDOTDOT) {
+			error = procfs_root(dvp->v_mount, vpp);
+			return (error);
+		}
+
+		procp = PFIND(pfs->pfs_pid);
+		if (procp == 0)
+			return (ENOENT);
+
+		for (i = 0; i < Nprocent; i++) {
+			struct pfsnames *dp = &procent[i];
+
+			if (cnp->cn_namelen == dp->d_namlen &&
+			    bcmp(pname, dp->d_name, dp->d_namlen) == 0) {
+			    	pfs_type = dp->d_pfstype;
+				goto found;
+			}
+		}
+		return (ENOENT);
+
+	found:
+		if (pfs_type == Pfile) {
+			nvp = procfs_findtextvp(procp);
+			if (nvp) {
+				VREF(nvp);
+				VOP_LOCK(nvp);
+			} else {
+				error = ENXIO;
+			}
+		} else {
+			error = procfs_allocvp(dvp->v_mount, &nvp,
+					pfs->pfs_pid, pfs_type);
+			if (error)
+				return (error);
+
+			nvp->v_type = VREG;
+			pfs = VTOPFS(nvp);
+		}
+		*vpp = nvp;
+		return (error);
+
+	default:
+		return (ENOTDIR);
+	}
+}
+
+/*
+ * readdir returns directory entries from pfsnode (vp).
+ *
+ * the strategy here with procfs is to generate a single
+ * directory entry at a time (struct pfsdent) and then
+ * copy that out to userland using uiomove.  a more efficent
+ * though more complex implementation, would try to minimize
+ * the number of calls to uiomove().  for procfs, this is
+ * hardly worth the added code complexity.
+ *
+ * this should just be done through read()
+ */
+procfs_readdir(ap)
+	struct vop_readdir_args *ap;
+{
+	struct uio *uio = ap->a_uio;
+	struct pfsdent d;
+	struct pfsdent *dp = &d;
+	struct pfsnode *pfs;
+	int error;
+	int count;
+	int i;
+
+	pfs = VTOPFS(ap->a_vp);
+
+	if (uio->uio_resid < UIO_MX)
+		return (EINVAL);
+	if (uio->uio_offset & (UIO_MX-1))
+		return (EINVAL);
+	if (uio->uio_offset < 0)
+		return (EINVAL);
+
+	error = 0;
+	count = 0;
+	i = uio->uio_offset / UIO_MX;
+
+	switch (pfs->pfs_type) {
+	/*
+	 * this is for the process-specific sub-directories.
+	 * all that is needed to is copy out all the entries
+	 * from the procent[] table (top of this file).
+	 */
+	case Pproc: {
+		while (uio->uio_resid >= UIO_MX) {
+			struct pfsnames *dt;
+
+			if (i >= Nprocent)
+				break;
+
+			dt = &procent[i];
+			
+			dp->d_reclen = UIO_MX;
+			dp->d_fileno = PROCFS_FILENO(pfs->pfs_pid, dt->d_pfstype);
+			dp->d_type = DT_REG;
+			dp->d_namlen = dt->d_namlen;
+			bcopy(dt->d_name, dp->d_name, sizeof(dt->d_name)-1);
+			error = uiomove((caddr_t) dp, UIO_MX, uio);
+			if (error)
+				break;
+			count += UIO_MX;
+			i++;
+		}
+
+	    	break;
+
+	    }
+
+	/*
+	 * this is for the root of the procfs filesystem
+	 * what is needed is a special entry for "curproc"
+	 * followed by an entry for each process on allproc
+#ifdef PROCFS_ZOMBIE
+	 * and zombproc.
+#endif
+	 */
+
+	case Proot: {
+		int pcnt;
+#ifdef PROCFS_ZOMBIE
+		int doingzomb = 0;
+#endif
+		volatile struct proc *p;
+
+		p = allproc;
+
+#define PROCFS_XFILES	1	/* number of other entries, like "curproc" */
+		pcnt = PROCFS_XFILES;
+
+		while (p && uio->uio_resid >= UIO_MX) {
+			bzero((char *) dp, UIO_MX);
+			dp->d_type = DT_DIR;
+			dp->d_reclen = UIO_MX;
+
+			switch (i) {
+			case 0:
+				/* ship out entry for "curproc" */
+				dp->d_fileno = PROCFS_FILENO(PID_MAX+1, Pproc);
+				dp->d_namlen = sprintf(dp->d_name, "curproc");
+				break;
+
+			default:
+				if (pcnt >= i) {
+					dp->d_fileno = PROCFS_FILENO(p->p_pid, Pproc);
+					dp->d_namlen = sprintf(dp->d_name, "%ld", (long) p->p_pid);
+				}
+
+				p = p->p_next;
+
+#ifdef PROCFS_ZOMBIE
+				if (p == 0 && doingzomb == 0) {
+					doingzomb = 1;
+					p = zombproc;
+				}
+#endif
+
+				if (pcnt++ < i)
+					continue;
+
+				break;
+			}
+			error = uiomove((caddr_t) dp, UIO_MX, uio);
+			if (error)
+				break;
+			count += UIO_MX;
+			i++;
+		}
+
+		break;
+
+	    }
+
+	default:
+		error = ENOTDIR;
+		break;
+	}
+
+	uio->uio_offset = i * UIO_MX;
+
+	return (error);
+}
+
+/*
+ * convert decimal ascii to pid_t
+ */
+static pid_t
+atopid(b, len)
+	const char *b;
+	u_int len;
+{
+	pid_t p = 0;
+
+	while (len--) {
+		char c = *b++;
+		if (c < '0' || c > '9')
+			return (NO_PID);
+		p = 10 * p + (c - '0');
+		if (p > PID_MAX)
+			return (NO_PID);
+	}
+
+	return (p);
+}
+
+/*
+ * procfs vnode operations.
+ */
+int (**procfs_vnodeop_p)();
+struct vnodeopv_entry_desc procfs_vnodeop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, procfs_lookup },		/* lookup */
+	{ &vop_create_desc, procfs_create },		/* create */
+	{ &vop_mknod_desc, procfs_mknod },		/* mknod */
+	{ &vop_open_desc, procfs_open },		/* open */
+	{ &vop_close_desc, procfs_close },		/* close */
+	{ &vop_access_desc, procfs_access },		/* access */
+	{ &vop_getattr_desc, procfs_getattr },		/* getattr */
+	{ &vop_setattr_desc, procfs_setattr },		/* setattr */
+	{ &vop_read_desc, procfs_read },		/* read */
+	{ &vop_write_desc, procfs_write },		/* write */
+	{ &vop_ioctl_desc, procfs_ioctl },		/* ioctl */
+	{ &vop_select_desc, procfs_select },		/* select */
+	{ &vop_mmap_desc, procfs_mmap },		/* mmap */
+	{ &vop_fsync_desc, procfs_fsync },		/* fsync */
+	{ &vop_seek_desc, procfs_seek },		/* seek */
+	{ &vop_remove_desc, procfs_remove },		/* remove */
+	{ &vop_link_desc, procfs_link },		/* link */
+	{ &vop_rename_desc, procfs_rename },		/* rename */
+	{ &vop_mkdir_desc, procfs_mkdir },		/* mkdir */
+	{ &vop_rmdir_desc, procfs_rmdir },		/* rmdir */
+	{ &vop_symlink_desc, procfs_symlink },		/* symlink */
+	{ &vop_readdir_desc, procfs_readdir },		/* readdir */
+	{ &vop_readlink_desc, procfs_readlink },	/* readlink */
+	{ &vop_abortop_desc, procfs_abortop },		/* abortop */
+	{ &vop_inactive_desc, procfs_inactive },	/* inactive */
+	{ &vop_reclaim_desc, procfs_reclaim },		/* reclaim */
+	{ &vop_lock_desc, procfs_lock },		/* lock */
+	{ &vop_unlock_desc, procfs_unlock },		/* unlock */
+	{ &vop_bmap_desc, procfs_bmap },		/* bmap */
+	{ &vop_strategy_desc, procfs_strategy },	/* strategy */
+	{ &vop_print_desc, procfs_print },		/* print */
+	{ &vop_islocked_desc, procfs_islocked },	/* islocked */
+	{ &vop_pathconf_desc, procfs_pathconf },	/* pathconf */
+	{ &vop_advlock_desc, procfs_advlock },		/* advlock */
+	{ &vop_blkatoff_desc, procfs_blkatoff },	/* blkatoff */
+	{ &vop_valloc_desc, procfs_valloc },		/* valloc */
+	{ &vop_vfree_desc, procfs_vfree },		/* vfree */
+	{ &vop_truncate_desc, procfs_truncate },	/* truncate */
+	{ &vop_update_desc, procfs_update },		/* update */
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc procfs_vnodeop_opv_desc =
+	{ &procfs_vnodeop_p, procfs_vnodeop_entries };
diff --git a/sys/fs/specfs/spec_vnops.c b/sys/fs/specfs/spec_vnops.c
new file mode 100644
index 00000000000..111c517b162
--- /dev/null
+++ b/sys/fs/specfs/spec_vnops.c
@@ -0,0 +1,689 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)spec_vnops.c	8.6 (Berkeley) 4/9/94
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/conf.h>
+#include <sys/buf.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/stat.h>
+#include <sys/errno.h>
+#include <sys/ioctl.h>
+#include <sys/file.h>
+#include <sys/disklabel.h>
+#include <miscfs/specfs/specdev.h>
+
+/* symbolic sleep message strings for devices */
+char	devopn[] = "devopn";
+char	devio[] = "devio";
+char	devwait[] = "devwait";
+char	devin[] = "devin";
+char	devout[] = "devout";
+char	devioc[] = "devioc";
+char	devcls[] = "devcls";
+
+int (**spec_vnodeop_p)();
+struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, spec_lookup },		/* lookup */
+	{ &vop_create_desc, spec_create },		/* create */
+	{ &vop_mknod_desc, spec_mknod },		/* mknod */
+	{ &vop_open_desc, spec_open },			/* open */
+	{ &vop_close_desc, spec_close },		/* close */
+	{ &vop_access_desc, spec_access },		/* access */
+	{ &vop_getattr_desc, spec_getattr },		/* getattr */
+	{ &vop_setattr_desc, spec_setattr },		/* setattr */
+	{ &vop_read_desc, spec_read },			/* read */
+	{ &vop_write_desc, spec_write },		/* write */
+	{ &vop_ioctl_desc, spec_ioctl },		/* ioctl */
+	{ &vop_select_desc, spec_select },		/* select */
+	{ &vop_mmap_desc, spec_mmap },			/* mmap */
+	{ &vop_fsync_desc, spec_fsync },		/* fsync */
+	{ &vop_seek_desc, spec_seek },			/* seek */
+	{ &vop_remove_desc, spec_remove },		/* remove */
+	{ &vop_link_desc, spec_link },			/* link */
+	{ &vop_rename_desc, spec_rename },		/* rename */
+	{ &vop_mkdir_desc, spec_mkdir },		/* mkdir */
+	{ &vop_rmdir_desc, spec_rmdir },		/* rmdir */
+	{ &vop_symlink_desc, spec_symlink },		/* symlink */
+	{ &vop_readdir_desc, spec_readdir },		/* readdir */
+	{ &vop_readlink_desc, spec_readlink },		/* readlink */
+	{ &vop_abortop_desc, spec_abortop },		/* abortop */
+	{ &vop_inactive_desc, spec_inactive },		/* inactive */
+	{ &vop_reclaim_desc, spec_reclaim },		/* reclaim */
+	{ &vop_lock_desc, spec_lock },			/* lock */
+	{ &vop_unlock_desc, spec_unlock },		/* unlock */
+	{ &vop_bmap_desc, spec_bmap },			/* bmap */
+	{ &vop_strategy_desc, spec_strategy },		/* strategy */
+	{ &vop_print_desc, spec_print },		/* print */
+	{ &vop_islocked_desc, spec_islocked },		/* islocked */
+	{ &vop_pathconf_desc, spec_pathconf },		/* pathconf */
+	{ &vop_advlock_desc, spec_advlock },		/* advlock */
+	{ &vop_blkatoff_desc, spec_blkatoff },		/* blkatoff */
+	{ &vop_valloc_desc, spec_valloc },		/* valloc */
+	{ &vop_vfree_desc, spec_vfree },		/* vfree */
+	{ &vop_truncate_desc, spec_truncate },		/* truncate */
+	{ &vop_update_desc, spec_update },		/* update */
+	{ &vop_bwrite_desc, spec_bwrite },		/* bwrite */
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc spec_vnodeop_opv_desc =
+	{ &spec_vnodeop_p, spec_vnodeop_entries };
+
+/*
+ * Trivial lookup routine that always fails.
+ */
+int
+spec_lookup(ap)
+	struct vop_lookup_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+
+	*ap->a_vpp = NULL;
+	return (ENOTDIR);
+}
+
+/*
+ * Open a special file.
+ */
+/* ARGSUSED */
+spec_open(ap)
+	struct vop_open_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *bvp, *vp = ap->a_vp;
+	dev_t bdev, dev = (dev_t)vp->v_rdev;
+	register int maj = major(dev);
+	int error;
+
+	/*
+	 * Don't allow open if fs is mounted -nodev.
+	 */
+	if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV))
+		return (ENXIO);
+
+	switch (vp->v_type) {
+
+	case VCHR:
+		if ((u_int)maj >= nchrdev)
+			return (ENXIO);
+		if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) {
+			/*
+			 * When running in very secure mode, do not allow
+			 * opens for writing of any disk character devices.
+			 */
+			if (securelevel >= 2 && isdisk(dev, VCHR))
+				return (EPERM);
+			/*
+			 * When running in secure mode, do not allow opens
+			 * for writing of /dev/mem, /dev/kmem, or character
+			 * devices whose corresponding block devices are
+			 * currently mounted.
+			 */
+			if (securelevel >= 1) {
+				if ((bdev = chrtoblk(dev)) != NODEV &&
+				    vfinddev(bdev, VBLK, &bvp) &&
+				    bvp->v_usecount > 0 &&
+				    (error = vfs_mountedon(bvp)))
+					return (error);
+				if (iskmemdev(dev))
+					return (EPERM);
+			}
+		}
+		VOP_UNLOCK(vp);
+		error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, ap->a_p);
+		VOP_LOCK(vp);
+		return (error);
+
+	case VBLK:
+		if ((u_int)maj >= nblkdev)
+			return (ENXIO);
+		/*
+		 * When running in very secure mode, do not allow
+		 * opens for writing of any disk block devices.
+		 */
+		if (securelevel >= 2 && ap->a_cred != FSCRED &&
+		    (ap->a_mode & FWRITE) && isdisk(dev, VBLK))
+			return (EPERM);
+		/*
+		 * Do not allow opens of block devices that are
+		 * currently mounted.
+		 */
+		if (error = vfs_mountedon(vp))
+			return (error);
+		return ((*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, ap->a_p));
+	}
+	return (0);
+}
+
+/*
+ * Vnode op for read
+ */
+/* ARGSUSED */
+spec_read(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct uio *uio = ap->a_uio;
+ 	struct proc *p = uio->uio_procp;
+	struct buf *bp;
+	daddr_t bn, nextbn;
+	long bsize, bscale;
+	struct partinfo dpart;
+	int n, on, majordev, (*ioctl)();
+	int error = 0;
+	dev_t dev;
+
+#ifdef DIAGNOSTIC
+	if (uio->uio_rw != UIO_READ)
+		panic("spec_read mode");
+	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
+		panic("spec_read proc");
+#endif
+	if (uio->uio_resid == 0)
+		return (0);
+
+	switch (vp->v_type) {
+
+	case VCHR:
+		VOP_UNLOCK(vp);
+		error = (*cdevsw[major(vp->v_rdev)].d_read)
+			(vp->v_rdev, uio, ap->a_ioflag);
+		VOP_LOCK(vp);
+		return (error);
+
+	case VBLK:
+		if (uio->uio_offset < 0)
+			return (EINVAL);
+		bsize = BLKDEV_IOSIZE;
+		dev = vp->v_rdev;
+		if ((majordev = major(dev)) < nblkdev &&
+		    (ioctl = bdevsw[majordev].d_ioctl) != NULL &&
+		    (*ioctl)(dev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0 &&
+		    dpart.part->p_fstype == FS_BSDFFS &&
+		    dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
+			bsize = dpart.part->p_frag * dpart.part->p_fsize;
+		bscale = bsize / DEV_BSIZE;
+		do {
+			bn = (uio->uio_offset / DEV_BSIZE) &~ (bscale - 1);
+			on = uio->uio_offset % bsize;
+			n = min((unsigned)(bsize - on), uio->uio_resid);
+			if (vp->v_lastr + bscale == bn) {
+				nextbn = bn + bscale;
+				error = breadn(vp, bn, (int)bsize, &nextbn,
+					(int *)&bsize, 1, NOCRED, &bp);
+			} else
+				error = bread(vp, bn, (int)bsize, NOCRED, &bp);
+			vp->v_lastr = bn;
+			n = min(n, bsize - bp->b_resid);
+			if (error) {
+				brelse(bp);
+				return (error);
+			}
+			error = uiomove((char *)bp->b_data + on, n, uio);
+			if (n + on == bsize)
+				bp->b_flags |= B_AGE;
+			brelse(bp);
+		} while (error == 0 && uio->uio_resid > 0 && n != 0);
+		return (error);
+
+	default:
+		panic("spec_read type");
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * Vnode op for write
+ */
+/* ARGSUSED */
+spec_write(ap)
+	struct vop_write_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct uio *uio = ap->a_uio;
+	struct proc *p = uio->uio_procp;
+	struct buf *bp;
+	daddr_t bn;
+	int bsize, blkmask;
+	struct partinfo dpart;
+	register int n, on;
+	int error = 0;
+
+#ifdef DIAGNOSTIC
+	if (uio->uio_rw != UIO_WRITE)
+		panic("spec_write mode");
+	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
+		panic("spec_write proc");
+#endif
+
+	switch (vp->v_type) {
+
+	case VCHR:
+		VOP_UNLOCK(vp);
+		error = (*cdevsw[major(vp->v_rdev)].d_write)
+			(vp->v_rdev, uio, ap->a_ioflag);
+		VOP_LOCK(vp);
+		return (error);
+
+	case VBLK:
+		if (uio->uio_resid == 0)
+			return (0);
+		if (uio->uio_offset < 0)
+			return (EINVAL);
+		bsize = BLKDEV_IOSIZE;
+		if ((*bdevsw[major(vp->v_rdev)].d_ioctl)(vp->v_rdev, DIOCGPART,
+		    (caddr_t)&dpart, FREAD, p) == 0) {
+			if (dpart.part->p_fstype == FS_BSDFFS &&
+			    dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
+				bsize = dpart.part->p_frag *
+				    dpart.part->p_fsize;
+		}
+		blkmask = (bsize / DEV_BSIZE) - 1;
+		do {
+			bn = (uio->uio_offset / DEV_BSIZE) &~ blkmask;
+			on = uio->uio_offset % bsize;
+			n = min((unsigned)(bsize - on), uio->uio_resid);
+			if (n == bsize)
+				bp = getblk(vp, bn, bsize, 0, 0);
+			else
+				error = bread(vp, bn, bsize, NOCRED, &bp);
+			n = min(n, bsize - bp->b_resid);
+			if (error) {
+				brelse(bp);
+				return (error);
+			}
+			error = uiomove((char *)bp->b_data + on, n, uio);
+			if (n + on == bsize) {
+				bp->b_flags |= B_AGE;
+				bawrite(bp);
+			} else
+				bdwrite(bp);
+		} while (error == 0 && uio->uio_resid > 0 && n != 0);
+		return (error);
+
+	default:
+		panic("spec_write type");
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * Device ioctl operation.
+ */
+/* ARGSUSED */
+spec_ioctl(ap)
+	struct vop_ioctl_args /* {
+		struct vnode *a_vp;
+		int  a_command;
+		caddr_t  a_data;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	dev_t dev = ap->a_vp->v_rdev;
+
+	switch (ap->a_vp->v_type) {
+
+	case VCHR:
+		return ((*cdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data,
+		    ap->a_fflag, ap->a_p));
+
+	case VBLK:
+		if (ap->a_command == 0 && (int)ap->a_data == B_TAPE)
+			if (bdevsw[major(dev)].d_flags & B_TAPE)
+				return (0);
+			else
+				return (1);
+		return ((*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data,
+		   ap->a_fflag, ap->a_p));
+
+	default:
+		panic("spec_ioctl");
+		/* NOTREACHED */
+	}
+}
+
+/* ARGSUSED */
+spec_select(ap)
+	struct vop_select_args /* {
+		struct vnode *a_vp;
+		int  a_which;
+		int  a_fflags;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register dev_t dev;
+
+	switch (ap->a_vp->v_type) {
+
+	default:
+		return (1);		/* XXX */
+
+	case VCHR:
+		dev = ap->a_vp->v_rdev;
+		return (*cdevsw[major(dev)].d_select)(dev, ap->a_which, ap->a_p);
+	}
+}
+/*
+ * Synch buffers associated with a block device
+ */
+/* ARGSUSED */
+int
+spec_fsync(ap)
+	struct vop_fsync_args /* {
+		struct vnode *a_vp;
+		struct ucred *a_cred;
+		int  a_waitfor;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct buf *bp;
+	struct buf *nbp;
+	int s;
+
+	if (vp->v_type == VCHR)
+		return (0);
+	/*
+	 * Flush all dirty buffers associated with a block device.
+	 */
+loop:
+	s = splbio();
+	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
+		nbp = bp->b_vnbufs.le_next;
+		if ((bp->b_flags & B_BUSY))
+			continue;
+		if ((bp->b_flags & B_DELWRI) == 0)
+			panic("spec_fsync: not dirty");
+		bremfree(bp);
+		bp->b_flags |= B_BUSY;
+		splx(s);
+		bawrite(bp);
+		goto loop;
+	}
+	if (ap->a_waitfor == MNT_WAIT) {
+		while (vp->v_numoutput) {
+			vp->v_flag |= VBWAIT;
+			sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1);
+		}
+#ifdef DIAGNOSTIC
+		if (vp->v_dirtyblkhd.lh_first) {
+			vprint("spec_fsync: dirty", vp);
+			goto loop;
+		}
+#endif
+	}
+	splx(s);
+	return (0);
+}
+
+/*
+ * Just call the device strategy routine
+ */
+spec_strategy(ap)
+	struct vop_strategy_args /* {
+		struct buf *a_bp;
+	} */ *ap;
+{
+
+	(*bdevsw[major(ap->a_bp->b_dev)].d_strategy)(ap->a_bp);
+	return (0);
+}
+
+/*
+ * This is a noop, simply returning what one has been given.
+ */
+spec_bmap(ap)
+	struct vop_bmap_args /* {
+		struct vnode *a_vp;
+		daddr_t  a_bn;
+		struct vnode **a_vpp;
+		daddr_t *a_bnp;
+	} */ *ap;
+{
+
+	if (ap->a_vpp != NULL)
+		*ap->a_vpp = ap->a_vp;
+	if (ap->a_bnp != NULL)
+		*ap->a_bnp = ap->a_bn;
+	return (0);
+}
+
+/*
+ * At the moment we do not do any locking.
+ */
+/* ARGSUSED */
+spec_lock(ap)
+	struct vop_lock_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	return (0);
+}
+
+/* ARGSUSED */
+spec_unlock(ap)
+	struct vop_unlock_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	return (0);
+}
+
+/*
+ * Device close routine
+ */
+/* ARGSUSED */
+spec_close(ap)
+	struct vop_close_args /* {
+		struct vnode *a_vp;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	dev_t dev = vp->v_rdev;
+	int (*devclose) __P((dev_t, int, int, struct proc *));
+	int mode, error;
+
+	switch (vp->v_type) {
+
+	case VCHR:
+		/*
+		 * Hack: a tty device that is a controlling terminal
+		 * has a reference from the session structure.
+		 * We cannot easily tell that a character device is
+		 * a controlling terminal, unless it is the closing
+		 * process' controlling terminal.  In that case,
+		 * if the reference count is 2 (this last descriptor
+		 * plus the session), release the reference from the session.
+		 */
+		if (vcount(vp) == 2 && ap->a_p &&
+		    vp == ap->a_p->p_session->s_ttyvp) {
+			vrele(vp);
+			ap->a_p->p_session->s_ttyvp = NULL;
+		}
+		/*
+		 * If the vnode is locked, then we are in the midst
+		 * of forcably closing the device, otherwise we only
+		 * close on last reference.
+		 */
+		if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
+			return (0);
+		devclose = cdevsw[major(dev)].d_close;
+		mode = S_IFCHR;
+		break;
+
+	case VBLK:
+		/*
+		 * On last close of a block device (that isn't mounted)
+		 * we must invalidate any in core blocks, so that
+		 * we can, for instance, change floppy disks.
+		 */
+		if (error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0))
+			return (error);
+		/*
+		 * We do not want to really close the device if it
+		 * is still in use unless we are trying to close it
+		 * forcibly. Since every use (buffer, vnode, swap, cmap)
+		 * holds a reference to the vnode, and because we mark
+		 * any other vnodes that alias this device, when the
+		 * sum of the reference counts on all the aliased
+		 * vnodes descends to one, we are on last close.
+		 */
+		if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
+			return (0);
+		devclose = bdevsw[major(dev)].d_close;
+		mode = S_IFBLK;
+		break;
+
+	default:
+		panic("spec_close: not special");
+	}
+
+	return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p));
+}
+
+/*
+ * Print out the contents of a special device vnode.
+ */
+spec_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev),
+		minor(ap->a_vp->v_rdev));
+}
+
+/*
+ * Return POSIX pathconf information applicable to special devices.
+ */
+spec_pathconf(ap)
+	struct vop_pathconf_args /* {
+		struct vnode *a_vp;
+		int a_name;
+		int *a_retval;
+	} */ *ap;
+{
+
+	switch (ap->a_name) {
+	case _PC_LINK_MAX:
+		*ap->a_retval = LINK_MAX;
+		return (0);
+	case _PC_MAX_CANON:
+		*ap->a_retval = MAX_CANON;
+		return (0);
+	case _PC_MAX_INPUT:
+		*ap->a_retval = MAX_INPUT;
+		return (0);
+	case _PC_PIPE_BUF:
+		*ap->a_retval = PIPE_BUF;
+		return (0);
+	case _PC_CHOWN_RESTRICTED:
+		*ap->a_retval = 1;
+		return (0);
+	case _PC_VDISABLE:
+		*ap->a_retval = _POSIX_VDISABLE;
+		return (0);
+	default:
+		return (EINVAL);
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * Special device advisory byte-level locks.
+ */
+/* ARGSUSED */
+spec_advlock(ap)
+	struct vop_advlock_args /* {
+		struct vnode *a_vp;
+		caddr_t  a_id;
+		int  a_op;
+		struct flock *a_fl;
+		int  a_flags;
+	} */ *ap;
+{
+
+	return (EOPNOTSUPP);
+}
+
+/*
+ * Special device failed operation
+ */
+spec_ebadf()
+{
+
+	return (EBADF);
+}
+
+/*
+ * Special device bad operation
+ */
+spec_badop()
+{
+
+	panic("spec_badop called");
+	/* NOTREACHED */
+}
diff --git a/sys/fs/umapfs/umap.h b/sys/fs/umapfs/umap.h
new file mode 100644
index 00000000000..9f4d1e7ace5
--- /dev/null
+++ b/sys/fs/umapfs/umap.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * the UCLA Ficus project.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)umap.h	8.3 (Berkeley) 1/21/94
+ *
+ * @(#)null_vnops.c       1.5 (Berkeley) 7/10/92
+ */
+
+#define MAPFILEENTRIES 64
+#define GMAPFILEENTRIES 16
+#define NOBODY 32767
+#define NULLGROUP 65534
+
+struct umap_args {
+	char		*target;	/* Target of loopback  */
+	int 		nentries;       /* # of entries in user map array */
+	int 		gnentries;	/* # of entries in group map array */
+	u_long 		(*mapdata)[2];	/* pointer to array of user mappings */
+	u_long 		(*gmapdata)[2];	/* pointer to array of group mappings */
+};
+
+struct umap_mount {
+	struct mount	*umapm_vfs;
+	struct vnode	*umapm_rootvp;	/* Reference to root umap_node */
+	int             info_nentries;  /* number of uid mappings */
+	int		info_gnentries;	/* number of gid mappings */
+	u_long		info_mapdata[MAPFILEENTRIES][2]; /* mapping data for 
+	    user mapping in ficus */
+	u_long		info_gmapdata[GMAPFILEENTRIES][2]; /*mapping data for 
+	    group mapping in ficus */
+};
+
+#ifdef KERNEL
+/*
+ * A cache of vnode references
+ */
+struct umap_node {
+	struct umap_node	*umap_forw;	/* Hash chain */
+	struct umap_node	*umap_back;
+	struct vnode	*umap_lowervp;	/* Aliased vnode - VREFed once */
+	struct vnode	*umap_vnode;	/* Back pointer to vnode/umap_node */
+};
+
+extern int umap_node_create __P((struct mount *mp, struct vnode *target, struct vnode **vpp));
+extern u_long umap_reverse_findid __P((u_long id, u_long map[][2], int nentries));
+extern void umap_mapids __P((struct mount *v_mount, struct ucred *credp));
+
+#define	MOUNTTOUMAPMOUNT(mp) ((struct umap_mount *)((mp)->mnt_data))
+#define	VTOUMAP(vp) ((struct umap_node *)(vp)->v_data)
+#define UMAPTOV(xp) ((xp)->umap_vnode)
+#ifdef UMAPFS_DIAGNOSTIC
+extern struct vnode *umap_checkvp __P((struct vnode *vp, char *fil, int lno));
+#define	UMAPVPTOLOWERVP(vp) umap_checkvp((vp), __FILE__, __LINE__)
+#else
+#define	UMAPVPTOLOWERVP(vp) (VTOUMAP(vp)->umap_lowervp)
+#endif
+
+extern int (**umap_vnodeop_p)();
+extern struct vfsops umap_vfsops;
+#endif /* KERNEL */
diff --git a/sys/fs/umapfs/umap_subr.c b/sys/fs/umapfs/umap_subr.c
new file mode 100644
index 00000000000..6f1f077a621
--- /dev/null
+++ b/sys/fs/umapfs/umap_subr.c
@@ -0,0 +1,397 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)umap_subr.c	8.6 (Berkeley) 1/26/94
+ *
+ * $Id: lofs_subr.c, v 1.11 1992/05/30 10:05:43 jsp Exp jsp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <miscfs/umapfs/umap.h>
+
+#define LOG2_SIZEVNODE 7		/* log2(sizeof struct vnode) */
+#define	NUMAPNODECACHE 16
+#define	UMAP_NHASH(vp) ((((u_long) vp)>>LOG2_SIZEVNODE) & (NUMAPNODECACHE-1))
+
+/*
+ * Null layer cache:
+ * Each cache entry holds a reference to the target vnode
+ * along with a pointer to the alias vnode.  When an
+ * entry is added the target vnode is VREF'd.  When the
+ * alias is removed the target vnode is vrele'd.
+ */
+
+/*
+ * Cache head
+ */
+struct umap_node_cache {
+	struct umap_node	*ac_forw;
+	struct umap_node	*ac_back;
+};
+
+static struct umap_node_cache umap_node_cache[NUMAPNODECACHE];
+
+/*
+ * Initialise cache headers
+ */
+umapfs_init()
+{
+	struct umap_node_cache *ac;
+#ifdef UMAPFS_DIAGNOSTIC
+	printf("umapfs_init\n");		/* printed during system boot */
+#endif
+
+	for (ac = umap_node_cache; ac < umap_node_cache + NUMAPNODECACHE; ac++)
+		ac->ac_forw = ac->ac_back = (struct umap_node *) ac;
+}
+
+/*
+ * Compute hash list for given target vnode
+ */
+static struct umap_node_cache *
+umap_node_hash(targetvp)
+	struct vnode *targetvp;
+{
+
+	return (&umap_node_cache[UMAP_NHASH(targetvp)]);
+}
+
+/*
+ * umap_findid is called by various routines in umap_vnodeops.c to
+ * find a user or group id in a map.
+ */
+static u_long
+umap_findid(id, map, nentries)
+	u_long id;
+	u_long map[][2];
+	int nentries;
+{
+	int i;
+
+	/* Find uid entry in map */
+	i = 0;
+	while ((i<nentries) && ((map[i][0]) != id))
+		i++;
+
+	if (i < nentries)
+		return (map[i][1]);
+	else
+		return (-1);
+
+}
+
+/*
+ * umap_reverse_findid is called by umap_getattr() in umap_vnodeops.c to
+ * find a user or group id in a map, in reverse.
+ */
+u_long
+umap_reverse_findid(id, map, nentries)
+	u_long id;
+	u_long map[][2];
+	int nentries;
+{
+	int i;
+
+	/* Find uid entry in map */
+	i = 0;
+	while ((i<nentries) && ((map[i][1]) != id))
+		i++;
+
+	if (i < nentries)
+		return (map[i][0]);
+	else
+		return (-1);
+
+}
+
+/*
+ * Return alias for target vnode if already exists, else 0.
+ */
+static struct vnode *
+umap_node_find(mp, targetvp)
+	struct mount *mp;
+	struct vnode *targetvp;
+{
+	struct umap_node_cache *hd;
+	struct umap_node *a;
+	struct vnode *vp;
+
+#ifdef UMAPFS_DIAGNOSTIC
+	printf("umap_node_find(mp = %x, target = %x)\n", mp, targetvp);
+#endif
+
+	/*
+	 * Find hash base, and then search the (two-way) linked
+	 * list looking for a umap_node structure which is referencing
+	 * the target vnode.  If found, the increment the umap_node
+	 * reference count (but NOT the target vnode's VREF counter).
+	 */
+	hd = umap_node_hash(targetvp);
+
+ loop:
+	for (a = hd->ac_forw; a != (struct umap_node *) hd; a = a->umap_forw) {
+		if (a->umap_lowervp == targetvp &&
+		    a->umap_vnode->v_mount == mp) {
+			vp = UMAPTOV(a);
+			/*
+			 * We need vget for the VXLOCK
+			 * stuff, but we don't want to lock
+			 * the lower node.
+			 */
+			if (vget(vp, 0)) {
+#ifdef UMAPFS_DIAGNOSTIC
+				printf ("umap_node_find: vget failed.\n");
+#endif
+				goto loop;
+			}
+			return (vp);
+		}
+	}
+
+#ifdef UMAPFS_DIAGNOSTIC
+	printf("umap_node_find(%x, %x): NOT found\n", mp, targetvp);
+#endif
+
+	return (0);
+}
+
+/*
+ * Make a new umap_node node.
+ * Vp is the alias vnode, lofsvp is the target vnode.
+ * Maintain a reference to (targetvp).
+ */
+static int
+umap_node_alloc(mp, lowervp, vpp)
+	struct mount *mp;
+	struct vnode *lowervp;
+	struct vnode **vpp;
+{
+	struct umap_node_cache *hd;
+	struct umap_node *xp;
+	struct vnode *othervp, *vp;
+	int error;
+
+	if (error = getnewvnode(VT_UMAP, mp, umap_vnodeop_p, vpp))
+		return (error);
+	vp = *vpp;
+
+	MALLOC(xp, struct umap_node *, sizeof(struct umap_node),
+	    M_TEMP, M_WAITOK);
+	vp->v_type = lowervp->v_type;
+	xp->umap_vnode = vp;
+	vp->v_data = xp;
+	xp->umap_lowervp = lowervp;
+	/*
+	 * Before we insert our new node onto the hash chains,
+	 * check to see if someone else has beaten us to it.
+	 * (We could have slept in MALLOC.)
+	 */
+	if (othervp = umap_node_find(lowervp)) {
+		FREE(xp, M_TEMP);
+		vp->v_type = VBAD;	/* node is discarded */
+		vp->v_usecount = 0;	/* XXX */
+		*vpp = othervp;
+		return (0);
+	}
+	VREF(lowervp);   /* Extra VREF will be vrele'd in umap_node_create */
+	hd = umap_node_hash(lowervp);
+	insque(xp, hd);
+	return (0);
+}
+
+
+/*
+ * Try to find an existing umap_node vnode refering
+ * to it, otherwise make a new umap_node vnode which
+ * contains a reference to the target vnode.
+ */
+int
+umap_node_create(mp, targetvp, newvpp)
+	struct mount *mp;
+	struct vnode *targetvp;
+	struct vnode **newvpp;
+{
+	struct vnode *aliasvp;
+
+	if (aliasvp = umap_node_find(mp, targetvp)) {
+		/*
+		 * Take another reference to the alias vnode
+		 */
+#ifdef UMAPFS_DIAGNOSTIC
+		vprint("umap_node_create: exists", ap->umap_vnode);
+#endif
+		/* VREF(aliasvp); */
+	} else {
+		int error;
+
+		/*
+		 * Get new vnode.
+		 */
+#ifdef UMAPFS_DIAGNOSTIC
+		printf("umap_node_create: create new alias vnode\n");
+#endif
+		/*
+		 * Make new vnode reference the umap_node.
+		 */
+		if (error = umap_node_alloc(mp, targetvp, &aliasvp))
+			return (error);
+
+		/*
+		 * aliasvp is already VREF'd by getnewvnode()
+		 */
+	}
+
+	vrele(targetvp);
+
+#ifdef UMAPFS_DIAGNOSTIC
+	vprint("umap_node_create: alias", aliasvp);
+	vprint("umap_node_create: target", targetvp);
+#endif
+
+	*newvpp = aliasvp;
+	return (0);
+}
+
+#ifdef UMAPFS_DIAGNOSTIC
+int umap_checkvp_barrier = 1;
+struct vnode *
+umap_checkvp(vp, fil, lno)
+	struct vnode *vp;
+	char *fil;
+	int lno;
+{
+	struct umap_node *a = VTOUMAP(vp);
+#if 0
+	/*
+	 * Can't do this check because vop_reclaim runs
+	 * with funny vop vector.
+	 */
+	if (vp->v_op != umap_vnodeop_p) {
+		printf ("umap_checkvp: on non-umap-node\n");
+		while (umap_checkvp_barrier) /*WAIT*/ ;
+		panic("umap_checkvp");
+	}
+#endif
+	if (a->umap_lowervp == NULL) {
+		/* Should never happen */
+		int i; u_long *p;
+		printf("vp = %x, ZERO ptr\n", vp);
+		for (p = (u_long *) a, i = 0; i < 8; i++)
+			printf(" %x", p[i]);
+		printf("\n");
+		/* wait for debugger */
+		while (umap_checkvp_barrier) /*WAIT*/ ;
+		panic("umap_checkvp");
+	}
+	if (a->umap_lowervp->v_usecount < 1) {
+		int i; u_long *p;
+		printf("vp = %x, unref'ed lowervp\n", vp);
+		for (p = (u_long *) a, i = 0; i < 8; i++)
+			printf(" %x", p[i]);
+		printf("\n");
+		/* wait for debugger */
+		while (umap_checkvp_barrier) /*WAIT*/ ;
+		panic ("umap with unref'ed lowervp");
+	}
+#if 0
+	printf("umap %x/%d -> %x/%d [%s, %d]\n",
+	        a->umap_vnode, a->umap_vnode->v_usecount,
+		a->umap_lowervp, a->umap_lowervp->v_usecount,
+		fil, lno);
+#endif
+	return (a->umap_lowervp);
+}
+#endif
+
+/* umap_mapids maps all of the ids in a credential, both user and group. */
+
+void
+umap_mapids(v_mount, credp)
+	struct mount *v_mount;
+	struct ucred *credp;
+{
+	int i, unentries, gnentries;
+	u_long *groupmap, *usermap;
+	uid_t uid;
+	gid_t gid;
+
+	unentries =  MOUNTTOUMAPMOUNT(v_mount)->info_nentries;
+	usermap =  &(MOUNTTOUMAPMOUNT(v_mount)->info_mapdata[0][0]);
+	gnentries =  MOUNTTOUMAPMOUNT(v_mount)->info_gnentries;
+	groupmap =  &(MOUNTTOUMAPMOUNT(v_mount)->info_gmapdata[0][0]);
+
+	/* Find uid entry in map */
+
+	uid = (uid_t) umap_findid(credp->cr_uid, usermap, unentries);
+
+	if (uid != -1)
+		credp->cr_uid = uid;
+	else
+		credp->cr_uid = (uid_t) NOBODY;
+
+#ifdef notdef
+	/* cr_gid is the same as cr_groups[0] in 4BSD */
+
+	/* Find gid entry in map */
+
+	gid = (gid_t) umap_findid(credp->cr_gid, groupmap, gnentries);
+
+	if (gid != -1)
+		credp->cr_gid = gid;
+	else
+		credp->cr_gid = NULLGROUP;
+#endif
+
+	/* Now we must map each of the set of groups in the cr_groups 
+		structure. */
+
+	i = 0;
+	while (credp->cr_groups[i] != 0) {
+		gid = (gid_t) umap_findid(credp->cr_groups[i],
+					groupmap, gnentries);
+
+		if (gid != -1)
+			credp->cr_groups[i++] = gid;
+		else
+			credp->cr_groups[i++] = NULLGROUP;
+	}
+}
diff --git a/sys/fs/umapfs/umap_vfsops.c b/sys/fs/umapfs/umap_vfsops.c
new file mode 100644
index 00000000000..2480a85e440
--- /dev/null
+++ b/sys/fs/umapfs/umap_vfsops.c
@@ -0,0 +1,407 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * the UCLA Ficus project.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)umap_vfsops.c	8.3 (Berkeley) 1/21/94
+ *
+ * @(#)null_vfsops.c       1.5 (Berkeley) 7/10/92
+ */
+
+/*
+ * Umap Layer
+ * (See mount_umap(8) for a description of this layer.)
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <miscfs/umapfs/umap.h>
+
+/*
+ * Mount umap layer
+ */
+int
+umapfs_mount(mp, path, data, ndp, p)
+	struct mount *mp;
+	char *path;
+	caddr_t data;
+	struct nameidata *ndp;
+	struct proc *p;
+{
+	struct umap_args args;
+	struct vnode *lowerrootvp, *vp;
+	struct vnode *umapm_rootvp;
+	struct umap_mount *amp;
+	u_int size;
+	int error;
+
+#ifdef UMAPFS_DIAGNOSTIC
+	printf("umapfs_mount(mp = %x)\n", mp);
+#endif
+
+	/*
+	 * Update is a no-op
+	 */
+	if (mp->mnt_flag & MNT_UPDATE) {
+		return (EOPNOTSUPP);
+		/* return (VFS_MOUNT(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, path, data, ndp, p));*/
+	}
+
+	/*
+	 * Get argument
+	 */
+	if (error = copyin(data, (caddr_t)&args, sizeof(struct umap_args)))
+		return (error);
+
+	/*
+	 * Find lower node
+	 */
+	NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT|LOCKLEAF,
+		UIO_USERSPACE, args.target, p);
+	if (error = namei(ndp))
+		return (error);
+
+	/*
+	 * Sanity check on lower vnode
+	 */
+	lowerrootvp = ndp->ni_vp;
+#ifdef UMAPFS_DIAGNOSTIC
+	printf("vp = %x, check for VDIR...\n", lowerrootvp);
+#endif
+	vrele(ndp->ni_dvp);
+	ndp->ni_dvp = 0;
+
+	if (lowerrootvp->v_type != VDIR) {
+		vput(lowerrootvp);
+		return (EINVAL);
+	}
+
+#ifdef UMAPFS_DIAGNOSTIC
+	printf("mp = %x\n", mp);
+#endif
+
+	amp = (struct umap_mount *) malloc(sizeof(struct umap_mount),
+				M_UFSMNT, M_WAITOK);	/* XXX */
+
+	/*
+	 * Save reference to underlying FS
+	 */
+	amp->umapm_vfs = lowerrootvp->v_mount;
+
+	/* 
+	 * Now copy in the number of entries and maps for umap mapping.
+	 */
+	amp->info_nentries = args.nentries;
+	amp->info_gnentries = args.gnentries;
+	error = copyin(args.mapdata, (caddr_t)amp->info_mapdata, 
+	    2*sizeof(u_long)*args.nentries);
+	if (error)
+		return (error);
+
+#ifdef UMAP_DIAGNOSTIC
+	printf("umap_mount:nentries %d\n",args.nentries);
+	for (i = 0; i < args.nentries; i++)
+		printf("   %d maps to %d\n", amp->info_mapdata[i][0],
+	 	    amp->info_mapdata[i][1]);
+#endif
+
+	error = copyin(args.gmapdata, (caddr_t)amp->info_gmapdata, 
+	    2*sizeof(u_long)*args.nentries);
+	if (error)
+		return (error);
+
+#ifdef UMAP_DIAGNOSTIC
+	printf("umap_mount:gnentries %d\n",args.gnentries);
+	for (i = 0; i < args.gnentries; i++)
+		printf("	group %d maps to %d\n", 
+		    amp->info_gmapdata[i][0],
+	 	    amp->info_gmapdata[i][1]);
+#endif
+
+
+	/*
+	 * Save reference.  Each mount also holds
+	 * a reference on the root vnode.
+	 */
+	error = umap_node_create(mp, lowerrootvp, &vp);
+	/*
+	 * Unlock the node (either the lower or the alias)
+	 */
+	VOP_UNLOCK(vp);
+	/*
+	 * Make sure the node alias worked
+	 */
+	if (error) {
+		vrele(lowerrootvp);
+		free(amp, M_UFSMNT);	/* XXX */
+		return (error);
+	}
+
+	/*
+	 * Keep a held reference to the root vnode.
+	 * It is vrele'd in umapfs_unmount.
+	 */
+	umapm_rootvp = vp;
+	umapm_rootvp->v_flag |= VROOT;
+	amp->umapm_rootvp = umapm_rootvp;
+	if (UMAPVPTOLOWERVP(umapm_rootvp)->v_mount->mnt_flag & MNT_LOCAL)
+		mp->mnt_flag |= MNT_LOCAL;
+	mp->mnt_data = (qaddr_t) amp;
+	getnewfsid(mp, MOUNT_LOFS);
+
+	(void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+	(void) copyinstr(args.target, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 
+	    &size);
+	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+#ifdef UMAPFS_DIAGNOSTIC
+	printf("umapfs_mount: lower %s, alias at %s\n",
+		mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
+#endif
+	return (0);
+}
+
+/*
+ * VFS start.  Nothing needed here - the start routine
+ * on the underlying filesystem will have been called
+ * when that filesystem was mounted.
+ */
+int
+umapfs_start(mp, flags, p)
+	struct mount *mp;
+	int flags;
+	struct proc *p;
+{
+	return (0);
+	/* return (VFS_START(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, flags, p)); */
+}
+
+/*
+ * Free reference to umap layer
+ */
+int
+umapfs_unmount(mp, mntflags, p)
+	struct mount *mp;
+	int mntflags;
+	struct proc *p;
+{
+	struct vnode *umapm_rootvp = MOUNTTOUMAPMOUNT(mp)->umapm_rootvp;
+	int error;
+	int flags = 0;
+	extern int doforce;
+
+#ifdef UMAPFS_DIAGNOSTIC
+	printf("umapfs_unmount(mp = %x)\n", mp);
+#endif
+
+	if (mntflags & MNT_FORCE) {
+		/* lofs can never be rootfs so don't check for it */
+		if (!doforce)
+			return (EINVAL);
+		flags |= FORCECLOSE;
+	}
+
+	/*
+	 * Clear out buffer cache.  I don't think we
+	 * ever get anything cached at this level at the
+	 * moment, but who knows...
+	 */
+#ifdef notyet
+	mntflushbuf(mp, 0); 
+	if (mntinvalbuf(mp, 1))
+		return (EBUSY);
+#endif
+	if (umapm_rootvp->v_usecount > 1)
+		return (EBUSY);
+	if (error = vflush(mp, umapm_rootvp, flags))
+		return (error);
+
+#ifdef UMAPFS_DIAGNOSTIC
+	vprint("alias root of lower", umapm_rootvp);
+#endif	 
+	/*
+	 * Release reference on underlying root vnode
+	 */
+	vrele(umapm_rootvp);
+	/*
+	 * And blow it away for future re-use
+	 */
+	vgone(umapm_rootvp);
+	/*
+	 * Finally, throw away the umap_mount structure
+	 */
+	free(mp->mnt_data, M_UFSMNT);	/* XXX */
+	mp->mnt_data = 0;
+	return (0);
+}
+
+int
+umapfs_root(mp, vpp)
+	struct mount *mp;
+	struct vnode **vpp;
+{
+	struct vnode *vp;
+
+#ifdef UMAPFS_DIAGNOSTIC
+	printf("umapfs_root(mp = %x, vp = %x->%x)\n", mp,
+			MOUNTTOUMAPMOUNT(mp)->umapm_rootvp,
+			UMAPVPTOLOWERVP(MOUNTTOUMAPMOUNT(mp)->umapm_rootvp)
+			);
+#endif
+
+	/*
+	 * Return locked reference to root.
+	 */
+	vp = MOUNTTOUMAPMOUNT(mp)->umapm_rootvp;
+	VREF(vp);
+	VOP_LOCK(vp);
+	*vpp = vp;
+	return (0);
+}
+
+int
+umapfs_quotactl(mp, cmd, uid, arg, p)
+	struct mount *mp;
+	int cmd;
+	uid_t uid;
+	caddr_t arg;
+	struct proc *p;
+{
+	return (VFS_QUOTACTL(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, cmd, uid, arg, p));
+}
+
+int
+umapfs_statfs(mp, sbp, p)
+	struct mount *mp;
+	struct statfs *sbp;
+	struct proc *p;
+{
+	int error;
+	struct statfs mstat;
+
+#ifdef UMAPFS_DIAGNOSTIC
+	printf("umapfs_statfs(mp = %x, vp = %x->%x)\n", mp,
+			MOUNTTOUMAPMOUNT(mp)->umapm_rootvp,
+			UMAPVPTOLOWERVP(MOUNTTOUMAPMOUNT(mp)->umapm_rootvp)
+			);
+#endif
+
+	bzero(&mstat, sizeof(mstat));
+
+	error = VFS_STATFS(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, &mstat, p);
+	if (error)
+		return (error);
+
+	/* now copy across the "interesting" information and fake the rest */
+	sbp->f_type = mstat.f_type;
+	sbp->f_flags = mstat.f_flags;
+	sbp->f_bsize = mstat.f_bsize;
+	sbp->f_iosize = mstat.f_iosize;
+	sbp->f_blocks = mstat.f_blocks;
+	sbp->f_bfree = mstat.f_bfree;
+	sbp->f_bavail = mstat.f_bavail;
+	sbp->f_files = mstat.f_files;
+	sbp->f_ffree = mstat.f_ffree;
+	if (sbp != &mp->mnt_stat) {
+		bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+	}
+	return (0);
+}
+
+int
+umapfs_sync(mp, waitfor, cred, p)
+	struct mount *mp;
+	int waitfor;
+	struct ucred *cred;
+	struct proc *p;
+{
+	/*
+	 * XXX - Assumes no data cached at umap layer.
+	 */
+	return (0);
+}
+
+int
+umapfs_vget(mp, ino, vpp)
+	struct mount *mp;
+	ino_t ino;
+	struct vnode **vpp;
+{
+	
+	return (VFS_VGET(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, ino, vpp));
+}
+
+int
+umapfs_fhtovp(mp, fidp, nam, vpp, exflagsp, credanonp)
+	struct mount *mp;
+	struct fid *fidp;
+	struct mbuf *nam;
+	struct vnode **vpp;
+	int *exflagsp;
+	struct ucred**credanonp;
+{
+
+	return (VFS_FHTOVP(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, fidp, nam, vpp, exflagsp,credanonp));
+}
+
+int
+umapfs_vptofh(vp, fhp)
+	struct vnode *vp;
+	struct fid *fhp;
+{
+	return (VFS_VPTOFH(UMAPVPTOLOWERVP(vp), fhp));
+}
+
+int umapfs_init __P((void));
+
+struct vfsops umap_vfsops = {
+	umapfs_mount,
+	umapfs_start,
+	umapfs_unmount,
+	umapfs_root,
+	umapfs_quotactl,
+	umapfs_statfs,
+	umapfs_sync,
+	umapfs_vget,
+	umapfs_fhtovp,
+	umapfs_vptofh,
+	umapfs_init,
+};
diff --git a/sys/fs/umapfs/umap_vnops.c b/sys/fs/umapfs/umap_vnops.c
new file mode 100644
index 00000000000..287804e1561
--- /dev/null
+++ b/sys/fs/umapfs/umap_vnops.c
@@ -0,0 +1,488 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * the UCLA Ficus project.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)umap_vnops.c	8.3 (Berkeley) 1/5/94
+ */
+
+/*
+ * Umap Layer
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/buf.h>
+#include <miscfs/umapfs/umap.h>
+
+
+int umap_bug_bypass = 0;   /* for debugging: enables bypass printf'ing */
+
+/*
+ * This is the 10-Apr-92 bypass routine.
+ * See null_vnops.c:null_bypass for more details.
+ */ 
+int
+umap_bypass(ap)
+	struct vop_generic_args /* {
+		struct vnodeop_desc *a_desc;
+		<other random data follows, presumably>
+	} */ *ap;
+{
+	extern int (**umap_vnodeop_p)();  /* not extern, really "forward" */
+	struct ucred **credpp = 0, *credp = 0;
+	struct ucred *savecredp, *savecompcredp = 0;
+	struct ucred *compcredp = 0;
+	struct vnode **this_vp_p;
+	int error;
+	struct vnode *old_vps[VDESC_MAX_VPS];
+	struct vnode *vp1 = 0;
+	struct vnode **vps_p[VDESC_MAX_VPS];
+	struct vnode ***vppp;
+	struct vnodeop_desc *descp = ap->a_desc;
+	int reles, i;
+	struct componentname **compnamepp = 0;
+
+	if (umap_bug_bypass)
+		printf ("umap_bypass: %s\n", descp->vdesc_name);
+
+#ifdef SAFETY
+	/*
+	 * We require at least one vp.
+	 */
+	if (descp->vdesc_vp_offsets == NULL ||
+	    descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
+		panic ("umap_bypass: no vp's in map.\n");
+#endif
+
+	/*
+	 * Map the vnodes going in.
+	 * Later, we'll invoke the operation based on
+	 * the first mapped vnode's operation vector.
+	 */
+	reles = descp->vdesc_flags;
+	for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
+		if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
+			break;   /* bail out at end of list */
+		vps_p[i] = this_vp_p = 
+			VOPARG_OFFSETTO(struct vnode**, descp->vdesc_vp_offsets[i], ap);
+
+		if (i == 0) {
+			vp1 = *vps_p[0];
+		}
+
+		/*
+		 * We're not guaranteed that any but the first vnode
+		 * are of our type.  Check for and don't map any
+		 * that aren't.  (Must map first vp or vclean fails.)
+		 */
+
+		if (i && (*this_vp_p)->v_op != umap_vnodeop_p) {
+			old_vps[i] = NULL;
+		} else {
+			old_vps[i] = *this_vp_p;
+			*(vps_p[i]) = UMAPVPTOLOWERVP(*this_vp_p);
+			if (reles & 1)
+				VREF(*this_vp_p);
+		}
+			
+	}
+
+	/*
+	 * Fix the credentials.  (That's the purpose of this layer.)
+	 */
+
+	if (descp->vdesc_cred_offset != VDESC_NO_OFFSET) {
+
+		credpp = VOPARG_OFFSETTO(struct ucred**, 
+		    descp->vdesc_cred_offset, ap);
+
+		/* Save old values */
+
+		savecredp = (*credpp);
+		(*credpp) = crdup(savecredp);
+		credp = *credpp;
+
+		if (umap_bug_bypass && credp->cr_uid != 0)
+			printf("umap_bypass: user was %d, group %d\n", 
+			    credp->cr_uid, credp->cr_gid);
+
+		/* Map all ids in the credential structure. */
+
+		umap_mapids(vp1->v_mount, credp);
+
+		if (umap_bug_bypass && credp->cr_uid != 0)
+			printf("umap_bypass: user now %d, group %d\n", 
+			    credp->cr_uid, credp->cr_gid);
+	}
+
+	/* BSD often keeps a credential in the componentname structure
+	 * for speed.  If there is one, it better get mapped, too. 
+	 */
+
+	if (descp->vdesc_componentname_offset != VDESC_NO_OFFSET) {
+
+		compnamepp = VOPARG_OFFSETTO(struct componentname**, 
+		    descp->vdesc_componentname_offset, ap);
+
+		compcredp = (*compnamepp)->cn_cred;
+		savecompcredp = compcredp;
+		compcredp = (*compnamepp)->cn_cred = crdup(savecompcredp);
+
+		if (umap_bug_bypass && compcredp->cr_uid != 0)
+			printf("umap_bypass: component credit user was %d, group %d\n", 
+			    compcredp->cr_uid, compcredp->cr_gid);
+
+		/* Map all ids in the credential structure. */
+
+		umap_mapids(vp1->v_mount, compcredp);
+
+		if (umap_bug_bypass && compcredp->cr_uid != 0)
+			printf("umap_bypass: component credit user now %d, group %d\n", 
+			    compcredp->cr_uid, compcredp->cr_gid);
+	}
+
+	/*
+	 * Call the operation on the lower layer
+	 * with the modified argument structure.
+	 */
+	error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
+
+	/*
+	 * Maintain the illusion of call-by-value
+	 * by restoring vnodes in the argument structure
+	 * to their original value.
+	 */
+	reles = descp->vdesc_flags;
+	for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
+		if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
+			break;   /* bail out at end of list */
+		if (old_vps[i]) {
+			*(vps_p[i]) = old_vps[i];
+			if (reles & 1)
+				vrele(*(vps_p[i]));
+		};
+	};
+
+	/*
+	 * Map the possible out-going vpp
+	 * (Assumes that the lower layer always returns
+	 * a VREF'ed vpp unless it gets an error.)
+	 */
+	if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET &&
+	    !(descp->vdesc_flags & VDESC_NOMAP_VPP) &&
+	    !error) {
+		if (descp->vdesc_flags & VDESC_VPP_WILLRELE)
+			goto out;
+		vppp = VOPARG_OFFSETTO(struct vnode***,
+				 descp->vdesc_vpp_offset, ap);
+		error = umap_node_create(old_vps[0]->v_mount, **vppp, *vppp);
+	};
+
+ out:
+	/* 
+	 * Free duplicate cred structure and restore old one.
+	 */
+	if (descp->vdesc_cred_offset != VDESC_NO_OFFSET) {
+		if (umap_bug_bypass && credp && credp->cr_uid != 0)
+			printf("umap_bypass: returning-user was %d\n",
+					credp->cr_uid);
+
+		crfree(credp);
+		(*credpp) = savecredp;
+		if (umap_bug_bypass && credpp && (*credpp)->cr_uid != 0)
+		 	printf("umap_bypass: returning-user now %d\n\n", 
+			    (*credpp)->cr_uid);
+	}
+
+	if (descp->vdesc_componentname_offset != VDESC_NO_OFFSET) {
+		if (umap_bug_bypass && compcredp && compcredp->cr_uid != 0)
+		printf("umap_bypass: returning-component-user was %d\n", 
+				compcredp->cr_uid);
+
+		crfree(compcredp);
+		(*compnamepp)->cn_cred = savecompcredp;
+		if (umap_bug_bypass && credpp && (*credpp)->cr_uid != 0)
+		 	printf("umap_bypass: returning-component-user now %d\n", 
+					compcredp->cr_uid);
+	}
+
+	return (error);
+}
+
+
+/*
+ *  We handle getattr to change the fsid.
+ */
+int
+umap_getattr(ap)
+	struct vop_getattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	short uid, gid;
+	int error, tmpid, nentries, gnentries;
+	u_long (*mapdata)[2], (*gmapdata)[2];
+	struct vnode **vp1p;
+	struct vnodeop_desc *descp = ap->a_desc;
+
+	if (error = umap_bypass(ap))
+		return (error);
+	/* Requires that arguments be restored. */
+	ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
+
+	/*
+	 * Umap needs to map the uid and gid returned by a stat
+	 * into the proper values for this site.  This involves
+	 * finding the returned uid in the mapping information,
+	 * translating it into the uid on the other end,
+	 * and filling in the proper field in the vattr
+	 * structure pointed to by ap->a_vap.  The group
+	 * is easier, since currently all groups will be
+	 * translate to the NULLGROUP.
+	 */
+
+	/* Find entry in map */
+
+	uid = ap->a_vap->va_uid;
+	gid = ap->a_vap->va_gid;
+	if (umap_bug_bypass)
+		printf("umap_getattr: mapped uid = %d, mapped gid = %d\n", uid, 
+		    gid);
+
+	vp1p = VOPARG_OFFSETTO(struct vnode**, descp->vdesc_vp_offsets[0], ap);
+	nentries =  MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_nentries;
+	mapdata =  (MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_mapdata);
+	gnentries =  MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_gnentries;
+	gmapdata =  (MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_gmapdata);
+
+	/* Reverse map the uid for the vnode.  Since it's a reverse
+		map, we can't use umap_mapids() to do it. */
+
+	tmpid = umap_reverse_findid(uid, mapdata, nentries);
+
+	if (tmpid != -1) {
+
+		ap->a_vap->va_uid = (uid_t) tmpid;
+		if (umap_bug_bypass)
+			printf("umap_getattr: original uid = %d\n", uid);
+	} else 
+		ap->a_vap->va_uid = (uid_t) NOBODY;
+
+	/* Reverse map the gid for the vnode. */
+
+	tmpid = umap_reverse_findid(gid, gmapdata, gnentries);
+
+	if (tmpid != -1) {
+
+		ap->a_vap->va_gid = (gid_t) tmpid;
+		if (umap_bug_bypass)
+			printf("umap_getattr: original gid = %d\n", gid);
+	} else
+		ap->a_vap->va_gid = (gid_t) NULLGROUP;
+	
+	return (0);
+}
+
+int
+umap_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	/*
+	 * Do nothing (and _don't_ bypass).
+	 * Wait to vrele lowervp until reclaim,
+	 * so that until then our umap_node is in the
+	 * cache and reusable.
+	 *
+	 */
+	return (0);
+}
+
+int
+umap_reclaim(ap)
+	struct vop_reclaim_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct umap_node *xp = VTOUMAP(vp);
+	struct vnode *lowervp = xp->umap_lowervp;
+	
+	/* After this assignment, this node will not be re-used. */
+	xp->umap_lowervp = NULL;
+	remque(xp);
+	FREE(vp->v_data, M_TEMP);
+	vp->v_data = NULL;
+	vrele(lowervp);
+	return (0);
+}
+
+int
+umap_strategy(ap)
+	struct vop_strategy_args /* {
+		struct buf *a_bp;
+	} */ *ap;
+{
+	struct buf *bp = ap->a_bp;
+	int error;
+	struct vnode *savedvp;
+
+	savedvp = bp->b_vp;
+	bp->b_vp = UMAPVPTOLOWERVP(bp->b_vp);
+
+	error = VOP_STRATEGY(ap->a_bp);
+
+	bp->b_vp = savedvp;
+
+	return (error);
+}
+
+int
+umap_bwrite(ap)
+	struct vop_bwrite_args /* {
+		struct buf *a_bp;
+	} */ *ap;
+{
+	struct buf *bp = ap->a_bp;
+	int error;
+	struct vnode *savedvp;
+
+	savedvp = bp->b_vp;
+	bp->b_vp = UMAPVPTOLOWERVP(bp->b_vp);
+
+	error = VOP_BWRITE(ap->a_bp);
+
+	bp->b_vp = savedvp;
+
+	return (error);
+}
+
+
+int
+umap_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	printf("\ttag VT_UMAPFS, vp=%x, lowervp=%x\n", vp, UMAPVPTOLOWERVP(vp));
+	return (0);
+}
+
+int
+umap_rename(ap)
+	struct vop_rename_args  /* {
+		struct vnode *a_fdvp;
+		struct vnode *a_fvp;
+		struct componentname *a_fcnp;
+		struct vnode *a_tdvp;
+		struct vnode *a_tvp;
+		struct componentname *a_tcnp;
+	} */ *ap;
+{
+	int error;
+	struct componentname *compnamep;
+	struct ucred *compcredp, *savecompcredp;
+	struct vnode *vp;
+
+	/*
+	 * Rename is irregular, having two componentname structures.
+	 * We need to map the cre in the second structure,
+	 * and then bypass takes care of the rest.
+	 */
+
+	vp = ap->a_fdvp;
+	compnamep = ap->a_tcnp;
+	compcredp = compnamep->cn_cred;
+
+	savecompcredp = compcredp;
+	compcredp = compnamep->cn_cred = crdup(savecompcredp);
+
+	if (umap_bug_bypass && compcredp->cr_uid != 0)
+		printf("umap_rename: rename component credit user was %d, group %d\n", 
+		    compcredp->cr_uid, compcredp->cr_gid);
+
+	/* Map all ids in the credential structure. */
+
+	umap_mapids(vp->v_mount, compcredp);
+
+	if (umap_bug_bypass && compcredp->cr_uid != 0)
+		printf("umap_rename: rename component credit user now %d, group %d\n", 
+		    compcredp->cr_uid, compcredp->cr_gid);
+
+	error = umap_bypass(ap);
+	
+	/* Restore the additional mapped componentname cred structure. */
+
+	crfree(compcredp);
+	compnamep->cn_cred = savecompcredp;
+
+	return error;
+}
+
+/*
+ * Global vfs data structures
+ */
+/*
+ * XXX - strategy, bwrite are hand coded currently.  They should
+ * go away with a merged buffer/block cache.
+ *
+ */
+int (**umap_vnodeop_p)();
+struct vnodeopv_entry_desc umap_vnodeop_entries[] = {
+	{ &vop_default_desc, umap_bypass },
+
+	{ &vop_getattr_desc, umap_getattr },
+	{ &vop_inactive_desc, umap_inactive },
+	{ &vop_reclaim_desc, umap_reclaim },
+	{ &vop_print_desc, umap_print },
+	{ &vop_rename_desc, umap_rename },
+
+	{ &vop_strategy_desc, umap_strategy },
+	{ &vop_bwrite_desc, umap_bwrite },
+
+	{ (struct vnodeop_desc*) NULL, (int(*)()) NULL }
+};
+struct vnodeopv_desc umap_vnodeop_opv_desc =
+	{ &umap_vnodeop_p, umap_vnodeop_entries };
diff --git a/sys/fs/unionfs/union.h b/sys/fs/unionfs/union.h
new file mode 100644
index 00000000000..463218ac3ed
--- /dev/null
+++ b/sys/fs/unionfs/union.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 1994 The Regents of the University of California.
+ * Copyright (c) 1994 Jan-Simon Pendry.
+ * All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)union.h	8.2 (Berkeley) 2/17/94
+ */
+
+struct union_args {
+	char		*target;	/* Target of loopback  */
+	int		mntflags;	/* Options on the mount */
+};
+
+#define UNMNT_ABOVE	0x0001		/* Target appears below mount point */
+#define UNMNT_BELOW	0x0002		/* Target appears below mount point */
+#define UNMNT_REPLACE	0x0003		/* Target replaces mount point */
+#define UNMNT_OPMASK	0x0003
+
+struct union_mount {
+	struct vnode	*um_uppervp;
+	struct vnode	*um_lowervp;
+	struct ucred	*um_cred;	/* Credentials of user calling mount */
+	int		um_cmode;	/* cmask from mount process */
+	int		um_op;		/* Operation mode */
+};
+
+#ifdef KERNEL
+
+/*
+ * DEFDIRMODE is the mode bits used to create a shadow directory.
+ */
+#define VRWXMODE (VREAD|VWRITE|VEXEC)
+#define VRWMODE (VREAD|VWRITE)
+#define UN_DIRMODE ((VRWXMODE)|(VRWXMODE>>3)|(VRWXMODE>>6))
+#define UN_FILEMODE ((VRWMODE)|(VRWMODE>>3)|(VRWMODE>>6))
+
+/*
+ * A cache of vnode references
+ */
+struct union_node {
+	LIST_ENTRY(union_node)	un_cache;	/* Hash chain */
+	struct vnode		*un_vnode;	/* Back pointer */
+	struct vnode	        *un_uppervp;	/* overlaying object */
+	struct vnode	        *un_lowervp;	/* underlying object */
+	struct vnode		*un_dirvp;	/* Parent dir of uppervp */
+	char			*un_path;	/* saved component name */
+	int			un_hash;	/* saved un_path hash value */
+	int			un_openl;	/* # of opens on lowervp */
+	int			un_flags;
+#ifdef DIAGNOSTIC
+	pid_t			un_pid;
+#endif
+};
+
+#define UN_WANT		0x01
+#define UN_LOCKED	0x02
+#define UN_ULOCK	0x04		/* Upper node is locked */
+#define UN_KLOCK	0x08		/* Keep upper node locked on vput */
+
+extern int union_allocvp __P((struct vnode **, struct mount *,
+				struct vnode *, struct vnode *,
+				struct componentname *, struct vnode *,
+				struct vnode *));
+extern int union_copyfile __P((struct proc *, struct ucred *,
+				struct vnode *, struct vnode *));
+extern int union_mkshadow __P((struct union_mount *, struct vnode *,
+				struct componentname *, struct vnode **));
+extern int union_vn_create __P((struct vnode **, struct union_node *,
+				struct proc *));
+extern int union_cn_close __P((struct vnode *, int, struct ucred *,
+				struct proc *));
+extern void union_removed_upper __P((struct union_node *un));
+extern struct vnode *union_lowervp __P((struct vnode *));
+extern void union_newlower __P((struct union_node *, struct vnode *));
+extern void union_newupper __P((struct union_node *, struct vnode *));
+
+#define	MOUNTTOUNIONMOUNT(mp) ((struct union_mount *)((mp)->mnt_data))
+#define	VTOUNION(vp) ((struct union_node *)(vp)->v_data)
+#define	UNIONTOV(un) ((un)->un_vnode)
+#define	LOWERVP(vp) (VTOUNION(vp)->un_lowervp)
+#define	UPPERVP(vp) (VTOUNION(vp)->un_uppervp)
+#define OTHERVP(vp) (UPPERVP(vp) ? UPPERVP(vp) : LOWERVP(vp))
+
+extern int (**union_vnodeop_p)();
+extern struct vfsops union_vfsops;
+#endif /* KERNEL */
diff --git a/sys/fs/unionfs/union_subr.c b/sys/fs/unionfs/union_subr.c
new file mode 100644
index 00000000000..77947d1dfbe
--- /dev/null
+++ b/sys/fs/unionfs/union_subr.c
@@ -0,0 +1,744 @@
+/*
+ * Copyright (c) 1994 Jan-Simon Pendry
+ * Copyright (c) 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)union_subr.c	8.4 (Berkeley) 2/17/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/queue.h>
+#include <miscfs/union/union.h>
+
+#ifdef DIAGNOSTIC
+#include <sys/proc.h>
+#endif
+
+/* must be power of two, otherwise change UNION_HASH() */
+#define NHASH 32
+
+/* unsigned int ... */
+#define UNION_HASH(u, l) \
+	(((((unsigned long) (u)) + ((unsigned long) l)) >> 8) & (NHASH-1))
+
+static LIST_HEAD(unhead, union_node) unhead[NHASH];
+static int unvplock[NHASH];
+
+int
+union_init()
+{
+	int i;
+
+	for (i = 0; i < NHASH; i++)
+		LIST_INIT(&unhead[i]);
+	bzero((caddr_t) unvplock, sizeof(unvplock));
+}
+
+static int
+union_list_lock(ix)
+	int ix;
+{
+
+	if (unvplock[ix] & UN_LOCKED) {
+		unvplock[ix] |= UN_WANT;
+		sleep((caddr_t) &unvplock[ix], PINOD);
+		return (1);
+	}
+
+	unvplock[ix] |= UN_LOCKED;
+
+	return (0);
+}
+
+static void
+union_list_unlock(ix)
+	int ix;
+{
+
+	unvplock[ix] &= ~UN_LOCKED;
+
+	if (unvplock[ix] & UN_WANT) {
+		unvplock[ix] &= ~UN_WANT;
+		wakeup((caddr_t) &unvplock[ix]);
+	}
+}
+
+void
+union_updatevp(un, uppervp, lowervp)
+	struct union_node *un;
+	struct vnode *uppervp;
+	struct vnode *lowervp;
+{
+	int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp);
+	int nhash = UNION_HASH(uppervp, lowervp);
+
+	if (ohash != nhash) {
+		/*
+		 * Ensure locking is ordered from lower to higher
+		 * to avoid deadlocks.
+		 */
+		if (nhash < ohash) {
+			int t = ohash;
+			ohash = nhash;
+			nhash = t;
+		}
+
+		while (union_list_lock(ohash))
+			continue;
+
+		while (union_list_lock(nhash))
+			continue;
+
+		LIST_REMOVE(un, un_cache);
+		union_list_unlock(ohash);
+	} else {	
+		while (union_list_lock(nhash))
+			continue;
+	}
+
+	if (un->un_lowervp != lowervp) {
+		if (un->un_lowervp) {
+			vrele(un->un_lowervp);
+			if (un->un_path) {
+				free(un->un_path, M_TEMP);
+				un->un_path = 0;
+			}
+			if (un->un_dirvp) {
+				vrele(un->un_dirvp);
+				un->un_dirvp = NULLVP;
+			}
+		}
+		un->un_lowervp = lowervp;
+	}
+
+	if (un->un_uppervp != uppervp) {
+		if (un->un_uppervp)
+			vrele(un->un_uppervp);
+
+		un->un_uppervp = uppervp;
+	}
+
+	if (ohash != nhash)
+		LIST_INSERT_HEAD(&unhead[nhash], un, un_cache);
+
+	union_list_unlock(nhash);
+}
+
+void
+union_newlower(un, lowervp)
+	struct union_node *un;
+	struct vnode *lowervp;
+{
+
+	union_updatevp(un, un->un_uppervp, lowervp);
+}
+
+void
+union_newupper(un, uppervp)
+	struct union_node *un;
+	struct vnode *uppervp;
+{
+
+	union_updatevp(un, uppervp, un->un_lowervp);
+}
+
+/*
+ * allocate a union_node/vnode pair.  the vnode is
+ * referenced and locked.  the new vnode is returned
+ * via (vpp).  (mp) is the mountpoint of the union filesystem,
+ * (dvp) is the parent directory where the upper layer object
+ * should exist (but doesn't) and (cnp) is the componentname
+ * information which is partially copied to allow the upper
+ * layer object to be created at a later time.  (uppervp)
+ * and (lowervp) reference the upper and lower layer objects
+ * being mapped.  either, but not both, can be nil.
+ * if supplied, (uppervp) is locked.
+ * the reference is either maintained in the new union_node
+ * object which is allocated, or they are vrele'd.
+ *
+ * all union_nodes are maintained on a singly-linked
+ * list.  new nodes are only allocated when they cannot
+ * be found on this list.  entries on the list are
+ * removed when the vfs reclaim entry is called.
+ *
+ * a single lock is kept for the entire list.  this is
+ * needed because the getnewvnode() function can block
+ * waiting for a vnode to become free, in which case there
+ * may be more than one process trying to get the same
+ * vnode.  this lock is only taken if we are going to
+ * call getnewvnode, since the kernel itself is single-threaded.
+ *
+ * if an entry is found on the list, then call vget() to
+ * take a reference.  this is done because there may be
+ * zero references to it and so it needs to removed from
+ * the vnode free list.
+ */
+int
+union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp)
+	struct vnode **vpp;
+	struct mount *mp;
+	struct vnode *undvp;
+	struct vnode *dvp;		/* may be null */
+	struct componentname *cnp;	/* may be null */
+	struct vnode *uppervp;		/* may be null */
+	struct vnode *lowervp;		/* may be null */
+{
+	int error;
+	struct union_node *un;
+	struct union_node **pp;
+	struct vnode *xlowervp = NULLVP;
+	int hash;
+	int try;
+
+	if (uppervp == NULLVP && lowervp == NULLVP)
+		panic("union: unidentifiable allocation");
+
+	if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) {
+		xlowervp = lowervp;
+		lowervp = NULLVP;
+	}
+
+loop:
+	for (try = 0; try < 3; try++) {
+		switch (try) {
+		case 0:
+			if (lowervp == NULLVP)
+				continue;
+			hash = UNION_HASH(uppervp, lowervp);
+			break;
+
+		case 1:
+			if (uppervp == NULLVP)
+				continue;
+			hash = UNION_HASH(uppervp, NULLVP);
+			break;
+
+		case 2:
+			if (lowervp == NULLVP)
+				continue;
+			hash = UNION_HASH(NULLVP, lowervp);
+			break;
+		}
+
+		while (union_list_lock(hash))
+			continue;
+
+		for (un = unhead[hash].lh_first; un != 0;
+					un = un->un_cache.le_next) {
+			if ((un->un_lowervp == lowervp ||
+			     un->un_lowervp == NULLVP) &&
+			    (un->un_uppervp == uppervp ||
+			     un->un_uppervp == NULLVP) &&
+			    (UNIONTOV(un)->v_mount == mp)) {
+				if (vget(UNIONTOV(un), 0)) {
+					union_list_unlock(hash);
+					goto loop;
+				}
+				break;
+			}
+		}
+
+		union_list_unlock(hash);
+
+		if (un)
+			break;
+	}
+
+	if (un) {
+		/*
+		 * Obtain a lock on the union_node.
+		 * uppervp is locked, though un->un_uppervp
+		 * may not be.  this doesn't break the locking
+		 * hierarchy since in the case that un->un_uppervp
+		 * is not yet locked it will be vrele'd and replaced
+		 * with uppervp.
+		 */
+
+		if ((dvp != NULLVP) && (uppervp == dvp)) {
+			/*
+			 * Access ``.'', so (un) will already
+			 * be locked.  Since this process has
+			 * the lock on (uppervp) no other
+			 * process can hold the lock on (un).
+			 */
+#ifdef DIAGNOSTIC
+			if ((un->un_flags & UN_LOCKED) == 0)
+				panic("union: . not locked");
+			else if (curproc && un->un_pid != curproc->p_pid &&
+				    un->un_pid > -1 && curproc->p_pid > -1)
+				panic("union: allocvp not lock owner");
+#endif
+		} else {
+			if (un->un_flags & UN_LOCKED) {
+				vrele(UNIONTOV(un));
+				un->un_flags |= UN_WANT;
+				sleep((caddr_t) &un->un_flags, PINOD);
+				goto loop;
+			}
+			un->un_flags |= UN_LOCKED;
+
+#ifdef DIAGNOSTIC
+			if (curproc)
+				un->un_pid = curproc->p_pid;
+			else
+				un->un_pid = -1;
+#endif
+		}
+
+		/*
+		 * At this point, the union_node is locked,
+		 * un->un_uppervp may not be locked, and uppervp
+		 * is locked or nil.
+		 */
+
+		/*
+		 * Save information about the upper layer.
+		 */
+		if (uppervp != un->un_uppervp) {
+			union_newupper(un, uppervp);
+		} else if (uppervp) {
+			vrele(uppervp);
+		}
+
+		if (un->un_uppervp) {
+			un->un_flags |= UN_ULOCK;
+			un->un_flags &= ~UN_KLOCK;
+		}
+
+		/*
+		 * Save information about the lower layer.
+		 * This needs to keep track of pathname
+		 * and directory information which union_vn_create
+		 * might need.
+		 */
+		if (lowervp != un->un_lowervp) {
+			union_newlower(un, lowervp);
+			if (cnp && (lowervp != NULLVP) &&
+			    (lowervp->v_type == VREG)) {
+				un->un_hash = cnp->cn_hash;
+				un->un_path = malloc(cnp->cn_namelen+1,
+						M_TEMP, M_WAITOK);
+				bcopy(cnp->cn_nameptr, un->un_path,
+						cnp->cn_namelen);
+				un->un_path[cnp->cn_namelen] = '\0';
+				VREF(dvp);
+				un->un_dirvp = dvp;
+			}
+		} else if (lowervp) {
+			vrele(lowervp);
+		}
+		*vpp = UNIONTOV(un);
+		return (0);
+	}
+
+	/*
+	 * otherwise lock the vp list while we call getnewvnode
+	 * since that can block.
+	 */ 
+	hash = UNION_HASH(uppervp, lowervp);
+
+	if (union_list_lock(hash))
+		goto loop;
+
+	error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp);
+	if (error) {
+		if (uppervp) {
+			if (dvp == uppervp)
+				vrele(uppervp);
+			else
+				vput(uppervp);
+		}
+		if (lowervp)
+			vrele(lowervp);
+
+		goto out;
+	}
+
+	MALLOC((*vpp)->v_data, void *, sizeof(struct union_node),
+		M_TEMP, M_WAITOK);
+
+	if (uppervp)
+		(*vpp)->v_type = uppervp->v_type;
+	else
+		(*vpp)->v_type = lowervp->v_type;
+	un = VTOUNION(*vpp);
+	un->un_vnode = *vpp;
+	un->un_uppervp = uppervp;
+	un->un_lowervp = lowervp;
+	un->un_openl = 0;
+	un->un_flags = UN_LOCKED;
+	if (un->un_uppervp)
+		un->un_flags |= UN_ULOCK;
+#ifdef DIAGNOSTIC
+	if (curproc)
+		un->un_pid = curproc->p_pid;
+	else
+		un->un_pid = -1;
+#endif
+	if (cnp && (lowervp != NULLVP) && (lowervp->v_type == VREG)) {
+		un->un_hash = cnp->cn_hash;
+		un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK);
+		bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen);
+		un->un_path[cnp->cn_namelen] = '\0';
+		VREF(dvp);
+		un->un_dirvp = dvp;
+	} else {
+		un->un_hash = 0;
+		un->un_path = 0;
+		un->un_dirvp = 0;
+	}
+
+	LIST_INSERT_HEAD(&unhead[hash], un, un_cache);
+
+	if (xlowervp)
+		vrele(xlowervp);
+
+out:
+	union_list_unlock(hash);
+
+	return (error);
+}
+
+int
+union_freevp(vp)
+	struct vnode *vp;
+{
+	struct union_node *un = VTOUNION(vp);
+
+	LIST_REMOVE(un, un_cache);
+
+	if (un->un_uppervp)
+		vrele(un->un_uppervp);
+	if (un->un_lowervp)
+		vrele(un->un_lowervp);
+	if (un->un_dirvp)
+		vrele(un->un_dirvp);
+	if (un->un_path)
+		free(un->un_path, M_TEMP);
+
+	FREE(vp->v_data, M_TEMP);
+	vp->v_data = 0;
+
+	return (0);
+}
+
+/*
+ * copyfile.  copy the vnode (fvp) to the vnode (tvp)
+ * using a sequence of reads and writes.  both (fvp)
+ * and (tvp) are locked on entry and exit.
+ */
+int
+union_copyfile(p, cred, fvp, tvp)
+	struct proc *p;
+	struct ucred *cred;
+	struct vnode *fvp;
+	struct vnode *tvp;
+{
+	char *buf;
+	struct uio uio;
+	struct iovec iov;
+	int error = 0;
+
+	/*
+	 * strategy:
+	 * allocate a buffer of size MAXBSIZE.
+	 * loop doing reads and writes, keeping track
+	 * of the current uio offset.
+	 * give up at the first sign of trouble.
+	 */
+
+	uio.uio_procp = p;
+	uio.uio_segflg = UIO_SYSSPACE;
+	uio.uio_offset = 0;
+
+	VOP_UNLOCK(fvp);				/* XXX */
+	LEASE_CHECK(fvp, p, cred, LEASE_READ);
+	VOP_LOCK(fvp);					/* XXX */
+	VOP_UNLOCK(tvp);				/* XXX */
+	LEASE_CHECK(tvp, p, cred, LEASE_WRITE);
+	VOP_LOCK(tvp);					/* XXX */
+
+	buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
+
+	/* ugly loop follows... */
+	do {
+		off_t offset = uio.uio_offset;
+
+		uio.uio_iov = &iov;
+		uio.uio_iovcnt = 1;
+		iov.iov_base = buf;
+		iov.iov_len = MAXBSIZE;
+		uio.uio_resid = iov.iov_len;
+		uio.uio_rw = UIO_READ;
+		error = VOP_READ(fvp, &uio, 0, cred);
+
+		if (error == 0) {
+			uio.uio_iov = &iov;
+			uio.uio_iovcnt = 1;
+			iov.iov_base = buf;
+			iov.iov_len = MAXBSIZE - uio.uio_resid;
+			uio.uio_offset = offset;
+			uio.uio_rw = UIO_WRITE;
+			uio.uio_resid = iov.iov_len;
+
+			if (uio.uio_resid == 0)
+				break;
+
+			do {
+				error = VOP_WRITE(tvp, &uio, 0, cred);
+			} while ((uio.uio_resid > 0) && (error == 0));
+		}
+
+	} while (error == 0);
+
+	free(buf, M_TEMP);
+	return (error);
+}
+
+/*
+ * Create a shadow directory in the upper layer.
+ * The new vnode is returned locked.
+ *
+ * (um) points to the union mount structure for access to the
+ * the mounting process's credentials.
+ * (dvp) is the directory in which to create the shadow directory.
+ * it is unlocked on entry and exit.
+ * (cnp) is the componentname to be created.
+ * (vpp) is the returned newly created shadow directory, which
+ * is returned locked.
+ */
+int
+union_mkshadow(um, dvp, cnp, vpp)
+	struct union_mount *um;
+	struct vnode *dvp;
+	struct componentname *cnp;
+	struct vnode **vpp;
+{
+	int error;
+	struct vattr va;
+	struct proc *p = cnp->cn_proc;
+	struct componentname cn;
+
+	/*
+	 * policy: when creating the shadow directory in the
+	 * upper layer, create it owned by the user who did
+	 * the mount, group from parent directory, and mode
+	 * 777 modified by umask (ie mostly identical to the
+	 * mkdir syscall).  (jsp, kb)
+	 */
+
+	/*
+	 * A new componentname structure must be faked up because
+	 * there is no way to know where the upper level cnp came
+	 * from or what it is being used for.  This must duplicate
+	 * some of the work done by NDINIT, some of the work done
+	 * by namei, some of the work done by lookup and some of
+	 * the work done by VOP_LOOKUP when given a CREATE flag.
+	 * Conclusion: Horrible.
+	 *
+	 * The pathname buffer will be FREEed by VOP_MKDIR.
+	 */
+	cn.cn_pnbuf = malloc(cnp->cn_namelen+1, M_NAMEI, M_WAITOK);
+	bcopy(cnp->cn_nameptr, cn.cn_pnbuf, cnp->cn_namelen);
+	cn.cn_pnbuf[cnp->cn_namelen] = '\0';
+
+	cn.cn_nameiop = CREATE;
+	cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
+	cn.cn_proc = cnp->cn_proc;
+	if (um->um_op == UNMNT_ABOVE)
+		cn.cn_cred = cnp->cn_cred;
+	else
+		cn.cn_cred = um->um_cred;
+	cn.cn_nameptr = cn.cn_pnbuf;
+	cn.cn_namelen = cnp->cn_namelen;
+	cn.cn_hash = cnp->cn_hash;
+	cn.cn_consume = cnp->cn_consume;
+
+	VREF(dvp);
+	if (error = relookup(dvp, vpp, &cn))
+		return (error);
+	vrele(dvp);
+
+	if (*vpp) {
+		VOP_ABORTOP(dvp, &cn);
+		VOP_UNLOCK(dvp);
+		vrele(*vpp);
+		*vpp = NULLVP;
+		return (EEXIST);
+	}
+
+	VATTR_NULL(&va);
+	va.va_type = VDIR;
+	va.va_mode = um->um_cmode;
+
+	/* LEASE_CHECK: dvp is locked */
+	LEASE_CHECK(dvp, p, p->p_ucred, LEASE_WRITE);
+
+	error = VOP_MKDIR(dvp, vpp, &cn, &va);
+	return (error);
+}
+
+/*
+ * union_vn_create: creates and opens a new shadow file
+ * on the upper union layer.  this function is similar
+ * in spirit to calling vn_open but it avoids calling namei().
+ * the problem with calling namei is that a) it locks too many
+ * things, and b) it doesn't start at the "right" directory,
+ * whereas relookup is told where to start.
+ */
+int
+union_vn_create(vpp, un, p)
+	struct vnode **vpp;
+	struct union_node *un;
+	struct proc *p;
+{
+	struct vnode *vp;
+	struct ucred *cred = p->p_ucred;
+	struct vattr vat;
+	struct vattr *vap = &vat;
+	int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL);
+	int error;
+	int cmode = UN_FILEMODE & ~p->p_fd->fd_cmask;
+	char *cp;
+	struct componentname cn;
+
+	*vpp = NULLVP;
+
+	/*
+	 * Build a new componentname structure (for the same
+	 * reasons outlines in union_mkshadow).
+	 * The difference here is that the file is owned by
+	 * the current user, rather than by the person who
+	 * did the mount, since the current user needs to be
+	 * able to write the file (that's why it is being
+	 * copied in the first place).
+	 */
+	cn.cn_namelen = strlen(un->un_path);
+	cn.cn_pnbuf = (caddr_t) malloc(cn.cn_namelen, M_NAMEI, M_WAITOK);
+	bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1);
+	cn.cn_nameiop = CREATE;
+	cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
+	cn.cn_proc = p;
+	cn.cn_cred = p->p_ucred;
+	cn.cn_nameptr = cn.cn_pnbuf;
+	cn.cn_hash = un->un_hash;
+	cn.cn_consume = 0;
+
+	VREF(un->un_dirvp);
+	if (error = relookup(un->un_dirvp, &vp, &cn))
+		return (error);
+	vrele(un->un_dirvp);
+
+	if (vp) {
+		VOP_ABORTOP(un->un_dirvp, &cn);
+		if (un->un_dirvp == vp)
+			vrele(un->un_dirvp);
+		else
+			vput(un->un_dirvp);
+		vrele(vp);
+		return (EEXIST);
+	}
+
+	/*
+	 * Good - there was no race to create the file
+	 * so go ahead and create it.  The permissions
+	 * on the file will be 0666 modified by the
+	 * current user's umask.  Access to the file, while
+	 * it is unioned, will require access to the top *and*
+	 * bottom files.  Access when not unioned will simply
+	 * require access to the top-level file.
+	 * TODO: confirm choice of access permissions.
+	 */
+	VATTR_NULL(vap);
+	vap->va_type = VREG;
+	vap->va_mode = cmode;
+	LEASE_CHECK(un->un_dirvp, p, cred, LEASE_WRITE);
+	if (error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap))
+		return (error);
+
+	if (error = VOP_OPEN(vp, fmode, cred, p)) {
+		vput(vp);
+		return (error);
+	}
+
+	vp->v_writecount++;
+	*vpp = vp;
+	return (0);
+}
+
+int
+union_vn_close(vp, fmode, cred, p)
+	struct vnode *vp;
+	int fmode;
+	struct ucred *cred;
+	struct proc *p;
+{
+	if (fmode & FWRITE)
+		--vp->v_writecount;
+	return (VOP_CLOSE(vp, fmode));
+}
+
+void
+union_removed_upper(un)
+	struct union_node *un;
+{
+	if (un->un_flags & UN_ULOCK) {
+		un->un_flags &= ~UN_ULOCK;
+		VOP_UNLOCK(un->un_uppervp);
+	}
+
+	union_newupper(un, NULLVP);
+}
+
+struct vnode *
+union_lowervp(vp)
+	struct vnode *vp;
+{
+	struct union_node *un = VTOUNION(vp);
+
+	if (un->un_lowervp && (vp->v_type == un->un_lowervp->v_type)) {
+		if (vget(un->un_lowervp, 0))
+			return (NULLVP);
+	}
+
+	return (un->un_lowervp);
+}
diff --git a/sys/fs/unionfs/union_vfsops.c b/sys/fs/unionfs/union_vfsops.c
new file mode 100644
index 00000000000..9fa27460e3d
--- /dev/null
+++ b/sys/fs/unionfs/union_vfsops.c
@@ -0,0 +1,550 @@
+/*
+ * Copyright (c) 1994 The Regents of the University of California.
+ * Copyright (c) 1994 Jan-Simon Pendry.
+ * All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)union_vfsops.c	8.7 (Berkeley) 3/5/94
+ */
+
+/*
+ * Union Layer
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/filedesc.h>
+#include <sys/queue.h>
+#include <miscfs/union/union.h>
+
+/*
+ * Mount union filesystem
+ */
+int
+union_mount(mp, path, data, ndp, p)
+	struct mount *mp;
+	char *path;
+	caddr_t data;
+	struct nameidata *ndp;
+	struct proc *p;
+{
+	int error = 0;
+	struct union_args args;
+	struct vnode *lowerrootvp = NULLVP;
+	struct vnode *upperrootvp = NULLVP;
+	struct union_mount *um;
+	struct ucred *cred = 0;
+	struct ucred *scred;
+	struct vattr va;
+	char *cp;
+	int len;
+	u_int size;
+
+#ifdef UNION_DIAGNOSTIC
+	printf("union_mount(mp = %x)\n", mp);
+#endif
+
+	/*
+	 * Update is a no-op
+	 */
+	if (mp->mnt_flag & MNT_UPDATE) {
+		/*
+		 * Need to provide.
+		 * 1. a way to convert between rdonly and rdwr mounts.
+		 * 2. support for nfs exports.
+		 */
+		error = EOPNOTSUPP;
+		goto bad;
+	}
+
+	/*
+	 * Take a copy of the process's credentials.  This isn't
+	 * quite right since the euid will always be zero and we
+	 * want to get the "real" users credentials.  So fix up
+	 * the uid field after taking the copy.
+	 */
+	cred = crdup(p->p_ucred);
+	cred->cr_uid = p->p_cred->p_ruid;
+
+	/*
+	 * Ensure the *real* user has write permission on the
+	 * mounted-on directory.  This allows the mount_union
+	 * command to be made setuid root so allowing anyone
+	 * to do union mounts onto any directory on which they
+	 * have write permission and which they also own.
+	 */
+	error = VOP_GETATTR(mp->mnt_vnodecovered, &va, cred, p);
+	if (error)
+		goto bad;
+	if ((va.va_uid != cred->cr_uid) && 
+	    (cred->cr_uid != 0)) {
+		error = EACCES;
+		goto bad;
+	}
+	error = VOP_ACCESS(mp->mnt_vnodecovered, VWRITE, cred, p);
+	if (error)
+		goto bad;
+
+	/*
+	 * Get argument
+	 */
+	if (error = copyin(data, (caddr_t)&args, sizeof(struct union_args)))
+		goto bad;
+
+	lowerrootvp = mp->mnt_vnodecovered;
+	VREF(lowerrootvp);
+
+	/*
+	 * Find upper node.  Use the real process credentials,
+	 * not the effective ones since this will have come
+	 * through a setuid process (mount_union).  All this
+	 * messing around with permissions is entirely bogus
+	 * and should be removed by allowing any user straight
+	 * past the mount system call.
+	 */
+	scred = p->p_ucred;
+	p->p_ucred = cred;
+	NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT,
+	       UIO_USERSPACE, args.target, p);
+	p->p_ucred = scred;
+
+	if (error = namei(ndp))
+		goto bad;
+
+	upperrootvp = ndp->ni_vp;
+	vrele(ndp->ni_dvp);
+	ndp->ni_dvp = NULL;
+
+	if (upperrootvp->v_type != VDIR) {
+		error = EINVAL;
+		goto bad;
+	}
+	
+	um = (struct union_mount *) malloc(sizeof(struct union_mount),
+				M_UFSMNT, M_WAITOK);	/* XXX */
+
+	/*
+	 * Keep a held reference to the target vnodes.
+	 * They are vrele'd in union_unmount.
+	 *
+	 * Depending on the _BELOW flag, the filesystems are
+	 * viewed in a different order.  In effect, this is the
+	 * same as providing a mount under option to the mount syscall.
+	 */
+
+	um->um_op = args.mntflags & UNMNT_OPMASK;
+	switch (um->um_op) {
+	case UNMNT_ABOVE:
+		um->um_lowervp = lowerrootvp;
+		um->um_uppervp = upperrootvp;
+		break;
+
+	case UNMNT_BELOW:
+		um->um_lowervp = upperrootvp;
+		um->um_uppervp = lowerrootvp;
+		break;
+
+	case UNMNT_REPLACE:
+		vrele(lowerrootvp);
+		lowerrootvp = NULLVP;
+		um->um_uppervp = upperrootvp;
+		um->um_lowervp = lowerrootvp;
+		break;
+
+	default:
+		error = EINVAL;
+		goto bad;
+	}
+
+	um->um_cred = cred;
+	um->um_cmode = UN_DIRMODE &~ p->p_fd->fd_cmask;
+
+	/*
+	 * Depending on what you think the MNT_LOCAL flag might mean,
+	 * you may want the && to be || on the conditional below.
+	 * At the moment it has been defined that the filesystem is
+	 * only local if it is all local, ie the MNT_LOCAL flag implies
+	 * that the entire namespace is local.  If you think the MNT_LOCAL
+	 * flag implies that some of the files might be stored locally
+	 * then you will want to change the conditional.
+	 */
+	if (um->um_op == UNMNT_ABOVE) {
+		if (((um->um_lowervp == NULLVP) ||
+		     (um->um_lowervp->v_mount->mnt_flag & MNT_LOCAL)) &&
+		    (um->um_uppervp->v_mount->mnt_flag & MNT_LOCAL))
+			mp->mnt_flag |= MNT_LOCAL;
+	}
+
+	/*
+	 * Copy in the upper layer's RDONLY flag.  This is for the benefit
+	 * of lookup() which explicitly checks the flag, rather than asking
+	 * the filesystem for it's own opinion.  This means, that an update
+	 * mount of the underlying filesystem to go from rdonly to rdwr
+	 * will leave the unioned view as read-only.
+	 */
+	mp->mnt_flag |= (um->um_uppervp->v_mount->mnt_flag & MNT_RDONLY);
+
+	/*
+	 * This is a user mount.  Privilege check for unmount
+	 * will be done in union_unmount.
+	 */
+	mp->mnt_flag |= MNT_USER;
+
+	mp->mnt_data = (qaddr_t) um;
+	getnewfsid(mp, MOUNT_UNION);
+
+	(void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+
+	switch (um->um_op) {
+	case UNMNT_ABOVE:
+		cp = "<above>";
+		break;
+	case UNMNT_BELOW:
+		cp = "<below>";
+		break;
+	case UNMNT_REPLACE:
+		cp = "";
+		break;
+	}
+	len = strlen(cp);
+	bcopy(cp, mp->mnt_stat.f_mntfromname, len);
+
+	cp = mp->mnt_stat.f_mntfromname + len;
+	len = MNAMELEN - len;
+
+	(void) copyinstr(args.target, cp, len - 1, &size);
+	bzero(cp + size, len - size);
+
+#ifdef UNION_DIAGNOSTIC
+	printf("union_mount: from %s, on %s\n",
+		mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
+#endif
+	return (0);
+
+bad:
+	if (cred)
+		crfree(cred);
+	if (upperrootvp)
+		vrele(upperrootvp);
+	if (lowerrootvp)
+		vrele(lowerrootvp);
+	return (error);
+}
+
+/*
+ * VFS start.  Nothing needed here - the start routine
+ * on the underlying filesystem(s) will have been called
+ * when that filesystem was mounted.
+ */
+int
+union_start(mp, flags, p)
+	struct mount *mp;
+	int flags;
+	struct proc *p;
+{
+
+	return (0);
+}
+
+/*
+ * Free reference to union layer
+ */
+int
+union_unmount(mp, mntflags, p)
+	struct mount *mp;
+	int mntflags;
+	struct proc *p;
+{
+	struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
+	struct vnode *um_rootvp;
+	int error;
+	int flags = 0;
+	extern int doforce;
+
+#ifdef UNION_DIAGNOSTIC
+	printf("union_unmount(mp = %x)\n", mp);
+#endif
+
+	/* only the mounter, or superuser can unmount */
+	if ((p->p_cred->p_ruid != um->um_cred->cr_uid) &&
+	    (error = suser(p->p_ucred, &p->p_acflag)))
+		return (error);
+
+	if (mntflags & MNT_FORCE) {
+		/* union can never be rootfs so don't check for it */
+		if (!doforce)
+			return (EINVAL);
+		flags |= FORCECLOSE;
+	}
+
+	if (error = union_root(mp, &um_rootvp))
+		return (error);
+	if (um_rootvp->v_usecount > 1) {
+		vput(um_rootvp);
+		return (EBUSY);
+	}
+	if (error = vflush(mp, um_rootvp, flags)) {
+		vput(um_rootvp);
+		return (error);
+	}
+
+#ifdef UNION_DIAGNOSTIC
+	vprint("alias root of lower", um_rootvp);
+#endif	 
+	/*
+	 * Discard references to upper and lower target vnodes.
+	 */
+	if (um->um_lowervp)
+		vrele(um->um_lowervp);
+	vrele(um->um_uppervp);
+	crfree(um->um_cred);
+	/*
+	 * Release reference on underlying root vnode
+	 */
+	vput(um_rootvp);
+	/*
+	 * And blow it away for future re-use
+	 */
+	vgone(um_rootvp);
+	/*
+	 * Finally, throw away the union_mount structure
+	 */
+	free(mp->mnt_data, M_UFSMNT);	/* XXX */
+	mp->mnt_data = 0;
+	return (0);
+}
+
+int
+union_root(mp, vpp)
+	struct mount *mp;
+	struct vnode **vpp;
+{
+	struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
+	int error;
+	int loselock;
+
+#ifdef UNION_DIAGNOSTIC
+	printf("union_root(mp = %x, lvp = %x, uvp = %x)\n", mp,
+			um->um_lowervp,
+			um->um_uppervp);
+#endif
+
+	/*
+	 * Return locked reference to root.
+	 */
+	VREF(um->um_uppervp);
+	if ((um->um_op == UNMNT_BELOW) &&
+	     VOP_ISLOCKED(um->um_uppervp)) {
+		loselock = 1;
+	} else {
+		VOP_LOCK(um->um_uppervp);
+		loselock = 0;
+	}
+	if (um->um_lowervp)
+		VREF(um->um_lowervp);
+	error = union_allocvp(vpp, mp,
+			      (struct vnode *) 0,
+			      (struct vnode *) 0,
+			      (struct componentname *) 0,
+			      um->um_uppervp,
+			      um->um_lowervp);
+
+	if (error) {
+		if (!loselock)
+			VOP_UNLOCK(um->um_uppervp);
+		vrele(um->um_uppervp);
+		if (um->um_lowervp)
+			vrele(um->um_lowervp);
+	} else {
+		(*vpp)->v_flag |= VROOT;
+		if (loselock)
+			VTOUNION(*vpp)->un_flags &= ~UN_ULOCK;
+	}
+
+	return (error);
+}
+
+int
+union_quotactl(mp, cmd, uid, arg, p)
+	struct mount *mp;
+	int cmd;
+	uid_t uid;
+	caddr_t arg;
+	struct proc *p;
+{
+
+	return (EOPNOTSUPP);
+}
+
+int
+union_statfs(mp, sbp, p)
+	struct mount *mp;
+	struct statfs *sbp;
+	struct proc *p;
+{
+	int error;
+	struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
+	struct statfs mstat;
+	int lbsize;
+
+#ifdef UNION_DIAGNOSTIC
+	printf("union_statfs(mp = %x, lvp = %x, uvp = %x)\n", mp,
+			um->um_lowervp,
+	       		um->um_uppervp);
+#endif
+
+	bzero(&mstat, sizeof(mstat));
+
+	if (um->um_lowervp) {
+		error = VFS_STATFS(um->um_lowervp->v_mount, &mstat, p);
+		if (error)
+			return (error);
+	}
+
+	/* now copy across the "interesting" information and fake the rest */
+#if 0
+	sbp->f_type = mstat.f_type;
+	sbp->f_flags = mstat.f_flags;
+	sbp->f_bsize = mstat.f_bsize;
+	sbp->f_iosize = mstat.f_iosize;
+#endif
+	lbsize = mstat.f_bsize;
+	sbp->f_blocks = mstat.f_blocks;
+	sbp->f_bfree = mstat.f_bfree;
+	sbp->f_bavail = mstat.f_bavail;
+	sbp->f_files = mstat.f_files;
+	sbp->f_ffree = mstat.f_ffree;
+
+	error = VFS_STATFS(um->um_uppervp->v_mount, &mstat, p);
+	if (error)
+		return (error);
+
+	sbp->f_type = MOUNT_UNION;
+	sbp->f_flags = mstat.f_flags;
+	sbp->f_bsize = mstat.f_bsize;
+	sbp->f_iosize = mstat.f_iosize;
+
+	/*
+	 * if the lower and upper blocksizes differ, then frig the
+	 * block counts so that the sizes reported by df make some
+	 * kind of sense.  none of this makes sense though.
+	 */
+
+	if (mstat.f_bsize != lbsize) {
+		sbp->f_blocks = sbp->f_blocks * lbsize / mstat.f_bsize;
+		sbp->f_bfree = sbp->f_bfree * lbsize / mstat.f_bsize;
+		sbp->f_bavail = sbp->f_bavail * lbsize / mstat.f_bsize;
+	}
+	sbp->f_blocks += mstat.f_blocks;
+	sbp->f_bfree += mstat.f_bfree;
+	sbp->f_bavail += mstat.f_bavail;
+	sbp->f_files += mstat.f_files;
+	sbp->f_ffree += mstat.f_ffree;
+
+	if (sbp != &mp->mnt_stat) {
+		bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+	}
+	return (0);
+}
+
+int
+union_sync(mp, waitfor, cred, p)
+	struct mount *mp;
+	int waitfor;
+	struct ucred *cred;
+	struct proc *p;
+{
+
+	/*
+	 * XXX - Assumes no data cached at union layer.
+	 */
+	return (0);
+}
+
+int
+union_vget(mp, ino, vpp)
+	struct mount *mp;
+	ino_t ino;
+	struct vnode **vpp;
+{
+	
+	return (EOPNOTSUPP);
+}
+
+int
+union_fhtovp(mp, fidp, nam, vpp, exflagsp, credanonp)
+	struct mount *mp;
+	struct fid *fidp;
+	struct mbuf *nam;
+	struct vnode **vpp;
+	int *exflagsp;
+	struct ucred **credanonp;
+{
+
+	return (EOPNOTSUPP);
+}
+
+int
+union_vptofh(vp, fhp)
+	struct vnode *vp;
+	struct fid *fhp;
+{
+
+	return (EOPNOTSUPP);
+}
+
+int union_init __P((void));
+
+struct vfsops union_vfsops = {
+	union_mount,
+	union_start,
+	union_unmount,
+	union_root,
+	union_quotactl,
+	union_statfs,
+	union_sync,
+	union_vget,
+	union_fhtovp,
+	union_vptofh,
+	union_init,
+};
diff --git a/sys/fs/unionfs/union_vnops.c b/sys/fs/unionfs/union_vnops.c
new file mode 100644
index 00000000000..96327b0922d
--- /dev/null
+++ b/sys/fs/unionfs/union_vnops.c
@@ -0,0 +1,1495 @@
+/*
+ * Copyright (c) 1992, 1993, 1994 The Regents of the University of California.
+ * Copyright (c) 1992, 1993, 1994 Jan-Simon Pendry.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)union_vnops.c	8.6 (Berkeley) 2/17/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/buf.h>
+#include <sys/queue.h>
+#include <miscfs/union/union.h>
+
+#define FIXUP(un) { \
+	if (((un)->un_flags & UN_ULOCK) == 0) { \
+		union_fixup(un); \
+	} \
+}
+
+static void
+union_fixup(un)
+	struct union_node *un;
+{
+
+	VOP_LOCK(un->un_uppervp);
+	un->un_flags |= UN_ULOCK;
+}
+
+static int
+union_lookup1(udvp, dvp, vpp, cnp)
+	struct vnode *udvp;
+	struct vnode *dvp;
+	struct vnode **vpp;
+	struct componentname *cnp;
+{
+	int error;
+	struct vnode *tdvp;
+	struct mount *mp;
+
+	/*
+	 * If stepping up the directory tree, check for going
+	 * back across the mount point, in which case do what
+	 * lookup would do by stepping back down the mount
+	 * hierarchy.
+	 */
+	if (cnp->cn_flags & ISDOTDOT) {
+		for (;;) {
+			/*
+			 * Don't do the NOCROSSMOUNT check
+			 * at this level.  By definition,
+			 * union fs deals with namespaces, not
+			 * filesystems.
+			 */
+			if ((dvp->v_flag & VROOT) == 0)
+				break;
+
+			tdvp = dvp;
+			dvp = dvp->v_mount->mnt_vnodecovered;
+			vput(tdvp);
+			VREF(dvp);
+			VOP_LOCK(dvp);
+		}
+	}
+
+        error = VOP_LOOKUP(dvp, &tdvp, cnp);
+	if (error)
+		return (error);
+
+	/*
+	 * The parent directory will have been unlocked, unless lookup
+	 * found the last component.  In which case, re-lock the node
+	 * here to allow it to be unlocked again (phew) in union_lookup.
+	 */
+	if (dvp != tdvp && !(cnp->cn_flags & ISLASTCN))
+		VOP_LOCK(dvp);
+
+	dvp = tdvp;
+
+	/*
+	 * Lastly check if the current node is a mount point in
+	 * which case walk up the mount hierarchy making sure not to
+	 * bump into the root of the mount tree (ie. dvp != udvp).
+	 */
+	while (dvp != udvp && (dvp->v_type == VDIR) &&
+	       (mp = dvp->v_mountedhere)) {
+
+		if (mp->mnt_flag & MNT_MLOCK) {
+			mp->mnt_flag |= MNT_MWAIT;
+			sleep((caddr_t) mp, PVFS);
+			continue;
+		}
+
+		if (error = VFS_ROOT(mp, &tdvp)) {
+			vput(dvp);
+			return (error);
+		}
+
+		vput(dvp);
+		dvp = tdvp;
+	}
+
+	*vpp = dvp;
+	return (0);
+}
+
+int
+union_lookup(ap)
+	struct vop_lookup_args /* {
+		struct vnodeop_desc *a_desc;
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	int error;
+	int uerror, lerror;
+	struct vnode *uppervp, *lowervp;
+	struct vnode *upperdvp, *lowerdvp;
+	struct vnode *dvp = ap->a_dvp;
+	struct union_node *dun = VTOUNION(dvp);
+	struct componentname *cnp = ap->a_cnp;
+	int lockparent = cnp->cn_flags & LOCKPARENT;
+	int rdonly = cnp->cn_flags & RDONLY;
+	struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
+	struct ucred *saved_cred;
+
+	cnp->cn_flags |= LOCKPARENT;
+
+	upperdvp = dun->un_uppervp;
+	lowerdvp = dun->un_lowervp;
+	uppervp = NULLVP;
+	lowervp = NULLVP;
+
+	/*
+	 * do the lookup in the upper level.
+	 * if that level comsumes additional pathnames,
+	 * then assume that something special is going
+	 * on and just return that vnode.
+	 */
+	if (upperdvp) {
+		FIXUP(dun);
+		uerror = union_lookup1(um->um_uppervp, upperdvp,
+					&uppervp, cnp);
+		/*if (uppervp == upperdvp)
+			dun->un_flags |= UN_KLOCK;*/
+
+		if (cnp->cn_consume != 0) {
+			*ap->a_vpp = uppervp;
+			if (!lockparent)
+				cnp->cn_flags &= ~LOCKPARENT;
+			return (uerror);
+		}
+	} else {
+		uerror = ENOENT;
+	}
+
+	/*
+	 * in a similar way to the upper layer, do the lookup
+	 * in the lower layer.   this time, if there is some
+	 * component magic going on, then vput whatever we got
+	 * back from the upper layer and return the lower vnode
+	 * instead.
+	 */
+	if (lowerdvp) {
+		int nameiop;
+
+		VOP_LOCK(lowerdvp);
+
+		/*
+		 * Only do a LOOKUP on the bottom node, since
+		 * we won't be making changes to it anyway.
+		 */
+		nameiop = cnp->cn_nameiop;
+		cnp->cn_nameiop = LOOKUP;
+		if (um->um_op == UNMNT_BELOW) {
+			saved_cred = cnp->cn_cred;
+			cnp->cn_cred = um->um_cred;
+		}
+		lerror = union_lookup1(um->um_lowervp, lowerdvp,
+				&lowervp, cnp);
+		if (um->um_op == UNMNT_BELOW)
+			cnp->cn_cred = saved_cred;
+		cnp->cn_nameiop = nameiop;
+
+		if (lowervp != lowerdvp)
+			VOP_UNLOCK(lowerdvp);
+
+		if (cnp->cn_consume != 0) {
+			if (uppervp) {
+				if (uppervp == upperdvp)
+					vrele(uppervp);
+				else
+					vput(uppervp);
+				uppervp = NULLVP;
+			}
+			*ap->a_vpp = lowervp;
+			if (!lockparent)
+				cnp->cn_flags &= ~LOCKPARENT;
+			return (lerror);
+		}
+	} else {
+		lerror = ENOENT;
+	}
+
+	if (!lockparent)
+		cnp->cn_flags &= ~LOCKPARENT;
+
+	/*
+	 * at this point, we have uerror and lerror indicating
+	 * possible errors with the lookups in the upper and lower
+	 * layers.  additionally, uppervp and lowervp are (locked)
+	 * references to existing vnodes in the upper and lower layers.
+	 *
+	 * there are now three cases to consider.
+	 * 1. if both layers returned an error, then return whatever
+	 *    error the upper layer generated.
+	 *
+	 * 2. if the top layer failed and the bottom layer succeeded
+	 *    then two subcases occur.
+	 *    a.  the bottom vnode is not a directory, in which
+	 *	  case just return a new union vnode referencing
+	 *	  an empty top layer and the existing bottom layer.
+	 *    b.  the bottom vnode is a directory, in which case
+	 *	  create a new directory in the top-level and
+	 *	  continue as in case 3.
+	 *
+	 * 3. if the top layer succeeded then return a new union
+	 *    vnode referencing whatever the new top layer and
+	 *    whatever the bottom layer returned.
+	 */
+
+	*ap->a_vpp = NULLVP;
+
+	/* case 1. */
+	if ((uerror != 0) && (lerror != 0)) {
+		return (uerror);
+	}
+
+	/* case 2. */
+	if (uerror != 0 /* && (lerror == 0) */ ) {
+		if (lowervp->v_type == VDIR) { /* case 2b. */
+			dun->un_flags &= ~UN_ULOCK;
+			VOP_UNLOCK(upperdvp);
+			uerror = union_mkshadow(um, upperdvp, cnp, &uppervp);
+			VOP_LOCK(upperdvp);
+			dun->un_flags |= UN_ULOCK;
+
+			if (uerror) {
+				if (lowervp) {
+					vput(lowervp);
+					lowervp = NULLVP;
+				}
+				return (uerror);
+			}
+		}
+	}
+
+	if (lowervp)
+		VOP_UNLOCK(lowervp);
+
+	error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
+			      uppervp, lowervp);
+
+	if (error) {
+		if (uppervp)
+			vput(uppervp);
+		if (lowervp)
+			vrele(lowervp);
+	} else {
+		if (*ap->a_vpp != dvp)
+			if (!lockparent || !(cnp->cn_flags & ISLASTCN))
+				VOP_UNLOCK(dvp);
+	}
+
+	return (error);
+}
+
+int
+union_create(ap)
+	struct vop_create_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	struct union_node *un = VTOUNION(ap->a_dvp);
+	struct vnode *dvp = un->un_uppervp;
+
+	if (dvp) {
+		int error;
+		struct vnode *vp;
+
+		FIXUP(un);
+
+		VREF(dvp);
+		un->un_flags |= UN_KLOCK;
+		vput(ap->a_dvp);
+		error = VOP_CREATE(dvp, &vp, ap->a_cnp, ap->a_vap);
+		if (error)
+			return (error);
+
+		error = union_allocvp(
+				ap->a_vpp,
+				ap->a_dvp->v_mount,
+				ap->a_dvp,
+				NULLVP,
+				ap->a_cnp,
+				vp,
+				NULLVP);
+		if (error)
+			vput(vp);
+		return (error);
+	}
+
+	vput(ap->a_dvp);
+	return (EROFS);
+}
+
+int
+union_mknod(ap)
+	struct vop_mknod_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	struct union_node *un = VTOUNION(ap->a_dvp);
+	struct vnode *dvp = un->un_uppervp;
+
+	if (dvp) {
+		int error;
+		struct vnode *vp;
+
+		FIXUP(un);
+
+		VREF(dvp);
+		un->un_flags |= UN_KLOCK;
+		vput(ap->a_dvp);
+		error = VOP_MKNOD(dvp, &vp, ap->a_cnp, ap->a_vap);
+		if (error)
+			return (error);
+
+		if (vp) {
+			error = union_allocvp(
+					ap->a_vpp,
+					ap->a_dvp->v_mount,
+					ap->a_dvp,
+					NULLVP,
+					ap->a_cnp,
+					vp,
+					NULLVP);
+			if (error)
+				vput(vp);
+		}
+		return (error);
+	}
+
+	vput(ap->a_dvp);
+	return (EROFS);
+}
+
+int
+union_open(ap)
+	struct vop_open_args /* {
+		struct vnodeop_desc *a_desc;
+		struct vnode *a_vp;
+		int a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct union_node *un = VTOUNION(ap->a_vp);
+	struct vnode *tvp;
+	int mode = ap->a_mode;
+	struct ucred *cred = ap->a_cred;
+	struct proc *p = ap->a_p;
+	int error;
+
+	/*
+	 * If there is an existing upper vp then simply open that.
+	 */
+	tvp = un->un_uppervp;
+	if (tvp == NULLVP) {
+		/*
+		 * If the lower vnode is being opened for writing, then
+		 * copy the file contents to the upper vnode and open that,
+		 * otherwise can simply open the lower vnode.
+		 */
+		tvp = un->un_lowervp;
+		if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
+			struct vnode *vp;
+			int i;
+
+			/*
+			 * Open the named file in the upper layer.  Note that
+			 * the file may have come into existence *since* the
+			 * lookup was done, since the upper layer may really
+			 * be a loopback mount of some other filesystem...
+			 * so open the file with exclusive create and barf if
+			 * it already exists.
+			 * XXX - perhaps should re-lookup the node (once more
+			 * with feeling) and simply open that.  Who knows.
+			 */
+			error = union_vn_create(&vp, un, p);
+			if (error)
+				return (error);
+
+			/* at this point, uppervp is locked */
+			union_newupper(un, vp);
+			un->un_flags |= UN_ULOCK;
+
+			/*
+			 * Now, if the file is being opened with truncation,
+			 * then the (new) upper vnode is ready to fly,
+			 * otherwise the data from the lower vnode must be
+			 * copied to the upper layer first.  This only works
+			 * for regular files (check is made above).
+			 */
+			if ((mode & O_TRUNC) == 0) {
+				/*
+				 * XXX - should not ignore errors
+				 * from VOP_CLOSE
+				 */
+				VOP_LOCK(tvp);
+				error = VOP_OPEN(tvp, FREAD, cred, p);
+				if (error == 0) {
+					error = union_copyfile(p, cred,
+						       tvp, un->un_uppervp);
+					VOP_UNLOCK(tvp);
+					(void) VOP_CLOSE(tvp, FREAD);
+				} else {
+					VOP_UNLOCK(tvp);
+				}
+
+#ifdef UNION_DIAGNOSTIC
+				if (!error)
+					uprintf("union: copied up %s\n",
+								un->un_path);
+#endif
+			}
+
+			un->un_flags &= ~UN_ULOCK;
+			VOP_UNLOCK(un->un_uppervp);
+			union_vn_close(un->un_uppervp, FWRITE, cred, p);
+			VOP_LOCK(un->un_uppervp);
+			un->un_flags |= UN_ULOCK;
+
+			/*
+			 * Subsequent IOs will go to the top layer, so
+			 * call close on the lower vnode and open on the
+			 * upper vnode to ensure that the filesystem keeps
+			 * its references counts right.  This doesn't do
+			 * the right thing with (cred) and (FREAD) though.
+			 * Ignoring error returns is not righ, either.
+			 */
+			for (i = 0; i < un->un_openl; i++) {
+				(void) VOP_CLOSE(tvp, FREAD);
+				(void) VOP_OPEN(un->un_uppervp, FREAD, cred, p);
+			}
+			un->un_openl = 0;
+
+			if (error == 0)
+				error = VOP_OPEN(un->un_uppervp, mode, cred, p);
+			return (error);
+		}
+
+		/*
+		 * Just open the lower vnode
+		 */
+		un->un_openl++;
+		VOP_LOCK(tvp);
+		error = VOP_OPEN(tvp, mode, cred, p);
+		VOP_UNLOCK(tvp);
+
+		return (error);
+	}
+
+	FIXUP(un);
+
+	error = VOP_OPEN(tvp, mode, cred, p);
+
+	return (error);
+}
+
+int
+union_close(ap)
+	struct vop_close_args /* {
+		struct vnode *a_vp;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct union_node *un = VTOUNION(ap->a_vp);
+	struct vnode *vp;
+
+	if (un->un_uppervp) {
+		vp = un->un_uppervp;
+	} else {
+#ifdef UNION_DIAGNOSTIC
+		if (un->un_openl <= 0)
+			panic("union: un_openl cnt");
+#endif
+		--un->un_openl;
+		vp = un->un_lowervp;
+	}
+
+	return (VOP_CLOSE(vp, ap->a_fflag, ap->a_cred, ap->a_p));
+}
+
+/*
+ * Check access permission on the union vnode.
+ * The access check being enforced is to check
+ * against both the underlying vnode, and any
+ * copied vnode.  This ensures that no additional
+ * file permissions are given away simply because
+ * the user caused an implicit file copy.
+ */
+int
+union_access(ap)
+	struct vop_access_args /* {
+		struct vnodeop_desc *a_desc;
+		struct vnode *a_vp;
+		int a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct union_node *un = VTOUNION(ap->a_vp);
+	int error = EACCES;
+	struct vnode *vp;
+
+	if (vp = un->un_uppervp) {
+		FIXUP(un);
+		return (VOP_ACCESS(vp, ap->a_mode, ap->a_cred, ap->a_p));
+	}
+
+	if (vp = un->un_lowervp) {
+		VOP_LOCK(vp);
+		error = VOP_ACCESS(vp, ap->a_mode, ap->a_cred, ap->a_p);
+		if (error == 0) {
+			struct union_mount *um = MOUNTTOUNIONMOUNT(vp->v_mount);
+
+			if (um->um_op == UNMNT_BELOW)
+				error = VOP_ACCESS(vp, ap->a_mode,
+						um->um_cred, ap->a_p);
+		}
+		VOP_UNLOCK(vp);
+		if (error)
+			return (error);
+	}
+
+	return (error);
+}
+
+/*
+ *  We handle getattr only to change the fsid.
+ */
+int
+union_getattr(ap)
+	struct vop_getattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	int error;
+	struct union_node *un = VTOUNION(ap->a_vp);
+	struct vnode *vp = un->un_uppervp;
+	struct vattr *vap;
+	struct vattr va;
+
+
+	/*
+	 * Some programs walk the filesystem hierarchy by counting
+	 * links to directories to avoid stat'ing all the time.
+	 * This means the link count on directories needs to be "correct".
+	 * The only way to do that is to call getattr on both layers
+	 * and fix up the link count.  The link count will not necessarily
+	 * be accurate but will be large enough to defeat the tree walkers.
+	 */
+
+	vap = ap->a_vap;
+
+	vp = un->un_uppervp;
+	if (vp != NULLVP) {
+		FIXUP(un);
+		error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p);
+		if (error)
+			return (error);
+	}
+
+	if (vp == NULLVP) {
+		vp = un->un_lowervp;
+	} else if (vp->v_type == VDIR) {
+		vp = un->un_lowervp;
+		vap = &va;
+	} else {
+		vp = NULLVP;
+	}
+
+	if (vp != NULLVP) {
+		VOP_LOCK(vp);
+		error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p);
+		VOP_UNLOCK(vp);
+		if (error)
+			return (error);
+	}
+
+	if ((vap != ap->a_vap) && (vap->va_type == VDIR))
+		ap->a_vap->va_nlink += vap->va_nlink;
+
+	vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
+	return (0);
+}
+
+int
+union_setattr(ap)
+	struct vop_setattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct union_node *un = VTOUNION(ap->a_vp);
+	int error;
+
+	/*
+	 * Handle case of truncating lower object to zero size,
+	 * by creating a zero length upper object.  This is to
+	 * handle the case of open with O_TRUNC and O_CREAT.
+	 */
+	if ((un->un_uppervp == NULLVP) &&
+	    /* assert(un->un_lowervp != NULLVP) */
+	    (un->un_lowervp->v_type == VREG) &&
+	    (ap->a_vap->va_size == 0)) {
+		struct vnode *vp;
+
+		error = union_vn_create(&vp, un, ap->a_p);
+		if (error)
+			return (error);
+
+		/* at this point, uppervp is locked */
+		union_newupper(un, vp);
+
+		VOP_UNLOCK(vp);
+		union_vn_close(un->un_uppervp, FWRITE, ap->a_cred, ap->a_p);
+		VOP_LOCK(vp);
+		un->un_flags |= UN_ULOCK;
+	}
+
+	/*
+	 * Try to set attributes in upper layer,
+	 * otherwise return read-only filesystem error.
+	 */
+	if (un->un_uppervp != NULLVP) {
+		FIXUP(un);
+		error = VOP_SETATTR(un->un_uppervp, ap->a_vap,
+					ap->a_cred, ap->a_p);
+	} else {
+		error = EROFS;
+	}
+
+	return (error);
+}
+
+int
+union_read(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	int error;
+	struct vnode *vp = OTHERVP(ap->a_vp);
+	int dolock = (vp == LOWERVP(ap->a_vp));
+
+	if (dolock)
+		VOP_LOCK(vp);
+	else
+		FIXUP(VTOUNION(ap->a_vp));
+	error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
+	if (dolock)
+		VOP_UNLOCK(vp);
+
+	return (error);
+}
+
+int
+union_write(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	int error;
+	struct vnode *vp = OTHERVP(ap->a_vp);
+	int dolock = (vp == LOWERVP(ap->a_vp));
+
+	if (dolock)
+		VOP_LOCK(vp);
+	else
+		FIXUP(VTOUNION(ap->a_vp));
+	error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
+	if (dolock)
+		VOP_UNLOCK(vp);
+
+	return (error);
+}
+
+int
+union_ioctl(ap)
+	struct vop_ioctl_args /* {
+		struct vnode *a_vp;
+		int  a_command;
+		caddr_t  a_data;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	return (VOP_IOCTL(OTHERVP(ap->a_vp), ap->a_command, ap->a_data,
+				ap->a_fflag, ap->a_cred, ap->a_p));
+}
+
+int
+union_select(ap)
+	struct vop_select_args /* {
+		struct vnode *a_vp;
+		int  a_which;
+		int  a_fflags;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	return (VOP_SELECT(OTHERVP(ap->a_vp), ap->a_which, ap->a_fflags,
+				ap->a_cred, ap->a_p));
+}
+
+int
+union_mmap(ap)
+	struct vop_mmap_args /* {
+		struct vnode *a_vp;
+		int  a_fflags;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	return (VOP_MMAP(OTHERVP(ap->a_vp), ap->a_fflags,
+				ap->a_cred, ap->a_p));
+}
+
+int
+union_fsync(ap)
+	struct vop_fsync_args /* {
+		struct vnode *a_vp;
+		struct ucred *a_cred;
+		int  a_waitfor;
+		struct proc *a_p;
+	} */ *ap;
+{
+	int error = 0;
+	struct vnode *targetvp = OTHERVP(ap->a_vp);
+
+	if (targetvp) {
+		int dolock = (targetvp == LOWERVP(ap->a_vp));
+
+		if (dolock)
+			VOP_LOCK(targetvp);
+		else
+			FIXUP(VTOUNION(ap->a_vp));
+		error = VOP_FSYNC(targetvp, ap->a_cred,
+					ap->a_waitfor, ap->a_p);
+		if (dolock)
+			VOP_UNLOCK(targetvp);
+	}
+
+	return (error);
+}
+
+int
+union_seek(ap)
+	struct vop_seek_args /* {
+		struct vnode *a_vp;
+		off_t  a_oldoff;
+		off_t  a_newoff;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+
+	return (VOP_SEEK(OTHERVP(ap->a_vp), ap->a_oldoff, ap->a_newoff, ap->a_cred));
+}
+
+int
+union_remove(ap)
+	struct vop_remove_args /* {
+		struct vnode *a_dvp;
+		struct vnode *a_vp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	int error;
+	struct union_node *dun = VTOUNION(ap->a_dvp);
+	struct union_node *un = VTOUNION(ap->a_vp);
+
+	if (dun->un_uppervp && un->un_uppervp) {
+		struct vnode *dvp = dun->un_uppervp;
+		struct vnode *vp = un->un_uppervp;
+
+		FIXUP(dun);
+		VREF(dvp);
+		dun->un_flags |= UN_KLOCK;
+		vput(ap->a_dvp);
+		FIXUP(un);
+		VREF(vp);
+		un->un_flags |= UN_KLOCK;
+		vput(ap->a_vp);
+
+		error = VOP_REMOVE(dvp, vp, ap->a_cnp);
+		if (!error)
+			union_removed_upper(un);
+
+		/*
+		 * XXX: should create a whiteout here
+		 */
+	} else {
+		/*
+		 * XXX: should create a whiteout here
+		 */
+		vput(ap->a_dvp);
+		vput(ap->a_vp);
+		error = EROFS;
+	}
+
+	return (error);
+}
+
+int
+union_link(ap)
+	struct vop_link_args /* {
+		struct vnode *a_vp;
+		struct vnode *a_tdvp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	int error;
+	struct union_node *dun = VTOUNION(ap->a_vp);
+	struct union_node *un = VTOUNION(ap->a_tdvp);
+
+	if (dun->un_uppervp && un->un_uppervp) {
+		struct vnode *dvp = dun->un_uppervp;
+		struct vnode *vp = un->un_uppervp;
+
+		FIXUP(dun);
+		VREF(dvp);
+		dun->un_flags |= UN_KLOCK;
+		vput(ap->a_vp);
+		FIXUP(un);
+		VREF(vp);
+		vrele(ap->a_tdvp);
+
+		error = VOP_LINK(dvp, vp, ap->a_cnp);
+	} else {
+		/*
+		 * XXX: need to copy to upper layer
+		 * and do the link there.
+		 */
+		vput(ap->a_vp);
+		vrele(ap->a_tdvp);
+		error = EROFS;
+	}
+
+	return (error);
+}
+
+int
+union_rename(ap)
+	struct vop_rename_args  /* {
+		struct vnode *a_fdvp;
+		struct vnode *a_fvp;
+		struct componentname *a_fcnp;
+		struct vnode *a_tdvp;
+		struct vnode *a_tvp;
+		struct componentname *a_tcnp;
+	} */ *ap;
+{
+	int error;
+
+	struct vnode *fdvp = ap->a_fdvp;
+	struct vnode *fvp = ap->a_fvp;
+	struct vnode *tdvp = ap->a_tdvp;
+	struct vnode *tvp = ap->a_tvp;
+
+	if (fdvp->v_op == union_vnodeop_p) {	/* always true */
+		struct union_node *un = VTOUNION(fdvp);
+		if (un->un_uppervp == NULLVP) {
+			error = EROFS;
+			goto bad;
+		}
+
+		FIXUP(un);
+		fdvp = un->un_uppervp;
+		VREF(fdvp);
+		vrele(ap->a_fdvp);
+	}
+
+	if (fvp->v_op == union_vnodeop_p) {	/* always true */
+		struct union_node *un = VTOUNION(fvp);
+		if (un->un_uppervp == NULLVP) {
+			error = EROFS;
+			goto bad;
+		}
+
+		FIXUP(un);
+		fvp = un->un_uppervp;
+		VREF(fvp);
+		vrele(ap->a_fvp);
+	}
+
+	if (tdvp->v_op == union_vnodeop_p) {
+		struct union_node *un = VTOUNION(tdvp);
+		if (un->un_uppervp == NULLVP) {
+			error = EROFS;
+			goto bad;
+		}
+
+		tdvp = un->un_uppervp;
+		VREF(tdvp);
+		un->un_flags |= UN_KLOCK;
+		vput(ap->a_tdvp);
+	}
+
+	if (tvp && tvp->v_op == union_vnodeop_p) {
+		struct union_node *un = VTOUNION(tvp);
+		if (un->un_uppervp == NULLVP) {
+			error = EROFS;
+			goto bad;
+		}
+
+		tvp = un->un_uppervp;
+		VREF(tvp);
+		un->un_flags |= UN_KLOCK;
+		vput(ap->a_tvp);
+	}
+
+	return (VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp));
+
+bad:
+	vrele(fdvp);
+	vrele(fvp);
+	vput(tdvp);
+	if (tvp)
+		vput(tvp);
+
+	return (error);
+}
+
+int
+union_mkdir(ap)
+	struct vop_mkdir_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	struct union_node *un = VTOUNION(ap->a_dvp);
+	struct vnode *dvp = un->un_uppervp;
+
+	if (dvp) {
+		int error;
+		struct vnode *vp;
+
+		FIXUP(un);
+		VREF(dvp);
+		un->un_flags |= UN_KLOCK;
+		vput(ap->a_dvp);
+		error = VOP_MKDIR(dvp, &vp, ap->a_cnp, ap->a_vap);
+		if (error)
+			return (error);
+
+		error = union_allocvp(
+				ap->a_vpp,
+				ap->a_dvp->v_mount,
+				ap->a_dvp,
+				NULLVP,
+				ap->a_cnp,
+				vp,
+				NULLVP);
+		if (error)
+			vput(vp);
+		return (error);
+	}
+
+	vput(ap->a_dvp);
+	return (EROFS);
+}
+
+int
+union_rmdir(ap)
+	struct vop_rmdir_args /* {
+		struct vnode *a_dvp;
+		struct vnode *a_vp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	int error;
+	struct union_node *dun = VTOUNION(ap->a_dvp);
+	struct union_node *un = VTOUNION(ap->a_vp);
+
+	if (dun->un_uppervp && un->un_uppervp) {
+		struct vnode *dvp = dun->un_uppervp;
+		struct vnode *vp = un->un_uppervp;
+
+		FIXUP(dun);
+		VREF(dvp);
+		dun->un_flags |= UN_KLOCK;
+		vput(ap->a_dvp);
+		FIXUP(un);
+		VREF(vp);
+		un->un_flags |= UN_KLOCK;
+		vput(ap->a_vp);
+
+		error = VOP_RMDIR(dvp, vp, ap->a_cnp);
+		if (!error)
+			union_removed_upper(un);
+
+		/*
+		 * XXX: should create a whiteout here
+		 */
+	} else {
+		/*
+		 * XXX: should create a whiteout here
+		 */
+		vput(ap->a_dvp);
+		vput(ap->a_vp);
+		error = EROFS;
+	}
+
+	return (error);
+}
+
+int
+union_symlink(ap)
+	struct vop_symlink_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+		char *a_target;
+	} */ *ap;
+{
+	struct union_node *un = VTOUNION(ap->a_dvp);
+	struct vnode *dvp = un->un_uppervp;
+
+	if (dvp) {
+		int error;
+		struct vnode *vp;
+		struct mount *mp = ap->a_dvp->v_mount;
+
+		FIXUP(un);
+		VREF(dvp);
+		un->un_flags |= UN_KLOCK;
+		vput(ap->a_dvp);
+		error = VOP_SYMLINK(dvp, &vp, ap->a_cnp,
+					ap->a_vap, ap->a_target);
+		*ap->a_vpp = NULLVP;
+		return (error);
+	}
+
+	vput(ap->a_dvp);
+	return (EROFS);
+}
+
+/*
+ * union_readdir works in concert with getdirentries and
+ * readdir(3) to provide a list of entries in the unioned
+ * directories.  getdirentries is responsible for walking
+ * down the union stack.  readdir(3) is responsible for
+ * eliminating duplicate names from the returned data stream.
+ */
+int
+union_readdir(ap)
+	struct vop_readdir_args /* {
+		struct vnodeop_desc *a_desc;
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	int error = 0;
+	struct union_node *un = VTOUNION(ap->a_vp);
+
+	if (un->un_uppervp) {
+		FIXUP(un);
+		error = VOP_READDIR(un->un_uppervp, ap->a_uio, ap->a_cred);
+	}
+
+	return (error);
+}
+
+int
+union_readlink(ap)
+	struct vop_readlink_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	int error;
+	struct vnode *vp = OTHERVP(ap->a_vp);
+	int dolock = (vp == LOWERVP(ap->a_vp));
+
+	if (dolock)
+		VOP_LOCK(vp);
+	else
+		FIXUP(VTOUNION(ap->a_vp));
+	error = VOP_READLINK(vp, ap->a_uio, ap->a_cred);
+	if (dolock)
+		VOP_UNLOCK(vp);
+
+	return (error);
+}
+
+int
+union_abortop(ap)
+	struct vop_abortop_args /* {
+		struct vnode *a_dvp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	int error;
+	struct vnode *vp = OTHERVP(ap->a_dvp);
+	struct union_node *un = VTOUNION(ap->a_dvp);
+	int islocked = un->un_flags & UN_LOCKED;
+	int dolock = (vp == LOWERVP(ap->a_dvp));
+
+	if (islocked) {
+		if (dolock)
+			VOP_LOCK(vp);
+		else
+			FIXUP(VTOUNION(ap->a_dvp));
+	}
+	error = VOP_ABORTOP(vp, ap->a_cnp);
+	if (islocked && dolock)
+		VOP_UNLOCK(vp);
+
+	return (error);
+}
+
+int
+union_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	/*
+	 * Do nothing (and _don't_ bypass).
+	 * Wait to vrele lowervp until reclaim,
+	 * so that until then our union_node is in the
+	 * cache and reusable.
+	 *
+	 * NEEDSWORK: Someday, consider inactive'ing
+	 * the lowervp and then trying to reactivate it
+	 * with capabilities (v_id)
+	 * like they do in the name lookup cache code.
+	 * That's too much work for now.
+	 */
+
+#ifdef UNION_DIAGNOSTIC
+	struct union_node *un = VTOUNION(ap->a_vp);
+
+	if (un->un_flags & UN_LOCKED)
+		panic("union: inactivating locked node");
+#endif
+
+	return (0);
+}
+
+int
+union_reclaim(ap)
+	struct vop_reclaim_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	union_freevp(ap->a_vp);
+
+	return (0);
+}
+
+int
+union_lock(ap)
+	struct vop_lock_args *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct union_node *un;
+
+start:
+	while (vp->v_flag & VXLOCK) {
+		vp->v_flag |= VXWANT;
+		sleep((caddr_t)vp, PINOD);
+	}
+
+	un = VTOUNION(vp);
+
+	if (un->un_uppervp) {
+		if ((un->un_flags & UN_ULOCK) == 0) {
+			un->un_flags |= UN_ULOCK;
+			VOP_LOCK(un->un_uppervp);
+		}
+#ifdef DIAGNOSTIC
+		if (un->un_flags & UN_KLOCK)
+			panic("union: dangling upper lock");
+#endif
+	}
+
+	if (un->un_flags & UN_LOCKED) {
+#ifdef DIAGNOSTIC
+		if (curproc && un->un_pid == curproc->p_pid &&
+			    un->un_pid > -1 && curproc->p_pid > -1)
+			panic("union: locking against myself");
+#endif
+		un->un_flags |= UN_WANT;
+		sleep((caddr_t) &un->un_flags, PINOD);
+		goto start;
+	}
+
+#ifdef DIAGNOSTIC
+	if (curproc)
+		un->un_pid = curproc->p_pid;
+	else
+		un->un_pid = -1;
+#endif
+
+	un->un_flags |= UN_LOCKED;
+	return (0);
+}
+
+int
+union_unlock(ap)
+	struct vop_lock_args *ap;
+{
+	struct union_node *un = VTOUNION(ap->a_vp);
+
+#ifdef DIAGNOSTIC
+	if ((un->un_flags & UN_LOCKED) == 0)
+		panic("union: unlock unlocked node");
+	if (curproc && un->un_pid != curproc->p_pid &&
+			curproc->p_pid > -1 && un->un_pid > -1)
+		panic("union: unlocking other process's union node");
+#endif
+
+	un->un_flags &= ~UN_LOCKED;
+
+	if ((un->un_flags & (UN_ULOCK|UN_KLOCK)) == UN_ULOCK)
+		VOP_UNLOCK(un->un_uppervp);
+
+	un->un_flags &= ~(UN_ULOCK|UN_KLOCK);
+
+	if (un->un_flags & UN_WANT) {
+		un->un_flags &= ~UN_WANT;
+		wakeup((caddr_t) &un->un_flags);
+	}
+
+#ifdef DIAGNOSTIC
+	un->un_pid = 0;
+#endif
+
+	return (0);
+}
+
+int
+union_bmap(ap)
+	struct vop_bmap_args /* {
+		struct vnode *a_vp;
+		daddr_t  a_bn;
+		struct vnode **a_vpp;
+		daddr_t *a_bnp;
+		int *a_runp;
+	} */ *ap;
+{
+	int error;
+	struct vnode *vp = OTHERVP(ap->a_vp);
+	int dolock = (vp == LOWERVP(ap->a_vp));
+
+	if (dolock)
+		VOP_LOCK(vp);
+	else
+		FIXUP(VTOUNION(ap->a_vp));
+	error = VOP_BMAP(vp, ap->a_bn, ap->a_vpp, ap->a_bnp, ap->a_runp);
+	if (dolock)
+		VOP_UNLOCK(vp);
+
+	return (error);
+}
+
+int
+union_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+
+	printf("\ttag VT_UNION, vp=%x, uppervp=%x, lowervp=%x\n",
+			vp, UPPERVP(vp), LOWERVP(vp));
+	return (0);
+}
+
+int
+union_islocked(ap)
+	struct vop_islocked_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	return ((VTOUNION(ap->a_vp)->un_flags & UN_LOCKED) ? 1 : 0);
+}
+
+int
+union_pathconf(ap)
+	struct vop_pathconf_args /* {
+		struct vnode *a_vp;
+		int a_name;
+		int *a_retval;
+	} */ *ap;
+{
+	int error;
+	struct vnode *vp = OTHERVP(ap->a_vp);
+	int dolock = (vp == LOWERVP(ap->a_vp));
+
+	if (dolock)
+		VOP_LOCK(vp);
+	else
+		FIXUP(VTOUNION(ap->a_vp));
+	error = VOP_PATHCONF(vp, ap->a_name, ap->a_retval);
+	if (dolock)
+		VOP_UNLOCK(vp);
+
+	return (error);
+}
+
+int
+union_advlock(ap)
+	struct vop_advlock_args /* {
+		struct vnode *a_vp;
+		caddr_t  a_id;
+		int  a_op;
+		struct flock *a_fl;
+		int  a_flags;
+	} */ *ap;
+{
+
+	return (VOP_ADVLOCK(OTHERVP(ap->a_vp), ap->a_id, ap->a_op,
+				ap->a_fl, ap->a_flags));
+}
+
+
+/*
+ * XXX - vop_strategy must be hand coded because it has no
+ * vnode in its arguments.
+ * This goes away with a merged VM/buffer cache.
+ */
+int
+union_strategy(ap)
+	struct vop_strategy_args /* {
+		struct buf *a_bp;
+	} */ *ap;
+{
+	struct buf *bp = ap->a_bp;
+	int error;
+	struct vnode *savedvp;
+
+	savedvp = bp->b_vp;
+	bp->b_vp = OTHERVP(bp->b_vp);
+
+#ifdef DIAGNOSTIC
+	if (bp->b_vp == NULLVP)
+		panic("union_strategy: nil vp");
+	if (((bp->b_flags & B_READ) == 0) &&
+	    (bp->b_vp == LOWERVP(savedvp)))
+		panic("union_strategy: writing to lowervp");
+#endif
+
+	error = VOP_STRATEGY(bp);
+	bp->b_vp = savedvp;
+
+	return (error);
+}
+
+/*
+ * Global vfs data structures
+ */
+int (**union_vnodeop_p)();
+struct vnodeopv_entry_desc union_vnodeop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, union_lookup },		/* lookup */
+	{ &vop_create_desc, union_create },		/* create */
+	{ &vop_mknod_desc, union_mknod },		/* mknod */
+	{ &vop_open_desc, union_open },			/* open */
+	{ &vop_close_desc, union_close },		/* close */
+	{ &vop_access_desc, union_access },		/* access */
+	{ &vop_getattr_desc, union_getattr },		/* getattr */
+	{ &vop_setattr_desc, union_setattr },		/* setattr */
+	{ &vop_read_desc, union_read },			/* read */
+	{ &vop_write_desc, union_write },		/* write */
+	{ &vop_ioctl_desc, union_ioctl },		/* ioctl */
+	{ &vop_select_desc, union_select },		/* select */
+	{ &vop_mmap_desc, union_mmap },			/* mmap */
+	{ &vop_fsync_desc, union_fsync },		/* fsync */
+	{ &vop_seek_desc, union_seek },			/* seek */
+	{ &vop_remove_desc, union_remove },		/* remove */
+	{ &vop_link_desc, union_link },			/* link */
+	{ &vop_rename_desc, union_rename },		/* rename */
+	{ &vop_mkdir_desc, union_mkdir },		/* mkdir */
+	{ &vop_rmdir_desc, union_rmdir },		/* rmdir */
+	{ &vop_symlink_desc, union_symlink },		/* symlink */
+	{ &vop_readdir_desc, union_readdir },		/* readdir */
+	{ &vop_readlink_desc, union_readlink },		/* readlink */
+	{ &vop_abortop_desc, union_abortop },		/* abortop */
+	{ &vop_inactive_desc, union_inactive },		/* inactive */
+	{ &vop_reclaim_desc, union_reclaim },		/* reclaim */
+	{ &vop_lock_desc, union_lock },			/* lock */
+	{ &vop_unlock_desc, union_unlock },		/* unlock */
+	{ &vop_bmap_desc, union_bmap },			/* bmap */
+	{ &vop_strategy_desc, union_strategy },		/* strategy */
+	{ &vop_print_desc, union_print },		/* print */
+	{ &vop_islocked_desc, union_islocked },		/* islocked */
+	{ &vop_pathconf_desc, union_pathconf },		/* pathconf */
+	{ &vop_advlock_desc, union_advlock },		/* advlock */
+#ifdef notdef
+	{ &vop_blkatoff_desc, union_blkatoff },		/* blkatoff */
+	{ &vop_valloc_desc, union_valloc },		/* valloc */
+	{ &vop_vfree_desc, union_vfree },		/* vfree */
+	{ &vop_truncate_desc, union_truncate },		/* truncate */
+	{ &vop_update_desc, union_update },		/* update */
+	{ &vop_bwrite_desc, union_bwrite },		/* bwrite */
+#endif
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc union_vnodeop_opv_desc =
+	{ &union_vnodeop_p, union_vnodeop_entries };
diff --git a/sys/gnu/ext2fs/ext2_bmap.c b/sys/gnu/ext2fs/ext2_bmap.c
new file mode 100644
index 00000000000..bcd838d036a
--- /dev/null
+++ b/sys/gnu/ext2fs/ext2_bmap.c
@@ -0,0 +1,294 @@
+/*
+ * Copyright (c) 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufs_bmap.c	8.6 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/resourcevar.h>
+#include <sys/trace.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+/*
+ * Bmap converts a the logical block number of a file to its physical block
+ * number on the disk. The conversion is done by using the logical block
+ * number to index into the array of block pointers described by the dinode.
+ */
+int
+ufs_bmap(ap)
+	struct vop_bmap_args /* {
+		struct vnode *a_vp;
+		daddr_t  a_bn;
+		struct vnode **a_vpp;
+		daddr_t *a_bnp;
+		int *a_runp;
+	} */ *ap;
+{
+	/*
+	 * Check for underlying vnode requests and ensure that logical
+	 * to physical mapping is requested.
+	 */
+	if (ap->a_vpp != NULL)
+		*ap->a_vpp = VTOI(ap->a_vp)->i_devvp;
+	if (ap->a_bnp == NULL)
+		return (0);
+
+	return (ufs_bmaparray(ap->a_vp, ap->a_bn, ap->a_bnp, NULL, NULL,
+	    ap->a_runp));
+}
+
+/*
+ * Indirect blocks are now on the vnode for the file.  They are given negative
+ * logical block numbers.  Indirect blocks are addressed by the negative
+ * address of the first data block to which they point.  Double indirect blocks
+ * are addressed by one less than the address of the first indirect block to
+ * which they point.  Triple indirect blocks are addressed by one less than
+ * the address of the first double indirect block to which they point.
+ *
+ * ufs_bmaparray does the bmap conversion, and if requested returns the
+ * array of logical blocks which must be traversed to get to a block.
+ * Each entry contains the offset into that block that gets you to the
+ * next block and the disk address of the block (if it is assigned).
+ */
+
+int
+ufs_bmaparray(vp, bn, bnp, ap, nump, runp)
+	struct vnode *vp;
+	register daddr_t bn;
+	daddr_t *bnp;
+	struct indir *ap;
+	int *nump;
+	int *runp;
+{
+	register struct inode *ip;
+	struct buf *bp;
+	struct ufsmount *ump;
+	struct mount *mp;
+	struct vnode *devvp;
+	struct indir a[NIADDR], *xap;
+	daddr_t daddr;
+	long metalbn;
+	int error, maxrun, num;
+
+	ip = VTOI(vp);
+	mp = vp->v_mount;
+	ump = VFSTOUFS(mp);
+#ifdef DIAGNOSTIC
+	if (ap != NULL && nump == NULL || ap == NULL && nump != NULL)
+		panic("ufs_bmaparray: invalid arguments");
+#endif
+
+	if (runp) {
+		/*
+		 * XXX
+		 * If MAXBSIZE is the largest transfer the disks can handle,
+		 * we probably want maxrun to be 1 block less so that we
+		 * don't create a block larger than the device can handle.
+		 */
+		*runp = 0;
+		maxrun = MAXBSIZE / mp->mnt_stat.f_iosize - 1;
+	}
+
+	xap = ap == NULL ? a : ap;
+	if (!nump)
+		nump = &num;
+	if (error = ufs_getlbns(vp, bn, xap, nump))
+		return (error);
+
+	num = *nump;
+	if (num == 0) {
+		*bnp = blkptrtodb(ump, ip->i_db[bn]);
+		if (*bnp == 0)
+			*bnp = -1;
+		else if (runp)
+			for (++bn; bn < NDADDR && *runp < maxrun &&
+			    is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]);
+			    ++bn, ++*runp);
+		return (0);
+	}
+
+
+	/* Get disk address out of indirect block array */
+	daddr = ip->i_ib[xap->in_off];
+
+	devvp = VFSTOUFS(vp->v_mount)->um_devvp;
+	for (bp = NULL, ++xap; --num; ++xap) {
+		/* 
+		 * Exit the loop if there is no disk address assigned yet and
+		 * the indirect block isn't in the cache, or if we were
+		 * looking for an indirect block and we've found it.
+		 */
+
+		metalbn = xap->in_lbn;
+		if (daddr == 0 && !incore(vp, metalbn) || metalbn == bn)
+			break;
+		/*
+		 * If we get here, we've either got the block in the cache
+		 * or we have a disk address for it, go fetch it.
+		 */
+		if (bp)
+			brelse(bp);
+
+		xap->in_exists = 1;
+		bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0);
+		if (bp->b_flags & (B_DONE | B_DELWRI)) {
+			trace(TR_BREADHIT, pack(vp, size), metalbn);
+		}
+#ifdef DIAGNOSTIC
+		else if (!daddr)
+			panic("ufs_bmaparry: indirect block not in cache");
+#endif
+		else {
+			trace(TR_BREADMISS, pack(vp, size), metalbn);
+			bp->b_blkno = blkptrtodb(ump, daddr);
+			bp->b_flags |= B_READ;
+			VOP_STRATEGY(bp);
+			curproc->p_stats->p_ru.ru_inblock++;	/* XXX */
+			if (error = biowait(bp)) {
+				brelse(bp);
+				return (error);
+			}
+		}
+
+		daddr = ((daddr_t *)bp->b_data)[xap->in_off];
+		if (num == 1 && daddr && runp)
+			for (bn = xap->in_off + 1;
+			    bn < MNINDIR(ump) && *runp < maxrun &&
+			    is_sequential(ump, ((daddr_t *)bp->b_data)[bn - 1],
+			    ((daddr_t *)bp->b_data)[bn]);
+			    ++bn, ++*runp);
+	}
+	if (bp)
+		brelse(bp);
+
+	daddr = blkptrtodb(ump, daddr);
+	*bnp = daddr == 0 ? -1 : daddr;
+	return (0);
+}
+
+/*
+ * Create an array of logical block number/offset pairs which represent the
+ * path of indirect blocks required to access a data block.  The first "pair"
+ * contains the logical block number of the appropriate single, double or
+ * triple indirect block and the offset into the inode indirect block array.
+ * Note, the logical block number of the inode single/double/triple indirect
+ * block appears twice in the array, once with the offset into the i_ib and
+ * once with the offset into the page itself.
+ */
+int
+ufs_getlbns(vp, bn, ap, nump)
+	struct vnode *vp;
+	register daddr_t bn;
+	struct indir *ap;
+	int *nump;
+{
+	long metalbn, realbn;
+	struct ufsmount *ump;
+	int blockcnt, i, numlevels, off;
+
+	ump = VFSTOUFS(vp->v_mount);
+	if (nump)
+		*nump = 0;
+	numlevels = 0;
+	realbn = bn;
+	if ((long)bn < 0)
+		bn = -(long)bn;
+
+	/* The first NDADDR blocks are direct blocks. */
+	if (bn < NDADDR)
+		return (0);
+
+	/* 
+	 * Determine the number of levels of indirection.  After this loop
+	 * is done, blockcnt indicates the number of data blocks possible
+	 * at the given level of indirection, and NIADDR - i is the number
+	 * of levels of indirection needed to locate the requested block.
+	 */
+	for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) {
+		if (i == 0)
+			return (EFBIG);
+		blockcnt *= MNINDIR(ump);
+		if (bn < blockcnt)
+			break;
+	}
+
+	/* Calculate the address of the first meta-block. */
+	if (realbn >= 0)
+		metalbn = -(realbn - bn + NIADDR - i);
+	else
+		metalbn = -(-realbn - bn + NIADDR - i);
+
+	/* 
+	 * At each iteration, off is the offset into the bap array which is
+	 * an array of disk addresses at the current level of indirection.
+	 * The logical block number and the offset in that block are stored
+	 * into the argument array.
+	 */
+	ap->in_lbn = metalbn;
+	ap->in_off = off = NIADDR - i;
+	ap->in_exists = 0;
+	ap++;
+	for (++numlevels; i <= NIADDR; i++) {
+		/* If searching for a meta-data block, quit when found. */
+		if (metalbn == realbn)
+			break;
+
+		blockcnt /= MNINDIR(ump);
+		off = (bn / blockcnt) % MNINDIR(ump);
+
+		++numlevels;
+		ap->in_lbn = metalbn;
+		ap->in_off = off;
+		ap->in_exists = 0;
+		++ap;
+
+		metalbn -= -1 + off * blockcnt;
+	}
+	if (nump)
+		*nump = numlevels;
+	return (0);
+}
diff --git a/sys/gnu/ext2fs/ext2_ihash.c b/sys/gnu/ext2fs/ext2_ihash.c
new file mode 100644
index 00000000000..4a37c907ef6
--- /dev/null
+++ b/sys/gnu/ext2fs/ext2_ihash.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufs_ihash.c	8.4 (Berkeley) 12/30/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufs_extern.h>
+
+/*
+ * Structures associated with inode cacheing.
+ */
+struct inode **ihashtbl;
+u_long	ihash;		/* size of hash table - 1 */
+#define	INOHASH(device, inum)	(((device) + (inum)) & ihash)
+
+/*
+ * Initialize inode hash table.
+ */
+void
+ufs_ihashinit()
+{
+
+	ihashtbl = hashinit(desiredvnodes, M_UFSMNT, &ihash);
+}
+
+/*
+ * Use the device/inum pair to find the incore inode, and return a pointer
+ * to it. If it is in core, return it, even if it is locked.
+ */
+struct vnode *
+ufs_ihashlookup(device, inum)
+	dev_t device;
+	ino_t inum;
+{
+	register struct inode *ip;
+
+	for (ip = ihashtbl[INOHASH(device, inum)];; ip = ip->i_next) {
+		if (ip == NULL)
+			return (NULL);
+		if (inum == ip->i_number && device == ip->i_dev)
+			return (ITOV(ip));
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * Use the device/inum pair to find the incore inode, and return a pointer
+ * to it. If it is in core, but locked, wait for it.
+ */
+struct vnode *
+ufs_ihashget(device, inum)
+	dev_t device;
+	ino_t inum;
+{
+	register struct inode *ip;
+	struct vnode *vp;
+
+	for (;;)
+		for (ip = ihashtbl[INOHASH(device, inum)];; ip = ip->i_next) {
+			if (ip == NULL)
+				return (NULL);
+			if (inum == ip->i_number && device == ip->i_dev) {
+				if (ip->i_flag & IN_LOCKED) {
+					ip->i_flag |= IN_WANTED;
+					sleep(ip, PINOD);
+					break;
+				}
+				vp = ITOV(ip);
+				if (!vget(vp, 1))
+					return (vp);
+				break;
+			}
+		}
+	/* NOTREACHED */
+}
+
+/*
+ * Insert the inode into the hash table, and return it locked.
+ */
+void
+ufs_ihashins(ip)
+	struct inode *ip;
+{
+	struct inode **ipp, *iq;
+
+	ipp = &ihashtbl[INOHASH(ip->i_dev, ip->i_number)];
+	if (iq = *ipp)
+		iq->i_prev = &ip->i_next;
+	ip->i_next = iq;
+	ip->i_prev = ipp;
+	*ipp = ip;
+	if (ip->i_flag & IN_LOCKED)
+		panic("ufs_ihashins: already locked");
+	if (curproc)
+		ip->i_lockholder = curproc->p_pid;
+	else
+		ip->i_lockholder = -1;
+	ip->i_flag |= IN_LOCKED;
+}
+
+/*
+ * Remove the inode from the hash table.
+ */
+void
+ufs_ihashrem(ip)
+	register struct inode *ip;
+{
+	register struct inode *iq;
+
+	if (iq = ip->i_next)
+		iq->i_prev = ip->i_prev;
+	*ip->i_prev = iq;
+#ifdef DIAGNOSTIC
+	ip->i_next = NULL;
+	ip->i_prev = NULL;
+#endif
+}
diff --git a/sys/gnu/ext2fs/ext2_mount.h b/sys/gnu/ext2fs/ext2_mount.h
new file mode 100644
index 00000000000..237871fdaac
--- /dev/null
+++ b/sys/gnu/ext2fs/ext2_mount.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufsmount.h	8.2 (Berkeley) 1/12/94
+ */
+
+struct buf;
+struct inode;
+struct nameidata;
+struct timeval;
+struct ucred;
+struct uio;
+struct vnode;
+struct netexport;
+
+/* This structure describes the UFS specific mount structure data. */
+struct ufsmount {
+	struct	mount *um_mountp;		/* filesystem vfs structure */
+	dev_t	um_dev;				/* device mounted */
+	struct	vnode *um_devvp;		/* block device mounted vnode */
+	union {					/* pointer to superblock */
+		struct	lfs *lfs;		/* LFS */
+		struct	fs *fs;			/* FFS */
+	} ufsmount_u;
+#define	um_fs	ufsmount_u.fs
+#define	um_lfs	ufsmount_u.lfs
+	struct	vnode *um_quotas[MAXQUOTAS];	/* pointer to quota files */
+	struct	ucred *um_cred[MAXQUOTAS];	/* quota file access cred */
+	u_long	um_nindir;			/* indirect ptrs per block */
+	u_long	um_bptrtodb;			/* indir ptr to disk block */
+	u_long	um_seqinc;			/* inc between seq blocks */
+	time_t	um_btime[MAXQUOTAS];		/* block quota time limit */
+	time_t	um_itime[MAXQUOTAS];		/* inode quota time limit */
+	char	um_qflags[MAXQUOTAS];		/* quota specific flags */
+	struct	netexport um_export;		/* export information */
+};
+/*
+ * Flags describing the state of quotas.
+ */
+#define	QTF_OPENING	0x01			/* Q_QUOTAON in progress */
+#define	QTF_CLOSING	0x02			/* Q_QUOTAOFF in progress */
+
+/* Convert mount ptr to ufsmount ptr. */
+#define VFSTOUFS(mp)	((struct ufsmount *)((mp)->mnt_data))
+
+/*
+ * Macros to access file system parameters in the ufsmount structure.
+ * Used by ufs_bmap.
+ */
+#define	blkptrtodb(ump, b)	((b) << (ump)->um_bptrtodb)
+#define	is_sequential(ump, a, b) ((b) == (a) + ump->um_seqinc)
+#define MNINDIR(ump)	((ump)->um_nindir)
+
+
diff --git a/sys/gnu/ext2fs/inode.h b/sys/gnu/ext2fs/inode.h
new file mode 100644
index 00000000000..df155967a7d
--- /dev/null
+++ b/sys/gnu/ext2fs/inode.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 1982, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)inode.h	8.4 (Berkeley) 1/21/94
+ */
+
+#include <ufs/ufs/dinode.h>
+
+/*
+ * Theoretically, directories can be more than 2Gb in length, however, in
+ * practice this seems unlikely. So, we define the type doff_t as a long
+ * to keep down the cost of doing lookup on a 32-bit machine. If you are
+ * porting to a 64-bit architecture, you should make doff_t the same as off_t.
+ */
+#define	doff_t	long
+
+/*
+ * The inode is used to describe each active (or recently active)
+ * file in the UFS filesystem. It is composed of two types of
+ * information. The first part is the information that is needed
+ * only while the file is active (such as the identity of the file
+ * and linkage to speed its lookup). The second part is the 
+ * permannent meta-data associated with the file which is read
+ * in from the permanent dinode from long term storage when the
+ * file becomes active, and is put back when the file is no longer
+ * being used.
+ */
+struct inode {
+	struct	inode *i_next;	/* Hash chain forward. */
+	struct	inode **i_prev;	/* Hash chain back. */
+	struct	vnode *i_vnode;	/* Vnode associated with this inode. */
+	struct	vnode *i_devvp;	/* Vnode for block I/O. */
+	u_long	i_flag;		/* I* flags. */
+	dev_t	i_dev;		/* Device associated with the inode. */
+	ino_t	i_number;	/* The identity of the inode. */
+	union {			/* Associated filesystem. */
+		struct	fs *fs;		/* FFS */
+		struct	lfs *lfs;	/* LFS */
+	} inode_u;
+#define	i_fs	inode_u.fs
+#define	i_lfs	inode_u.lfs
+	struct	dquot *i_dquot[MAXQUOTAS];	/* Dquot structures. */
+	u_quad_t i_modrev;	/* Revision level for lease. */
+	struct	lockf *i_lockf;	/* Head of byte-level lock list. */
+	pid_t	i_lockholder;	/* DEBUG: holder of inode lock. */
+	pid_t	i_lockwaiter;	/* DEBUG: latest blocked for inode lock. */
+	/*
+	 * Side effects; used during directory lookup.
+	 */
+	long	i_count;	/* Size of free slot in directory. */
+	doff_t	i_endoff;	/* End of useful stuff in directory. */
+	doff_t	i_diroff;	/* Offset in dir, where we found last entry. */
+	doff_t	i_offset;	/* Offset of free space in directory. */
+	ino_t	i_ino;		/* Inode number of found directory. */
+	u_long	i_reclen;	/* Size of found directory entry. */
+	long	i_spare[11];	/* Spares to round up to 128 bytes. */
+	/*
+	 * The on-disk dinode itself.
+	 */
+	struct	dinode i_din;	/* 128 bytes of the on-disk dinode. */
+};
+
+#define	i_atime		i_din.di_atime
+#define	i_blocks	i_din.di_blocks
+#define	i_ctime		i_din.di_ctime
+#define	i_db		i_din.di_db
+#define	i_flags		i_din.di_flags
+#define	i_gen		i_din.di_gen
+#define	i_gid		i_din.di_gid
+#define	i_ib		i_din.di_ib
+#define	i_mode		i_din.di_mode
+#define	i_mtime		i_din.di_mtime
+#define	i_nlink		i_din.di_nlink
+#define	i_rdev		i_din.di_rdev
+#define	i_shortlink	i_din.di_shortlink
+#define	i_size		i_din.di_size
+#define	i_uid		i_din.di_uid
+
+/* These flags are kept in i_flag. */
+#define	IN_ACCESS	0x0001		/* Access time update request. */
+#define	IN_CHANGE	0x0002		/* Inode change time update request. */
+#define	IN_EXLOCK	0x0004		/* File has exclusive lock. */
+#define	IN_LOCKED	0x0008		/* Inode lock. */
+#define	IN_LWAIT	0x0010		/* Process waiting on file lock. */
+#define	IN_MODIFIED	0x0020		/* Inode has been modified. */
+#define	IN_RENAME	0x0040		/* Inode is being renamed. */
+#define	IN_SHLOCK	0x0080		/* File has shared lock. */
+#define	IN_UPDATE	0x0100		/* Modification time update request. */
+#define	IN_WANTED	0x0200		/* Inode is wanted by a process. */
+
+#ifdef KERNEL
+/*
+ * Structure used to pass around logical block paths generated by
+ * ufs_getlbns and used by truncate and bmap code.
+ */
+struct indir {
+	daddr_t	in_lbn;			/* Logical block number. */
+	int	in_off;			/* Offset in buffer. */
+	int	in_exists;		/* Flag if the block exists. */
+};
+
+/* Convert between inode pointers and vnode pointers. */
+#define VTOI(vp)	((struct inode *)(vp)->v_data)
+#define ITOV(ip)	((ip)->i_vnode)
+
+#define	ITIMES(ip, t1, t2) {						\
+	if ((ip)->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) {	\
+		(ip)->i_flag |= IN_MODIFIED;				\
+		if ((ip)->i_flag & IN_ACCESS)				\
+			(ip)->i_atime.ts_sec = (t1)->tv_sec;		\
+		if ((ip)->i_flag & IN_UPDATE) {				\
+			(ip)->i_mtime.ts_sec = (t2)->tv_sec;		\
+			(ip)->i_modrev++;				\
+		}							\
+		if ((ip)->i_flag & IN_CHANGE)				\
+			(ip)->i_ctime.ts_sec = time.tv_sec;		\
+		(ip)->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);	\
+	}								\
+}
+
+/* This overlays the fid structure (see mount.h). */
+struct ufid {
+	u_short	ufid_len;	/* Length of structure. */
+	u_short	ufid_pad;	/* Force long alignment. */
+	ino_t	ufid_ino;	/* File number (ino). */
+	long	ufid_gen;	/* Generation number. */
+};
+#endif /* KERNEL */
diff --git a/sys/gnu/fs/ext2fs/ext2_bmap.c b/sys/gnu/fs/ext2fs/ext2_bmap.c
new file mode 100644
index 00000000000..bcd838d036a
--- /dev/null
+++ b/sys/gnu/fs/ext2fs/ext2_bmap.c
@@ -0,0 +1,294 @@
+/*
+ * Copyright (c) 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufs_bmap.c	8.6 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/resourcevar.h>
+#include <sys/trace.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+/*
+ * Bmap converts a the logical block number of a file to its physical block
+ * number on the disk. The conversion is done by using the logical block
+ * number to index into the array of block pointers described by the dinode.
+ */
+int
+ufs_bmap(ap)
+	struct vop_bmap_args /* {
+		struct vnode *a_vp;
+		daddr_t  a_bn;
+		struct vnode **a_vpp;
+		daddr_t *a_bnp;
+		int *a_runp;
+	} */ *ap;
+{
+	/*
+	 * Check for underlying vnode requests and ensure that logical
+	 * to physical mapping is requested.
+	 */
+	if (ap->a_vpp != NULL)
+		*ap->a_vpp = VTOI(ap->a_vp)->i_devvp;
+	if (ap->a_bnp == NULL)
+		return (0);
+
+	return (ufs_bmaparray(ap->a_vp, ap->a_bn, ap->a_bnp, NULL, NULL,
+	    ap->a_runp));
+}
+
+/*
+ * Indirect blocks are now on the vnode for the file.  They are given negative
+ * logical block numbers.  Indirect blocks are addressed by the negative
+ * address of the first data block to which they point.  Double indirect blocks
+ * are addressed by one less than the address of the first indirect block to
+ * which they point.  Triple indirect blocks are addressed by one less than
+ * the address of the first double indirect block to which they point.
+ *
+ * ufs_bmaparray does the bmap conversion, and if requested returns the
+ * array of logical blocks which must be traversed to get to a block.
+ * Each entry contains the offset into that block that gets you to the
+ * next block and the disk address of the block (if it is assigned).
+ */
+
+int
+ufs_bmaparray(vp, bn, bnp, ap, nump, runp)
+	struct vnode *vp;
+	register daddr_t bn;
+	daddr_t *bnp;
+	struct indir *ap;
+	int *nump;
+	int *runp;
+{
+	register struct inode *ip;
+	struct buf *bp;
+	struct ufsmount *ump;
+	struct mount *mp;
+	struct vnode *devvp;
+	struct indir a[NIADDR], *xap;
+	daddr_t daddr;
+	long metalbn;
+	int error, maxrun, num;
+
+	ip = VTOI(vp);
+	mp = vp->v_mount;
+	ump = VFSTOUFS(mp);
+#ifdef DIAGNOSTIC
+	if (ap != NULL && nump == NULL || ap == NULL && nump != NULL)
+		panic("ufs_bmaparray: invalid arguments");
+#endif
+
+	if (runp) {
+		/*
+		 * XXX
+		 * If MAXBSIZE is the largest transfer the disks can handle,
+		 * we probably want maxrun to be 1 block less so that we
+		 * don't create a block larger than the device can handle.
+		 */
+		*runp = 0;
+		maxrun = MAXBSIZE / mp->mnt_stat.f_iosize - 1;
+	}
+
+	xap = ap == NULL ? a : ap;
+	if (!nump)
+		nump = &num;
+	if (error = ufs_getlbns(vp, bn, xap, nump))
+		return (error);
+
+	num = *nump;
+	if (num == 0) {
+		*bnp = blkptrtodb(ump, ip->i_db[bn]);
+		if (*bnp == 0)
+			*bnp = -1;
+		else if (runp)
+			for (++bn; bn < NDADDR && *runp < maxrun &&
+			    is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]);
+			    ++bn, ++*runp);
+		return (0);
+	}
+
+
+	/* Get disk address out of indirect block array */
+	daddr = ip->i_ib[xap->in_off];
+
+	devvp = VFSTOUFS(vp->v_mount)->um_devvp;
+	for (bp = NULL, ++xap; --num; ++xap) {
+		/* 
+		 * Exit the loop if there is no disk address assigned yet and
+		 * the indirect block isn't in the cache, or if we were
+		 * looking for an indirect block and we've found it.
+		 */
+
+		metalbn = xap->in_lbn;
+		if (daddr == 0 && !incore(vp, metalbn) || metalbn == bn)
+			break;
+		/*
+		 * If we get here, we've either got the block in the cache
+		 * or we have a disk address for it, go fetch it.
+		 */
+		if (bp)
+			brelse(bp);
+
+		xap->in_exists = 1;
+		bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0);
+		if (bp->b_flags & (B_DONE | B_DELWRI)) {
+			trace(TR_BREADHIT, pack(vp, size), metalbn);
+		}
+#ifdef DIAGNOSTIC
+		else if (!daddr)
+			panic("ufs_bmaparry: indirect block not in cache");
+#endif
+		else {
+			trace(TR_BREADMISS, pack(vp, size), metalbn);
+			bp->b_blkno = blkptrtodb(ump, daddr);
+			bp->b_flags |= B_READ;
+			VOP_STRATEGY(bp);
+			curproc->p_stats->p_ru.ru_inblock++;	/* XXX */
+			if (error = biowait(bp)) {
+				brelse(bp);
+				return (error);
+			}
+		}
+
+		daddr = ((daddr_t *)bp->b_data)[xap->in_off];
+		if (num == 1 && daddr && runp)
+			for (bn = xap->in_off + 1;
+			    bn < MNINDIR(ump) && *runp < maxrun &&
+			    is_sequential(ump, ((daddr_t *)bp->b_data)[bn - 1],
+			    ((daddr_t *)bp->b_data)[bn]);
+			    ++bn, ++*runp);
+	}
+	if (bp)
+		brelse(bp);
+
+	daddr = blkptrtodb(ump, daddr);
+	*bnp = daddr == 0 ? -1 : daddr;
+	return (0);
+}
+
+/*
+ * Create an array of logical block number/offset pairs which represent the
+ * path of indirect blocks required to access a data block.  The first "pair"
+ * contains the logical block number of the appropriate single, double or
+ * triple indirect block and the offset into the inode indirect block array.
+ * Note, the logical block number of the inode single/double/triple indirect
+ * block appears twice in the array, once with the offset into the i_ib and
+ * once with the offset into the page itself.
+ */
+int
+ufs_getlbns(vp, bn, ap, nump)
+	struct vnode *vp;
+	register daddr_t bn;
+	struct indir *ap;
+	int *nump;
+{
+	long metalbn, realbn;
+	struct ufsmount *ump;
+	int blockcnt, i, numlevels, off;
+
+	ump = VFSTOUFS(vp->v_mount);
+	if (nump)
+		*nump = 0;
+	numlevels = 0;
+	realbn = bn;
+	if ((long)bn < 0)
+		bn = -(long)bn;
+
+	/* The first NDADDR blocks are direct blocks. */
+	if (bn < NDADDR)
+		return (0);
+
+	/* 
+	 * Determine the number of levels of indirection.  After this loop
+	 * is done, blockcnt indicates the number of data blocks possible
+	 * at the given level of indirection, and NIADDR - i is the number
+	 * of levels of indirection needed to locate the requested block.
+	 */
+	for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) {
+		if (i == 0)
+			return (EFBIG);
+		blockcnt *= MNINDIR(ump);
+		if (bn < blockcnt)
+			break;
+	}
+
+	/* Calculate the address of the first meta-block. */
+	if (realbn >= 0)
+		metalbn = -(realbn - bn + NIADDR - i);
+	else
+		metalbn = -(-realbn - bn + NIADDR - i);
+
+	/* 
+	 * At each iteration, off is the offset into the bap array which is
+	 * an array of disk addresses at the current level of indirection.
+	 * The logical block number and the offset in that block are stored
+	 * into the argument array.
+	 */
+	ap->in_lbn = metalbn;
+	ap->in_off = off = NIADDR - i;
+	ap->in_exists = 0;
+	ap++;
+	for (++numlevels; i <= NIADDR; i++) {
+		/* If searching for a meta-data block, quit when found. */
+		if (metalbn == realbn)
+			break;
+
+		blockcnt /= MNINDIR(ump);
+		off = (bn / blockcnt) % MNINDIR(ump);
+
+		++numlevels;
+		ap->in_lbn = metalbn;
+		ap->in_off = off;
+		ap->in_exists = 0;
+		++ap;
+
+		metalbn -= -1 + off * blockcnt;
+	}
+	if (nump)
+		*nump = numlevels;
+	return (0);
+}
diff --git a/sys/gnu/fs/ext2fs/ext2_mount.h b/sys/gnu/fs/ext2fs/ext2_mount.h
new file mode 100644
index 00000000000..237871fdaac
--- /dev/null
+++ b/sys/gnu/fs/ext2fs/ext2_mount.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufsmount.h	8.2 (Berkeley) 1/12/94
+ */
+
+struct buf;
+struct inode;
+struct nameidata;
+struct timeval;
+struct ucred;
+struct uio;
+struct vnode;
+struct netexport;
+
+/* This structure describes the UFS specific mount structure data. */
+struct ufsmount {
+	struct	mount *um_mountp;		/* filesystem vfs structure */
+	dev_t	um_dev;				/* device mounted */
+	struct	vnode *um_devvp;		/* block device mounted vnode */
+	union {					/* pointer to superblock */
+		struct	lfs *lfs;		/* LFS */
+		struct	fs *fs;			/* FFS */
+	} ufsmount_u;
+#define	um_fs	ufsmount_u.fs
+#define	um_lfs	ufsmount_u.lfs
+	struct	vnode *um_quotas[MAXQUOTAS];	/* pointer to quota files */
+	struct	ucred *um_cred[MAXQUOTAS];	/* quota file access cred */
+	u_long	um_nindir;			/* indirect ptrs per block */
+	u_long	um_bptrtodb;			/* indir ptr to disk block */
+	u_long	um_seqinc;			/* inc between seq blocks */
+	time_t	um_btime[MAXQUOTAS];		/* block quota time limit */
+	time_t	um_itime[MAXQUOTAS];		/* inode quota time limit */
+	char	um_qflags[MAXQUOTAS];		/* quota specific flags */
+	struct	netexport um_export;		/* export information */
+};
+/*
+ * Flags describing the state of quotas.
+ */
+#define	QTF_OPENING	0x01			/* Q_QUOTAON in progress */
+#define	QTF_CLOSING	0x02			/* Q_QUOTAOFF in progress */
+
+/* Convert mount ptr to ufsmount ptr. */
+#define VFSTOUFS(mp)	((struct ufsmount *)((mp)->mnt_data))
+
+/*
+ * Macros to access file system parameters in the ufsmount structure.
+ * Used by ufs_bmap.
+ */
+#define	blkptrtodb(ump, b)	((b) << (ump)->um_bptrtodb)
+#define	is_sequential(ump, a, b) ((b) == (a) + ump->um_seqinc)
+#define MNINDIR(ump)	((ump)->um_nindir)
+
+
diff --git a/sys/gnu/fs/ext2fs/inode.h b/sys/gnu/fs/ext2fs/inode.h
new file mode 100644
index 00000000000..df155967a7d
--- /dev/null
+++ b/sys/gnu/fs/ext2fs/inode.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 1982, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)inode.h	8.4 (Berkeley) 1/21/94
+ */
+
+#include <ufs/ufs/dinode.h>
+
+/*
+ * Theoretically, directories can be more than 2Gb in length, however, in
+ * practice this seems unlikely. So, we define the type doff_t as a long
+ * to keep down the cost of doing lookup on a 32-bit machine. If you are
+ * porting to a 64-bit architecture, you should make doff_t the same as off_t.
+ */
+#define	doff_t	long
+
+/*
+ * The inode is used to describe each active (or recently active)
+ * file in the UFS filesystem. It is composed of two types of
+ * information. The first part is the information that is needed
+ * only while the file is active (such as the identity of the file
+ * and linkage to speed its lookup). The second part is the 
+ * permannent meta-data associated with the file which is read
+ * in from the permanent dinode from long term storage when the
+ * file becomes active, and is put back when the file is no longer
+ * being used.
+ */
+struct inode {
+	struct	inode *i_next;	/* Hash chain forward. */
+	struct	inode **i_prev;	/* Hash chain back. */
+	struct	vnode *i_vnode;	/* Vnode associated with this inode. */
+	struct	vnode *i_devvp;	/* Vnode for block I/O. */
+	u_long	i_flag;		/* I* flags. */
+	dev_t	i_dev;		/* Device associated with the inode. */
+	ino_t	i_number;	/* The identity of the inode. */
+	union {			/* Associated filesystem. */
+		struct	fs *fs;		/* FFS */
+		struct	lfs *lfs;	/* LFS */
+	} inode_u;
+#define	i_fs	inode_u.fs
+#define	i_lfs	inode_u.lfs
+	struct	dquot *i_dquot[MAXQUOTAS];	/* Dquot structures. */
+	u_quad_t i_modrev;	/* Revision level for lease. */
+	struct	lockf *i_lockf;	/* Head of byte-level lock list. */
+	pid_t	i_lockholder;	/* DEBUG: holder of inode lock. */
+	pid_t	i_lockwaiter;	/* DEBUG: latest blocked for inode lock. */
+	/*
+	 * Side effects; used during directory lookup.
+	 */
+	long	i_count;	/* Size of free slot in directory. */
+	doff_t	i_endoff;	/* End of useful stuff in directory. */
+	doff_t	i_diroff;	/* Offset in dir, where we found last entry. */
+	doff_t	i_offset;	/* Offset of free space in directory. */
+	ino_t	i_ino;		/* Inode number of found directory. */
+	u_long	i_reclen;	/* Size of found directory entry. */
+	long	i_spare[11];	/* Spares to round up to 128 bytes. */
+	/*
+	 * The on-disk dinode itself.
+	 */
+	struct	dinode i_din;	/* 128 bytes of the on-disk dinode. */
+};
+
+#define	i_atime		i_din.di_atime
+#define	i_blocks	i_din.di_blocks
+#define	i_ctime		i_din.di_ctime
+#define	i_db		i_din.di_db
+#define	i_flags		i_din.di_flags
+#define	i_gen		i_din.di_gen
+#define	i_gid		i_din.di_gid
+#define	i_ib		i_din.di_ib
+#define	i_mode		i_din.di_mode
+#define	i_mtime		i_din.di_mtime
+#define	i_nlink		i_din.di_nlink
+#define	i_rdev		i_din.di_rdev
+#define	i_shortlink	i_din.di_shortlink
+#define	i_size		i_din.di_size
+#define	i_uid		i_din.di_uid
+
+/* These flags are kept in i_flag. */
+#define	IN_ACCESS	0x0001		/* Access time update request. */
+#define	IN_CHANGE	0x0002		/* Inode change time update request. */
+#define	IN_EXLOCK	0x0004		/* File has exclusive lock. */
+#define	IN_LOCKED	0x0008		/* Inode lock. */
+#define	IN_LWAIT	0x0010		/* Process waiting on file lock. */
+#define	IN_MODIFIED	0x0020		/* Inode has been modified. */
+#define	IN_RENAME	0x0040		/* Inode is being renamed. */
+#define	IN_SHLOCK	0x0080		/* File has shared lock. */
+#define	IN_UPDATE	0x0100		/* Modification time update request. */
+#define	IN_WANTED	0x0200		/* Inode is wanted by a process. */
+
+#ifdef KERNEL
+/*
+ * Structure used to pass around logical block paths generated by
+ * ufs_getlbns and used by truncate and bmap code.
+ */
+struct indir {
+	daddr_t	in_lbn;			/* Logical block number. */
+	int	in_off;			/* Offset in buffer. */
+	int	in_exists;		/* Flag if the block exists. */
+};
+
+/* Convert between inode pointers and vnode pointers. */
+#define VTOI(vp)	((struct inode *)(vp)->v_data)
+#define ITOV(ip)	((ip)->i_vnode)
+
+#define	ITIMES(ip, t1, t2) {						\
+	if ((ip)->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) {	\
+		(ip)->i_flag |= IN_MODIFIED;				\
+		if ((ip)->i_flag & IN_ACCESS)				\
+			(ip)->i_atime.ts_sec = (t1)->tv_sec;		\
+		if ((ip)->i_flag & IN_UPDATE) {				\
+			(ip)->i_mtime.ts_sec = (t2)->tv_sec;		\
+			(ip)->i_modrev++;				\
+		}							\
+		if ((ip)->i_flag & IN_CHANGE)				\
+			(ip)->i_ctime.ts_sec = time.tv_sec;		\
+		(ip)->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);	\
+	}								\
+}
+
+/* This overlays the fid structure (see mount.h). */
+struct ufid {
+	u_short	ufid_len;	/* Length of structure. */
+	u_short	ufid_pad;	/* Force long alignment. */
+	ino_t	ufid_ino;	/* File number (ino). */
+	long	ufid_gen;	/* Generation number. */
+};
+#endif /* KERNEL */
diff --git a/sys/isofs/cd9660/TODO b/sys/isofs/cd9660/TODO
new file mode 100644
index 00000000000..555d26ad7d1
--- /dev/null
+++ b/sys/isofs/cd9660/TODO
@@ -0,0 +1,77 @@
+#	$Id: TODO,v 1.4 1993/09/07 15:40:51 ws Exp $
+
+ 1) should understand "older", original High Sierra ("CDROM001") type
+
+   Not yet. ( I don't have this technical information, yet. )
+
+ 2) should understand Rock Ridge
+
+   Yes, we have follows function.
+
+       o Symbolic Link
+       o Real Name(long name)
+       o File Attribute 
+       o Time stamp
+       o uid, gid
+       o Devices
+       o Relocated directories
+
+   Except follows:
+
+       o POSIX device number mapping
+
+         There is some preliminary stuff in there that (ab-)uses the mknod
+         system call, but this needs a writable filesystem
+         
+ 3) should be called cdfs, as there are other ISO file system soon possible
+
+   Not yet. Probably we should make another file system when the ECMA draft 
+   is valid and do it. For doing Rock Ridge Support, I can use almost same
+   code. So I just use the same file system interface...
+
+ 4) should have file handles implemented for use with NFS, etc
+
+   Yes. we have already this one, and I based it for this release. 
+
+ 5) should have name translation enabled by mount flag
+
+   Yes. we can disable the Rock Ridge Extension by follows option;
+
+      "mount -t isofs -o -norrip /dev/cd0d /cdrom"
+
+ 6) should run as a user process, and not take up kernel space (cdroms
+    are slow)
+
+   Not yet.
+
+ 7) ECMA support.
+
+   Not yet. we need not only a technical spec but also ECMA format
+   cd-rom itself!
+
+ 8) Character set change by SVD ( multi SVD support )
+
+   Not yet. We should also hack the other part of system as 8 bit
+   clean. As far as I know, if you export the cdrom by NFS, the client
+   can access the 8 bit clean (ie. Solaris Japanese with EUC code )
+
+ 9) Access checks in isofs_access
+
+   Not yet.
+
+ 10) Support for generation numbers
+
+   Yes. Default is to list only the last file (the one with the highest
+   generation number). If you mount with -gen, all files are shown with
+   their generation numbers. In both cases you can specify the generation
+   number on opening files (if you happen to know it) or leave it off,
+   when it will again find the last file.
+
+ 11) Support for extended attributes
+
+   Yes. Since this requires an extra block buffer for the attributes
+   this must be enabled on mounting with the option -extattr.
+
+----------
+Last update July 19, '93 by Atsushi Murai. (amurai@spec.co.jp)
+Last update August 19, '93 by Wolfgang Solfrank. (ws@tools.de)
diff --git a/sys/isofs/cd9660/TODO.hibler b/sys/isofs/cd9660/TODO.hibler
new file mode 100644
index 00000000000..3501aa296cd
--- /dev/null
+++ b/sys/isofs/cd9660/TODO.hibler
@@ -0,0 +1,22 @@
+1. Investiate making ISOFS another UFS shared filesystem (ala FFS/MFS/LFS).
+   Since it was modelled after the inode code, we might be able to merge
+   them back.  It looks like a seperate (but very similar) lookup routine
+   will be needed due to the associated file stuff.
+
+2. Make filesystem exportable.  This comes for free if stacked with UFS.
+   Otherwise, the ufs_export routines need to be elevated to vfs_* routines.
+	[ DONE - hibler ]
+
+3. If it can't be merged with UFS, at least get them in sync.  For example,
+   it could use the same style hashing routines as in ufs/ufs_ihash.c
+
+4. It would be nice to be able to use the vfs_cluster code.
+   Unfortunately, if the logical block size is smaller than the page size,
+   it won't work.  Also, if throughtput is relatively constant for any
+   block size (as it is for the HP drive--150kbs) then clustering may not
+   buy much (or may even hurt when vfs_cluster comes up with a large sync
+   cluster).
+
+5. Seems like there should be a "notrans" or some such mount option to show
+   filenames as they really are without lower-casing, stripping of version
+   numbers, etc.  Does this make sense?
diff --git a/sys/isofs/cd9660/cd9660_bmap.c b/sys/isofs/cd9660/cd9660_bmap.c
new file mode 100644
index 00000000000..911eedfd06a
--- /dev/null
+++ b/sys/isofs/cd9660/cd9660_bmap.c
@@ -0,0 +1,102 @@
+/*-
+ * Copyright (c) 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cd9660_bmap.c	8.3 (Berkeley) 1/23/94
+ */
+
+#include <sys/param.h>
+#include <sys/namei.h>
+#include <sys/buf.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+
+/*
+ * Bmap converts a the logical block number of a file to its physical block
+ * number on the disk. The conversion is done by using the logical block
+ * number to index into the data block (extent) for the file.
+ */
+int
+cd9660_bmap(ap)
+	struct vop_bmap_args /* {
+		struct vnode *a_vp;
+		daddr_t  a_bn;
+		struct vnode **a_vpp;
+		daddr_t *a_bnp;
+		int *a_runp;
+	} */ *ap;
+{
+	struct iso_node *ip = VTOI(ap->a_vp);
+	daddr_t lblkno = ap->a_bn;
+	long bsize;
+
+	/*
+	 * Check for underlying vnode requests and ensure that logical
+	 * to physical mapping is requested.
+	 */
+	if (ap->a_vpp != NULL)
+		*ap->a_vpp = ip->i_devvp;
+	if (ap->a_bnp == NULL)
+		return (0);
+
+	/*
+	 * Compute the requested block number
+	 */
+	bsize = ip->i_mnt->logical_block_size;
+	*ap->a_bnp = (ip->iso_start + lblkno) * btodb(bsize);
+
+	/*
+	 * Determine maximum number of readahead blocks following the
+	 * requested block.
+	 */
+	if (ap->a_runp) {
+		int nblk;
+
+		nblk = (ip->i_size - (lblkno + 1) * bsize) / bsize;
+		if (nblk <= 0)
+			*ap->a_runp = 0;
+		else if (nblk >= MAXBSIZE/bsize)
+			*ap->a_runp = MAXBSIZE/bsize - 1;
+		else
+			*ap->a_runp = nblk;
+	}
+
+	return 0;
+}
diff --git a/sys/isofs/cd9660/cd9660_lookup.c b/sys/isofs/cd9660/cd9660_lookup.c
new file mode 100644
index 00000000000..62d1d3fc791
--- /dev/null
+++ b/sys/isofs/cd9660/cd9660_lookup.c
@@ -0,0 +1,465 @@
+/*-
+ * Copyright (c) 1989, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)ufs_lookup.c	7.33 (Berkeley) 5/19/91
+ *
+ *	@(#)cd9660_lookup.c	8.2 (Berkeley) 1/23/94
+ */
+
+#include <sys/param.h>
+#include <sys/namei.h>
+#include <sys/buf.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+#include <isofs/cd9660/iso_rrip.h>
+#include <isofs/cd9660/cd9660_rrip.h>
+
+struct	nchstats iso_nchstats;
+
+/*
+ * Convert a component of a pathname into a pointer to a locked inode.
+ * This is a very central and rather complicated routine.
+ * If the file system is not maintained in a strict tree hierarchy,
+ * this can result in a deadlock situation (see comments in code below).
+ *
+ * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
+ * whether the name is to be looked up, created, renamed, or deleted.
+ * When CREATE, RENAME, or DELETE is specified, information usable in
+ * creating, renaming, or deleting a directory entry may be calculated.
+ * If flag has LOCKPARENT or'ed into it and the target of the pathname
+ * exists, lookup returns both the target and its parent directory locked.
+ * When creating or renaming and LOCKPARENT is specified, the target may
+ * not be ".".  When deleting and LOCKPARENT is specified, the target may
+ * be "."., but the caller must check to ensure it does an vrele and iput
+ * instead of two iputs.
+ *
+ * Overall outline of ufs_lookup:
+ *
+ *	check accessibility of directory
+ *	look for name in cache, if found, then if at end of path
+ *	  and deleting or creating, drop it, else return name
+ *	search for name in directory, to found or notfound
+ * notfound:
+ *	if creating, return locked directory, leaving info on available slots
+ *	else return error
+ * found:
+ *	if at end of path and deleting, return information to allow delete
+ *	if at end of path and rewriting (RENAME and LOCKPARENT), lock target
+ *	  inode and return info to allow rewrite
+ *	if not at end, add name to cache; if at end and neither creating
+ *	  nor deleting, add name to cache
+ *
+ * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode unlocked.
+ */
+cd9660_lookup(ap)
+	struct vop_lookup_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	register struct vnode *vdp;	/* vnode for directory being searched */
+	register struct iso_node *dp;	/* inode for directory being searched */
+	register struct iso_mnt *imp;	/* file system that directory is in */
+	struct buf *bp;			/* a buffer of directory entries */
+	struct iso_directory_record *ep;/* the current directory entry */
+	int entryoffsetinblock;		/* offset of ep in bp's buffer */
+	int saveoffset;			/* offset of last directory entry in dir */
+	int numdirpasses;		/* strategy for directory search */
+	doff_t endsearch;		/* offset to end directory search */
+	struct iso_node *pdp;		/* saved dp during symlink work */
+	struct iso_node *tdp;		/* returned by iget */
+	int lockparent;			/* 1 => lockparent flag is set */
+	int wantparent;			/* 1 => wantparent or lockparent flag */
+	int error;
+	ino_t ino = 0;
+	int reclen;
+	u_short namelen;
+	char altname[NAME_MAX];
+	int res;
+	int assoc, len;
+	char *name;
+	struct vnode **vpp = ap->a_vpp;
+	struct componentname *cnp = ap->a_cnp;
+	struct ucred *cred = cnp->cn_cred;
+	int flags = cnp->cn_flags;
+	int nameiop = cnp->cn_nameiop;
+	
+	bp = NULL;
+	*vpp = NULL;
+	vdp = ap->a_dvp;
+	dp = VTOI(vdp);
+	imp = dp->i_mnt;
+	lockparent = flags & LOCKPARENT;
+	wantparent = flags & (LOCKPARENT|WANTPARENT);
+	
+	/*
+	 * Check accessiblity of directory.
+	 */
+	if (vdp->v_type != VDIR)
+	    return (ENOTDIR);
+	if (error = VOP_ACCESS(vdp, VEXEC, cred, cnp->cn_proc))
+		return (error);
+	
+	/*
+	 * We now have a segment name to search for, and a directory to search.
+	 *
+	 * Before tediously performing a linear scan of the directory,
+	 * check the name cache to see if the directory/name pair
+	 * we are looking for is known already.
+	 */
+	if (error = cache_lookup(vdp, vpp, cnp)) {
+		int vpid;	/* capability number of vnode */
+
+		if (error == ENOENT)
+			return (error);
+#ifdef PARANOID
+		if ((vdp->v_flag & VROOT) && (flags & ISDOTDOT))
+			panic("ufs_lookup: .. through root");
+#endif
+		/*
+		 * Get the next vnode in the path.
+		 * See comment below starting `Step through' for
+		 * an explaination of the locking protocol.
+		 */
+		pdp = dp;
+		dp = VTOI(*vpp);
+		vdp = *vpp;
+		vpid = vdp->v_id;
+		if (pdp == dp) {
+			VREF(vdp);
+			error = 0;
+		} else if (flags & ISDOTDOT) {
+			ISO_IUNLOCK(pdp);
+			error = vget(vdp, 1);
+			if (!error && lockparent && (flags & ISLASTCN))
+				ISO_ILOCK(pdp);
+		} else {
+			error = vget(vdp, 1);
+			if (!lockparent || error || !(flags & ISLASTCN))
+				ISO_IUNLOCK(pdp);
+		}
+		/*
+		 * Check that the capability number did not change
+		 * while we were waiting for the lock.
+		 */
+		if (!error) {
+			if (vpid == vdp->v_id)
+				return (0);
+			iso_iput(dp);
+			if (lockparent && pdp != dp && (flags & ISLASTCN))
+				ISO_IUNLOCK(pdp);
+		}
+		ISO_ILOCK(pdp);
+		dp = pdp;
+		vdp = ITOV(dp);
+		*vpp = NULL;
+	}
+	
+	len = cnp->cn_namelen;
+	name = cnp->cn_nameptr;
+	/*
+	 * A leading `=' means, we are looking for an associated file
+	 */
+	if (assoc = (imp->iso_ftype != ISO_FTYPE_RRIP && *name == ASSOCCHAR)) {
+		len--;
+		name++;
+	}
+	
+	/*
+	 * If there is cached information on a previous search of
+	 * this directory, pick up where we last left off.
+	 * We cache only lookups as these are the most common
+	 * and have the greatest payoff. Caching CREATE has little
+	 * benefit as it usually must search the entire directory
+	 * to determine that the entry does not exist. Caching the
+	 * location of the last DELETE or RENAME has not reduced
+	 * profiling time and hence has been removed in the interest
+	 * of simplicity.
+	 */
+	if (nameiop != LOOKUP || dp->i_diroff == 0 ||
+	    dp->i_diroff > dp->i_size) {
+		entryoffsetinblock = 0;
+		dp->i_offset = 0;
+		numdirpasses = 1;
+	} else {
+		dp->i_offset = dp->i_diroff;
+		entryoffsetinblock = iso_blkoff(imp, dp->i_offset);
+		if (entryoffsetinblock != 0) {
+			if (error = iso_blkatoff(dp, dp->i_offset, &bp))
+				return (error);
+		}
+		numdirpasses = 2;
+		iso_nchstats.ncs_2passes++;
+	}
+	endsearch = roundup(dp->i_size, imp->logical_block_size);
+	
+searchloop:
+	while (dp->i_offset < endsearch) {
+		/*
+		 * If offset is on a block boundary,
+		 * read the next directory block.
+		 * Release previous if it exists.
+		 */
+		if (iso_blkoff(imp, dp->i_offset) == 0) {
+			if (bp != NULL)
+				brelse(bp);
+			if (error = iso_blkatoff(dp, dp->i_offset, &bp))
+				return (error);
+			entryoffsetinblock = 0;
+		}
+		/*
+		 * Get pointer to next entry.
+		 */
+		ep = (struct iso_directory_record *)
+			(bp->b_un.b_addr + entryoffsetinblock);
+		
+		reclen = isonum_711 (ep->length);
+		if (reclen == 0) {
+			/* skip to next block, if any */
+			dp->i_offset =
+				roundup(dp->i_offset, imp->logical_block_size);
+			continue;
+		}
+		
+		if (reclen < ISO_DIRECTORY_RECORD_SIZE)
+			/* illegal entry, stop */
+			break;
+		
+		if (entryoffsetinblock + reclen > imp->logical_block_size)
+			/* entries are not allowed to cross boundaries */
+			break;
+		
+		/*
+		 * Check for a name match.
+		 */
+		namelen = isonum_711(ep->name_len);
+		
+		if (reclen < ISO_DIRECTORY_RECORD_SIZE + namelen)
+			/* illegal entry, stop */
+			break;
+		
+		switch (imp->iso_ftype) {
+		default:
+			if ((!(isonum_711(ep->flags)&4)) == !assoc) {
+				if ((len == 1
+				     && *name == '.')
+				    || (flags & ISDOTDOT)) {
+					if (namelen == 1
+					    && ep->name[0] == ((flags & ISDOTDOT) ? 1 : 0)) {
+						/*
+						 * Save directory entry's inode number and
+						 * reclen in ndp->ni_ufs area, and release
+						 * directory buffer.
+						 */
+						isodirino(&dp->i_ino,ep,imp);
+						goto found;
+					}
+					if (namelen != 1
+					    || ep->name[0] != 0)
+						goto notfound;
+				} else if (!(res = isofncmp(name,len,
+							    ep->name,namelen))) {
+					if (isonum_711(ep->flags)&2)
+						isodirino(&ino,ep,imp);
+					else
+						ino = dbtob(bp->b_blkno)
+							+ entryoffsetinblock;
+					saveoffset = dp->i_offset;
+				} else if (ino)
+					goto foundino;
+#ifdef	NOSORTBUG	/* On some CDs directory entries are not sorted correctly */
+				else if (res < 0)
+					goto notfound;
+				else if (res > 0 && numdirpasses == 2)
+					numdirpasses++;
+#endif
+			}
+			break;
+		case ISO_FTYPE_RRIP:
+			if (isonum_711(ep->flags)&2)
+				isodirino(&ino,ep,imp);
+			else
+				ino = dbtob(bp->b_blkno) + entryoffsetinblock;
+			dp->i_ino = ino;
+			cd9660_rrip_getname(ep,altname,&namelen,&dp->i_ino,imp);
+			if (namelen == cnp->cn_namelen
+			    && !bcmp(name,altname,namelen))
+				goto found;
+			ino = 0;
+			break;
+		}
+		dp->i_offset += reclen;
+		entryoffsetinblock += reclen;
+	}
+	if (ino) {
+foundino:
+		dp->i_ino = ino;
+		if (saveoffset != dp->i_offset) {
+			if (iso_lblkno(imp,dp->i_offset)
+			    != iso_lblkno(imp,saveoffset)) {
+				if (bp != NULL)
+					brelse(bp);
+				if (error = iso_blkatoff(dp, saveoffset, &bp))
+					return (error);
+			}
+			ep = (struct iso_directory_record *)(bp->b_un.b_addr
+							     + iso_blkoff(imp,saveoffset));
+			dp->i_offset = saveoffset;
+		}
+		goto found;
+	}
+notfound:
+	/*
+	 * If we started in the middle of the directory and failed
+	 * to find our target, we must check the beginning as well.
+	 */
+	if (numdirpasses == 2) {
+		numdirpasses--;
+		dp->i_offset = 0;
+		endsearch = dp->i_diroff;
+		goto searchloop;
+	}
+	if (bp != NULL)
+		brelse(bp);
+	/*
+	 * Insert name into cache (as non-existent) if appropriate.
+	 */
+	if (cnp->cn_flags & MAKEENTRY)
+		cache_enter(vdp, *vpp, cnp);
+	if (nameiop == CREATE || nameiop == RENAME)
+		return (EJUSTRETURN);
+	return (ENOENT);
+	
+found:
+	if (numdirpasses == 2)
+		iso_nchstats.ncs_pass2++;
+	if (bp != NULL)
+		brelse(bp);
+	
+	/*
+	 * Found component in pathname.
+	 * If the final component of path name, save information
+	 * in the cache as to where the entry was found.
+	 */
+	if ((flags & ISLASTCN) && nameiop == LOOKUP)
+		dp->i_diroff = dp->i_offset;
+	
+	/*
+	 * Step through the translation in the name.  We do not `iput' the
+	 * directory because we may need it again if a symbolic link
+	 * is relative to the current directory.  Instead we save it
+	 * unlocked as "pdp".  We must get the target inode before unlocking
+	 * the directory to insure that the inode will not be removed
+	 * before we get it.  We prevent deadlock by always fetching
+	 * inodes from the root, moving down the directory tree. Thus
+	 * when following backward pointers ".." we must unlock the
+	 * parent directory before getting the requested directory.
+	 * There is a potential race condition here if both the current
+	 * and parent directories are removed before the `iget' for the
+	 * inode associated with ".." returns.  We hope that this occurs
+	 * infrequently since we cannot avoid this race condition without
+	 * implementing a sophisticated deadlock detection algorithm.
+	 * Note also that this simple deadlock detection scheme will not
+	 * work if the file system has any hard links other than ".."
+	 * that point backwards in the directory structure.
+	 */
+	pdp = dp;
+	/*
+	 * If ino is different from dp->i_ino,
+	 * it's a relocated directory.
+	 */
+	if (flags & ISDOTDOT) {
+		ISO_IUNLOCK(pdp);	/* race to get the inode */
+		if (error = iso_iget(dp,dp->i_ino,
+				     dp->i_ino != ino,
+				     &tdp,ep)) {
+			ISO_ILOCK(pdp);
+			return (error);
+		}
+		if (lockparent && (flags & ISLASTCN))
+			ISO_ILOCK(pdp);
+		*vpp = ITOV(tdp);
+	} else if (dp->i_number == dp->i_ino) {
+		VREF(vdp);	/* we want ourself, ie "." */
+		*vpp = vdp;
+	} else {
+		if (error = iso_iget(dp,dp->i_ino,dp->i_ino!=ino,&tdp,ep))
+			return (error);
+		if (!lockparent || !(flags & ISLASTCN))
+			ISO_IUNLOCK(pdp);
+		*vpp = ITOV(tdp);
+	}
+	
+	/*
+	 * Insert name into cache if appropriate.
+	 */
+	if (cnp->cn_flags & MAKEENTRY)
+		cache_enter(vdp, *vpp, cnp);
+	return (0);
+}
+
+/*
+ * Return buffer with contents of block "offset"
+ * from the beginning of directory "ip".  If "res"
+ * is non-zero, fill it in with a pointer to the
+ * remaining space in the directory.
+ */
+iso_blkatoff(ip, offset, bpp)
+	struct iso_node *ip;
+	doff_t offset;
+	struct buf **bpp;
+{
+	register struct iso_mnt *imp = ip->i_mnt;
+	daddr_t lbn = iso_lblkno(imp,offset);
+	int bsize = iso_blksize(imp,ip,lbn);
+	struct buf *bp;
+	int error;
+	
+	if (error = bread(ITOV(ip),lbn,bsize,NOCRED,&bp)) {
+		brelse(bp);
+		*bpp = 0;
+		return (error);
+	}
+	*bpp = bp;
+	
+	return (0);
+}
diff --git a/sys/isofs/cd9660/cd9660_node.c b/sys/isofs/cd9660/cd9660_node.c
new file mode 100644
index 00000000000..d83a7a6f126
--- /dev/null
+++ b/sys/isofs/cd9660/cd9660_node.c
@@ -0,0 +1,648 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cd9660_node.c	8.2 (Berkeley) 1/23/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/stat.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+#include <isofs/cd9660/iso_rrip.h>
+
+#define	INOHSZ	512
+#if	((INOHSZ&(INOHSZ-1)) == 0)
+#define	INOHASH(dev,ino)	(((dev)+((ino)>>12))&(INOHSZ-1))
+#else
+#define	INOHASH(dev,ino)	(((unsigned)((dev)+((ino)>>12)))%INOHSZ)
+#endif
+
+union iso_ihead {
+	union  iso_ihead *ih_head[2];
+	struct iso_node *ih_chain[2];
+} iso_ihead[INOHSZ];
+
+#ifdef	ISODEVMAP
+#define	DNOHSZ	64
+#if	((DNOHSZ&(DNOHSZ-1)) == 0)
+#define	DNOHASH(dev,ino)	(((dev)+((ino)>>12))&(DNOHSZ-1))
+#else
+#define	DNOHASH(dev,ino)	(((unsigned)((dev)+((ino)>>12)))%DNOHSZ)
+#endif
+
+union iso_dhead {
+	union  iso_dhead  *dh_head[2];
+	struct iso_dnode *dh_chain[2];
+} iso_dhead[DNOHSZ];
+#endif
+
+int prtactive;	/* 1 => print out reclaim of active vnodes */
+
+/*
+ * Initialize hash links for inodes and dnodes.
+ */
+cd9660_init()
+{
+	register int i;
+	register union iso_ihead *ih = iso_ihead;
+#ifdef	ISODEVMAP
+	register union iso_dhead *dh = iso_dhead;
+#endif
+
+	for (i = INOHSZ; --i >= 0; ih++) {
+		ih->ih_head[0] = ih;
+		ih->ih_head[1] = ih;
+	}
+#ifdef	ISODEVMAP
+	for (i = DNOHSZ; --i >= 0; dh++) {
+		dh->dh_head[0] = dh;
+		dh->dh_head[1] = dh;
+	}
+#endif
+}
+
+#ifdef	ISODEVMAP
+/*
+ * Enter a new node into the device hash list
+ */
+struct iso_dnode *
+iso_dmap(dev,ino,create)
+	dev_t	dev;
+	ino_t	ino;
+	int	create;
+{
+	struct iso_dnode *dp;
+	union iso_dhead *dh;
+	
+	dh = &iso_dhead[DNOHASH(dev, ino)];
+	for (dp = dh->dh_chain[0];
+	     dp != (struct iso_dnode *)dh;
+	     dp = dp->d_forw)
+		if (ino == dp->i_number && dev == dp->i_dev)
+			return dp;
+
+	if (!create)
+		return (struct iso_dnode *)0;
+
+	MALLOC(dp,struct iso_dnode *,sizeof(struct iso_dnode),M_CACHE,M_WAITOK);
+	dp->i_dev = dev;
+	dp->i_number = ino;
+	insque(dp,dh);
+	
+	return dp;
+}
+
+void
+iso_dunmap(dev)
+	dev_t	dev;
+{
+	struct iso_dnode *dp, *dq;
+	union iso_dhead *dh;
+	
+	for (dh = iso_dhead; dh < iso_dhead + DNOHSZ; dh++) {
+		for (dp = dh->dh_chain[0];
+		     dp != (struct iso_dnode *)dh;
+		     dp = dq) {
+			dq = dp->d_forw;
+			if (dev == dp->i_dev) {
+				remque(dp);
+				FREE(dp,M_CACHE);
+			}
+		}
+	}
+}
+#endif
+
+/*
+ * Look up a ISOFS dinode number to find its incore vnode.
+ * If it is not in core, read it in from the specified device.
+ * If it is in core, wait for the lock bit to clear, then
+ * return the inode locked. Detection and handling of mount
+ * points must be done by the calling routine.
+ */
+iso_iget(xp, ino, relocated, ipp, isodir)
+	struct iso_node *xp;
+	ino_t ino;
+	struct iso_node **ipp;
+	struct iso_directory_record *isodir;
+{
+	dev_t dev = xp->i_dev;
+	struct mount *mntp = ITOV(xp)->v_mount;
+	register struct iso_node *ip, *iq;
+	register struct vnode *vp;
+	register struct iso_dnode *dp;
+	struct vnode *nvp;
+	struct buf *bp = NULL, *bp2 = NULL;
+	union iso_ihead *ih;
+	union iso_dhead *dh;
+	int i, error, result;
+	struct iso_mnt *imp;
+	ino_t defino;
+	
+	ih = &iso_ihead[INOHASH(dev, ino)];
+loop:
+	for (ip = ih->ih_chain[0];
+	     ip != (struct iso_node *)ih;
+	     ip = ip->i_forw) {
+		if (ino != ip->i_number || dev != ip->i_dev)
+			continue;
+		if ((ip->i_flag&ILOCKED) != 0) {
+			ip->i_flag |= IWANT;
+			sleep((caddr_t)ip, PINOD);
+			goto loop;
+		}
+		if (vget(ITOV(ip), 1))
+			goto loop;
+		*ipp = ip;
+		return 0;
+	}
+	/*
+	 * Allocate a new vnode/iso_node.
+	 */
+	if (error = getnewvnode(VT_ISOFS, mntp, cd9660_vnodeop_p, &nvp)) {
+		*ipp = 0;
+		return error;
+	}
+	MALLOC(ip, struct iso_node *, sizeof(struct iso_node),
+	       M_ISOFSNODE, M_WAITOK);
+	bzero((caddr_t)ip, sizeof(struct iso_node));
+	nvp->v_data = ip;
+	ip->i_vnode = nvp;
+	ip->i_flag = 0;
+	ip->i_devvp = 0;
+	ip->i_diroff = 0;
+	ip->i_lockf = 0;
+	
+	/*
+	 * Put it onto its hash chain and lock it so that other requests for
+	 * this inode will block if they arrive while we are sleeping waiting
+	 * for old data structures to be purged or for the contents of the
+	 * disk portion of this inode to be read.
+	 */
+	ip->i_dev = dev;
+	ip->i_number = ino;
+	insque(ip, ih);
+	ISO_ILOCK(ip);
+
+	imp = VFSTOISOFS (mntp);
+	ip->i_mnt = imp;
+	ip->i_devvp = imp->im_devvp;
+	VREF(ip->i_devvp);
+	
+	if (relocated) {
+		/*
+		 * On relocated directories we must
+		 * read the `.' entry out of a dir.
+		 */
+		ip->iso_start = ino >> imp->im_bshift;
+		if (error = iso_blkatoff(ip,0,&bp)) {
+			vrele(ip->i_devvp);
+			remque(ip);
+			ip->i_forw = ip;
+			ip->i_back = ip;
+			iso_iput(ip);
+			*ipp = 0;
+			return error;
+		}
+		isodir = (struct iso_directory_record *)bp->b_un.b_addr;
+	}
+	
+	ip->iso_extent = isonum_733(isodir->extent);
+	ip->i_size = isonum_733(isodir->size);
+	ip->iso_start = isonum_711(isodir->ext_attr_length) + ip->iso_extent;
+	
+	vp = ITOV(ip);
+	
+	/*
+	 * Setup time stamp, attribute
+	 */
+	vp->v_type = VNON;
+	switch (imp->iso_ftype) {
+	default:	/* ISO_FTYPE_9660 */
+		if ((imp->im_flags&ISOFSMNT_EXTATT)
+		    && isonum_711(isodir->ext_attr_length))
+			iso_blkatoff(ip,-isonum_711(isodir->ext_attr_length),
+				     &bp2);
+		cd9660_defattr(isodir,ip,bp2 );
+		cd9660_deftstamp(isodir,ip,bp2 );
+		break;
+	case ISO_FTYPE_RRIP:
+		result = cd9660_rrip_analyze(isodir,ip,imp);
+		break;
+	}
+	if (bp2)
+		brelse(bp2);
+	if (bp)
+		brelse(bp);
+	
+	/*
+	 * Initialize the associated vnode
+	 */
+	vp->v_type = IFTOVT(ip->inode.iso_mode);
+	
+	if ( vp->v_type == VFIFO ) {
+#ifdef	FIFO
+		extern int (**cd9660_fifoop_p)();
+		vp->v_op = cd9660_fifoop_p;
+#else
+		iso_iput(ip);
+		*ipp = 0;
+		return EOPNOTSUPP;
+#endif	/* FIFO */
+	} else if ( vp->v_type == VCHR || vp->v_type == VBLK ) {
+		extern int (**cd9660_specop_p)();
+
+		/*
+		 * if device, look at device number table for translation
+		 */
+#ifdef	ISODEVMAP
+		if (dp = iso_dmap(dev,ino,0))
+			ip->inode.iso_rdev = dp->d_dev;
+#endif
+		vp->v_op = cd9660_specop_p;
+		if (nvp = checkalias(vp, ip->inode.iso_rdev, mntp)) {
+			/*
+			 * Reinitialize aliased inode.
+			 */
+			vp = nvp;
+			iq = VTOI(vp);
+			iq->i_vnode = vp;
+			iq->i_flag = 0;
+			ISO_ILOCK(iq);
+			iq->i_dev = dev;
+			iq->i_number = ino;
+			iq->i_mnt = ip->i_mnt;
+			bcopy(&ip->iso_extent,&iq->iso_extent,
+			      (char *)(ip + 1) - (char *)&ip->iso_extent);
+			insque(iq, ih);
+			/*
+			 * Discard unneeded vnode
+			 * (This introduces the need of INACTIVE modification)
+			 */
+			ip->inode.iso_mode = 0;
+			iso_iput(ip);
+			ip = iq;
+		}
+	}
+	
+	if (ip->iso_extent == imp->root_extent)
+		vp->v_flag |= VROOT;
+	
+	*ipp = ip;
+	return 0;
+}
+
+/*
+ * Unlock and decrement the reference count of an inode structure.
+ */
+iso_iput(ip)
+	register struct iso_node *ip;
+{
+	
+	if ((ip->i_flag & ILOCKED) == 0)
+		panic("iso_iput");
+	ISO_IUNLOCK(ip);
+	vrele(ITOV(ip));
+}
+
+/*
+ * Last reference to an inode, write the inode out and if necessary,
+ * truncate and deallocate the file.
+ */
+int
+cd9660_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	register struct iso_node *ip = VTOI(vp);
+	int mode, error = 0;
+	
+	if (prtactive && vp->v_usecount != 0)
+		vprint("cd9660_inactive: pushing active", vp);
+	
+	ip->i_flag = 0;
+	/*
+	 * If we are done with the inode, reclaim it
+	 * so that it can be reused immediately.
+	 */
+	if (vp->v_usecount == 0 && ip->inode.iso_mode == 0)
+		vgone(vp);
+	return error;
+}
+
+/*
+ * Reclaim an inode so that it can be used for other purposes.
+ */
+int
+cd9660_reclaim(ap)
+	struct vop_reclaim_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct iso_node *ip = VTOI(vp);
+	int i;
+	
+	if (prtactive && vp->v_usecount != 0)
+		vprint("cd9660_reclaim: pushing active", vp);
+	/*
+	 * Remove the inode from its hash chain.
+	 */
+	remque(ip);
+	ip->i_forw = ip;
+	ip->i_back = ip;
+	/*
+	 * Purge old data structures associated with the inode.
+	 */
+	cache_purge(vp);
+	if (ip->i_devvp) {
+		vrele(ip->i_devvp);
+		ip->i_devvp = 0;
+	}
+	FREE(vp->v_data, M_ISOFSNODE);
+	vp->v_data = NULL;
+	return 0;
+}
+
+/*
+ * Lock an inode. If its already locked, set the WANT bit and sleep.
+ */
+iso_ilock(ip)
+	register struct iso_node *ip;
+{
+	
+	while (ip->i_flag & ILOCKED) {
+		ip->i_flag |= IWANT;
+		if (ip->i_spare0 == curproc->p_pid)
+			panic("locking against myself");
+		ip->i_spare1 = curproc->p_pid;
+		(void) sleep((caddr_t)ip, PINOD);
+	}
+	ip->i_spare1 = 0;
+	ip->i_spare0 = curproc->p_pid;
+	ip->i_flag |= ILOCKED;
+}
+
+/*
+ * Unlock an inode.  If WANT bit is on, wakeup.
+ */
+iso_iunlock(ip)
+	register struct iso_node *ip;
+{
+
+	if ((ip->i_flag & ILOCKED) == 0)
+		vprint("iso_iunlock: unlocked inode", ITOV(ip));
+	ip->i_spare0 = 0;
+	ip->i_flag &= ~ILOCKED;
+	if (ip->i_flag&IWANT) {
+		ip->i_flag &= ~IWANT;
+		wakeup((caddr_t)ip);
+	}
+}
+
+/*
+ * File attributes
+ */
+void
+cd9660_defattr(isodir,inop,bp)
+	struct iso_directory_record *isodir;
+	struct iso_node *inop;
+	struct buf *bp;
+{
+	struct buf *bp2 = NULL;
+	struct iso_mnt *imp;
+	struct iso_extended_attributes *ap = NULL;
+	int off;
+	
+	if (isonum_711(isodir->flags)&2) {
+		inop->inode.iso_mode = S_IFDIR;
+		/*
+		 * If we return 2, fts() will assume there are no subdirectories
+		 * (just links for the path and .), so instead we return 1.
+		 */
+		inop->inode.iso_links = 1;
+	} else {
+		inop->inode.iso_mode = S_IFREG;
+		inop->inode.iso_links = 1;
+	}
+	if (!bp
+	    && ((imp = inop->i_mnt)->im_flags&ISOFSMNT_EXTATT)
+	    && (off = isonum_711(isodir->ext_attr_length))) {
+		iso_blkatoff(inop,-off * imp->logical_block_size,&bp2);
+		bp = bp2;
+	}
+	if (bp) {
+		ap = (struct iso_extended_attributes *)bp->b_un.b_addr;
+		
+		if (isonum_711(ap->version) == 1) {
+			if (!(ap->perm[0]&0x40))
+				inop->inode.iso_mode |= VEXEC >> 6;
+			if (!(ap->perm[0]&0x10))
+				inop->inode.iso_mode |= VREAD >> 6;
+			if (!(ap->perm[0]&4))
+				inop->inode.iso_mode |= VEXEC >> 3;
+			if (!(ap->perm[0]&1))
+				inop->inode.iso_mode |= VREAD >> 3;
+			if (!(ap->perm[1]&0x40))
+				inop->inode.iso_mode |= VEXEC;
+			if (!(ap->perm[1]&0x10))
+				inop->inode.iso_mode |= VREAD;
+			inop->inode.iso_uid = isonum_723(ap->owner); /* what about 0? */
+			inop->inode.iso_gid = isonum_723(ap->group); /* what about 0? */
+		} else
+			ap = NULL;
+	}
+	if (!ap) {
+		inop->inode.iso_mode |= VREAD|VEXEC|(VREAD|VEXEC)>>3|(VREAD|VEXEC)>>6;
+		inop->inode.iso_uid = (uid_t)0;
+		inop->inode.iso_gid = (gid_t)0;
+	}
+	if (bp2)
+		brelse(bp2);
+}
+
+/*
+ * Time stamps
+ */
+void
+cd9660_deftstamp(isodir,inop,bp)
+	struct iso_directory_record *isodir;
+	struct iso_node *inop;
+	struct buf *bp;
+{
+	struct buf *bp2 = NULL;
+	struct iso_mnt *imp;
+	struct iso_extended_attributes *ap = NULL;
+	int off;
+	
+	if (!bp
+	    && ((imp = inop->i_mnt)->im_flags&ISOFSMNT_EXTATT)
+	    && (off = isonum_711(isodir->ext_attr_length))) {
+		iso_blkatoff(inop,-off * imp->logical_block_size,&bp2);
+		bp = bp2;
+	}
+	if (bp) {
+		ap = (struct iso_extended_attributes *)bp->b_un.b_addr;
+		
+		if (isonum_711(ap->version) == 1) {
+			if (!cd9660_tstamp_conv17(ap->ftime,&inop->inode.iso_atime))
+				cd9660_tstamp_conv17(ap->ctime,&inop->inode.iso_atime);
+			if (!cd9660_tstamp_conv17(ap->ctime,&inop->inode.iso_ctime))
+				inop->inode.iso_ctime = inop->inode.iso_atime;
+			if (!cd9660_tstamp_conv17(ap->mtime,&inop->inode.iso_mtime))
+				inop->inode.iso_mtime = inop->inode.iso_ctime;
+		} else
+			ap = NULL;
+	}
+	if (!ap) {
+		cd9660_tstamp_conv7(isodir->date,&inop->inode.iso_ctime);
+		inop->inode.iso_atime = inop->inode.iso_ctime;
+		inop->inode.iso_mtime = inop->inode.iso_ctime;
+	}
+	if (bp2)
+		brelse(bp2);
+}
+
+int
+cd9660_tstamp_conv7(pi,pu)
+char *pi;
+struct timeval *pu;
+{
+	int i;
+	int crtime, days;
+	int y, m, d, hour, minute, second, tz;
+	
+	y = pi[0] + 1900;
+	m = pi[1];
+	d = pi[2];
+	hour = pi[3];
+	minute = pi[4];
+	second = pi[5];
+	tz = pi[6];
+	
+	if (y < 1970) {
+		pu->tv_sec  = 0;
+		pu->tv_usec = 0;
+		return 0;
+	} else {
+#ifdef	ORIGINAL
+		/* computes day number relative to Sept. 19th,1989 */
+		/* don't even *THINK* about changing formula. It works! */
+		days = 367*(y-1980)-7*(y+(m+9)/12)/4-3*((y+(m-9)/7)/100+1)/4+275*m/9+d-100;
+#else
+		/*
+		 * Changed :-) to make it relative to Jan. 1st, 1970
+		 * and to disambiguate negative division
+		 */
+		days = 367*(y-1960)-7*(y+(m+9)/12)/4-3*((y+(m+9)/12-1)/100+1)/4+275*m/9+d-239;
+#endif
+		crtime = ((((days * 24) + hour) * 60 + minute) * 60) + second;
+		
+		/* timezone offset is unreliable on some disks */
+		if (-48 <= tz && tz <= 52)
+			crtime += tz * 15 * 60;
+	}
+	pu->tv_sec  = crtime;
+	pu->tv_usec = 0;
+	return 1;
+}
+
+static unsigned
+cd9660_chars2ui(begin,len)
+	unsigned char *begin;
+	int len;
+{
+	unsigned rc;
+	
+	for (rc = 0; --len >= 0;) {
+		rc *= 10;
+		rc += *begin++ - '0';
+	}
+	return rc;
+}
+
+int
+cd9660_tstamp_conv17(pi,pu)
+	unsigned char *pi;
+	struct timeval *pu;
+{
+	unsigned char buf[7];
+	
+	/* year:"0001"-"9999" -> -1900  */
+	buf[0] = cd9660_chars2ui(pi,4) - 1900;
+	
+	/* month: " 1"-"12"      -> 1 - 12 */
+	buf[1] = cd9660_chars2ui(pi + 4,2);
+	
+	/* day:   " 1"-"31"      -> 1 - 31 */
+	buf[2] = cd9660_chars2ui(pi + 6,2);
+	
+	/* hour:  " 0"-"23"      -> 0 - 23 */
+	buf[3] = cd9660_chars2ui(pi + 8,2);
+	
+	/* minute:" 0"-"59"      -> 0 - 59 */
+	buf[4] = cd9660_chars2ui(pi + 10,2);
+	
+	/* second:" 0"-"59"      -> 0 - 59 */
+	buf[5] = cd9660_chars2ui(pi + 12,2);
+	
+	/* difference of GMT */
+	buf[6] = pi[16];
+	
+	return cd9660_tstamp_conv7(buf,pu);
+}
+
+void
+isodirino(inump,isodir,imp)
+	ino_t *inump;
+	struct iso_directory_record *isodir;
+	struct iso_mnt *imp;
+{
+	*inump = (isonum_733(isodir->extent) + isonum_711(isodir->ext_attr_length))
+		 * imp->logical_block_size;
+}
diff --git a/sys/isofs/cd9660/cd9660_node.h b/sys/isofs/cd9660/cd9660_node.h
new file mode 100644
index 00000000000..45de67f1a6b
--- /dev/null
+++ b/sys/isofs/cd9660/cd9660_node.h
@@ -0,0 +1,143 @@
+/*-
+ * Copyright (c) 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cd9660_node.h	8.2 (Berkeley) 1/23/94
+ */
+
+/*
+ * Theoretically, directories can be more than 2Gb in length,
+ * however, in practice this seems unlikely. So, we define
+ * the type doff_t as a long to keep down the cost of doing
+ * lookup on a 32-bit machine. If you are porting to a 64-bit
+ * architecture, you should make doff_t the same as off_t.
+ */
+#define doff_t	long
+
+typedef	struct	{
+	struct timespec	iso_atime;	/* time of last access */
+	struct timespec	iso_mtime;	/* time of last modification */
+	struct timespec	iso_ctime;	/* time file changed */
+	u_short		iso_mode;	/* files access mode and type */
+	uid_t		iso_uid;	/* owner user id */
+	gid_t		iso_gid;	/* owner group id */
+	short		iso_links;	/* links of file */
+	dev_t		iso_rdev;	/* Major/Minor number for special */
+} ISO_RRIP_INODE;
+
+#ifdef	ISODEVMAP
+/*
+ * FOr device# (major,minor) translation table
+ */
+struct iso_dnode {
+	struct iso_dnode *d_chain[2];	/* hash chain, MUST be first */
+	dev_t		i_dev;		/* device where dnode resides */
+	ino_t		i_number;	/* the identity of the inode */
+	dev_t		d_dev;		/* device # for translation */
+};
+#define	d_forw		d_chain[0]
+#define	d_back		d_chain[1]
+#endif
+
+struct iso_node {
+	struct	iso_node *i_chain[2]; /* hash chain, MUST be first */
+	struct	vnode *i_vnode;	/* vnode associated with this inode */
+	struct	vnode *i_devvp;	/* vnode for block I/O */
+	u_long	i_flag;		/* see below */
+	dev_t	i_dev;		/* device where inode resides */
+	ino_t	i_number;	/* the identity of the inode */
+				/* we use the actual starting block of the file */
+	struct	iso_mnt *i_mnt;	/* filesystem associated with this inode */
+	struct	lockf *i_lockf;	/* head of byte-level lock list */
+	doff_t	i_endoff;	/* end of useful stuff in directory */
+	doff_t	i_diroff;	/* offset in dir, where we found last entry */
+	doff_t	i_offset;	/* offset of free space in directory */
+	ino_t	i_ino;		/* inode number of found directory */
+	long	i_spare0;
+	long	i_spare1;
+
+	long iso_extent;	/* extent of file */
+	long i_size;
+	long iso_start;		/* actual start of data of file (may be different */
+				/* from iso_extent, if file has extended attributes) */
+	ISO_RRIP_INODE  inode;
+};
+
+#define	i_forw		i_chain[0]
+#define	i_back		i_chain[1]
+
+/* flags */
+#define	ILOCKED		0x0001		/* inode is locked */
+#define	IWANT		0x0002		/* some process waiting on lock */
+#define	IACC		0x0020		/* inode access time to be updated */
+
+#define VTOI(vp) ((struct iso_node *)(vp)->v_data)
+#define ITOV(ip) ((ip)->i_vnode)
+
+#define ISO_ILOCK(ip)	iso_ilock(ip)
+#define ISO_IUNLOCK(ip)	iso_iunlock(ip)
+
+/*
+ * Prototypes for ISOFS vnode operations
+ */
+int cd9660_lookup __P((struct vop_lookup_args *));
+int cd9660_open __P((struct vop_open_args *));
+int cd9660_close __P((struct vop_close_args *));
+int cd9660_access __P((struct vop_access_args *));
+int cd9660_getattr __P((struct vop_getattr_args *));
+int cd9660_read __P((struct vop_read_args *));
+int cd9660_ioctl __P((struct vop_ioctl_args *));
+int cd9660_select __P((struct vop_select_args *));
+int cd9660_mmap __P((struct vop_mmap_args *));
+int cd9660_seek __P((struct vop_seek_args *));
+int cd9660_readdir __P((struct vop_readdir_args *));
+int cd9660_abortop __P((struct vop_abortop_args *));
+int cd9660_inactive __P((struct vop_inactive_args *));
+int cd9660_reclaim __P((struct vop_reclaim_args *));
+int cd9660_bmap __P((struct vop_bmap_args *));
+int cd9660_lock __P((struct vop_lock_args *));
+int cd9660_unlock __P((struct vop_unlock_args *));
+int cd9660_strategy __P((struct vop_strategy_args *));
+int cd9660_print __P((struct vop_print_args *));
+int cd9660_islocked __P((struct vop_islocked_args *));
+void cd9660_defattr __P((struct iso_directory_record *,
+			struct iso_node *, struct buf *));
+void cd9660_deftstamp __P((struct iso_directory_record *,
+			struct iso_node *, struct buf *));
+#ifdef	ISODEVMAP
+struct iso_dnode *iso_dmap __P((dev_t, ino_t, int));
+void iso_dunmap __P((dev_t));
+#endif
diff --git a/sys/isofs/cd9660/cd9660_rrip.c b/sys/isofs/cd9660/cd9660_rrip.c
new file mode 100644
index 00000000000..0923fa01477
--- /dev/null
+++ b/sys/isofs/cd9660/cd9660_rrip.c
@@ -0,0 +1,685 @@
+/*-
+ * Copyright (c) 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cd9660_rrip.c	8.2 (Berkeley) 1/23/94
+ */
+
+#include <sys/param.h>
+#include <sys/namei.h>
+#include <sys/buf.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <sys/time.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+#include <isofs/cd9660/cd9660_rrip.h>
+#include <isofs/cd9660/iso_rrip.h>
+
+/*
+ * POSIX file attribute
+ */
+static int
+cd9660_rrip_attr(p,ana)
+	ISO_RRIP_ATTR *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	ana->inop->inode.iso_mode = isonum_731(p->mode_l);
+	ana->inop->inode.iso_uid = (uid_t)isonum_731(p->uid_l);
+	ana->inop->inode.iso_gid = (gid_t)isonum_731(p->gid_l);
+	ana->inop->inode.iso_links = isonum_731(p->links_l);
+	ana->fields &= ~ISO_SUSP_ATTR;
+	return ISO_SUSP_ATTR;
+}
+
+static void
+cd9660_rrip_defattr(isodir,ana)
+	struct iso_directory_record *isodir;
+	ISO_RRIP_ANALYZE *ana;
+{
+	/* But this is a required field! */
+	printf("RRIP without PX field?\n");
+	cd9660_defattr(isodir,ana->inop,NULL);
+}
+
+/*
+ * Symbolic Links
+ */
+static int
+cd9660_rrip_slink(p,ana)
+	ISO_RRIP_SLINK  *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	register ISO_RRIP_SLINK_COMPONENT *pcomp;
+	register ISO_RRIP_SLINK_COMPONENT *pcompe;
+	int len, wlen, cont;
+	char *outbuf, *inbuf;
+	
+	pcomp = (ISO_RRIP_SLINK_COMPONENT *)p->component;
+	pcompe = (ISO_RRIP_SLINK_COMPONENT *)((char *)p + isonum_711(p->h.length));
+	len = *ana->outlen;
+	outbuf = ana->outbuf;
+	cont = ana->cont;
+	
+	/*
+	 * Gathering a Symbolic name from each component with path
+	 */
+	for (;
+	     pcomp < pcompe;
+	     pcomp = (ISO_RRIP_SLINK_COMPONENT *)((char *)pcomp + ISO_RRIP_SLSIZ
+						  + isonum_711(pcomp->clen))) {
+		
+		if (!cont) {
+			if (len < ana->maxlen) {
+				len++;
+				*outbuf++ = '/';
+			}
+		}
+		cont = 0;
+		
+		inbuf = "..";
+		wlen = 0;
+		
+		switch (*pcomp->cflag) {
+			
+		case ISO_SUSP_CFLAG_CURRENT:
+			/* Inserting Current */
+			wlen = 1;
+			break;
+			
+		case ISO_SUSP_CFLAG_PARENT:
+			/* Inserting Parent */
+			wlen = 2;
+			break;
+			
+		case ISO_SUSP_CFLAG_ROOT:
+			/* Inserting slash for ROOT */
+			/* start over from beginning(?) */
+			outbuf -= len;
+			len = 0;
+			break;
+			
+		case ISO_SUSP_CFLAG_VOLROOT:
+			/* Inserting a mount point i.e. "/cdrom" */
+			/* same as above */
+			outbuf -= len;
+			len = 0;
+			inbuf = ana->imp->im_mountp->mnt_stat.f_mntonname;
+			wlen = strlen(inbuf);
+			break;
+			
+		case ISO_SUSP_CFLAG_HOST:
+			/* Inserting hostname i.e. "kurt.tools.de" */
+			inbuf = hostname;
+			wlen = hostnamelen;
+			break;
+			
+		case ISO_SUSP_CFLAG_CONTINUE:
+			cont = 1;
+			/* fall thru */
+		case 0:
+			/* Inserting component */
+			wlen = isonum_711(pcomp->clen);
+			inbuf = pcomp->name;
+			break;
+		default:
+			printf("RRIP with incorrect flags?");
+			wlen = ana->maxlen + 1;
+			break;
+		}
+		
+		if (len + wlen > ana->maxlen) {
+			/* indicate error to caller */
+			ana->cont = 1;
+			ana->fields = 0;
+			ana->outbuf -= *ana->outlen;
+			*ana->outlen = 0;
+			return 0;
+		}
+		
+		bcopy(inbuf,outbuf,wlen);
+		outbuf += wlen;
+		len += wlen;
+		
+	}
+	ana->outbuf = outbuf;
+	*ana->outlen = len;
+	ana->cont = cont;
+	
+	if (!isonum_711(p->flags)) {
+		ana->fields &= ~ISO_SUSP_SLINK;
+		return ISO_SUSP_SLINK;
+	}
+	return 0;
+}
+
+/*
+ * Alternate name
+ */
+static int
+cd9660_rrip_altname(p,ana)
+	ISO_RRIP_ALTNAME *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	char *inbuf;
+	int wlen;
+	int cont;
+	
+	inbuf = "..";
+	wlen = 0;
+	cont = 0;
+	
+	switch (*p->flags) {
+	case ISO_SUSP_CFLAG_CURRENT:
+		/* Inserting Current */
+		wlen = 1;
+		break;
+		
+	case ISO_SUSP_CFLAG_PARENT:
+		/* Inserting Parent */
+		wlen = 2;
+		break;
+		
+	case ISO_SUSP_CFLAG_HOST:
+		/* Inserting hostname i.e. "kurt.tools.de" */
+		inbuf = hostname;
+		wlen = hostnamelen;
+		break;
+		
+	case ISO_SUSP_CFLAG_CONTINUE:
+		cont = 1;
+		/* fall thru */
+	case 0:
+		/* Inserting component */
+		wlen = isonum_711(p->h.length) - 5;
+		inbuf = (char *)p + 5;
+		break;
+		
+	default:
+		printf("RRIP with incorrect NM flags?\n");
+		wlen = ana->maxlen + 1;
+		break;
+	}
+	
+	if ((*ana->outlen += wlen) > ana->maxlen) {
+		/* treat as no name field */
+		ana->fields &= ~ISO_SUSP_ALTNAME;
+		ana->outbuf -= *ana->outlen - wlen;
+		*ana->outlen = 0;
+		return 0;
+	}
+	
+	bcopy(inbuf,ana->outbuf,wlen);
+	ana->outbuf += wlen;
+	
+	if (!cont) {
+		ana->fields &= ~ISO_SUSP_ALTNAME;
+		return ISO_SUSP_ALTNAME;
+	}
+	return 0;
+}
+
+static void
+cd9660_rrip_defname(isodir,ana)
+	struct iso_directory_record *isodir;
+	ISO_RRIP_ANALYZE *ana;
+{
+	strcpy(ana->outbuf,"..");
+	switch (*isodir->name) {
+	default:
+		isofntrans(isodir->name,isonum_711(isodir->name_len),
+			   ana->outbuf,ana->outlen,
+			   1,isonum_711(isodir->flags)&4);
+		break;
+	case 0:
+		*ana->outlen = 1;
+		break;
+	case 1:
+		*ana->outlen = 2;
+		break;
+	}
+}
+
+/*
+ * Parent or Child Link
+ */
+static int
+cd9660_rrip_pclink(p,ana)
+	ISO_RRIP_CLINK  *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	*ana->inump = isonum_733(p->dir_loc) << ana->imp->im_bshift;
+	ana->fields &= ~(ISO_SUSP_CLINK|ISO_SUSP_PLINK);
+	return *p->h.type == 'C' ? ISO_SUSP_CLINK : ISO_SUSP_PLINK;
+}
+
+/*
+ * Relocated directory
+ */
+static int
+cd9660_rrip_reldir(p,ana)
+	ISO_RRIP_RELDIR  *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	/* special hack to make caller aware of RE field */
+	*ana->outlen = 0;
+	ana->fields = 0;
+	return ISO_SUSP_RELDIR|ISO_SUSP_ALTNAME|ISO_SUSP_CLINK|ISO_SUSP_PLINK;
+}
+
+static int
+cd9660_rrip_tstamp(p,ana)
+	ISO_RRIP_TSTAMP *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	unsigned char *ptime;
+	
+	ptime = p->time;
+	
+	/* Check a format of time stamp (7bytes/17bytes) */
+	if (!(*p->flags&ISO_SUSP_TSTAMP_FORM17)) {
+		if (*p->flags&ISO_SUSP_TSTAMP_CREAT)
+			ptime += 7;
+		
+		if (*p->flags&ISO_SUSP_TSTAMP_MODIFY) {
+			cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_mtime);
+			ptime += 7;
+		} else
+			bzero(&ana->inop->inode.iso_mtime,sizeof(struct timeval));
+		
+		if (*p->flags&ISO_SUSP_TSTAMP_ACCESS) {
+			cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_atime);
+			ptime += 7;
+		} else
+			ana->inop->inode.iso_atime = ana->inop->inode.iso_mtime;
+		
+		if (*p->flags&ISO_SUSP_TSTAMP_ATTR)
+			cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_ctime);
+		else
+			ana->inop->inode.iso_ctime = ana->inop->inode.iso_mtime;
+		
+	} else {
+		if (*p->flags&ISO_SUSP_TSTAMP_CREAT)
+			ptime += 17;
+		
+		if (*p->flags&ISO_SUSP_TSTAMP_MODIFY) {
+			cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_mtime);
+			ptime += 17;
+		} else
+			bzero(&ana->inop->inode.iso_mtime,sizeof(struct timeval));
+		
+		if (*p->flags&ISO_SUSP_TSTAMP_ACCESS) {
+			cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_atime);
+			ptime += 17;
+		} else
+			ana->inop->inode.iso_atime = ana->inop->inode.iso_mtime;
+		
+		if (*p->flags&ISO_SUSP_TSTAMP_ATTR)
+			cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_ctime);
+		else
+			ana->inop->inode.iso_ctime = ana->inop->inode.iso_mtime;
+		
+	}
+	ana->fields &= ~ISO_SUSP_TSTAMP;
+	return ISO_SUSP_TSTAMP;
+}
+
+static void
+cd9660_rrip_deftstamp(isodir,ana)
+	struct iso_directory_record  *isodir;
+	ISO_RRIP_ANALYZE *ana;
+{
+	cd9660_deftstamp(isodir,ana->inop,NULL);
+}
+
+/*
+ * POSIX device modes
+ */
+static int
+cd9660_rrip_device(p,ana)
+	ISO_RRIP_DEVICE *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	unsigned high, low;
+	
+	high = isonum_733(p->dev_t_high_l);
+	low  = isonum_733(p->dev_t_low_l);
+	
+	if ( high == 0 ) {
+		ana->inop->inode.iso_rdev = makedev( major(low), minor(low) );
+	} else {
+		ana->inop->inode.iso_rdev = makedev( high, minor(low) );
+	}
+	ana->fields &= ~ISO_SUSP_DEVICE;
+	return ISO_SUSP_DEVICE;
+}
+
+/*
+ * Flag indicating
+ */
+static int
+cd9660_rrip_idflag(p,ana)
+	ISO_RRIP_IDFLAG *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	ana->fields &= isonum_711(p->flags)|~0xff; /* don't touch high bits */
+	/* special handling of RE field */
+	if (ana->fields&ISO_SUSP_RELDIR)
+		return cd9660_rrip_reldir(p,ana);
+	
+	return ISO_SUSP_IDFLAG;
+}
+
+/*
+ * Continuation pointer
+ */
+static int
+cd9660_rrip_cont(p,ana)
+	ISO_RRIP_CONT *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	ana->iso_ce_blk = isonum_733(p->location);
+	ana->iso_ce_off = isonum_733(p->offset);
+	ana->iso_ce_len = isonum_733(p->length);
+	return ISO_SUSP_CONT;
+}
+
+/*
+ * System Use end
+ */
+static int
+cd9660_rrip_stop(p,ana)
+	ISO_SUSP_HEADER *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	/* stop analyzing */
+	ana->fields = 0;
+	return ISO_SUSP_STOP;
+}
+
+/*
+ * Extension reference
+ */
+static int
+cd9660_rrip_extref(p,ana)
+	ISO_RRIP_EXTREF *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	if (isonum_711(p->len_id) != 10
+	    || bcmp((char *)p + 8,"RRIP_1991A",10)
+	    || isonum_711(p->version) != 1)
+		return 0;
+	ana->fields &= ~ISO_SUSP_EXTREF;
+	return ISO_SUSP_EXTREF;
+}
+
+typedef struct {
+	char type[2];
+	int (*func)();
+	void (*func2)();
+	int result;
+} RRIP_TABLE;
+
+static int
+cd9660_rrip_loop(isodir,ana,table)
+	struct iso_directory_record *isodir;
+	ISO_RRIP_ANALYZE *ana;
+	RRIP_TABLE *table;
+{
+	register RRIP_TABLE *ptable;
+	register ISO_SUSP_HEADER *phead;
+	register ISO_SUSP_HEADER *pend;
+	struct buf *bp = NULL;
+	int i;
+	char *pwhead;
+	int result;
+	
+	/*
+	 * Note: If name length is odd,
+	 *       it will be padding 1 byte  after the name
+	 */
+	pwhead = isodir->name + isonum_711(isodir->name_len);
+	if (!(isonum_711(isodir->name_len)&1))
+		pwhead++;
+	
+	/* If it's not the '.' entry of the root dir obey SP field */
+	if (*isodir->name != 0
+	    || isonum_733(isodir->extent) != ana->imp->root_extent)
+		pwhead += ana->imp->rr_skip;
+	else
+		pwhead += ana->imp->rr_skip0;
+	
+	phead = (ISO_SUSP_HEADER *)pwhead;
+	pend = (ISO_SUSP_HEADER *)((char *)isodir + isonum_711(isodir->length));
+	
+	result = 0;
+	while (1) {
+		ana->iso_ce_len = 0;
+		/*
+		 * Note: "pend" should be more than one SUSP header
+		 */ 
+		while (pend >= phead + 1) {
+			if (isonum_711(phead->version) == 1) {
+				for (ptable = table; ptable->func; ptable++) {
+					if (*phead->type == *ptable->type
+					    && phead->type[1] == ptable->type[1]) {
+						result |= ptable->func(phead,ana);
+						break;
+					}
+				}
+				if (!ana->fields)
+					break;
+			}
+			/*
+			 * move to next SUSP
+			 * Hopefully this works with newer versions, too
+			 */
+			phead = (ISO_SUSP_HEADER *)((char *)phead + isonum_711(phead->length));
+		}
+		
+		if ( ana->fields && ana->iso_ce_len ) {
+			if (ana->iso_ce_blk >= ana->imp->volume_space_size
+			    || ana->iso_ce_off + ana->iso_ce_len > ana->imp->logical_block_size
+			    || bread(ana->imp->im_devvp,
+				     ana->iso_ce_blk * ana->imp->logical_block_size / DEV_BSIZE,
+				     ana->imp->logical_block_size,NOCRED,&bp))
+				/* what to do now? */
+				break;
+			phead = (ISO_SUSP_HEADER *)(bp->b_un.b_addr + ana->iso_ce_off);
+			pend = (ISO_SUSP_HEADER *) ((char *)phead + ana->iso_ce_len);
+		} else
+			break;
+	}
+	if (bp)
+		brelse(bp);
+	/*
+	 * If we don't find the Basic SUSP stuffs, just set default value
+	 *   ( attribute/time stamp )
+	 */
+	for (ptable = table; ptable->func2; ptable++)
+		if (!(ptable->result&result))
+			ptable->func2(isodir,ana);
+	
+	return result;
+}
+
+static RRIP_TABLE rrip_table_analyze[] = {
+	{ "PX", cd9660_rrip_attr,	cd9660_rrip_defattr,	ISO_SUSP_ATTR },
+	{ "TF", cd9660_rrip_tstamp,	cd9660_rrip_deftstamp,	ISO_SUSP_TSTAMP },
+	{ "PN", cd9660_rrip_device,	0,			ISO_SUSP_DEVICE },
+	{ "RR", cd9660_rrip_idflag,	0,			ISO_SUSP_IDFLAG },
+	{ "CE", cd9660_rrip_cont,	0,			ISO_SUSP_CONT },
+	{ "ST", cd9660_rrip_stop,	0,			ISO_SUSP_STOP },
+	{ "",	0,			0,			0 }
+};
+
+int
+cd9660_rrip_analyze(isodir,inop,imp)
+	struct iso_directory_record *isodir;
+	struct iso_node *inop;
+	struct iso_mnt *imp;
+{
+	ISO_RRIP_ANALYZE analyze;
+	
+	analyze.inop = inop;
+	analyze.imp = imp;
+	analyze.fields = ISO_SUSP_ATTR|ISO_SUSP_TSTAMP|ISO_SUSP_DEVICE;
+	
+	return cd9660_rrip_loop(isodir,&analyze,rrip_table_analyze);
+}
+
+/* 
+ * Get Alternate Name from 'AL' record 
+ * If either no AL record or 0 length, 
+ *    it will be return the translated ISO9660 name,
+ */
+static RRIP_TABLE rrip_table_getname[] = {
+	{ "NM", cd9660_rrip_altname,	cd9660_rrip_defname,	ISO_SUSP_ALTNAME },
+	{ "CL", cd9660_rrip_pclink,	0,			ISO_SUSP_CLINK|ISO_SUSP_PLINK },
+	{ "PL", cd9660_rrip_pclink,	0,			ISO_SUSP_CLINK|ISO_SUSP_PLINK },
+	{ "RE", cd9660_rrip_reldir,	0,			ISO_SUSP_RELDIR },
+	{ "RR", cd9660_rrip_idflag,	0,			ISO_SUSP_IDFLAG },
+	{ "CE", cd9660_rrip_cont,	0,			ISO_SUSP_CONT },
+	{ "ST", cd9660_rrip_stop,	0,			ISO_SUSP_STOP },
+	{ "",	0,			0,			0 }
+};
+
+int
+cd9660_rrip_getname(isodir,outbuf,outlen,inump,imp)
+	struct iso_directory_record *isodir;
+	char *outbuf;
+	u_short *outlen;
+	ino_t *inump;
+	struct iso_mnt *imp;
+{
+	ISO_RRIP_ANALYZE analyze;
+	RRIP_TABLE *tab;
+	
+	analyze.outbuf = outbuf;
+	analyze.outlen = outlen;
+	analyze.maxlen = NAME_MAX;
+	analyze.inump = inump;
+	analyze.imp = imp;
+	analyze.fields = ISO_SUSP_ALTNAME|ISO_SUSP_RELDIR|ISO_SUSP_CLINK|ISO_SUSP_PLINK;
+	*outlen = 0;
+	
+	tab = rrip_table_getname;
+	if (*isodir->name == 0
+	    || *isodir->name == 1) {
+		cd9660_rrip_defname(isodir,&analyze);
+		
+		analyze.fields &= ~ISO_SUSP_ALTNAME;
+		tab++;
+	}
+	
+	return cd9660_rrip_loop(isodir,&analyze,tab);
+}
+
+/* 
+ * Get Symbolic Name from 'SL' record 
+ *
+ * Note: isodir should contains SL record!
+ */
+static RRIP_TABLE rrip_table_getsymname[] = {
+	{ "SL", cd9660_rrip_slink,	0,			ISO_SUSP_SLINK },
+	{ "RR", cd9660_rrip_idflag,	0,			ISO_SUSP_IDFLAG },
+	{ "CE", cd9660_rrip_cont,	0,			ISO_SUSP_CONT },
+	{ "ST", cd9660_rrip_stop,	0,			ISO_SUSP_STOP },
+	{ "",	0,			0,			0 }
+};
+
+int
+cd9660_rrip_getsymname(isodir,outbuf,outlen,imp)
+	struct iso_directory_record *isodir;
+	char *outbuf;
+	u_short *outlen;
+	struct iso_mnt *imp;
+{
+	ISO_RRIP_ANALYZE analyze;
+	
+	analyze.outbuf = outbuf;
+	analyze.outlen = outlen;
+	*outlen = 0;
+	analyze.maxlen = MAXPATHLEN;
+	analyze.cont = 1;		/* don't start with a slash */
+	analyze.imp = imp;
+	analyze.fields = ISO_SUSP_SLINK;
+	
+	return (cd9660_rrip_loop(isodir,&analyze,rrip_table_getsymname)&ISO_SUSP_SLINK);
+}
+
+static RRIP_TABLE rrip_table_extref[] = {
+	{ "ER", cd9660_rrip_extref,	0,			ISO_SUSP_EXTREF },
+	{ "CE", cd9660_rrip_cont,	0,			ISO_SUSP_CONT },
+	{ "ST", cd9660_rrip_stop,	0,			ISO_SUSP_STOP },
+	{ "",	0,			0,			0 }
+};
+
+/*
+ * Check for Rock Ridge Extension and return offset of its fields.
+ * Note: We require the ER field.
+ */
+int
+cd9660_rrip_offset(isodir,imp)
+	struct iso_directory_record *isodir;
+	struct iso_mnt *imp;
+{
+	ISO_RRIP_OFFSET *p;
+	ISO_RRIP_ANALYZE analyze;
+	
+	imp->rr_skip0 = 0;
+	p = (ISO_RRIP_OFFSET *)(isodir->name + 1);
+	if (bcmp(p,"SP\7\1\276\357",6)) {
+		/* Maybe, it's a CDROM XA disc? */
+		imp->rr_skip0 = 15;
+		p = (ISO_RRIP_OFFSET *)((char *)p + 15);
+		if (bcmp(p,"SP\7\1\276\357",6))
+			return -1;
+	}
+	
+	analyze.imp = imp;
+	analyze.fields = ISO_SUSP_EXTREF;
+	if (!(cd9660_rrip_loop(isodir,&analyze,rrip_table_extref)&ISO_SUSP_EXTREF))
+		return -1;
+	
+	return isonum_711(p->skip);
+}
diff --git a/sys/isofs/cd9660/cd9660_rrip.h b/sys/isofs/cd9660/cd9660_rrip.h
new file mode 100644
index 00000000000..b4017281f06
--- /dev/null
+++ b/sys/isofs/cd9660/cd9660_rrip.h
@@ -0,0 +1,146 @@
+/*-
+ * Copyright (c) 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cd9660_rrip.h	8.1 (Berkeley) 1/21/94
+ */
+
+typedef struct {
+	char 	      type		[ISODCL (  0,    1)];
+	unsigned char length		[ISODCL (  2,    2)]; /* 711 */
+	unsigned char version		[ISODCL (  3,    3)];
+} ISO_SUSP_HEADER;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	char mode_l			[ISODCL (  4,    7)]; /* 731 */
+	char mode_m			[ISODCL (  8,   11)]; /* 732 */
+	char links_l			[ISODCL ( 12,   15)]; /* 731 */
+	char links_m			[ISODCL ( 16,   19)]; /* 732 */
+	char uid_l			[ISODCL ( 20,   23)]; /* 731 */
+	char uid_m			[ISODCL ( 24,   27)]; /* 732 */
+	char gid_l			[ISODCL ( 28,   31)]; /* 731 */
+	char gid_m			[ISODCL ( 32,   35)]; /* 732 */
+} ISO_RRIP_ATTR;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	char dev_t_high_l		[ISODCL (  4,    7)]; /* 731 */
+	char dev_t_high_m		[ISODCL (  8,   11)]; /* 732 */
+	char dev_t_low_l		[ISODCL ( 12,   15)]; /* 731 */
+	char dev_t_low_m		[ISODCL ( 16,   19)]; /* 732 */
+} ISO_RRIP_DEVICE;
+
+#define	ISO_SUSP_CFLAG_CONTINUE	0x01
+#define	ISO_SUSP_CFLAG_CURRENT	0x02
+#define	ISO_SUSP_CFLAG_PARENT	0x04
+#define	ISO_SUSP_CFLAG_ROOT	0x08
+#define	ISO_SUSP_CFLAG_VOLROOT	0x10
+#define	ISO_SUSP_CFLAG_HOST	0x20
+
+typedef struct {
+	u_char cflag			[ISODCL (  1,    1)];
+	u_char clen			[ISODCL (  2,    2)];
+	u_char name			[0];
+} ISO_RRIP_SLINK_COMPONENT;
+#define	ISO_RRIP_SLSIZ	2
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	u_char flags			[ISODCL (  4,    4)];
+	u_char component		[ISODCL (  5,    5)];
+} ISO_RRIP_SLINK;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	char flags			[ISODCL (  4,    4)];
+} ISO_RRIP_ALTNAME;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	char dir_loc			[ISODCL (  4,    11)]; /* 733 */
+} ISO_RRIP_CLINK;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	char dir_loc			[ISODCL (  4,    11)]; /* 733 */
+} ISO_RRIP_PLINK;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+} ISO_RRIP_RELDIR;
+
+#define	ISO_SUSP_TSTAMP_FORM17	0x80
+#define	ISO_SUSP_TSTAMP_FORM7	0x00
+#define	ISO_SUSP_TSTAMP_CREAT	0x01
+#define	ISO_SUSP_TSTAMP_MODIFY	0x02
+#define	ISO_SUSP_TSTAMP_ACCESS	0x04
+#define	ISO_SUSP_TSTAMP_ATTR	0x08
+#define	ISO_SUSP_TSTAMP_BACKUP	0x10
+#define	ISO_SUSP_TSTAMP_EXPIRE	0x20
+#define	ISO_SUSP_TSTAMP_EFFECT	0x40
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	unsigned char flags		[ISODCL (  4,    4)];
+	unsigned char time		[ISODCL (  5,    5)];
+} ISO_RRIP_TSTAMP;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	unsigned char flags		[ISODCL (  4,    4)];
+} ISO_RRIP_IDFLAG;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	char len_id			[ISODCL (  4,    4)];
+	char len_des			[ISODCL (  5,	 5)];
+	char len_src			[ISODCL (  6,	 6)];
+	char version			[ISODCL (  7,	 7)];
+} ISO_RRIP_EXTREF;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	char check			[ISODCL (  4,	 5)];
+	char skip			[ISODCL (  6,	 6)];
+} ISO_RRIP_OFFSET;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	char location			[ISODCL (  4,	11)];
+	char offset			[ISODCL ( 12,	19)];
+	char length			[ISODCL ( 20,	27)];
+} ISO_RRIP_CONT;
diff --git a/sys/isofs/cd9660/cd9660_util.c b/sys/isofs/cd9660/cd9660_util.c
new file mode 100644
index 00000000000..f74f0515ff7
--- /dev/null
+++ b/sys/isofs/cd9660/cd9660_util.c
@@ -0,0 +1,236 @@
+/*-
+ * Copyright (c) 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cd9660_util.c	8.1 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/resourcevar.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/conf.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <miscfs/specfs/specdev.h> /* XXX */
+#include <miscfs/fifofs/fifo.h> /* XXX */
+#include <sys/malloc.h>
+#include <sys/dir.h>
+
+#include <isofs/cd9660/iso.h>
+
+#ifdef	__notanymore__
+int
+isonum_711 (p)
+unsigned char *p;
+{
+	return (*p);
+}
+
+int
+isonum_712 (p)
+signed char *p;
+{
+	return (*p);
+}
+
+int
+isonum_721 (p)
+unsigned char *p;
+{
+	/* little endian short */
+#if BYTE_ORDER != LITTLE_ENDIAN
+	printf ("isonum_721 called on non little-endian machine!\n");
+#endif
+
+	return *(short *)p;
+}
+
+int
+isonum_722 (p)
+unsigned char *p;
+{
+        /* big endian short */
+#if BYTE_ORDER != BIG_ENDIAN
+        printf ("isonum_722 called on non big-endian machine!\n");
+#endif
+
+	return *(short *)p;
+}
+
+int
+isonum_723 (p)
+unsigned char *p;
+{
+#if BYTE_ORDER == BIG_ENDIAN
+        return isonum_722 (p + 2);
+#elif BYTE_ORDER == LITTLE_ENDIAN
+	return isonum_721 (p);
+#else
+	printf ("isonum_723 unsupported byte order!\n");
+	return 0;
+#endif
+}
+
+int
+isonum_731 (p)
+unsigned char *p;
+{
+        /* little endian long */
+#if BYTE_ORDER != LITTLE_ENDIAN
+        printf ("isonum_731 called on non little-endian machine!\n");
+#endif
+
+	return *(long *)p;
+}
+
+int
+isonum_732 (p)
+unsigned char *p;
+{
+        /* big endian long */
+#if BYTE_ORDER != BIG_ENDIAN
+        printf ("isonum_732 called on non big-endian machine!\n");
+#endif
+
+	return *(long *)p;
+}
+
+int
+isonum_733 (p)
+unsigned char *p;
+{
+#if BYTE_ORDER == BIG_ENDIAN
+        return isonum_732 (p + 4);
+#elif BYTE_ORDER == LITTLE_ENDIAN
+	return isonum_731 (p);
+#else
+	printf ("isonum_733 unsupported byte order!\n");
+	return 0;
+#endif
+}
+#endif	/* __notanymore__ */
+
+/*
+ * translate and compare a filename
+ * Note: Version number plus ';' may be omitted.
+ */
+int
+isofncmp(unsigned char *fn,int fnlen,unsigned char *isofn,int isolen)
+{
+	int i, j;
+	char c;
+	
+	while (--fnlen >= 0) {
+		if (--isolen < 0)
+			return *fn;
+		if ((c = *isofn++) == ';') {
+			switch (*fn++) {
+			default:
+				return *--fn;
+			case 0:
+				return 0;
+			case ';':
+				break;
+			}
+			for (i = 0; --fnlen >= 0; i = i * 10 + *fn++ - '0') {
+				if (*fn < '0' || *fn > '9') {
+					return -1;
+				}
+			}
+			for (j = 0; --isolen >= 0; j = j * 10 + *isofn++ - '0');
+			return i - j;
+		}
+		if (c != *fn) {
+			if (c >= 'A' && c <= 'Z') {
+				if (c + ('a' - 'A') != *fn) {
+					if (*fn >= 'a' && *fn <= 'z')
+						return *fn - ('a' - 'A') - c;
+					else
+						return *fn - c;
+				}
+			} else
+				return *fn - c;
+		}
+		fn++;
+	}
+	if (isolen > 0) {
+		switch (*isofn) {
+		default:
+			return -1;
+		case '.':
+			if (isofn[1] != ';')
+				return -1;
+		case ';':
+			return 0;
+		}
+	}
+	return 0;
+}
+
+/*
+ * translate a filename
+ */
+void
+isofntrans(unsigned char *infn,int infnlen,
+	   unsigned char *outfn,unsigned short *outfnlen,
+	   int original,int assoc)
+{
+	int fnidx = 0;
+	
+	if (assoc) {
+		*outfn++ = ASSOCCHAR;
+		fnidx++;
+	}
+	for (; fnidx < infnlen; fnidx++) {
+		char c = *infn++;
+		
+		if (!original && c >= 'A' && c <= 'Z')
+			*outfn++ = c + ('a' - 'A');
+		else if (!original && c == '.' && *infn == ';')
+			break;
+		else if (!original && c == ';')
+			break;
+		else
+			*outfn++ = c;
+	}
+	*outfnlen = fnidx;
+}
diff --git a/sys/isofs/cd9660/cd9660_vfsops.c b/sys/isofs/cd9660/cd9660_vfsops.c
new file mode 100644
index 00000000000..02dd92af66f
--- /dev/null
+++ b/sys/isofs/cd9660/cd9660_vfsops.c
@@ -0,0 +1,681 @@
+/*-
+ * Copyright (c) 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cd9660_vfsops.c	8.3 (Berkeley) 1/31/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <miscfs/specfs/specdev.h>
+#include <sys/mount.h>
+#include <sys/buf.h>
+#include <sys/file.h>
+#include <sys/dkbad.h>
+#include <sys/disklabel.h>
+#include <sys/ioctl.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+
+extern int enodev ();
+
+struct vfsops cd9660_vfsops = {
+	cd9660_mount,
+	cd9660_start,
+	cd9660_unmount,
+	cd9660_root,
+	cd9660_quotactl,
+	cd9660_statfs,
+	cd9660_sync,
+	cd9660_vget,
+	cd9660_fhtovp,
+	cd9660_vptofh,
+	cd9660_init,
+};
+
+/*
+ * Called by vfs_mountroot when iso is going to be mounted as root.
+ *
+ * Name is updated by mount(8) after booting.
+ */
+#define ROOTNAME	"root_device"
+
+static iso_mountfs();
+
+cd9660_mountroot()
+{
+	register struct mount *mp;
+	extern struct vnode *rootvp;
+	struct proc *p = curproc;	/* XXX */
+	struct iso_mnt *imp;
+	register struct fs *fs;
+	u_int size;
+	int error;
+	struct iso_args args;
+	
+	/*
+	 * Get vnodes for swapdev and rootdev.
+	 */
+	if (bdevvp(swapdev, &swapdev_vp) || bdevvp(rootdev, &rootvp))
+		panic("cd9660_mountroot: can't setup bdevvp's");
+
+	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
+	bzero((char *)mp, (u_long)sizeof(struct mount));
+	mp->mnt_op = &cd9660_vfsops;
+	mp->mnt_flag = MNT_RDONLY;
+	args.flags = ISOFSMNT_ROOT;
+	if (error = iso_mountfs(rootvp, mp, p, &args)) {
+		free(mp, M_MOUNT);
+		return (error);
+	}
+	if (error = vfs_lock(mp)) {
+		(void)cd9660_unmount(mp, 0, p);
+		free(mp, M_MOUNT);
+		return (error);
+	}
+	TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+	mp->mnt_flag |= MNT_ROOTFS;
+	mp->mnt_vnodecovered = NULLVP;
+	imp = VFSTOISOFS(mp);
+	bzero(imp->im_fsmnt, sizeof(imp->im_fsmnt));
+	imp->im_fsmnt[0] = '/';
+	bcopy((caddr_t)imp->im_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
+	    MNAMELEN);
+	(void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+	    &size);
+	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+	(void) cd9660_statfs(mp, &mp->mnt_stat, p);
+	vfs_unlock(mp);
+	return (0);
+}
+
+/*
+ * Flag to allow forcible unmounting.
+ */
+int iso_doforce = 1;
+
+/*
+ * VFS Operations.
+ *
+ * mount system call
+ */
+cd9660_mount(mp, path, data, ndp, p)
+	register struct mount *mp;
+	char *path;
+	caddr_t data;
+	struct nameidata *ndp;
+	struct proc *p;
+{
+	struct vnode *devvp;
+	struct iso_args args;
+	u_int size;
+	int error;
+	struct iso_mnt *imp;
+	
+	if (error = copyin(data, (caddr_t)&args, sizeof (struct iso_args)))
+		return (error);
+	
+	if ((mp->mnt_flag & MNT_RDONLY) == 0)
+		return (EROFS);
+	
+	/*
+	 * If updating, check whether changing from read-only to
+	 * read/write; if there is no device name, that's all we do.
+	 */
+	if (mp->mnt_flag & MNT_UPDATE) {
+		imp = VFSTOISOFS(mp);
+		if (args.fspec == 0)
+			return (vfs_export(mp, &imp->im_export, &args.export));
+	}
+	/*
+	 * Not an update, or updating the name: look up the name
+	 * and verify that it refers to a sensible block device.
+	 */
+	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
+	if (error = namei(ndp))
+		return (error);
+	devvp = ndp->ni_vp;
+
+	if (devvp->v_type != VBLK) {
+		vrele(devvp);
+		return ENOTBLK;
+	}
+	if (major(devvp->v_rdev) >= nblkdev) {
+		vrele(devvp);
+		return ENXIO;
+	}
+	if ((mp->mnt_flag & MNT_UPDATE) == 0)
+		error = iso_mountfs(devvp, mp, p, &args);
+	else {
+		if (devvp != imp->im_devvp)
+			error = EINVAL;	/* needs translation */
+		else
+			vrele(devvp);
+	}
+	if (error) {
+		vrele(devvp);
+		return error;
+	}
+	imp = VFSTOISOFS(mp);
+	(void) copyinstr(path, imp->im_fsmnt, sizeof(imp->im_fsmnt)-1, &size);
+	bzero(imp->im_fsmnt + size, sizeof(imp->im_fsmnt) - size);
+	bcopy((caddr_t)imp->im_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
+	    MNAMELEN);
+	(void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+	    &size);
+	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+	(void) cd9660_statfs(mp, &mp->mnt_stat, p);
+	return 0;
+}
+
+/*
+ * Common code for mount and mountroot
+ */
+static iso_mountfs(devvp, mp, p, argp)
+	register struct vnode *devvp;
+	struct mount *mp;
+	struct proc *p;
+	struct iso_args *argp;
+{
+	register struct iso_mnt *isomp = (struct iso_mnt *)0;
+	struct buf *bp = NULL;
+	dev_t dev = devvp->v_rdev;
+	caddr_t base, space;
+	int havepart = 0, blks;
+	int error = EINVAL, i, size;
+	int needclose = 0;
+	int ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
+	extern struct vnode *rootvp;
+	int j;
+	int iso_bsize;
+	int iso_blknum;
+	struct iso_volume_descriptor *vdp;
+	struct iso_primary_descriptor *pri;
+	struct iso_directory_record *rootp;
+	int logical_block_size;
+	
+	if (!ronly)
+		return EROFS;
+	
+	/*
+	 * Disallow multiple mounts of the same device.
+	 * Disallow mounting of a device that is currently in use
+	 * (except for root, which might share swap device for miniroot).
+	 * Flush out any old buffers remaining from a previous use.
+	 */
+	if (error = vfs_mountedon(devvp))
+		return error;
+	if (vcount(devvp) > 1 && devvp != rootvp)
+		return EBUSY;
+	if (error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0))
+		return (error);
+
+	if (error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p))
+		return error;
+	needclose = 1;
+	
+	/* This is the "logical sector size".  The standard says this
+	 * should be 2048 or the physical sector size on the device,
+	 * whichever is greater.  For now, we'll just use a constant.
+	 */
+	iso_bsize = ISO_DEFAULT_BLOCK_SIZE;
+	
+	for (iso_blknum = 16; iso_blknum < 100; iso_blknum++) {
+		if (error = bread (devvp, btodb(iso_blknum * iso_bsize),
+				   iso_bsize, NOCRED, &bp))
+			goto out;
+		
+		vdp = (struct iso_volume_descriptor *)bp->b_un.b_addr;
+		if (bcmp (vdp->id, ISO_STANDARD_ID, sizeof vdp->id) != 0) {
+			error = EINVAL;
+			goto out;
+		}
+		
+		if (isonum_711 (vdp->type) == ISO_VD_END) {
+			error = EINVAL;
+			goto out;
+		}
+		
+		if (isonum_711 (vdp->type) == ISO_VD_PRIMARY)
+			break;
+		brelse(bp);
+	}
+	
+	if (isonum_711 (vdp->type) != ISO_VD_PRIMARY) {
+		error = EINVAL;
+		goto out;
+	}
+	
+	pri = (struct iso_primary_descriptor *)vdp;
+	
+	logical_block_size = isonum_723 (pri->logical_block_size);
+	
+	if (logical_block_size < DEV_BSIZE || logical_block_size > MAXBSIZE
+	    || (logical_block_size & (logical_block_size - 1)) != 0) {
+		error = EINVAL;
+		goto out;
+	}
+	
+	rootp = (struct iso_directory_record *)pri->root_directory_record;
+	
+	isomp = malloc(sizeof *isomp, M_ISOFSMNT, M_WAITOK);
+	bzero((caddr_t)isomp, sizeof *isomp);
+	isomp->logical_block_size = logical_block_size;
+	isomp->volume_space_size = isonum_733 (pri->volume_space_size);
+	bcopy (rootp, isomp->root, sizeof isomp->root);
+	isomp->root_extent = isonum_733 (rootp->extent);
+	isomp->root_size = isonum_733 (rootp->size);
+	
+	isomp->im_bmask = logical_block_size - 1;
+	isomp->im_bshift = 0;
+	while ((1 << isomp->im_bshift) < isomp->logical_block_size)
+		isomp->im_bshift++;
+	
+	bp->b_flags |= B_AGE;
+	brelse(bp);
+	bp = NULL;
+	
+	mp->mnt_data = (qaddr_t)isomp;
+	mp->mnt_stat.f_fsid.val[0] = (long)dev;
+	mp->mnt_stat.f_fsid.val[1] = MOUNT_CD9660;
+	mp->mnt_maxsymlinklen = 0;
+	mp->mnt_flag |= MNT_LOCAL;
+	isomp->im_mountp = mp;
+	isomp->im_dev = dev;
+	isomp->im_devvp = devvp;
+	
+	devvp->v_specflags |= SI_MOUNTEDON;
+	
+	/* Check the Rock Ridge Extention support */
+	if (!(argp->flags & ISOFSMNT_NORRIP)) {
+		if (error = bread (isomp->im_devvp,
+				   (isomp->root_extent + isonum_711(rootp->ext_attr_length))
+				   * isomp->logical_block_size / DEV_BSIZE,
+				   isomp->logical_block_size,NOCRED,&bp))
+		    goto out;
+		
+		rootp = (struct iso_directory_record *)bp->b_un.b_addr;
+		
+		if ((isomp->rr_skip = cd9660_rrip_offset(rootp,isomp)) < 0) {
+		    argp->flags  |= ISOFSMNT_NORRIP;
+		} else {
+		    argp->flags  &= ~ISOFSMNT_GENS;
+		}
+		
+		/*
+		 * The contents are valid,
+		 * but they will get reread as part of another vnode, so...
+		 */
+		bp->b_flags |= B_AGE;
+		brelse(bp);
+		bp = NULL;
+	}
+	isomp->im_flags = argp->flags&(ISOFSMNT_NORRIP|ISOFSMNT_GENS|ISOFSMNT_EXTATT);
+	switch (isomp->im_flags&(ISOFSMNT_NORRIP|ISOFSMNT_GENS)) {
+	default:
+	    isomp->iso_ftype = ISO_FTYPE_DEFAULT;
+	    break;
+	case ISOFSMNT_GENS|ISOFSMNT_NORRIP:
+	    isomp->iso_ftype = ISO_FTYPE_9660;
+	    break;
+	case 0:
+	    isomp->iso_ftype = ISO_FTYPE_RRIP;
+	    break;
+	}
+	
+	return 0;
+out:
+	if (bp)
+		brelse(bp);
+	if (needclose)
+		(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p);
+	if (isomp) {
+		free((caddr_t)isomp, M_ISOFSMNT);
+		mp->mnt_data = (qaddr_t)0;
+	}
+	return error;
+}
+
+/*
+ * Make a filesystem operational.
+ * Nothing to do at the moment.
+ */
+/* ARGSUSED */
+cd9660_start(mp, flags, p)
+	struct mount *mp;
+	int flags;
+	struct proc *p;
+{
+	return 0;
+}
+
+/*
+ * unmount system call
+ */
+int
+cd9660_unmount(mp, mntflags, p)
+	struct mount *mp;
+	int mntflags;
+	struct proc *p;
+{
+	register struct iso_mnt *isomp;
+	int i, error, ronly, flags = 0;
+	
+	if (mntflags & MNT_FORCE) {
+		if (!iso_doforce || (mp->mnt_flag & MNT_ROOTFS))
+			return (EINVAL);
+		flags |= FORCECLOSE;
+	}
+#if 0
+	mntflushbuf(mp, 0);
+	if (mntinvalbuf(mp))
+		return EBUSY;
+#endif
+	if (error = vflush(mp, NULLVP, flags))
+		return (error);
+
+	isomp = VFSTOISOFS(mp);
+
+#ifdef	ISODEVMAP
+	if (isomp->iso_ftype == ISO_FTYPE_RRIP)
+		iso_dunmap(isomp->im_dev);
+#endif
+	
+	isomp->im_devvp->v_specflags &= ~SI_MOUNTEDON;
+	error = VOP_CLOSE(isomp->im_devvp, FREAD, NOCRED, p);
+	vrele(isomp->im_devvp);
+	free((caddr_t)isomp, M_ISOFSMNT);
+	mp->mnt_data = (qaddr_t)0;
+	mp->mnt_flag &= ~MNT_LOCAL;
+	return (error);
+}
+
+/*
+ * Return root of a filesystem
+ */
+cd9660_root(mp, vpp)
+	struct mount *mp;
+	struct vnode **vpp;
+{
+	register struct iso_node *ip;
+	struct iso_node tip, *nip;
+	struct vnode tvp;
+	int error;
+	struct iso_mnt *imp = VFSTOISOFS (mp);
+	struct iso_directory_record *dp;
+	
+	tvp.v_mount = mp;
+	tvp.v_data = &tip;
+	ip = VTOI(&tvp);
+	ip->i_vnode = &tvp;
+	ip->i_dev = imp->im_dev;
+	ip->i_diroff = 0;
+	dp = (struct iso_directory_record *)imp->root;
+	isodirino(&ip->i_number,dp,imp);
+	
+	/*
+	 * With RRIP we must use the `.' entry of the root directory.
+	 * Simply tell iget, that it's a relocated directory.
+	 */
+	error = iso_iget(ip,ip->i_number,
+			 imp->iso_ftype == ISO_FTYPE_RRIP,
+			 &nip,dp);
+	if (error)
+		return error;
+	*vpp = ITOV(nip);
+	return 0;
+}
+
+/*
+ * Do operations associated with quotas, not supported
+ */
+/* ARGSUSED */
+int
+cd9660_quotactl(mp, cmd, uid, arg, p)
+	struct mount *mp;
+	int cmd;
+	uid_t uid;
+	caddr_t arg;
+	struct proc *p;
+{
+
+	return (EOPNOTSUPP);
+}
+
+/*
+ * Get file system statistics.
+ */
+cd9660_statfs(mp, sbp, p)
+	struct mount *mp;
+	register struct statfs *sbp;
+	struct proc *p;
+{
+	register struct iso_mnt *isomp;
+	register struct fs *fs;
+	
+	isomp = VFSTOISOFS(mp);
+	
+	sbp->f_type = MOUNT_CD9660;
+	sbp->f_bsize = isomp->logical_block_size;
+	sbp->f_iosize = sbp->f_bsize;	/* XXX */
+	sbp->f_blocks = isomp->volume_space_size;
+	sbp->f_bfree = 0; /* total free blocks */
+	sbp->f_bavail = 0; /* blocks free for non superuser */
+	sbp->f_files =  0; /* total files */
+	sbp->f_ffree = 0; /* free file nodes */
+	if (sbp != &mp->mnt_stat) {
+		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
+			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
+		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
+			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
+	}
+	/* Use the first spare for flags: */
+	sbp->f_spare[0] = isomp->im_flags;
+	return 0;
+}
+
+/* ARGSUSED */
+int
+cd9660_sync(mp, waitfor, cred, p)
+	struct mount *mp;
+	int waitfor;
+	struct ucred *cred;
+	struct proc *p;
+{
+	return (0);
+}
+
+/*
+ * Flat namespace lookup.
+ * Currently unsupported.
+ */
+/* ARGSUSED */
+int
+cd9660_vget(mp, ino, vpp)
+	struct mount *mp;
+	ino_t ino;
+	struct vnode **vpp;
+{
+
+	return (EOPNOTSUPP);
+}
+
+/*
+ * File handle to vnode
+ *
+ * Have to be really careful about stale file handles:
+ * - check that the inode number is in range
+ * - call iget() to get the locked inode
+ * - check for an unallocated inode (i_mode == 0)
+ * - check that the generation number matches
+ */
+
+struct ifid {
+	ushort	ifid_len;
+	ushort	ifid_pad;
+	int	ifid_ino;
+	long	ifid_start;
+};
+
+/* ARGSUSED */
+int
+cd9660_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
+	register struct mount *mp;
+	struct fid *fhp;
+	struct mbuf *nam;
+	struct vnode **vpp;
+	int *exflagsp;
+	struct ucred **credanonp;
+{
+	struct vnode			tvp;
+	int				error;
+	int				lbn, off;
+	struct ifid			*ifhp;
+	struct iso_mnt			*imp;
+	struct buf			*bp;
+	struct iso_directory_record	*dirp;
+	struct iso_node 		tip, *ip, *nip;
+	struct netcred			*np;
+	
+	imp = VFSTOISOFS (mp);
+	ifhp = (struct ifid *)fhp;
+	
+#ifdef	ISOFS_DBG
+	printf("fhtovp: ino %d, start %ld\n",
+	       ifhp->ifid_ino, ifhp->ifid_start);
+#endif
+	
+	np = vfs_export_lookup(mp, &imp->im_export, nam);
+	if (np == NULL)
+		return (EACCES);
+
+	lbn = iso_lblkno(imp, ifhp->ifid_ino);
+	if (lbn >= imp->volume_space_size) {
+		printf("fhtovp: lbn exceed volume space %d\n", lbn);
+		return (ESTALE);
+	}
+	
+	off = iso_blkoff(imp, ifhp->ifid_ino);
+	if (off + ISO_DIRECTORY_RECORD_SIZE > imp->logical_block_size) {
+		printf("fhtovp: crosses block boundary %d\n",
+		       off + ISO_DIRECTORY_RECORD_SIZE);
+		return (ESTALE);
+	}
+	
+	error = bread(imp->im_devvp, btodb(lbn * imp->logical_block_size),
+		      imp->logical_block_size, NOCRED, &bp);
+	if (error) {
+		printf("fhtovp: bread error %d\n",error);
+		brelse(bp);
+		return (error);
+	}
+	
+	dirp = (struct iso_directory_record *)(bp->b_un.b_addr + off);
+	if (off + isonum_711(dirp->length) > imp->logical_block_size) {
+		brelse(bp);
+		printf("fhtovp: directory crosses block boundary %d[off=%d/len=%d]\n",
+		       off+isonum_711(dirp->length), off,
+		       isonum_711(dirp->length));
+		return (ESTALE);
+	}
+	
+	if (isonum_733(dirp->extent) + isonum_711(dirp->ext_attr_length) !=
+	    ifhp->ifid_start) {
+		brelse(bp);
+		printf("fhtovp: file start miss %d vs %d\n",
+		       isonum_733(dirp->extent)+isonum_711(dirp->ext_attr_length),
+		       ifhp->ifid_start);
+		return (ESTALE);
+	}
+	brelse(bp);
+	
+	ip = &tip;
+	tvp.v_mount = mp;
+	tvp.v_data = ip;
+	ip->i_vnode = &tvp;
+	ip->i_dev = imp->im_dev;
+	if (error = iso_iget(ip, ifhp->ifid_ino, 0, &nip, dirp)) {
+		*vpp = NULLVP;
+		printf("fhtovp: failed to get inode\n");
+		return (error);
+	}
+	ip = nip;
+	/*
+	 * XXX need generation number?
+	 */
+	if (ip->inode.iso_mode == 0) {
+		iso_iput(ip);
+		*vpp = NULLVP;
+		printf("fhtovp: inode mode == 0\n");
+		return (ESTALE);
+	}
+	*vpp = ITOV(ip);
+	*exflagsp = np->netc_exflags;
+	*credanonp = &np->netc_anon;
+	return 0;
+}
+
+/*
+ * Vnode pointer to File handle
+ */
+/* ARGSUSED */
+cd9660_vptofh(vp, fhp)
+	struct vnode *vp;
+	struct fid *fhp;
+{
+	register struct iso_node *ip = VTOI(vp);
+	register struct ifid *ifhp;
+	register struct iso_mnt *mp = ip->i_mnt;
+	
+	ifhp = (struct ifid *)fhp;
+	ifhp->ifid_len = sizeof(struct ifid);
+	
+	ifhp->ifid_ino = ip->i_number;
+	ifhp->ifid_start = ip->iso_start;
+	
+#ifdef	ISOFS_DBG
+	printf("vptofh: ino %d, start %ld\n",
+	       ifhp->ifid_ino,ifhp->ifid_start);
+#endif
+	return 0;
+}
diff --git a/sys/isofs/cd9660/cd9660_vnops.c b/sys/isofs/cd9660/cd9660_vnops.c
new file mode 100644
index 00000000000..59f5a73f5c8
--- /dev/null
+++ b/sys/isofs/cd9660/cd9660_vnops.c
@@ -0,0 +1,1038 @@
+/*-
+ * Copyright (c) 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cd9660_vnops.c	8.3 (Berkeley) 1/23/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/resourcevar.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/conf.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <miscfs/specfs/specdev.h>
+#include <miscfs/fifofs/fifo.h>
+#include <sys/malloc.h>
+#include <sys/dir.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+#include <isofs/cd9660/iso_rrip.h>
+
+#if 0
+/*
+ * Mknod vnode call
+ *  Actually remap the device number
+ */
+cd9660_mknod(ndp, vap, cred, p)
+	struct nameidata *ndp;
+	struct ucred *cred;
+	struct vattr *vap;
+	struct proc *p;
+{
+#ifndef	ISODEVMAP
+	free(ndp->ni_pnbuf, M_NAMEI);
+	vput(ndp->ni_dvp);
+	vput(ndp->ni_vp);
+	return EINVAL;
+#else
+	register struct vnode *vp;
+	struct iso_node *ip;
+	struct iso_dnode *dp;
+	int error;
+	
+	vp = ndp->ni_vp;
+	ip = VTOI(vp);
+	
+	if (ip->i_mnt->iso_ftype != ISO_FTYPE_RRIP
+	    || vap->va_type != vp->v_type
+	    || (vap->va_type != VCHR && vap->va_type != VBLK)) {
+		free(ndp->ni_pnbuf, M_NAMEI);
+		vput(ndp->ni_dvp);
+		vput(ndp->ni_vp);
+		return EINVAL;
+	}
+	
+	dp = iso_dmap(ip->i_dev,ip->i_number,1);
+	if (ip->inode.iso_rdev == vap->va_rdev || vap->va_rdev == VNOVAL) {
+		/* same as the unmapped one, delete the mapping */
+		remque(dp);
+		FREE(dp,M_CACHE);
+	} else
+		/* enter new mapping */
+		dp->d_dev = vap->va_rdev;
+	
+	/*
+	 * Remove inode so that it will be reloaded by iget and
+	 * checked to see if it is an alias of an existing entry
+	 * in the inode cache.
+	 */
+	vput(vp);
+	vp->v_type = VNON;
+	vgone(vp);
+	return (0);
+#endif
+}
+#endif
+
+/*
+ * Open called.
+ *
+ * Nothing to do.
+ */
+/* ARGSUSED */
+int
+cd9660_open(ap)
+	struct vop_open_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	return (0);
+}
+
+/*
+ * Close called
+ *
+ * Update the times on the inode on writeable file systems.
+ */
+/* ARGSUSED */
+int
+cd9660_close(ap)
+	struct vop_close_args /* {
+		struct vnode *a_vp;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	return (0);
+}
+
+/*
+ * Check mode permission on inode pointer. Mode is READ, WRITE or EXEC.
+ * The mode is shifted to select the owner/group/other fields. The
+ * super user is granted all permissions.
+ */
+/* ARGSUSED */
+cd9660_access(ap)
+	struct vop_access_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	return (0);
+}
+
+cd9660_getattr(ap)
+	struct vop_getattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+
+{
+	struct vnode *vp = ap->a_vp;
+	register struct vattr *vap = ap->a_vap;
+	register struct iso_node *ip = VTOI(vp);
+	int i;
+
+	vap->va_fsid	= ip->i_dev;
+	vap->va_fileid	= ip->i_number;
+
+	vap->va_mode	= ip->inode.iso_mode;
+	vap->va_nlink	= ip->inode.iso_links;
+	vap->va_uid	= ip->inode.iso_uid;
+	vap->va_gid	= ip->inode.iso_gid;
+	vap->va_atime	= ip->inode.iso_atime;
+	vap->va_mtime	= ip->inode.iso_mtime;
+	vap->va_ctime	= ip->inode.iso_ctime;
+	vap->va_rdev	= ip->inode.iso_rdev;
+
+	vap->va_size	= (u_quad_t) ip->i_size;
+	vap->va_flags	= 0;
+	vap->va_gen = 1;
+	vap->va_blocksize = ip->i_mnt->logical_block_size;
+	vap->va_bytes	= (u_quad_t) ip->i_size;
+	vap->va_type	= vp->v_type;
+	return (0);
+}
+
+#if ISO_DEFAULT_BLOCK_SIZE >= NBPG
+#ifdef DEBUG
+extern int doclusterread;
+#else
+#define doclusterread 1
+#endif
+#else
+/* XXX until cluster routines can handle block sizes less than one page */
+#define doclusterread 0
+#endif
+
+/*
+ * Vnode op for reading.
+ */
+cd9660_read(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	register struct uio *uio = ap->a_uio;
+	register struct iso_node *ip = VTOI(vp);
+	register struct iso_mnt *imp;
+	struct buf *bp;
+	daddr_t lbn, bn, rablock;
+	off_t diff;
+	int rasize, error = 0;
+	long size, n, on;
+	
+	if (uio->uio_resid == 0)
+		return (0);
+	if (uio->uio_offset < 0)
+		return (EINVAL);
+	ip->i_flag |= IACC;
+	imp = ip->i_mnt;
+	do {
+		lbn = iso_lblkno(imp, uio->uio_offset);
+		on = iso_blkoff(imp, uio->uio_offset);
+		n = min((unsigned)(imp->logical_block_size - on),
+			uio->uio_resid);
+		diff = (off_t)ip->i_size - uio->uio_offset;
+		if (diff <= 0)
+			return (0);
+		if (diff < n)
+			n = diff;
+		size = iso_blksize(imp, ip, lbn);
+		rablock = lbn + 1;
+		if (doclusterread) {
+			if (iso_lblktosize(imp, rablock) <= ip->i_size)
+				error = cluster_read(vp, (off_t)ip->i_size,
+						     lbn, size, NOCRED, &bp);
+			else 
+				error = bread(vp, lbn, size, NOCRED, &bp);
+		} else {
+			if (vp->v_lastr + 1 == lbn &&
+			    iso_lblktosize(imp, rablock) < ip->i_size) {
+				rasize = iso_blksize(imp, ip, rablock);
+				error = breadn(vp, lbn, size, &rablock,
+					       &rasize, 1, NOCRED, &bp);
+			} else
+				error = bread(vp, lbn, size, NOCRED, &bp);
+		}
+		vp->v_lastr = lbn;
+		n = min(n, size - bp->b_resid);
+		if (error) {
+			brelse(bp);
+			return (error);
+		}
+
+		error = uiomove(bp->b_un.b_addr + on, (int)n, uio);
+		if (n + on == imp->logical_block_size ||
+		    uio->uio_offset == (off_t)ip->i_size)
+			bp->b_flags |= B_AGE;
+		brelse(bp);
+	} while (error == 0 && uio->uio_resid > 0 && n != 0);
+	return (error);
+}
+
+/* ARGSUSED */
+int
+cd9660_ioctl(ap)
+	struct vop_ioctl_args /* {
+		struct vnode *a_vp;
+		int  a_command;
+		caddr_t  a_data;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	printf("You did ioctl for isofs !!\n");
+	return (ENOTTY);
+}
+
+/* ARGSUSED */
+int
+cd9660_select(ap)
+	struct vop_select_args /* {
+		struct vnode *a_vp;
+		int  a_which;
+		int  a_fflags;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	/*
+	 * We should really check to see if I/O is possible.
+	 */
+	return (1);
+}
+
+/*
+ * Mmap a file
+ *
+ * NB Currently unsupported.
+ */
+/* ARGSUSED */
+int
+cd9660_mmap(ap)
+	struct vop_mmap_args /* {
+		struct vnode *a_vp;
+		int  a_fflags;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	return (EINVAL);
+}
+
+/*
+ * Seek on a file
+ *
+ * Nothing to do, so just return.
+ */
+/* ARGSUSED */
+int
+cd9660_seek(ap)
+	struct vop_seek_args /* {
+		struct vnode *a_vp;
+		off_t  a_oldoff;
+		off_t  a_newoff;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+
+	return (0);
+}
+
+/*
+ * Structure for reading directories
+ */
+struct isoreaddir {
+	struct dirent saveent;
+	struct dirent assocent;
+	struct dirent current;
+	off_t saveoff;
+	off_t assocoff;
+	off_t curroff;
+	struct uio *uio;
+	off_t uio_off;
+	u_int *cookiep;
+	int ncookies;
+	int eof;
+};
+
+static int
+iso_uiodir(idp,dp,off)
+	struct isoreaddir *idp;
+	struct dirent *dp;
+	off_t off;
+{
+	int error;
+	
+	dp->d_name[dp->d_namlen] = 0;
+	dp->d_reclen = DIRSIZ(dp);
+	
+	if (idp->uio->uio_resid < dp->d_reclen) {
+		idp->eof = 0;
+		return -1;
+	}
+	
+	if (idp->cookiep) {
+		if (idp->ncookies <= 0) {
+			idp->eof = 0;
+			return -1;
+		}
+		
+		*idp->cookiep++ = off;
+		--idp->ncookies;
+	}
+	
+	if (error = uiomove(dp,dp->d_reclen,idp->uio))
+		return error;
+	idp->uio_off = off;
+	return 0;
+}
+
+static int
+iso_shipdir(idp)
+	struct isoreaddir *idp;
+{
+	struct dirent *dp;
+	int cl, sl, assoc;
+	int error;
+	char *cname, *sname;
+	
+	cl = idp->current.d_namlen;
+	cname = idp->current.d_name;
+	if (assoc = cl > 1 && *cname == ASSOCCHAR) {
+		cl--;
+		cname++;
+	}
+	
+	dp = &idp->saveent;
+	sname = dp->d_name;
+	if (!(sl = dp->d_namlen)) {
+		dp = &idp->assocent;
+		sname = dp->d_name + 1;
+		sl = dp->d_namlen - 1;
+	}
+	if (sl > 0) {
+		if (sl != cl
+		    || bcmp(sname,cname,sl)) {
+			if (idp->assocent.d_namlen) {
+				if (error = iso_uiodir(idp,&idp->assocent,idp->assocoff))
+					return error;
+				idp->assocent.d_namlen = 0;
+			}
+			if (idp->saveent.d_namlen) {
+				if (error = iso_uiodir(idp,&idp->saveent,idp->saveoff))
+					return error;
+				idp->saveent.d_namlen = 0;
+			}
+		}
+	}
+	idp->current.d_reclen = DIRSIZ(&idp->current);
+	if (assoc) {
+		idp->assocoff = idp->curroff;
+		bcopy(&idp->current,&idp->assocent,idp->current.d_reclen);
+	} else {
+		idp->saveoff = idp->curroff;
+		bcopy(&idp->current,&idp->saveent,idp->current.d_reclen);
+	}
+	return 0;
+}
+
+/*
+ * Vnode op for readdir
+ * XXX make sure everything still works now that eofflagp and cookiep
+ * are no longer args.
+ */
+int
+cd9660_readdir(ap)
+	struct vop_readdir_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	register struct uio *uio = ap->a_uio;
+	struct isoreaddir *idp;
+	int entryoffsetinblock;
+	int error = 0;
+	int endsearch;
+	struct iso_directory_record *ep;
+	u_short elen;
+	int reclen;
+	struct iso_mnt *imp;
+	struct iso_node *ip;
+	struct buf *bp = NULL;
+	
+	ip = VTOI(ap->a_vp);
+	imp = ip->i_mnt;
+	
+	MALLOC(idp,struct isoreaddir *,sizeof(*idp),M_TEMP,M_WAITOK);
+	idp->saveent.d_namlen = 0;
+	idp->assocent.d_namlen = 0;
+	idp->uio = uio;
+#if 0
+	idp->cookiep = cookies;
+	idp->ncookies = ncookies;
+	idp->eof = 1;
+#else
+	idp->cookiep = 0;
+#endif
+	idp->curroff = uio->uio_offset;
+	
+	entryoffsetinblock = iso_blkoff(imp, idp->curroff);
+	if (entryoffsetinblock != 0) {
+		if (error = iso_blkatoff(ip, idp->curroff, &bp)) {
+			FREE(idp,M_TEMP);
+			return (error);
+		}
+	}
+	
+	endsearch = ip->i_size;
+	
+	while (idp->curroff < endsearch) {
+		/*
+		 * If offset is on a block boundary,
+		 * read the next directory block.
+		 * Release previous if it exists.
+		 */
+		
+		if (iso_blkoff(imp, idp->curroff) == 0) {
+			if (bp != NULL)
+				brelse(bp);
+			if (error = iso_blkatoff(ip, idp->curroff, &bp))
+				break;
+			entryoffsetinblock = 0;
+		}
+		/*
+		 * Get pointer to next entry.
+		 */
+		
+		ep = (struct iso_directory_record *)
+			(bp->b_un.b_addr + entryoffsetinblock);
+		
+		reclen = isonum_711 (ep->length);
+		if (reclen == 0) {
+			/* skip to next block, if any */
+			idp->curroff = roundup (idp->curroff,
+						imp->logical_block_size);
+			continue;
+		}
+		
+		if (reclen < ISO_DIRECTORY_RECORD_SIZE) {
+			error = EINVAL;
+			/* illegal entry, stop */
+			break;
+		}
+		
+		if (entryoffsetinblock + reclen > imp->logical_block_size) {
+			error = EINVAL;
+			/* illegal directory, so stop looking */
+			break;
+		}
+		
+		idp->current.d_namlen = isonum_711 (ep->name_len);
+		if (isonum_711(ep->flags)&2)
+			isodirino(&idp->current.d_fileno,ep,imp);
+		else
+			idp->current.d_fileno = dbtob(bp->b_blkno) +
+				idp->curroff;
+		
+		if (reclen < ISO_DIRECTORY_RECORD_SIZE + idp->current.d_namlen) {
+			error = EINVAL;
+			/* illegal entry, stop */
+			break;
+		}
+		
+		idp->curroff += reclen;
+		/*
+		 *
+		 */
+		switch (imp->iso_ftype) {
+		case ISO_FTYPE_RRIP:
+			cd9660_rrip_getname(ep,idp->current.d_name,
+					   (u_short *)&idp->current.d_namlen,
+					   &idp->current.d_fileno,imp);
+			if (idp->current.d_namlen)
+				error = iso_uiodir(idp,&idp->current,idp->curroff);
+			break;
+		default:	/* ISO_FTYPE_DEFAULT || ISO_FTYPE_9660 */
+			strcpy(idp->current.d_name,"..");
+			switch (ep->name[0]) {
+			case 0:
+				idp->current.d_namlen = 1;
+				error = iso_uiodir(idp,&idp->current,idp->curroff);
+				break;
+			case 1:
+				idp->current.d_namlen = 2;
+				error = iso_uiodir(idp,&idp->current,idp->curroff);
+				break;
+			default:
+				isofntrans(ep->name,idp->current.d_namlen,
+					   idp->current.d_name, &elen,
+					   imp->iso_ftype == ISO_FTYPE_9660,
+					   isonum_711(ep->flags)&4);
+				idp->current.d_namlen = (u_char)elen;
+				if (imp->iso_ftype == ISO_FTYPE_DEFAULT)
+					error = iso_shipdir(idp);
+				else
+					error = iso_uiodir(idp,&idp->current,idp->curroff);
+				break;
+			}
+		}
+		if (error)
+			break;
+		
+		entryoffsetinblock += reclen;
+	}
+	
+	if (!error && imp->iso_ftype == ISO_FTYPE_DEFAULT) {
+		idp->current.d_namlen = 0;
+		error = iso_shipdir(idp);
+	}
+	if (error < 0)
+		error = 0;
+	
+	if (bp)
+		brelse (bp);
+
+	uio->uio_offset = idp->uio_off;
+#if 0
+	*eofflagp = idp->eof;
+#endif
+	
+	FREE(idp,M_TEMP);
+	
+	return (error);
+}
+
+/*
+ * Return target name of a symbolic link
+ * Shouldn't we get the parent vnode and read the data from there?
+ * This could eventually result in deadlocks in cd9660_lookup.
+ * But otherwise the block read here is in the block buffer two times.
+ */
+typedef struct iso_directory_record ISODIR;
+typedef struct iso_node             ISONODE;
+typedef struct iso_mnt              ISOMNT;
+int
+cd9660_readlink(ap)
+	struct vop_readlink_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	ISONODE	*ip;
+	ISODIR	*dirp;                   
+	ISOMNT	*imp;
+	struct	buf *bp;
+	u_short	symlen;
+	int	error;
+	char	*symname;
+	ino_t	ino;
+	
+	ip  = VTOI(ap->a_vp);
+	imp = ip->i_mnt;
+	
+	if (imp->iso_ftype != ISO_FTYPE_RRIP)
+		return EINVAL;
+	
+	/*
+	 * Get parents directory record block that this inode included.
+	 */
+	error = bread(imp->im_devvp,
+		      (daddr_t)(ip->i_number / DEV_BSIZE),
+		      imp->logical_block_size,
+		      NOCRED,
+		      &bp);
+	if (error) {
+		brelse(bp);
+		return EINVAL;
+	}
+
+	/*
+	 * Setup the directory pointer for this inode
+	 */
+	dirp = (ISODIR *)(bp->b_un.b_addr + (ip->i_number & imp->im_bmask));
+#ifdef DEBUG
+	printf("lbn=%d,off=%d,bsize=%d,DEV_BSIZE=%d, dirp= %08x, b_addr=%08x, offset=%08x(%08x)\n",
+	       (daddr_t)(ip->i_number >> imp->im_bshift),
+	       ip->i_number & imp->im_bmask,
+	       imp->logical_block_size,
+	       DEV_BSIZE,
+	       dirp,
+	       bp->b_un.b_addr,
+	       ip->i_number,
+	       ip->i_number & imp->im_bmask );
+#endif
+	
+	/*
+	 * Just make sure, we have a right one....
+	 *   1: Check not cross boundary on block
+	 */
+	if ((ip->i_number & imp->im_bmask) + isonum_711(dirp->length)
+	    > imp->logical_block_size) {
+		brelse(bp);
+		return EINVAL;
+	}
+	
+	/*
+	 * Now get a buffer
+	 * Abuse a namei buffer for now.
+	 */
+	MALLOC(symname,char *,MAXPATHLEN,M_NAMEI,M_WAITOK);
+	
+	/*
+	 * Ok, we just gathering a symbolic name in SL record.
+	 */
+	if (cd9660_rrip_getsymname(dirp,symname,&symlen,imp) == 0) {
+		FREE(symname,M_NAMEI);
+		brelse(bp);
+		return EINVAL;
+	}
+	/*
+	 * Don't forget before you leave from home ;-)
+	 */
+	brelse(bp);
+	
+	/*
+	 * return with the symbolic name to caller's.
+	 */
+	error = uiomove(symname,symlen,ap->a_uio);
+	
+	FREE(symname,M_NAMEI);
+	
+	return error;
+}
+
+/*
+ * Ufs abort op, called after namei() when a CREATE/DELETE isn't actually
+ * done. If a buffer has been saved in anticipation of a CREATE, delete it.
+ */
+int
+cd9660_abortop(ap)
+	struct vop_abortop_args /* {
+		struct vnode *a_dvp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
+		FREE(ap->a_cnp->cn_pnbuf, M_NAMEI);
+	return 0;
+}
+
+/*
+ * Lock an inode.
+ */
+int
+cd9660_lock(ap)
+	struct vop_lock_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	register struct iso_node *ip = VTOI(ap->a_vp);
+
+	ISO_ILOCK(ip);
+	return 0;
+}
+
+/*
+ * Unlock an inode.
+ */
+int
+cd9660_unlock(ap)
+	struct vop_unlock_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	register struct iso_node *ip = VTOI(ap->a_vp);
+
+	if (!(ip->i_flag & ILOCKED))
+		panic("cd9660_unlock NOT LOCKED");
+	ISO_IUNLOCK(ip);
+	return 0;
+}
+
+/*
+ * Check for a locked inode.
+ */
+int
+cd9660_islocked(ap)
+	struct vop_islocked_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	if (VTOI(ap->a_vp)->i_flag & ILOCKED)
+		return 1;
+	return 0;
+}
+
+/*
+ * Calculate the logical to physical mapping if not done already,
+ * then call the device strategy routine.
+ */
+int
+cd9660_strategy(ap)
+	struct vop_strategy_args /* {
+		struct buf *a_bp;
+	} */ *ap;
+{
+	register struct buf *bp = ap->a_bp;
+	register struct vnode *vp = bp->b_vp;
+	register struct iso_node *ip;
+	int error;
+
+	ip = VTOI(vp);
+	if (vp->v_type == VBLK || vp->v_type == VCHR)
+		panic("cd9660_strategy: spec");
+	if (bp->b_blkno == bp->b_lblkno) {
+		if (error =
+		    VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL)) {
+			bp->b_error = error;
+			bp->b_flags |= B_ERROR;
+			biodone(bp);
+			return (error);
+		}
+		if ((long)bp->b_blkno == -1)
+			clrbuf(bp);
+	}
+	if ((long)bp->b_blkno == -1) {
+		biodone(bp);
+		return (0);
+	}
+	vp = ip->i_devvp;
+	bp->b_dev = vp->v_rdev;
+	VOCALL (vp->v_op, VOFFSET(vop_strategy), ap);
+	return (0);
+}
+
+/*
+ * Print out the contents of an inode.
+ */
+int
+cd9660_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	printf("tag VT_ISOFS, isofs vnode\n");
+	return 0;
+}
+
+/*
+ * Unsupported operation
+ */
+int
+cd9660_enotsupp()
+{
+
+	return (EOPNOTSUPP);
+}
+
+/*
+ * Global vfs data structures for isofs
+ */
+#define cd9660_create \
+	((int (*) __P((struct  vop_create_args *)))cd9660_enotsupp)
+#define cd9660_mknod ((int (*) __P((struct  vop_mknod_args *)))cd9660_enotsupp)
+#define cd9660_setattr \
+	((int (*) __P((struct  vop_setattr_args *)))cd9660_enotsupp)
+#define cd9660_write ((int (*) __P((struct  vop_write_args *)))cd9660_enotsupp)
+#define cd9660_fsync ((int (*) __P((struct  vop_fsync_args *)))nullop)
+#define cd9660_remove \
+	((int (*) __P((struct  vop_remove_args *)))cd9660_enotsupp)
+#define cd9660_link ((int (*) __P((struct  vop_link_args *)))cd9660_enotsupp)
+#define cd9660_rename \
+	((int (*) __P((struct  vop_rename_args *)))cd9660_enotsupp)
+#define cd9660_mkdir ((int (*) __P((struct  vop_mkdir_args *)))cd9660_enotsupp)
+#define cd9660_rmdir ((int (*) __P((struct  vop_rmdir_args *)))cd9660_enotsupp)
+#define cd9660_symlink \
+	((int (*) __P((struct vop_symlink_args *)))cd9660_enotsupp)
+#define cd9660_pathconf \
+	((int (*) __P((struct vop_pathconf_args *)))cd9660_enotsupp)
+#define cd9660_advlock \
+	((int (*) __P((struct vop_advlock_args *)))cd9660_enotsupp)
+#define cd9660_blkatoff \
+	((int (*) __P((struct  vop_blkatoff_args *)))cd9660_enotsupp)
+#define cd9660_valloc ((int(*) __P(( \
+		struct vnode *pvp, \
+		int mode, \
+		struct ucred *cred, \
+		struct vnode **vpp))) cd9660_enotsupp)
+#define cd9660_vfree ((int (*) __P((struct  vop_vfree_args *)))cd9660_enotsupp)
+#define cd9660_truncate \
+	((int (*) __P((struct  vop_truncate_args *)))cd9660_enotsupp)
+#define cd9660_update \
+	((int (*) __P((struct  vop_update_args *)))cd9660_enotsupp)
+#define cd9660_bwrite \
+	((int (*) __P((struct  vop_bwrite_args *)))cd9660_enotsupp)
+
+/*
+ * Global vfs data structures for nfs
+ */
+int (**cd9660_vnodeop_p)();
+struct vnodeopv_entry_desc cd9660_vnodeop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, cd9660_lookup },	/* lookup */
+	{ &vop_create_desc, cd9660_create },	/* create */
+	{ &vop_mknod_desc, cd9660_mknod },	/* mknod */
+	{ &vop_open_desc, cd9660_open },	/* open */
+	{ &vop_close_desc, cd9660_close },	/* close */
+	{ &vop_access_desc, cd9660_access },	/* access */
+	{ &vop_getattr_desc, cd9660_getattr },	/* getattr */
+	{ &vop_setattr_desc, cd9660_setattr },	/* setattr */
+	{ &vop_read_desc, cd9660_read },	/* read */
+	{ &vop_write_desc, cd9660_write },	/* write */
+	{ &vop_ioctl_desc, cd9660_ioctl },	/* ioctl */
+	{ &vop_select_desc, cd9660_select },	/* select */
+	{ &vop_mmap_desc, cd9660_mmap },	/* mmap */
+	{ &vop_fsync_desc, cd9660_fsync },	/* fsync */
+	{ &vop_seek_desc, cd9660_seek },	/* seek */
+	{ &vop_remove_desc, cd9660_remove },	/* remove */
+	{ &vop_link_desc, cd9660_link },	/* link */
+	{ &vop_rename_desc, cd9660_rename },	/* rename */
+	{ &vop_mkdir_desc, cd9660_mkdir },	/* mkdir */
+	{ &vop_rmdir_desc, cd9660_rmdir },	/* rmdir */
+	{ &vop_symlink_desc, cd9660_symlink },	/* symlink */
+	{ &vop_readdir_desc, cd9660_readdir },	/* readdir */
+	{ &vop_readlink_desc, cd9660_readlink },/* readlink */
+	{ &vop_abortop_desc, cd9660_abortop },	/* abortop */
+	{ &vop_inactive_desc, cd9660_inactive },/* inactive */
+	{ &vop_reclaim_desc, cd9660_reclaim },	/* reclaim */
+	{ &vop_lock_desc, cd9660_lock },	/* lock */
+	{ &vop_unlock_desc, cd9660_unlock },	/* unlock */
+	{ &vop_bmap_desc, cd9660_bmap },	/* bmap */
+	{ &vop_strategy_desc, cd9660_strategy },/* strategy */
+	{ &vop_print_desc, cd9660_print },	/* print */
+	{ &vop_islocked_desc, cd9660_islocked },/* islocked */
+	{ &vop_pathconf_desc, cd9660_pathconf },/* pathconf */
+	{ &vop_advlock_desc, cd9660_advlock },	/* advlock */
+	{ &vop_blkatoff_desc, cd9660_blkatoff },/* blkatoff */
+	{ &vop_valloc_desc, cd9660_valloc },	/* valloc */
+	{ &vop_vfree_desc, cd9660_vfree },	/* vfree */
+	{ &vop_truncate_desc, cd9660_truncate },/* truncate */
+	{ &vop_update_desc, cd9660_update },	/* update */
+	{ &vop_bwrite_desc, vn_bwrite },
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc cd9660_vnodeop_opv_desc =
+	{ &cd9660_vnodeop_p, cd9660_vnodeop_entries };
+
+/*
+ * Special device vnode ops
+ */
+int (**cd9660_specop_p)();
+struct vnodeopv_entry_desc cd9660_specop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, spec_lookup },	/* lookup */
+	{ &vop_create_desc, cd9660_create },	/* create */
+	{ &vop_mknod_desc, cd9660_mknod },	/* mknod */
+	{ &vop_open_desc, spec_open },		/* open */
+	{ &vop_close_desc, spec_close },	/* close */
+	{ &vop_access_desc, cd9660_access },	/* access */
+	{ &vop_getattr_desc, cd9660_getattr },	/* getattr */
+	{ &vop_setattr_desc, cd9660_setattr },	/* setattr */
+	{ &vop_read_desc, spec_read },		/* read */
+	{ &vop_write_desc, spec_write },	/* write */
+	{ &vop_ioctl_desc, spec_ioctl },	/* ioctl */
+	{ &vop_select_desc, spec_select },	/* select */
+	{ &vop_mmap_desc, spec_mmap },		/* mmap */
+	{ &vop_fsync_desc, spec_fsync },	/* fsync */
+	{ &vop_seek_desc, spec_seek },		/* seek */
+	{ &vop_remove_desc, cd9660_remove },	/* remove */
+	{ &vop_link_desc, cd9660_link },	/* link */
+	{ &vop_rename_desc, cd9660_rename },	/* rename */
+	{ &vop_mkdir_desc, cd9660_mkdir },	/* mkdir */
+	{ &vop_rmdir_desc, cd9660_rmdir },	/* rmdir */
+	{ &vop_symlink_desc, cd9660_symlink },	/* symlink */
+	{ &vop_readdir_desc, spec_readdir },	/* readdir */
+	{ &vop_readlink_desc, spec_readlink },	/* readlink */
+	{ &vop_abortop_desc, spec_abortop },	/* abortop */
+	{ &vop_inactive_desc, cd9660_inactive },/* inactive */
+	{ &vop_reclaim_desc, cd9660_reclaim },	/* reclaim */
+	{ &vop_lock_desc, cd9660_lock },	/* lock */
+	{ &vop_unlock_desc, cd9660_unlock },	/* unlock */
+	{ &vop_bmap_desc, spec_bmap },		/* bmap */
+		/* XXX strategy: panics, should be notsupp instead? */
+	{ &vop_strategy_desc, cd9660_strategy },/* strategy */
+	{ &vop_print_desc, cd9660_print },	/* print */
+	{ &vop_islocked_desc, cd9660_islocked },/* islocked */
+	{ &vop_pathconf_desc, spec_pathconf },	/* pathconf */
+	{ &vop_advlock_desc, spec_advlock },	/* advlock */
+	{ &vop_blkatoff_desc, spec_blkatoff },	/* blkatoff */
+	{ &vop_valloc_desc, spec_valloc },	/* valloc */
+	{ &vop_vfree_desc, spec_vfree },	/* vfree */
+	{ &vop_truncate_desc, spec_truncate },	/* truncate */
+	{ &vop_update_desc, cd9660_update },	/* update */
+	{ &vop_bwrite_desc, vn_bwrite },
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc cd9660_specop_opv_desc =
+	{ &cd9660_specop_p, cd9660_specop_entries };
+
+#ifdef FIFO
+int (**cd9660_fifoop_p)();
+struct vnodeopv_entry_desc cd9660_fifoop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, fifo_lookup },	/* lookup */
+	{ &vop_create_desc, cd9660_create },	/* create */
+	{ &vop_mknod_desc, cd9660_mknod },	/* mknod */
+	{ &vop_open_desc, fifo_open },		/* open */
+	{ &vop_close_desc, fifo_close },	/* close */
+	{ &vop_access_desc, cd9660_access },	/* access */
+	{ &vop_getattr_desc, cd9660_getattr },	/* getattr */
+	{ &vop_setattr_desc, cd9660_setattr },	/* setattr */
+	{ &vop_read_desc, fifo_read },		/* read */
+	{ &vop_write_desc, fifo_write },	/* write */
+	{ &vop_ioctl_desc, fifo_ioctl },	/* ioctl */
+	{ &vop_select_desc, fifo_select },	/* select */
+	{ &vop_mmap_desc, fifo_mmap },		/* mmap */
+	{ &vop_fsync_desc, fifo_fsync },	/* fsync */
+	{ &vop_seek_desc, fifo_seek },		/* seek */
+	{ &vop_remove_desc, cd9660_remove },	/* remove */
+	{ &vop_link_desc, cd9660_link },	/* link */
+	{ &vop_rename_desc, cd9660_rename },	/* rename */
+	{ &vop_mkdir_desc, cd9660_mkdir },	/* mkdir */
+	{ &vop_rmdir_desc, cd9660_rmdir },	/* rmdir */
+	{ &vop_symlink_desc, cd9660_symlink },	/* symlink */
+	{ &vop_readdir_desc, fifo_readdir },	/* readdir */
+	{ &vop_readlink_desc, fifo_readlink },	/* readlink */
+	{ &vop_abortop_desc, fifo_abortop },	/* abortop */
+	{ &vop_inactive_desc, cd9660_inactive },/* inactive */
+	{ &vop_reclaim_desc, cd9660_reclaim },	/* reclaim */
+	{ &vop_lock_desc, cd9660_lock },	/* lock */
+	{ &vop_unlock_desc, cd9660_unlock },	/* unlock */
+	{ &vop_bmap_desc, fifo_bmap },		/* bmap */
+	{ &vop_strategy_desc, fifo_badop },	/* strategy */
+	{ &vop_print_desc, cd9660_print },	/* print */
+	{ &vop_islocked_desc, cd9660_islocked },/* islocked */
+	{ &vop_pathconf_desc, fifo_pathconf },	/* pathconf */
+	{ &vop_advlock_desc, fifo_advlock },	/* advlock */
+	{ &vop_blkatoff_desc, fifo_blkatoff },	/* blkatoff */
+	{ &vop_valloc_desc, fifo_valloc },	/* valloc */
+	{ &vop_vfree_desc, fifo_vfree },	/* vfree */
+	{ &vop_truncate_desc, fifo_truncate },	/* truncate */
+	{ &vop_update_desc, cd9660_update },	/* update */
+	{ &vop_bwrite_desc, vn_bwrite },
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc cd9660_fifoop_opv_desc =
+	{ &cd9660_fifoop_p, cd9660_fifoop_entries };
+#endif /* FIFO */
diff --git a/sys/isofs/cd9660/iso.h b/sys/isofs/cd9660/iso.h
new file mode 100644
index 00000000000..e3567066e1c
--- /dev/null
+++ b/sys/isofs/cd9660/iso.h
@@ -0,0 +1,256 @@
+/*-
+ * Copyright (c) 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)iso.h	8.2 (Berkeley) 1/23/94
+ */
+
+#define ISODCL(from, to) (to - from + 1)
+
+struct iso_volume_descriptor {
+	char type[ISODCL(1,1)]; /* 711 */
+	char id[ISODCL(2,6)];
+	char version[ISODCL(7,7)];
+	char data[ISODCL(8,2048)];
+};
+
+/* volume descriptor types */
+#define ISO_VD_PRIMARY 1
+#define ISO_VD_END 255
+
+#define ISO_STANDARD_ID "CD001"
+#define ISO_ECMA_ID     "CDW01"
+
+struct iso_primary_descriptor {
+	char type			[ISODCL (  1,   1)]; /* 711 */
+	char id				[ISODCL (  2,   6)];
+	char version			[ISODCL (  7,   7)]; /* 711 */
+	char unused1			[ISODCL (  8,   8)];
+	char system_id			[ISODCL (  9,  40)]; /* achars */
+	char volume_id			[ISODCL ( 41,  72)]; /* dchars */
+	char unused2			[ISODCL ( 73,  80)];
+	char volume_space_size		[ISODCL ( 81,  88)]; /* 733 */
+	char unused3			[ISODCL ( 89, 120)];
+	char volume_set_size		[ISODCL (121, 124)]; /* 723 */
+	char volume_sequence_number	[ISODCL (125, 128)]; /* 723 */
+	char logical_block_size		[ISODCL (129, 132)]; /* 723 */
+	char path_table_size		[ISODCL (133, 140)]; /* 733 */
+	char type_l_path_table		[ISODCL (141, 144)]; /* 731 */
+	char opt_type_l_path_table	[ISODCL (145, 148)]; /* 731 */
+	char type_m_path_table		[ISODCL (149, 152)]; /* 732 */
+	char opt_type_m_path_table	[ISODCL (153, 156)]; /* 732 */
+	char root_directory_record	[ISODCL (157, 190)]; /* 9.1 */
+	char volume_set_id		[ISODCL (191, 318)]; /* dchars */
+	char publisher_id		[ISODCL (319, 446)]; /* achars */
+	char preparer_id		[ISODCL (447, 574)]; /* achars */
+	char application_id		[ISODCL (575, 702)]; /* achars */
+	char copyright_file_id		[ISODCL (703, 739)]; /* 7.5 dchars */
+	char abstract_file_id		[ISODCL (740, 776)]; /* 7.5 dchars */
+	char bibliographic_file_id	[ISODCL (777, 813)]; /* 7.5 dchars */
+	char creation_date		[ISODCL (814, 830)]; /* 8.4.26.1 */
+	char modification_date		[ISODCL (831, 847)]; /* 8.4.26.1 */
+	char expiration_date		[ISODCL (848, 864)]; /* 8.4.26.1 */
+	char effective_date		[ISODCL (865, 881)]; /* 8.4.26.1 */
+	char file_structure_version	[ISODCL (882, 882)]; /* 711 */
+	char unused4			[ISODCL (883, 883)];
+	char application_data		[ISODCL (884, 1395)];
+	char unused5			[ISODCL (1396, 2048)];
+};
+#define ISO_DEFAULT_BLOCK_SIZE		2048
+
+struct iso_directory_record {
+	char length			[ISODCL (1, 1)]; /* 711 */
+	char ext_attr_length		[ISODCL (2, 2)]; /* 711 */
+	unsigned char extent		[ISODCL (3, 10)]; /* 733 */
+	unsigned char size		[ISODCL (11, 18)]; /* 733 */
+	char date			[ISODCL (19, 25)]; /* 7 by 711 */
+	char flags			[ISODCL (26, 26)];
+	char file_unit_size		[ISODCL (27, 27)]; /* 711 */
+	char interleave			[ISODCL (28, 28)]; /* 711 */
+	char volume_sequence_number	[ISODCL (29, 32)]; /* 723 */
+	char name_len			[ISODCL (33, 33)]; /* 711 */
+	char name			[0];
+};
+/* can't take sizeof(iso_directory_record), because of possible alignment
+   of the last entry (34 instead of 33) */
+#define ISO_DIRECTORY_RECORD_SIZE	33
+
+struct iso_extended_attributes {
+	unsigned char owner		[ISODCL (1, 4)]; /* 723 */
+	unsigned char group		[ISODCL (5, 8)]; /* 723 */
+	unsigned char perm		[ISODCL (9, 10)]; /* 9.5.3 */
+	char ctime			[ISODCL (11, 27)]; /* 8.4.26.1 */
+	char mtime			[ISODCL (28, 44)]; /* 8.4.26.1 */
+	char xtime			[ISODCL (45, 61)]; /* 8.4.26.1 */
+	char ftime			[ISODCL (62, 78)]; /* 8.4.26.1 */
+	char recfmt			[ISODCL (79, 79)]; /* 711 */
+	char recattr			[ISODCL (80, 80)]; /* 711 */
+	unsigned char reclen		[ISODCL (81, 84)]; /* 723 */
+	char system_id			[ISODCL (85, 116)]; /* achars */
+	char system_use			[ISODCL (117, 180)];
+	char version			[ISODCL (181, 181)]; /* 711 */
+	char len_esc			[ISODCL (182, 182)]; /* 711 */
+	char reserved			[ISODCL (183, 246)];
+	unsigned char len_au		[ISODCL (247, 250)]; /* 723 */
+};
+
+/* CD-ROM Format type */
+enum ISO_FTYPE  { ISO_FTYPE_DEFAULT, ISO_FTYPE_9660, ISO_FTYPE_RRIP, ISO_FTYPE_ECMA };
+
+#ifndef	ISOFSMNT_ROOT
+#define	ISOFSMNT_ROOT	0
+#endif
+
+struct iso_mnt {
+	int im_flags;
+
+	struct mount *im_mountp;
+	dev_t im_dev;
+	struct vnode *im_devvp;
+
+	int logical_block_size;
+	int im_bshift;
+	int im_bmask;
+	
+	int volume_space_size;
+	char im_fsmnt[50];
+	struct netexport im_export;
+	
+	char root[ISODCL (157, 190)];
+	int root_extent;
+	int root_size;
+	enum ISO_FTYPE  iso_ftype;
+	
+	int rr_skip;
+	int rr_skip0;
+};
+
+#define VFSTOISOFS(mp)	((struct iso_mnt *)((mp)->mnt_data))
+
+#define iso_blkoff(imp, loc) ((loc) & (imp)->im_bmask)
+#define iso_lblkno(imp, loc) ((loc) >> (imp)->im_bshift)
+#define iso_blksize(imp, ip, lbn) ((imp)->logical_block_size)
+#define iso_lblktosize(imp, blk) ((blk) << (imp)->im_bshift)
+
+int cd9660_mount __P((struct mount *,
+	    char *, caddr_t, struct nameidata *, struct proc *));
+int cd9660_start __P((struct mount *, int, struct proc *));
+int cd9660_unmount __P((struct mount *, int, struct proc *));
+int cd9660_root __P((struct mount *, struct vnode **));
+int cd9660_quotactl __P((struct mount *, int, uid_t, caddr_t, struct proc *));
+int cd9660_statfs __P((struct mount *, struct statfs *, struct proc *));
+int cd9660_sync __P((struct mount *, int, struct ucred *, struct proc *));
+int cd9660_vget __P((struct mount *, ino_t, struct vnode **));
+int cd9660_fhtovp __P((struct mount *, struct fid *, struct mbuf *,
+	    struct vnode **, int *, struct ucred **));
+int cd9660_vptofh __P((struct vnode *, struct fid *));
+int cd9660_init __P(());
+
+struct iso_node;
+int iso_blkatoff __P((struct iso_node *ip, long offset, struct buf **bpp)); 
+int iso_iget __P((struct iso_node *xp, ino_t ino, int relocated,
+		  struct iso_node **ipp, struct iso_directory_record *isodir));
+int iso_iput __P((struct iso_node *ip)); 
+int iso_ilock __P((struct iso_node *ip)); 
+int iso_iunlock __P((struct iso_node *ip)); 
+int cd9660_mountroot __P((void)); 
+
+extern int (**cd9660_vnodeop_p)();
+
+extern inline int
+isonum_711(p)
+	unsigned char *p;
+{
+	return *p;
+}
+
+extern inline int
+isonum_712(p)
+	char *p;
+{
+	return *p;
+}
+
+extern inline int
+isonum_721(p)
+	unsigned char *p;
+{
+	return *p|((char)p[1] << 8);
+}
+
+extern inline int
+isonum_722(p)
+	unsigned char *p;
+{
+	return ((char)*p << 8)|p[1];
+}
+
+extern inline int
+isonum_723(p)
+	unsigned char *p;
+{
+	return isonum_721(p);
+}
+
+extern inline int
+isonum_731(p)
+	unsigned char *p;
+{
+	return *p|(p[1] << 8)|(p[2] << 16)|(p[3] << 24);
+}
+
+extern inline int
+isonum_732(p)
+	unsigned char *p;
+{
+	return (*p << 24)|(p[1] << 16)|(p[2] << 8)|p[3];
+}
+
+extern inline int
+isonum_733(p)
+	unsigned char *p;
+{
+	return isonum_731(p);
+}
+
+int isofncmp __P((unsigned char *, int, unsigned char *, int));
+void isofntrans __P((unsigned char *, int, unsigned char *, unsigned short *,
+		     int, int));
+
+/*
+ * Associated files have a leading '='.
+ */
+#define	ASSOCCHAR	'='
diff --git a/sys/isofs/cd9660/iso_rrip.h b/sys/isofs/cd9660/iso_rrip.h
new file mode 100644
index 00000000000..78e4a775201
--- /dev/null
+++ b/sys/isofs/cd9660/iso_rrip.h
@@ -0,0 +1,83 @@
+/*-
+ * Copyright (c) 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)iso_rrip.h	8.2 (Berkeley) 1/23/94
+ */
+
+
+/*
+ *	Analyze function flag (similar to RR field bits)
+ */
+#define	ISO_SUSP_ATTR		0x0001
+#define	ISO_SUSP_DEVICE		0x0002
+#define	ISO_SUSP_SLINK		0x0004
+#define	ISO_SUSP_ALTNAME	0x0008
+#define	ISO_SUSP_CLINK		0x0010
+#define	ISO_SUSP_PLINK		0x0020
+#define	ISO_SUSP_RELDIR		0x0040
+#define	ISO_SUSP_TSTAMP		0x0080
+#define	ISO_SUSP_IDFLAG		0x0100
+#define	ISO_SUSP_EXTREF		0x0200
+#define	ISO_SUSP_CONT		0x0400
+#define	ISO_SUSP_OFFSET		0x0800
+#define	ISO_SUSP_STOP		0x1000
+#define	ISO_SUSP_UNKNOWN	0x8000
+
+typedef struct {
+	struct iso_node	*inop;
+	int		fields;		/* interesting fields in this analysis */
+	daddr_t		iso_ce_blk;	/* block of continuation area */
+	off_t		iso_ce_off;	/* offset of continuation area */
+	int		iso_ce_len;	/* length of continuation area */
+	struct iso_mnt	*imp;		/* mount structure */
+	ino_t		*inump;		/* inode number pointer */
+	char		*outbuf;	/* name/symbolic link output area */
+	u_short		*outlen;	/* length of above */
+	u_short		maxlen;		/* maximum length of above */
+	int		cont;		/* continuation of above */
+} ISO_RRIP_ANALYZE;
+
+int cd9660_rrip_analyze __P((struct iso_directory_record *isodir,
+			    struct iso_node *inop, struct iso_mnt *imp));
+int cd9660_rrip_getname __P((struct iso_directory_record *isodir,
+			    char *outbuf, u_short *outlen,
+			    ino_t *inump, struct iso_mnt *imp));
+int cd9660_rrip_getsymname __P((struct iso_directory_record *isodir,
+			       char *outbuf, u_short *outlen,
+			       struct iso_mnt *imp));
+int cd9660_rrip_offset __P((struct iso_directory_record *isodir,
+			   struct iso_mnt *imp));
diff --git a/sys/kern/Make.tags.inc b/sys/kern/Make.tags.inc
new file mode 100644
index 00000000000..1563c4165f1
--- /dev/null
+++ b/sys/kern/Make.tags.inc
@@ -0,0 +1,18 @@
+#	@(#)Make.tags.inc	8.1 (Berkeley) 6/11/93
+
+# Common files for "make tags".
+# Included by the Makefile for each architecture.
+
+# Put the ../sys stuff near the end so that subroutine definitions win when
+# there is a struct tag with the same name (eg., vmmeter).  The real
+# solution would probably be for ctags to generate "struct vmmeter" tags.
+
+COMM=	/sys/conf/*.[ch] \
+	/sys/dev/*.[ch] /sys/dev/scsi/*.[ch] \
+	/sys/kern/*.[ch] /sys/libkern/*.[ch] \
+	/sys/miscfs/*/*.[ch] \
+	/sys/net/*.[ch] /sys/netccitt/*.[ch] /sys/netinet/*.[ch] \
+	/sys/netiso/*.[ch] /sys/netns/*.[ch] \
+	/sys/nfs/*.[ch] /sys/sys/*.[ch] \
+	/sys/ufs/*/*.[ch] \
+	/sys/vm/*.[ch]
diff --git a/sys/kern/Makefile b/sys/kern/Makefile
new file mode 100644
index 00000000000..cfe962a9a66
--- /dev/null
+++ b/sys/kern/Makefile
@@ -0,0 +1,50 @@
+#	@(#)Makefile	8.2 (Berkeley) 3/21/94
+
+# Makefile for kernel tags files, init_sysent, etc.
+
+ARCH=	hp300 i386 luna68k news3400 pmax sparc tahoe vax
+
+all:
+	@echo "make tags, make links or init_sysent.c only"
+
+init_sysent.c syscalls.c ../sys/syscall.h: makesyscalls.sh syscalls.master
+	-mv -f init_sysent.c init_sysent.c.bak
+	-mv -f syscalls.c syscalls.c.bak
+	-mv -f ../sys/syscall.h ../sys/syscall.h.bak
+	sh makesyscalls.sh syscalls.master
+
+# Kernel tags:
+# Tags files are built in the top-level directory for each architecture,
+# with a makefile listing the architecture-dependent files, etc.  The list
+# of common files is in ./Make.tags.inc.  Links to the correct tags file
+# are placed in each source directory.  We need to have links to tags files
+# from the generic directories that are relative to the machine type, even
+# via remote mounts; therefore we use symlinks to $SYSTAGS, which points at
+# ${SYSDIR}/${MACHINE}/tags.
+
+SYSTAGS=/var/db/sys_tags
+SYSDIR=/sys
+
+# Directories in which to place tags links (other than machine-dependent)
+DGEN=	conf \
+	dev dev/scsi \
+	hp hp/dev hp/hpux \
+	kern libkern \
+	miscfs miscfs/deadfs miscfs/fdesc miscfs/fifofs miscfs/kernfs \
+	miscfs/lofs miscfs/nullfs miscfs/portal miscfs/procfs \
+	miscfs/specfs miscfs/umapfs miscfs/union \
+	net netccitt netinet netiso netns nfs scripts sys \
+	ufs ufs/ffs ufs/lfs ufs/mfs ufs/ufs \
+	vm
+
+tags::
+	-for i in ${ARCH}; do \
+	    (cd ../$$i && make ${MFLAGS} tags); done
+
+links::
+	rm -f ${SYSTAGS}
+	ln -s ${SYSDIR}/${MACHINE}/tags ${SYSTAGS}
+	-for i in ${DGEN}; do \
+	    (cd ../$$i && { rm -f tags; ln -s ${SYSTAGS} tags; }) done
+	-for i in ${ARCH}; do \
+	    (cd ../$$i && make ${MFLAGS} SYSTAGS=${SYSTAGS} links); done
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
new file mode 100644
index 00000000000..c6497153a69
--- /dev/null
+++ b/sys/kern/init_main.c
@@ -0,0 +1,394 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)init_main.c	8.9 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/filedesc.h>
+#include <sys/errno.h>
+#include <sys/exec.h>
+#include <sys/kernel.h>
+#include <sys/mount.h>
+#include <sys/map.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/signalvar.h>
+#include <sys/systm.h>
+#include <sys/vnode.h>
+#include <sys/conf.h>
+#include <sys/buf.h>
+#include <sys/clist.h>
+#include <sys/device.h>
+#include <sys/protosw.h>
+#include <sys/reboot.h>
+#include <sys/user.h>
+
+#include <ufs/ufs/quota.h>
+
+#include <machine/cpu.h>
+
+#include <vm/vm.h>
+
+#ifdef HPFPLIB
+char	copyright[] =
+"Copyright (c) 1982, 1986, 1989, 1991, 1993\n\tThe Regents of the University of California.\nCopyright (c) 1992 Hewlett-Packard Company\nCopyright (c) 1992 Motorola Inc.\nAll rights reserved.\n\n";
+#else
+char	copyright[] =
+"Copyright (c) 1982, 1986, 1989, 1991, 1993\n\tThe Regents of the University of California.  All rights reserved.\n\n";
+#endif
+
+/* Components of the first process -- never freed. */
+struct	session session0;
+struct	pgrp pgrp0;
+struct	proc proc0;
+struct	pcred cred0;
+struct	filedesc0 filedesc0;
+struct	plimit limit0;
+struct	vmspace vmspace0;
+struct	proc *curproc = &proc0;
+struct	proc *initproc, *pageproc;
+
+int	cmask = CMASK;
+extern	struct user *proc0paddr;
+
+struct	vnode *rootvp, *swapdev_vp;
+int	boothowto;
+struct	timeval boottime;
+struct	timeval runtime;
+
+static void start_init __P((struct proc *p, void *framep));
+
+/*
+ * System startup; initialize the world, create process 0, mount root
+ * filesystem, and fork to create init and pagedaemon.  Most of the
+ * hard work is done in the lower-level initialization routines including
+ * startup(), which does memory initialization and autoconfiguration.
+ */
+main(framep)
+	void *framep;
+{
+	register struct proc *p;
+	register struct filedesc0 *fdp;
+	register struct pdevinit *pdev;
+	register int i;
+	int s, rval[2];
+	extern int (*mountroot) __P((void));
+	extern struct pdevinit pdevinit[];
+	extern void roundrobin __P((void *));
+	extern void schedcpu __P((void *));
+
+	/*
+	 * Initialize the current process pointer (curproc) before
+	 * any possible traps/probes to simplify trap processing.
+	 */
+	p = &proc0;
+	curproc = p;
+	/*
+	 * Attempt to find console and initialize
+	 * in case of early panic or other messages.
+	 */
+	consinit();
+	printf(copyright);
+
+	vm_mem_init();
+	kmeminit();
+	cpu_startup();
+
+	/*
+	 * Create process 0 (the swapper).
+	 */
+	allproc = (volatile struct proc *)p;
+	p->p_prev = (struct proc **)&allproc;
+	p->p_pgrp = &pgrp0;
+	pgrphash[0] = &pgrp0;
+	pgrp0.pg_mem = p;
+	pgrp0.pg_session = &session0;
+	session0.s_count = 1;
+	session0.s_leader = p;
+
+	p->p_flag = P_INMEM | P_SYSTEM;
+	p->p_stat = SRUN;
+	p->p_nice = NZERO;
+	bcopy("swapper", p->p_comm, sizeof ("swapper"));
+
+	/* Create credentials. */
+	cred0.p_refcnt = 1;
+	p->p_cred = &cred0;
+	p->p_ucred = crget();
+	p->p_ucred->cr_ngroups = 1;	/* group 0 */
+
+	/* Create the file descriptor table. */
+	fdp = &filedesc0;
+	p->p_fd = &fdp->fd_fd;
+	fdp->fd_fd.fd_refcnt = 1;
+	fdp->fd_fd.fd_cmask = cmask;
+	fdp->fd_fd.fd_ofiles = fdp->fd_dfiles;
+	fdp->fd_fd.fd_ofileflags = fdp->fd_dfileflags;
+	fdp->fd_fd.fd_nfiles = NDFILE;
+
+	/* Create the limits structures. */
+	p->p_limit = &limit0;
+	for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++)
+		limit0.pl_rlimit[i].rlim_cur =
+		    limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
+	limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur = NOFILE;
+	limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur = MAXUPRC;
+	i = ptoa(cnt.v_free_count);
+	limit0.pl_rlimit[RLIMIT_RSS].rlim_max = i;
+	limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = i;
+	limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = i / 3;
+	limit0.p_refcnt = 1;
+
+	/* Allocate a prototype map so we have something to fork. */
+	p->p_vmspace = &vmspace0;
+	vmspace0.vm_refcnt = 1;
+	pmap_pinit(&vmspace0.vm_pmap);
+	vm_map_init(&p->p_vmspace->vm_map, round_page(VM_MIN_ADDRESS),
+	    trunc_page(VM_MAX_ADDRESS), TRUE);
+	vmspace0.vm_map.pmap = &vmspace0.vm_pmap;
+	p->p_addr = proc0paddr;				/* XXX */
+
+	/*
+	 * We continue to place resource usage info and signal
+	 * actions in the user struct so they're pageable.
+	 */
+	p->p_stats = &p->p_addr->u_stats;
+	p->p_sigacts = &p->p_addr->u_sigacts;
+
+	/*
+	 * Initialize per uid information structure and charge
+	 * root for one process.
+	 */
+	usrinfoinit();
+	(void)chgproccnt(0, 1);
+
+	rqinit();
+
+	/* Configure virtual memory system, set vm rlimits. */
+	vm_init_limits(p);
+
+	/* Initialize the file systems. */
+	vfsinit();
+
+	/* Start real time and statistics clocks. */
+	initclocks();
+
+	/* Initialize mbuf's. */
+	mbinit();
+
+	/* Initialize clists. */
+	clist_init();
+
+#ifdef SYSVSHM
+	/* Initialize System V style shared memory. */
+	shminit();
+#endif
+
+	/* Attach pseudo-devices. */
+	for (pdev = pdevinit; pdev->pdev_attach != NULL; pdev++)
+		(*pdev->pdev_attach)(pdev->pdev_count);
+
+	/*
+	 * Initialize protocols.  Block reception of incoming packets
+	 * until everything is ready.
+	 */
+	s = splimp();
+	ifinit();
+	domaininit();
+	splx(s);
+
+#ifdef GPROF
+	/* Initialize kernel profiling. */
+	kmstartup();
+#endif
+
+	/* Kick off timeout driven events by calling first time. */
+	roundrobin(NULL);
+	schedcpu(NULL);
+
+	/* Mount the root file system. */
+	if ((*mountroot)())
+		panic("cannot mount root");
+
+	/* Get the vnode for '/'.  Set fdp->fd_fd.fd_cdir to reference it. */
+	if (VFS_ROOT(mountlist.tqh_first, &rootvnode))
+		panic("cannot find root vnode");
+	fdp->fd_fd.fd_cdir = rootvnode;
+	VREF(fdp->fd_fd.fd_cdir);
+	VOP_UNLOCK(rootvnode);
+	fdp->fd_fd.fd_rdir = NULL;
+	swapinit();
+
+	/*
+	 * Now can look at time, having had a chance to verify the time
+	 * from the file system.  Reset p->p_rtime as it may have been
+	 * munched in mi_switch() after the time got set.
+	 */
+	p->p_stats->p_start = runtime = mono_time = boottime = time;
+	p->p_rtime.tv_sec = p->p_rtime.tv_usec = 0;
+
+	/* Initialize signal state for process 0. */
+	siginit(p);
+
+	/* Create process 1 (init(8)). */
+	if (fork(p, NULL, rval))
+		panic("fork init");
+	if (rval[1]) {
+		start_init(curproc, framep);
+		return;
+	}
+
+	/* Create process 2 (the pageout daemon). */
+	if (fork(p, NULL, rval))
+		panic("fork pager");
+	if (rval[1]) {
+		/*
+		 * Now in process 2.
+		 */
+		p = curproc;
+		pageproc = p;
+		p->p_flag |= P_INMEM | P_SYSTEM;	/* XXX */
+		bcopy("pagedaemon", curproc->p_comm, sizeof ("pagedaemon"));
+		vm_pageout();
+		/* NOTREACHED */
+	}
+
+	/* The scheduler is an infinite loop. */
+	scheduler();
+	/* NOTREACHED */
+}
+
+/*
+ * List of paths to try when searching for "init".
+ */
+static char *initpaths[] = {
+	"/sbin/init",
+	"/sbin/oinit",
+	"/sbin/init.bak",
+	NULL,
+};
+
+/*
+ * Start the initial user process; try exec'ing each pathname in "initpaths".
+ * The program is invoked with one argument containing the boot flags.
+ */
+static void
+start_init(p, framep)
+	struct proc *p;
+	void *framep;
+{
+	vm_offset_t addr;
+	struct execve_args args;
+	int options, i, retval[2], error;
+	char **pathp, *path, *ucp, **uap, *arg0, *arg1;
+
+	initproc = p;
+
+	/*
+	 * We need to set the system call frame as if we were entered through
+	 * a syscall() so that when we call execve() below, it will be able
+	 * to set the entry point (see setregs) when it tries to exec.  The
+	 * startup code in "locore.s" has allocated space for the frame and
+	 * passed a pointer to that space as main's argument.
+	 */
+	cpu_set_init_frame(p, framep);
+
+	/*
+	 * Need just enough stack to hold the faked-up "execve()" arguments.
+	 */
+	addr = trunc_page(VM_MAX_ADDRESS - PAGE_SIZE);
+	if (vm_allocate(&p->p_vmspace->vm_map, &addr, PAGE_SIZE, FALSE) != 0)
+		panic("init: couldn't allocate argument space");
+	p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
+
+	for (pathp = &initpaths[0]; (path = *pathp) != NULL; pathp++) {
+		/*
+		 * Move out the boot flag argument.
+		 */
+		options = 0;
+		ucp = (char *)USRSTACK;
+		(void)subyte(--ucp, 0);		/* trailing zero */
+		if (boothowto & RB_SINGLE) {
+			(void)subyte(--ucp, 's');
+			options = 1;
+		}
+#ifdef notyet
+                if (boothowto & RB_FASTBOOT) {
+			(void)subyte(--ucp, 'f');
+			options = 1;
+		}
+#endif
+		if (options == 0)
+			(void)subyte(--ucp, '-');
+		(void)subyte(--ucp, '-');		/* leading hyphen */
+		arg1 = ucp;
+
+		/*
+		 * Move out the file name (also arg 0).
+		 */
+		for (i = strlen(path) + 1; i >= 0; i--)
+			(void)subyte(--ucp, path[i]);
+		arg0 = ucp;
+
+		/*
+		 * Move out the arg pointers.
+		 */
+		uap = (char **)((int)ucp & ~(NBPW-1));
+		(void)suword((caddr_t)--uap, 0);	/* terminator */
+		(void)suword((caddr_t)--uap, (int)arg1);
+		(void)suword((caddr_t)--uap, (int)arg0);
+
+		/*
+		 * Point at the arguments.
+		 */
+		args.fname = arg0;
+		args.argp = uap;
+		args.envp = NULL;
+
+		/*
+		 * Now try to exec the program.  If can't for any reason
+		 * other than it doesn't exist, complain.
+		 */
+		if ((error = execve(p, &args, &retval)) == 0)
+			return;
+		if (error != ENOENT)
+			printf("exec %s: error %d\n", path, error);
+	}
+	printf("init: not found\n");
+	panic("no init");
+}
diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c
new file mode 100644
index 00000000000..4b25c0695cf
--- /dev/null
+++ b/sys/kern/init_sysent.c
@@ -0,0 +1,480 @@
+/*
+ * System call switch table.
+ *
+ * DO NOT EDIT-- this file is automatically generated.
+ * created from	@(#)syscalls.master	8.2 (Berkeley) 1/13/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+int	nosys();
+
+int	nosys();
+int	exit();
+int	fork();
+int	read();
+int	write();
+int	open();
+int	close();
+int	wait4();
+int	link();
+int	unlink();
+int	chdir();
+int	fchdir();
+int	mknod();
+int	chmod();
+int	chown();
+int	obreak();
+int	getfsstat();
+int	getpid();
+int	mount();
+int	unmount();
+int	setuid();
+int	getuid();
+int	geteuid();
+int	ptrace();
+int	recvmsg();
+int	sendmsg();
+int	recvfrom();
+int	accept();
+int	getpeername();
+int	getsockname();
+int	access();
+int	chflags();
+int	fchflags();
+int	sync();
+int	kill();
+int	getppid();
+int	dup();
+int	pipe();
+int	getegid();
+int	profil();
+#ifdef KTRACE
+int	ktrace();
+#else
+#endif
+int	sigaction();
+int	getgid();
+int	sigprocmask();
+int	getlogin();
+int	setlogin();
+int	acct();
+int	sigpending();
+int	sigaltstack();
+int	ioctl();
+int	reboot();
+int	revoke();
+int	symlink();
+int	readlink();
+int	execve();
+int	umask();
+int	chroot();
+int	msync();
+int	vfork();
+int	sbrk();
+int	sstk();
+int	ovadvise();
+int	munmap();
+int	mprotect();
+int	madvise();
+int	mincore();
+int	getgroups();
+int	setgroups();
+int	getpgrp();
+int	setpgid();
+int	setitimer();
+int	swapon();
+int	getitimer();
+int	getdtablesize();
+int	dup2();
+int	fcntl();
+int	select();
+int	fsync();
+int	setpriority();
+int	socket();
+int	connect();
+int	getpriority();
+int	sigreturn();
+int	bind();
+int	setsockopt();
+int	listen();
+int	sigsuspend();
+#ifdef TRACE
+int	vtrace();
+#else
+#endif
+int	gettimeofday();
+int	getrusage();
+int	getsockopt();
+#ifdef vax
+int	resuba();
+#else
+#endif
+int	readv();
+int	writev();
+int	settimeofday();
+int	fchown();
+int	fchmod();
+int	rename();
+int	flock();
+int	mkfifo();
+int	sendto();
+int	shutdown();
+int	socketpair();
+int	mkdir();
+int	rmdir();
+int	utimes();
+int	adjtime();
+int	setsid();
+int	quotactl();
+#ifdef NFS
+int	nfssvc();
+#else
+#endif
+int	statfs();
+int	fstatfs();
+#ifdef NFS
+int	getfh();
+#else
+#endif
+#ifdef SYSVSHM
+int	shmsys();
+#else
+#endif
+int	setgid();
+int	setegid();
+int	seteuid();
+#ifdef LFS
+int	lfs_bmapv();
+int	lfs_markv();
+int	lfs_segclean();
+int	lfs_segwait();
+#else
+#endif
+int	stat();
+int	fstat();
+int	lstat();
+int	pathconf();
+int	fpathconf();
+int	getrlimit();
+int	setrlimit();
+int	getdirentries();
+int	mmap();
+int	nosys();
+int	lseek();
+int	truncate();
+int	ftruncate();
+int	__sysctl();
+int	mlock();
+int	munlock();
+
+#ifdef COMPAT_43
+#define compat(n, name) n, __CONCAT(o,name)
+
+int	ocreat();
+int	olseek();
+int	ostat();
+int	olstat();
+#ifdef KTRACE
+#else
+#endif
+int	ofstat();
+int	ogetkerninfo();
+int	ogetpagesize();
+int	ommap();
+int	owait();
+int	ogethostname();
+int	osethostname();
+int	oaccept();
+int	osend();
+int	orecv();
+int	osigvec();
+int	osigblock();
+int	osigsetmask();
+int	osigstack();
+int	orecvmsg();
+int	osendmsg();
+#ifdef TRACE
+#else
+#endif
+#ifdef vax
+#else
+#endif
+int	orecvfrom();
+int	osetreuid();
+int	osetregid();
+int	otruncate();
+int	oftruncate();
+int	ogetpeername();
+int	ogethostid();
+int	osethostid();
+int	ogetrlimit();
+int	osetrlimit();
+int	okillpg();
+int	oquota();
+int	ogetsockname();
+#ifdef NFS
+#else
+#endif
+int	ogetdirentries();
+#ifdef NFS
+#else
+#endif
+#ifdef SYSVSHM
+#else
+#endif
+#ifdef LFS
+#else
+#endif
+
+#else /* COMPAT_43 */
+#define compat(n, name) 0, nosys
+#endif /* COMPAT_43 */
+
+struct sysent sysent[] = {
+	{ 0, nosys },			/* 0 = syscall */
+	{ 1, exit },			/* 1 = exit */
+	{ 0, fork },			/* 2 = fork */
+	{ 3, read },			/* 3 = read */
+	{ 3, write },			/* 4 = write */
+	{ 3, open },			/* 5 = open */
+	{ 1, close },			/* 6 = close */
+	{ 4, wait4 },			/* 7 = wait4 */
+	{ compat(2,creat) },		/* 8 = old creat */
+	{ 2, link },			/* 9 = link */
+	{ 1, unlink },			/* 10 = unlink */
+	{ 0, nosys },			/* 11 = obsolete execv */
+	{ 1, chdir },			/* 12 = chdir */
+	{ 1, fchdir },			/* 13 = fchdir */
+	{ 3, mknod },			/* 14 = mknod */
+	{ 2, chmod },			/* 15 = chmod */
+	{ 3, chown },			/* 16 = chown */
+	{ 1, obreak },			/* 17 = break */
+	{ 3, getfsstat },			/* 18 = getfsstat */
+	{ compat(3,lseek) },		/* 19 = old lseek */
+	{ 0, getpid },			/* 20 = getpid */
+	{ 4, mount },			/* 21 = mount */
+	{ 2, unmount },			/* 22 = unmount */
+	{ 1, setuid },			/* 23 = setuid */
+	{ 0, getuid },			/* 24 = getuid */
+	{ 0, geteuid },			/* 25 = geteuid */
+	{ 4, ptrace },			/* 26 = ptrace */
+	{ 3, recvmsg },			/* 27 = recvmsg */
+	{ 3, sendmsg },			/* 28 = sendmsg */
+	{ 6, recvfrom },			/* 29 = recvfrom */
+	{ 3, accept },			/* 30 = accept */
+	{ 3, getpeername },			/* 31 = getpeername */
+	{ 3, getsockname },			/* 32 = getsockname */
+	{ 2, access },			/* 33 = access */
+	{ 2, chflags },			/* 34 = chflags */
+	{ 2, fchflags },			/* 35 = fchflags */
+	{ 0, sync },			/* 36 = sync */
+	{ 2, kill },			/* 37 = kill */
+	{ compat(2,stat) },		/* 38 = old stat */
+	{ 0, getppid },			/* 39 = getppid */
+	{ compat(2,lstat) },		/* 40 = old lstat */
+	{ 2, dup },			/* 41 = dup */
+	{ 0, pipe },			/* 42 = pipe */
+	{ 0, getegid },			/* 43 = getegid */
+	{ 4, profil },			/* 44 = profil */
+#ifdef KTRACE
+	{ 4, ktrace },			/* 45 = ktrace */
+#else
+	{ 0, nosys },			/* 45 = ktrace */
+#endif
+	{ 3, sigaction },			/* 46 = sigaction */
+	{ 0, getgid },			/* 47 = getgid */
+	{ 2, sigprocmask },			/* 48 = sigprocmask */
+	{ 2, getlogin },			/* 49 = getlogin */
+	{ 1, setlogin },			/* 50 = setlogin */
+	{ 1, acct },			/* 51 = acct */
+	{ 0, sigpending },			/* 52 = sigpending */
+	{ 2, sigaltstack },			/* 53 = sigaltstack */
+	{ 3, ioctl },			/* 54 = ioctl */
+	{ 1, reboot },			/* 55 = reboot */
+	{ 1, revoke },			/* 56 = revoke */
+	{ 2, symlink },			/* 57 = symlink */
+	{ 3, readlink },			/* 58 = readlink */
+	{ 3, execve },			/* 59 = execve */
+	{ 1, umask },			/* 60 = umask */
+	{ 1, chroot },			/* 61 = chroot */
+	{ compat(2,fstat) },		/* 62 = old fstat */
+	{ compat(4,getkerninfo) },		/* 63 = old getkerninfo */
+	{ compat(0,getpagesize) },		/* 64 = old getpagesize */
+	{ 2, msync },			/* 65 = msync */
+	{ 0, vfork },			/* 66 = vfork */
+	{ 0, nosys },			/* 67 = obsolete vread */
+	{ 0, nosys },			/* 68 = obsolete vwrite */
+	{ 1, sbrk },			/* 69 = sbrk */
+	{ 1, sstk },			/* 70 = sstk */
+	{ compat(7,mmap) },		/* 71 = old mmap */
+	{ 1, ovadvise },			/* 72 = vadvise */
+	{ 2, munmap },			/* 73 = munmap */
+	{ 3, mprotect },			/* 74 = mprotect */
+	{ 3, madvise },			/* 75 = madvise */
+	{ 0, nosys },			/* 76 = obsolete vhangup */
+	{ 0, nosys },			/* 77 = obsolete vlimit */
+	{ 3, mincore },			/* 78 = mincore */
+	{ 2, getgroups },			/* 79 = getgroups */
+	{ 2, setgroups },			/* 80 = setgroups */
+	{ 0, getpgrp },			/* 81 = getpgrp */
+	{ 2, setpgid },			/* 82 = setpgid */
+	{ 3, setitimer },			/* 83 = setitimer */
+	{ compat(0,wait) },		/* 84 = old wait */
+	{ 1, swapon },			/* 85 = swapon */
+	{ 2, getitimer },			/* 86 = getitimer */
+	{ compat(2,gethostname) },		/* 87 = old gethostname */
+	{ compat(2,sethostname) },		/* 88 = old sethostname */
+	{ 0, getdtablesize },			/* 89 = getdtablesize */
+	{ 2, dup2 },			/* 90 = dup2 */
+	{ 0, nosys },			/* 91 = getdopt */
+	{ 3, fcntl },			/* 92 = fcntl */
+	{ 5, select },			/* 93 = select */
+	{ 0, nosys },			/* 94 = setdopt */
+	{ 1, fsync },			/* 95 = fsync */
+	{ 3, setpriority },			/* 96 = setpriority */
+	{ 3, socket },			/* 97 = socket */
+	{ 3, connect },			/* 98 = connect */
+	{ compat(3,accept) },		/* 99 = old accept */
+	{ 2, getpriority },			/* 100 = getpriority */
+	{ compat(4,send) },		/* 101 = old send */
+	{ compat(4,recv) },		/* 102 = old recv */
+	{ 1, sigreturn },			/* 103 = sigreturn */
+	{ 3, bind },			/* 104 = bind */
+	{ 5, setsockopt },			/* 105 = setsockopt */
+	{ 2, listen },			/* 106 = listen */
+	{ 0, nosys },			/* 107 = obsolete vtimes */
+	{ compat(3,sigvec) },		/* 108 = old sigvec */
+	{ compat(1,sigblock) },		/* 109 = old sigblock */
+	{ compat(1,sigsetmask) },		/* 110 = old sigsetmask */
+	{ 1, sigsuspend },			/* 111 = sigsuspend */
+	{ compat(2,sigstack) },		/* 112 = old sigstack */
+	{ compat(3,recvmsg) },		/* 113 = old recvmsg */
+	{ compat(3,sendmsg) },		/* 114 = old sendmsg */
+#ifdef TRACE
+	{ 2, vtrace },			/* 115 = vtrace */
+#else
+	{ 0, nosys },			/* 115 = obsolete vtrace */
+#endif
+	{ 2, gettimeofday },			/* 116 = gettimeofday */
+	{ 2, getrusage },			/* 117 = getrusage */
+	{ 5, getsockopt },			/* 118 = getsockopt */
+#ifdef vax
+	{ 1, resuba },			/* 119 = resuba */
+#else
+	{ 0, nosys },			/* 119 = nosys */
+#endif
+	{ 3, readv },			/* 120 = readv */
+	{ 3, writev },			/* 121 = writev */
+	{ 2, settimeofday },			/* 122 = settimeofday */
+	{ 3, fchown },			/* 123 = fchown */
+	{ 2, fchmod },			/* 124 = fchmod */
+	{ compat(6,recvfrom) },		/* 125 = old recvfrom */
+	{ compat(2,setreuid) },		/* 126 = old setreuid */
+	{ compat(2,setregid) },		/* 127 = old setregid */
+	{ 2, rename },			/* 128 = rename */
+	{ compat(2,truncate) },		/* 129 = old truncate */
+	{ compat(2,ftruncate) },		/* 130 = old ftruncate */
+	{ 2, flock },			/* 131 = flock */
+	{ 2, mkfifo },			/* 132 = mkfifo */
+	{ 6, sendto },			/* 133 = sendto */
+	{ 2, shutdown },			/* 134 = shutdown */
+	{ 5, socketpair },			/* 135 = socketpair */
+	{ 2, mkdir },			/* 136 = mkdir */
+	{ 1, rmdir },			/* 137 = rmdir */
+	{ 2, utimes },			/* 138 = utimes */
+	{ 0, nosys },			/* 139 = obsolete 4.2 sigreturn */
+	{ 2, adjtime },			/* 140 = adjtime */
+	{ compat(3,getpeername) },		/* 141 = old getpeername */
+	{ compat(0,gethostid) },		/* 142 = old gethostid */
+	{ compat(1,sethostid) },		/* 143 = old sethostid */
+	{ compat(2,getrlimit) },		/* 144 = old getrlimit */
+	{ compat(2,setrlimit) },		/* 145 = old setrlimit */
+	{ compat(2,killpg) },		/* 146 = old killpg */
+	{ 0, setsid },			/* 147 = setsid */
+	{ 4, quotactl },			/* 148 = quotactl */
+	{ compat(4,quota) },		/* 149 = old quota */
+	{ compat(3,getsockname) },		/* 150 = old getsockname */
+	{ 0, nosys },			/* 151 = nosys */
+	{ 0, nosys },			/* 152 = nosys */
+	{ 0, nosys },			/* 153 = nosys */
+	{ 0, nosys },			/* 154 = nosys */
+#ifdef NFS
+	{ 2, nfssvc },			/* 155 = nfssvc */
+#else
+	{ 0, nosys },			/* 155 = nosys */
+#endif
+	{ compat(4,getdirentries) },		/* 156 = old getdirentries */
+	{ 2, statfs },			/* 157 = statfs */
+	{ 2, fstatfs },			/* 158 = fstatfs */
+	{ 0, nosys },			/* 159 = nosys */
+	{ 0, nosys },			/* 160 = nosys */
+#ifdef NFS
+	{ 2, getfh },			/* 161 = getfh */
+#else
+	{ 0, nosys },			/* 161 = nosys */
+#endif
+	{ 0, nosys },			/* 162 = nosys */
+	{ 0, nosys },			/* 163 = nosys */
+	{ 0, nosys },			/* 164 = nosys */
+	{ 0, nosys },			/* 165 = nosys */
+	{ 0, nosys },			/* 166 = nosys */
+	{ 0, nosys },			/* 167 = nosys */
+	{ 0, nosys },			/* 168 = nosys */
+	{ 0, nosys },			/* 169 = nosys */
+	{ 0, nosys },			/* 170 = nosys */
+#ifdef SYSVSHM
+	{ 4, shmsys },			/* 171 = shmsys */
+#else
+	{ 0, nosys },			/* 171 = nosys */
+#endif
+	{ 0, nosys },			/* 172 = nosys */
+	{ 0, nosys },			/* 173 = nosys */
+	{ 0, nosys },			/* 174 = nosys */
+	{ 0, nosys },			/* 175 = nosys */
+	{ 0, nosys },			/* 176 = nosys */
+	{ 0, nosys },			/* 177 = nosys */
+	{ 0, nosys },			/* 178 = nosys */
+	{ 0, nosys },			/* 179 = nosys */
+	{ 0, nosys },			/* 180 = nosys */
+	{ 1, setgid },			/* 181 = setgid */
+	{ 1, setegid },			/* 182 = setegid */
+	{ 1, seteuid },			/* 183 = seteuid */
+#ifdef LFS
+	{ 3, lfs_bmapv },			/* 184 = lfs_bmapv */
+	{ 3, lfs_markv },			/* 185 = lfs_markv */
+	{ 2, lfs_segclean },			/* 186 = lfs_segclean */
+	{ 2, lfs_segwait },			/* 187 = lfs_segwait */
+#else
+	{ 0, nosys },			/* 184 = nosys */
+	{ 0, nosys },			/* 185 = nosys */
+	{ 0, nosys },			/* 186 = nosys */
+	{ 0, nosys },			/* 187 = nosys */
+#endif
+	{ 2, stat },			/* 188 = stat */
+	{ 2, fstat },			/* 189 = fstat */
+	{ 2, lstat },			/* 190 = lstat */
+	{ 2, pathconf },			/* 191 = pathconf */
+	{ 2, fpathconf },			/* 192 = fpathconf */
+	{ 0, nosys },			/* 193 = nosys */
+	{ 2, getrlimit },			/* 194 = getrlimit */
+	{ 2, setrlimit },			/* 195 = setrlimit */
+	{ 4, getdirentries },			/* 196 = getdirentries */
+	{ 8, mmap },			/* 197 = mmap */
+	{ 0, nosys },			/* 198 = __syscall */
+	{ 5, lseek },			/* 199 = lseek */
+	{ 4, truncate },			/* 200 = truncate */
+	{ 4, ftruncate },			/* 201 = ftruncate */
+	{ 6, __sysctl },			/* 202 = __sysctl */
+	{ 2, mlock },			/* 203 = mlock */
+	{ 2, munlock },			/* 204 = munlock */
+	{ 0, nosys },			/* 205 = nosys */
+	{ 0, nosys },			/* 206 = nosys */
+	{ 0, nosys },			/* 207 = nosys */
+	{ 0, nosys },			/* 208 = nosys */
+	{ 0, nosys },			/* 209 = nosys */
+	{ 0, nosys },			/* 210 = nosys */
+};
+
+int	nsysent = sizeof(sysent) / sizeof(sysent[0]);
diff --git a/sys/kern/kern_acct.c b/sys/kern/kern_acct.c
new file mode 100644
index 00000000000..b752279d120
--- /dev/null
+++ b/sys/kern/kern_acct.c
@@ -0,0 +1,116 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)kern_acct.c	8.1 (Berkeley) 6/14/93
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/syslog.h>
+#include <sys/kernel.h>
+
+struct acct_args {
+	char	*fname;
+};
+acct(a1, a2, a3)
+	struct proc *a1;
+	struct acct_args *a2;
+	int *a3;
+{
+	/*
+	 * Body deleted.
+	 */
+	return (ENOSYS);
+}
+
+acct_process(a1)
+	struct proc *a1;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return;
+}
+
+/*
+ * Periodically check the file system to see if accounting
+ * should be turned on or off.
+ */
+
+/*
+ * Values associated with enabling and disabling accounting
+ */
+int	acctsuspend = 2;	/* stop accounting when < 2% free space left */
+int	acctresume = 4;		/* resume when free space risen to > 4% */
+int	acctchkfreq = 15;	/* frequency (in seconds) to check space */
+
+/*
+ * SHOULD REPLACE THIS WITH A DRIVER THAT CAN BE READ TO SIMPLIFY.
+ */
+struct	vnode *acctp;
+struct	vnode *savacctp;
+
+/* ARGSUSED */
+void
+acctwatch(a)
+	void *a;
+{
+	struct statfs sb;
+
+	if (savacctp) {
+		(void)VFS_STATFS(savacctp->v_mount, &sb, (struct proc *)0);
+		if (sb.f_bavail > acctresume * sb.f_blocks / 100) {
+			acctp = savacctp;
+			savacctp = NULL;
+			log(LOG_NOTICE, "Accounting resumed\n");
+		}
+	} else {
+		if (acctp == NULL)
+			return;
+		(void)VFS_STATFS(acctp->v_mount, &sb, (struct proc *)0);
+		if (sb.f_bavail <= acctsuspend * sb.f_blocks / 100) {
+			savacctp = acctp;
+			acctp = NULL;
+			log(LOG_NOTICE, "Accounting suspended\n");
+		}
+	}
+	timeout(acctwatch, NULL, acctchkfreq * hz);
+}
diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c
new file mode 100644
index 00000000000..f42900cb75d
--- /dev/null
+++ b/sys/kern/kern_clock.c
@@ -0,0 +1,528 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_clock.c	8.5 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/dkstat.h>
+#include <sys/callout.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+
+#include <machine/cpu.h>
+
+#ifdef GPROF
+#include <sys/gmon.h>
+#endif
+
+/*
+ * Clock handling routines.
+ *
+ * This code is written to operate with two timers that run independently of
+ * each other.  The main clock, running hz times per second, is used to keep
+ * track of real time.  The second timer handles kernel and user profiling,
+ * and does resource use estimation.  If the second timer is programmable,
+ * it is randomized to avoid aliasing between the two clocks.  For example,
+ * the randomization prevents an adversary from always giving up the cpu
+ * just before its quantum expires.  Otherwise, it would never accumulate
+ * cpu ticks.  The mean frequency of the second timer is stathz.
+ *
+ * If no second timer exists, stathz will be zero; in this case we drive
+ * profiling and statistics off the main clock.  This WILL NOT be accurate;
+ * do not do it unless absolutely necessary.
+ *
+ * The statistics clock may (or may not) be run at a higher rate while
+ * profiling.  This profile clock runs at profhz.  We require that profhz
+ * be an integral multiple of stathz.
+ *
+ * If the statistics clock is running fast, it must be divided by the ratio
+ * profhz/stathz for statistics.  (For profiling, every tick counts.)
+ */
+
+/*
+ * TODO:
+ *	allocate more timeout table slots when table overflows.
+ */
+
+/*
+ * Bump a timeval by a small number of usec's.
+ */
+#define BUMPTIME(t, usec) { \
+	register volatile struct timeval *tp = (t); \
+	register long us; \
+ \
+	tp->tv_usec = us = tp->tv_usec + (usec); \
+	if (us >= 1000000) { \
+		tp->tv_usec = us - 1000000; \
+		tp->tv_sec++; \
+	} \
+}
+
+int	stathz;
+int	profhz;
+int	profprocs;
+int	ticks;
+static int psdiv, pscnt;	/* prof => stat divider */
+int	psratio;		/* ratio: prof / stat */
+
+volatile struct	timeval time;
+volatile struct	timeval mono_time;
+
+/*
+ * Initialize clock frequencies and start both clocks running.
+ */
+void
+initclocks()
+{
+	register int i;
+
+	/*
+	 * Set divisors to 1 (normal case) and let the machine-specific
+	 * code do its bit.
+	 */
+	psdiv = pscnt = 1;
+	cpu_initclocks();
+
+	/*
+	 * Compute profhz/stathz, and fix profhz if needed.
+	 */
+	i = stathz ? stathz : hz;
+	if (profhz == 0)
+		profhz = i;
+	psratio = profhz / i;
+}
+
+/*
+ * The real-time timer, interrupting hz times per second.
+ */
+void
+hardclock(frame)
+	register struct clockframe *frame;
+{
+	register struct callout *p1;
+	register struct proc *p;
+	register int delta, needsoft;
+	extern int tickdelta;
+	extern long timedelta;
+
+	/*
+	 * Update real-time timeout queue.
+	 * At front of queue are some number of events which are ``due''.
+	 * The time to these is <= 0 and if negative represents the
+	 * number of ticks which have passed since it was supposed to happen.
+	 * The rest of the q elements (times > 0) are events yet to happen,
+	 * where the time for each is given as a delta from the previous.
+	 * Decrementing just the first of these serves to decrement the time
+	 * to all events.
+	 */
+	needsoft = 0;
+	for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) {
+		if (--p1->c_time > 0)
+			break;
+		needsoft = 1;
+		if (p1->c_time == 0)
+			break;
+	}
+
+	p = curproc;
+	if (p) {
+		register struct pstats *pstats;
+
+		/*
+		 * Run current process's virtual and profile time, as needed.
+		 */
+		pstats = p->p_stats;
+		if (CLKF_USERMODE(frame) &&
+		    timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
+		    itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
+			psignal(p, SIGVTALRM);
+		if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
+		    itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
+			psignal(p, SIGPROF);
+	}
+
+	/*
+	 * If no separate statistics clock is available, run it from here.
+	 */
+	if (stathz == 0)
+		statclock(frame);
+
+	/*
+	 * Increment the time-of-day.  The increment is just ``tick'' unless
+	 * we are still adjusting the clock; see adjtime().
+	 */
+	ticks++;
+	if (timedelta == 0)
+		delta = tick;
+	else {
+		delta = tick + tickdelta;
+		timedelta -= tickdelta;
+	}
+	BUMPTIME(&time, delta);
+	BUMPTIME(&mono_time, delta);
+
+	/*
+	 * Process callouts at a very low cpu priority, so we don't keep the
+	 * relatively high clock interrupt priority any longer than necessary.
+	 */
+	if (needsoft) {
+		if (CLKF_BASEPRI(frame)) {
+			/*
+			 * Save the overhead of a software interrupt;
+			 * it will happen as soon as we return, so do it now.
+			 */
+			(void)splsoftclock();
+			softclock();
+		} else
+			setsoftclock();
+	}
+}
+
+/*
+ * Software (low priority) clock interrupt.
+ * Run periodic events from timeout queue.
+ */
+/*ARGSUSED*/
+void
+softclock()
+{
+	register struct callout *c;
+	register void *arg;
+	register void (*func) __P((void *));
+	register int s;
+
+	s = splhigh();
+	while ((c = calltodo.c_next) != NULL && c->c_time <= 0) {
+		func = c->c_func;
+		arg = c->c_arg;
+		calltodo.c_next = c->c_next;
+		c->c_next = callfree;
+		callfree = c;
+		splx(s);
+		(*func)(arg);
+		(void) splhigh();
+	}
+	splx(s);
+}
+
+/*
+ * timeout --
+ *	Execute a function after a specified length of time.
+ *
+ * untimeout --
+ *	Cancel previous timeout function call.
+ *
+ *	See AT&T BCI Driver Reference Manual for specification.  This
+ *	implementation differs from that one in that no identification
+ *	value is returned from timeout, rather, the original arguments
+ *	to timeout are used to identify entries for untimeout.
+ */
+void
+timeout(ftn, arg, ticks)
+	void (*ftn) __P((void *));
+	void *arg;
+	register int ticks;
+{
+	register struct callout *new, *p, *t;
+	register int s;
+
+	if (ticks <= 0)
+		ticks = 1;
+
+	/* Lock out the clock. */
+	s = splhigh();
+
+	/* Fill in the next free callout structure. */
+	if (callfree == NULL)
+		panic("timeout table full");
+	new = callfree;
+	callfree = new->c_next;
+	new->c_arg = arg;
+	new->c_func = ftn;
+
+	/*
+	 * The time for each event is stored as a difference from the time
+	 * of the previous event on the queue.  Walk the queue, correcting
+	 * the ticks argument for queue entries passed.  Correct the ticks
+	 * value for the queue entry immediately after the insertion point
+	 * as well.  Watch out for negative c_time values; these represent
+	 * overdue events.
+	 */
+	for (p = &calltodo;
+	    (t = p->c_next) != NULL && ticks > t->c_time; p = t)
+		if (t->c_time > 0)
+			ticks -= t->c_time;
+	new->c_time = ticks;
+	if (t != NULL)
+		t->c_time -= ticks;
+
+	/* Insert the new entry into the queue. */
+	p->c_next = new;
+	new->c_next = t;
+	splx(s);
+}
+
+void
+untimeout(ftn, arg)
+	void (*ftn) __P((void *));
+	void *arg;
+{
+	register struct callout *p, *t;
+	register int s;
+
+	s = splhigh();
+	for (p = &calltodo; (t = p->c_next) != NULL; p = t)
+		if (t->c_func == ftn && t->c_arg == arg) {
+			/* Increment next entry's tick count. */
+			if (t->c_next && t->c_time > 0)
+				t->c_next->c_time += t->c_time;
+
+			/* Move entry from callout queue to callfree queue. */
+			p->c_next = t->c_next;
+			t->c_next = callfree;
+			callfree = t;
+			break;
+		}
+	splx(s);
+}
+
+/*
+ * Compute number of hz until specified time.  Used to
+ * compute third argument to timeout() from an absolute time.
+ */
+int
+hzto(tv)
+	struct timeval *tv;
+{
+	register long ticks, sec;
+	int s;
+
+	/*
+	 * If number of milliseconds will fit in 32 bit arithmetic,
+	 * then compute number of milliseconds to time and scale to
+	 * ticks.  Otherwise just compute number of hz in time, rounding
+	 * times greater than representible to maximum value.
+	 *
+	 * Delta times less than 25 days can be computed ``exactly''.
+	 * Maximum value for any timeout in 10ms ticks is 250 days.
+	 */
+	s = splhigh();
+	sec = tv->tv_sec - time.tv_sec;
+	if (sec <= 0x7fffffff / 1000 - 1000)
+		ticks = ((tv->tv_sec - time.tv_sec) * 1000 +
+			(tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000);
+	else if (sec <= 0x7fffffff / hz)
+		ticks = sec * hz;
+	else
+		ticks = 0x7fffffff;
+	splx(s);
+	return (ticks);
+}
+
+/*
+ * Start profiling on a process.
+ *
+ * Kernel profiling passes proc0 which never exits and hence
+ * keeps the profile clock running constantly.
+ */
+void
+startprofclock(p)
+	register struct proc *p;
+{
+	int s;
+
+	if ((p->p_flag & P_PROFIL) == 0) {
+		p->p_flag |= P_PROFIL;
+		if (++profprocs == 1 && stathz != 0) {
+			s = splstatclock();
+			psdiv = pscnt = psratio;
+			setstatclockrate(profhz);
+			splx(s);
+		}
+	}
+}
+
+/*
+ * Stop profiling on a process.
+ */
+void
+stopprofclock(p)
+	register struct proc *p;
+{
+	int s;
+
+	if (p->p_flag & P_PROFIL) {
+		p->p_flag &= ~P_PROFIL;
+		if (--profprocs == 0 && stathz != 0) {
+			s = splstatclock();
+			psdiv = pscnt = 1;
+			setstatclockrate(stathz);
+			splx(s);
+		}
+	}
+}
+
+int	dk_ndrive = DK_NDRIVE;
+
+/*
+ * Statistics clock.  Grab profile sample, and if divider reaches 0,
+ * do process and kernel statistics.
+ */
+void
+statclock(frame)
+	register struct clockframe *frame;
+{
+#ifdef GPROF
+	register struct gmonparam *g;
+#endif
+	register struct proc *p;
+	register int i;
+
+	if (CLKF_USERMODE(frame)) {
+		p = curproc;
+		if (p->p_flag & P_PROFIL)
+			addupc_intr(p, CLKF_PC(frame), 1);
+		if (--pscnt > 0)
+			return;
+		/*
+		 * Came from user mode; CPU was in user state.
+		 * If this process is being profiled record the tick.
+		 */
+		p->p_uticks++;
+		if (p->p_nice > NZERO)
+			cp_time[CP_NICE]++;
+		else
+			cp_time[CP_USER]++;
+	} else {
+#ifdef GPROF
+		/*
+		 * Kernel statistics are just like addupc_intr, only easier.
+		 */
+		g = &_gmonparam;
+		if (g->state == GMON_PROF_ON) {
+			i = CLKF_PC(frame) - g->lowpc;
+			if (i < g->textsize) {
+				i /= HISTFRACTION * sizeof(*g->kcount);
+				g->kcount[i]++;
+			}
+		}
+#endif
+		if (--pscnt > 0)
+			return;
+		/*
+		 * Came from kernel mode, so we were:
+		 * - handling an interrupt,
+		 * - doing syscall or trap work on behalf of the current
+		 *   user process, or
+		 * - spinning in the idle loop.
+		 * Whichever it is, charge the time as appropriate.
+		 * Note that we charge interrupts to the current process,
+		 * regardless of whether they are ``for'' that process,
+		 * so that we know how much of its real time was spent
+		 * in ``non-process'' (i.e., interrupt) work.
+		 */
+		p = curproc;
+		if (CLKF_INTR(frame)) {
+			if (p != NULL)
+				p->p_iticks++;
+			cp_time[CP_INTR]++;
+		} else if (p != NULL) {
+			p->p_sticks++;
+			cp_time[CP_SYS]++;
+		} else
+			cp_time[CP_IDLE]++;
+	}
+	pscnt = psdiv;
+
+	/*
+	 * We maintain statistics shown by user-level statistics
+	 * programs:  the amount of time in each cpu state, and
+	 * the amount of time each of DK_NDRIVE ``drives'' is busy.
+	 *
+	 * XXX	should either run linked list of drives, or (better)
+	 *	grab timestamps in the start & done code.
+	 */
+	for (i = 0; i < DK_NDRIVE; i++)
+		if (dk_busy & (1 << i))
+			dk_time[i]++;
+
+	/*
+	 * We adjust the priority of the current process.  The priority of
+	 * a process gets worse as it accumulates CPU time.  The cpu usage
+	 * estimator (p_estcpu) is increased here.  The formula for computing
+	 * priorities (in kern_synch.c) will compute a different value each
+	 * time p_estcpu increases by 4.  The cpu usage estimator ramps up
+	 * quite quickly when the process is running (linearly), and decays
+	 * away exponentially, at a rate which is proportionally slower when
+	 * the system is busy.  The basic principal is that the system will
+	 * 90% forget that the process used a lot of CPU time in 5 * loadav
+	 * seconds.  This causes the system to favor processes which haven't
+	 * run much recently, and to round-robin among other processes.
+	 */
+	if (p != NULL) {
+		p->p_cpticks++;
+		if (++p->p_estcpu == 0)
+			p->p_estcpu--;
+		if ((p->p_estcpu & 3) == 0) {
+			resetpriority(p);
+			if (p->p_priority >= PUSER)
+				p->p_priority = p->p_usrpri;
+		}
+	}
+}
+
+/*
+ * Return information about system clocks.
+ */
+sysctl_clockrate(where, sizep)
+	register char *where;
+	size_t *sizep;
+{
+	struct clockinfo clkinfo;
+
+	/*
+	 * Construct clockinfo structure.
+	 */
+	clkinfo.hz = hz;
+	clkinfo.tick = tick;
+	clkinfo.profhz = profhz;
+	clkinfo.stathz = stathz ? stathz : hz;
+	return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo)));
+}
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
new file mode 100644
index 00000000000..543946d3f8f
--- /dev/null
+++ b/sys/kern/kern_descrip.c
@@ -0,0 +1,914 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/filedesc.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/fcntl.h>
+#include <sys/malloc.h>
+#include <sys/syslog.h>
+#include <sys/unistd.h>
+#include <sys/resourcevar.h>
+
+/*
+ * Descriptor management.
+ */
+struct file *filehead;	/* head of list of open files */
+int nfiles;		/* actual number of open files */
+
+/*
+ * System calls on descriptors.
+ */
+struct getdtablesize_args {
+	int	dummy;
+};
+/* ARGSUSED */
+getdtablesize(p, uap, retval)
+	struct proc *p;
+	struct getdtablesize_args *uap;
+	int *retval;
+{
+
+	*retval = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
+	return (0);
+}
+
+/*
+ * Duplicate a file descriptor.
+ */
+struct dup_args {
+	u_int	fd;
+};
+/* ARGSUSED */
+dup(p, uap, retval)
+	struct proc *p;
+	struct dup_args *uap;
+	int *retval;
+{
+	register struct filedesc *fdp;
+	u_int old;
+	int new, error;
+
+	old = uap->fd;
+	/*
+	 * XXX Compatibility
+	 */
+	if (old &~ 077) { uap->fd &= 077; return (dup2(p, uap, retval)); }
+
+	fdp = p->p_fd;
+	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL)
+		return (EBADF);
+	if (error = fdalloc(p, 0, &new))
+		return (error);
+	return (finishdup(fdp, (int)old, new, retval));
+}
+
+/*
+ * Duplicate a file descriptor to a particular value.
+ */
+struct dup2_args {
+	u_int	from;
+	u_int	to;
+};
+/* ARGSUSED */
+dup2(p, uap, retval)
+	struct proc *p;
+	struct dup2_args *uap;
+	int *retval;
+{
+	register struct filedesc *fdp = p->p_fd;
+	register u_int old = uap->from, new = uap->to;
+	int i, error;
+
+	if (old >= fdp->fd_nfiles ||
+	    fdp->fd_ofiles[old] == NULL ||
+	    new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
+	    new >= maxfiles)
+		return (EBADF);
+	if (old == new) {
+		*retval = new;
+		return (0);
+	}
+	if (new >= fdp->fd_nfiles) {
+		if (error = fdalloc(p, new, &i))
+			return (error);
+		if (new != i)
+			panic("dup2: fdalloc");
+	} else if (fdp->fd_ofiles[new]) {
+		if (fdp->fd_ofileflags[new] & UF_MAPPED)
+			(void) munmapfd(p, new);
+		/*
+		 * dup2() must succeed even if the close has an error.
+		 */
+		(void) closef(fdp->fd_ofiles[new], p);
+	}
+	return (finishdup(fdp, (int)old, (int)new, retval));
+}
+
+/*
+ * The file control system call.
+ */
+struct fcntl_args {
+	int	fd;
+	int	cmd;
+	int	arg;
+};
+/* ARGSUSED */
+fcntl(p, uap, retval)
+	struct proc *p;
+	register struct fcntl_args *uap;
+	int *retval;
+{
+	register struct filedesc *fdp = p->p_fd;
+	register struct file *fp;
+	register char *pop;
+	struct vnode *vp;
+	int i, tmp, error, flg = F_POSIX;
+	struct flock fl;
+	u_int newmin;
+
+	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
+		return (EBADF);
+	pop = &fdp->fd_ofileflags[uap->fd];
+	switch (uap->cmd) {
+
+	case F_DUPFD:
+		newmin = uap->arg;
+		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
+		    newmin >= maxfiles)
+			return (EINVAL);
+		if (error = fdalloc(p, newmin, &i))
+			return (error);
+		return (finishdup(fdp, uap->fd, i, retval));
+
+	case F_GETFD:
+		*retval = *pop & 1;
+		return (0);
+
+	case F_SETFD:
+		*pop = (*pop &~ 1) | (uap->arg & 1);
+		return (0);
+
+	case F_GETFL:
+		*retval = OFLAGS(fp->f_flag);
+		return (0);
+
+	case F_SETFL:
+		fp->f_flag &= ~FCNTLFLAGS;
+		fp->f_flag |= FFLAGS(uap->arg) & FCNTLFLAGS;
+		tmp = fp->f_flag & FNONBLOCK;
+		error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
+		if (error)
+			return (error);
+		tmp = fp->f_flag & FASYNC;
+		error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
+		if (!error)
+			return (0);
+		fp->f_flag &= ~FNONBLOCK;
+		tmp = 0;
+		(void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
+		return (error);
+
+	case F_GETOWN:
+		if (fp->f_type == DTYPE_SOCKET) {
+			*retval = ((struct socket *)fp->f_data)->so_pgid;
+			return (0);
+		}
+		error = (*fp->f_ops->fo_ioctl)
+			(fp, (int)TIOCGPGRP, (caddr_t)retval, p);
+		*retval = -*retval;
+		return (error);
+
+	case F_SETOWN:
+		if (fp->f_type == DTYPE_SOCKET) {
+			((struct socket *)fp->f_data)->so_pgid = uap->arg;
+			return (0);
+		}
+		if (uap->arg <= 0) {
+			uap->arg = -uap->arg;
+		} else {
+			struct proc *p1 = pfind(uap->arg);
+			if (p1 == 0)
+				return (ESRCH);
+			uap->arg = p1->p_pgrp->pg_id;
+		}
+		return ((*fp->f_ops->fo_ioctl)
+			(fp, (int)TIOCSPGRP, (caddr_t)&uap->arg, p));
+
+	case F_SETLKW:
+		flg |= F_WAIT;
+		/* Fall into F_SETLK */
+
+	case F_SETLK:
+		if (fp->f_type != DTYPE_VNODE)
+			return (EBADF);
+		vp = (struct vnode *)fp->f_data;
+		/* Copy in the lock structure */
+		error = copyin((caddr_t)uap->arg, (caddr_t)&fl, sizeof (fl));
+		if (error)
+			return (error);
+		if (fl.l_whence == SEEK_CUR)
+			fl.l_start += fp->f_offset;
+		switch (fl.l_type) {
+
+		case F_RDLCK:
+			if ((fp->f_flag & FREAD) == 0)
+				return (EBADF);
+			p->p_flag |= P_ADVLOCK;
+			return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
+
+		case F_WRLCK:
+			if ((fp->f_flag & FWRITE) == 0)
+				return (EBADF);
+			p->p_flag |= P_ADVLOCK;
+			return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
+
+		case F_UNLCK:
+			return (VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl,
+				F_POSIX));
+
+		default:
+			return (EINVAL);
+		}
+
+	case F_GETLK:
+		if (fp->f_type != DTYPE_VNODE)
+			return (EBADF);
+		vp = (struct vnode *)fp->f_data;
+		/* Copy in the lock structure */
+		error = copyin((caddr_t)uap->arg, (caddr_t)&fl, sizeof (fl));
+		if (error)
+			return (error);
+		if (fl.l_whence == SEEK_CUR)
+			fl.l_start += fp->f_offset;
+		if (error = VOP_ADVLOCK(vp, (caddr_t)p, F_GETLK, &fl, F_POSIX))
+			return (error);
+		return (copyout((caddr_t)&fl, (caddr_t)uap->arg, sizeof (fl)));
+
+	default:
+		return (EINVAL);
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * Common code for dup, dup2, and fcntl(F_DUPFD).
+ */
+int
+finishdup(fdp, old, new, retval)
+	register struct filedesc *fdp;
+	register int old, new, *retval;
+{
+	register struct file *fp;
+
+	fp = fdp->fd_ofiles[old];
+	fdp->fd_ofiles[new] = fp;
+	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
+	fp->f_count++;
+	if (new > fdp->fd_lastfile)
+		fdp->fd_lastfile = new;
+	*retval = new;
+	return (0);
+}
+
+/*
+ * Close a file descriptor.
+ */
+struct close_args {
+	int	fd;
+};
+/* ARGSUSED */
+close(p, uap, retval)
+	struct proc *p;
+	struct close_args *uap;
+	int *retval;
+{
+	register struct filedesc *fdp = p->p_fd;
+	register struct file *fp;
+	register int fd = uap->fd;
+	register u_char *pf;
+
+	if ((unsigned)fd >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[fd]) == NULL)
+		return (EBADF);
+	pf = (u_char *)&fdp->fd_ofileflags[fd];
+	if (*pf & UF_MAPPED)
+		(void) munmapfd(p, fd);
+	fdp->fd_ofiles[fd] = NULL;
+	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
+		fdp->fd_lastfile--;
+	if (fd < fdp->fd_freefile)
+		fdp->fd_freefile = fd;
+	*pf = 0;
+	return (closef(fp, p));
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/*
+ * Return status information about a file descriptor.
+ */
+struct ofstat_args {
+	int	fd;
+	struct	ostat *sb;
+};
+/* ARGSUSED */
+ofstat(p, uap, retval)
+	struct proc *p;
+	register struct ofstat_args *uap;
+	int *retval;
+{
+	register struct filedesc *fdp = p->p_fd;
+	register struct file *fp;
+	struct stat ub;
+	struct ostat oub;
+	int error;
+
+	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
+		return (EBADF);
+	switch (fp->f_type) {
+
+	case DTYPE_VNODE:
+		error = vn_stat((struct vnode *)fp->f_data, &ub, p);
+		break;
+
+	case DTYPE_SOCKET:
+		error = soo_stat((struct socket *)fp->f_data, &ub);
+		break;
+
+	default:
+		panic("ofstat");
+		/*NOTREACHED*/
+	}
+	cvtstat(&ub, &oub);
+	if (error == 0)
+		error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
+	return (error);
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+/*
+ * Return status information about a file descriptor.
+ */
+struct fstat_args {
+	int	fd;
+	struct	stat *sb;
+};
+/* ARGSUSED */
+fstat(p, uap, retval)
+	struct proc *p;
+	register struct fstat_args *uap;
+	int *retval;
+{
+	register struct filedesc *fdp = p->p_fd;
+	register struct file *fp;
+	struct stat ub;
+	int error;
+
+	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
+		return (EBADF);
+	switch (fp->f_type) {
+
+	case DTYPE_VNODE:
+		error = vn_stat((struct vnode *)fp->f_data, &ub, p);
+		break;
+
+	case DTYPE_SOCKET:
+		error = soo_stat((struct socket *)fp->f_data, &ub);
+		break;
+
+	default:
+		panic("fstat");
+		/*NOTREACHED*/
+	}
+	if (error == 0)
+		error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
+	return (error);
+}
+
+/*
+ * Return pathconf information about a file descriptor.
+ */
+struct fpathconf_args {
+	int	fd;
+	int	name;
+};
+/* ARGSUSED */
+fpathconf(p, uap, retval)
+	struct proc *p;
+	register struct fpathconf_args *uap;
+	int *retval;
+{
+	struct filedesc *fdp = p->p_fd;
+	struct file *fp;
+	struct vnode *vp;
+
+	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
+		return (EBADF);
+	switch (fp->f_type) {
+
+	case DTYPE_SOCKET:
+		if (uap->name != _PC_PIPE_BUF)
+			return (EINVAL);
+		*retval = PIPE_BUF;
+		return (0);
+
+	case DTYPE_VNODE:
+		vp = (struct vnode *)fp->f_data;
+		return (VOP_PATHCONF(vp, uap->name, retval));
+
+	default:
+		panic("fpathconf");
+	}
+	/*NOTREACHED*/
+}
+
+/*
+ * Allocate a file descriptor for the process.
+ */
+int fdexpand;
+
+fdalloc(p, want, result)
+	struct proc *p;
+	int want;
+	int *result;
+{
+	register struct filedesc *fdp = p->p_fd;
+	register int i;
+	int lim, last, nfiles;
+	struct file **newofile;
+	char *newofileflags;
+
+	/*
+	 * Search for a free descriptor starting at the higher
+	 * of want or fd_freefile.  If that fails, consider
+	 * expanding the ofile array.
+	 */
+	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
+	for (;;) {
+		last = min(fdp->fd_nfiles, lim);
+		if ((i = want) < fdp->fd_freefile)
+			i = fdp->fd_freefile;
+		for (; i < last; i++) {
+			if (fdp->fd_ofiles[i] == NULL) {
+				fdp->fd_ofileflags[i] = 0;
+				if (i > fdp->fd_lastfile)
+					fdp->fd_lastfile = i;
+				if (want <= fdp->fd_freefile)
+					fdp->fd_freefile = i;
+				*result = i;
+				return (0);
+			}
+		}
+
+		/*
+		 * No space in current array.  Expand?
+		 */
+		if (fdp->fd_nfiles >= lim)
+			return (EMFILE);
+		if (fdp->fd_nfiles < NDEXTENT)
+			nfiles = NDEXTENT;
+		else
+			nfiles = 2 * fdp->fd_nfiles;
+		MALLOC(newofile, struct file **, nfiles * OFILESIZE,
+		    M_FILEDESC, M_WAITOK);
+		newofileflags = (char *) &newofile[nfiles];
+		/*
+		 * Copy the existing ofile and ofileflags arrays
+		 * and zero the new portion of each array.
+		 */
+		bcopy(fdp->fd_ofiles, newofile,
+			(i = sizeof(struct file *) * fdp->fd_nfiles));
+		bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
+		bcopy(fdp->fd_ofileflags, newofileflags,
+			(i = sizeof(char) * fdp->fd_nfiles));
+		bzero(newofileflags + i, nfiles * sizeof(char) - i);
+		if (fdp->fd_nfiles > NDFILE)
+			FREE(fdp->fd_ofiles, M_FILEDESC);
+		fdp->fd_ofiles = newofile;
+		fdp->fd_ofileflags = newofileflags;
+		fdp->fd_nfiles = nfiles;
+		fdexpand++;
+	}
+}
+
+/*
+ * Check to see whether n user file descriptors
+ * are available to the process p.
+ */
+fdavail(p, n)
+	struct proc *p;
+	register int n;
+{
+	register struct filedesc *fdp = p->p_fd;
+	register struct file **fpp;
+	register int i, lim;
+
+	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
+	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
+		return (1);
+	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
+	for (i = fdp->fd_nfiles - fdp->fd_freefile; --i >= 0; fpp++)
+		if (*fpp == NULL && --n <= 0)
+			return (1);
+	return (0);
+}
+
+/*
+ * Create a new open file structure and allocate
+ * a file decriptor for the process that refers to it.
+ */
+falloc(p, resultfp, resultfd)
+	register struct proc *p;
+	struct file **resultfp;
+	int *resultfd;
+{
+	register struct file *fp, *fq, **fpp;
+	int error, i;
+
+	if (error = fdalloc(p, 0, &i))
+		return (error);
+	if (nfiles >= maxfiles) {
+		tablefull("file");
+		return (ENFILE);
+	}
+	/*
+	 * Allocate a new file descriptor.
+	 * If the process has file descriptor zero open, add to the list
+	 * of open files at that point, otherwise put it at the front of
+	 * the list of open files.
+	 */
+	nfiles++;
+	MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK);
+	bzero(fp, sizeof(struct file));
+	if (fq = p->p_fd->fd_ofiles[0])
+		fpp = &fq->f_filef;
+	else
+		fpp = &filehead;
+	p->p_fd->fd_ofiles[i] = fp;
+	if (fq = *fpp)
+		fq->f_fileb = &fp->f_filef;
+	fp->f_filef = fq;
+	fp->f_fileb = fpp;
+	*fpp = fp;
+	fp->f_count = 1;
+	fp->f_cred = p->p_ucred;
+	crhold(fp->f_cred);
+	if (resultfp)
+		*resultfp = fp;
+	if (resultfd)
+		*resultfd = i;
+	return (0);
+}
+
+/*
+ * Free a file descriptor.
+ */
+ffree(fp)
+	register struct file *fp;
+{
+	register struct file *fq;
+
+	if (fq = fp->f_filef)
+		fq->f_fileb = fp->f_fileb;
+	*fp->f_fileb = fq;
+	crfree(fp->f_cred);
+#ifdef DIAGNOSTIC
+	fp->f_filef = NULL;
+	fp->f_fileb = NULL;
+	fp->f_count = 0;
+#endif
+	nfiles--;
+	FREE(fp, M_FILE);
+}
+
+/*
+ * Copy a filedesc structure.
+ */
+struct filedesc *
+fdcopy(p)
+	struct proc *p;
+{
+	register struct filedesc *newfdp, *fdp = p->p_fd;
+	register struct file **fpp;
+	register int i;
+
+	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
+	    M_FILEDESC, M_WAITOK);
+	bcopy(fdp, newfdp, sizeof(struct filedesc));
+	VREF(newfdp->fd_cdir);
+	if (newfdp->fd_rdir)
+		VREF(newfdp->fd_rdir);
+	newfdp->fd_refcnt = 1;
+
+	/*
+	 * If the number of open files fits in the internal arrays
+	 * of the open file structure, use them, otherwise allocate
+	 * additional memory for the number of descriptors currently
+	 * in use.
+	 */
+	if (newfdp->fd_lastfile < NDFILE) {
+		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
+		newfdp->fd_ofileflags =
+		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
+		i = NDFILE;
+	} else {
+		/*
+		 * Compute the smallest multiple of NDEXTENT needed
+		 * for the file descriptors currently in use,
+		 * allowing the table to shrink.
+		 */
+		i = newfdp->fd_nfiles;
+		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
+			i /= 2;
+		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
+		    M_FILEDESC, M_WAITOK);
+		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
+	}
+	newfdp->fd_nfiles = i;
+	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
+	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
+	fpp = newfdp->fd_ofiles;
+	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++)
+		if (*fpp != NULL)
+			(*fpp)->f_count++;
+	return (newfdp);
+}
+
+/*
+ * Release a filedesc structure.
+ */
+void
+fdfree(p)
+	struct proc *p;
+{
+	register struct filedesc *fdp = p->p_fd;
+	struct file **fpp;
+	register int i;
+
+	if (--fdp->fd_refcnt > 0)
+		return;
+	fpp = fdp->fd_ofiles;
+	for (i = fdp->fd_lastfile; i-- >= 0; fpp++)
+		if (*fpp)
+			(void) closef(*fpp, p);
+	if (fdp->fd_nfiles > NDFILE)
+		FREE(fdp->fd_ofiles, M_FILEDESC);
+	vrele(fdp->fd_cdir);
+	if (fdp->fd_rdir)
+		vrele(fdp->fd_rdir);
+	FREE(fdp, M_FILEDESC);
+}
+
+/*
+ * Internal form of close.
+ * Decrement reference count on file structure.
+ * Note: p may be NULL when closing a file
+ * that was being passed in a message.
+ */
+closef(fp, p)
+	register struct file *fp;
+	register struct proc *p;
+{
+	struct vnode *vp;
+	struct flock lf;
+	int error;
+
+	if (fp == NULL)
+		return (0);
+	/*
+	 * POSIX record locking dictates that any close releases ALL
+	 * locks owned by this process.  This is handled by setting
+	 * a flag in the unlock to free ONLY locks obeying POSIX
+	 * semantics, and not to free BSD-style file locks.
+	 * If the descriptor was in a message, POSIX-style locks
+	 * aren't passed with the descriptor.
+	 */
+	if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) {
+		lf.l_whence = SEEK_SET;
+		lf.l_start = 0;
+		lf.l_len = 0;
+		lf.l_type = F_UNLCK;
+		vp = (struct vnode *)fp->f_data;
+		(void) VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX);
+	}
+	if (--fp->f_count > 0)
+		return (0);
+	if (fp->f_count < 0)
+		panic("closef: count < 0");
+	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
+		lf.l_whence = SEEK_SET;
+		lf.l_start = 0;
+		lf.l_len = 0;
+		lf.l_type = F_UNLCK;
+		vp = (struct vnode *)fp->f_data;
+		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
+	}
+	if (fp->f_ops)
+		error = (*fp->f_ops->fo_close)(fp, p);
+	else
+		error = 0;
+	ffree(fp);
+	return (error);
+}
+
+/*
+ * Apply an advisory lock on a file descriptor.
+ *
+ * Just attempt to get a record lock of the requested type on
+ * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
+ */
+struct flock_args {
+	int	fd;
+	int	how;
+};
+/* ARGSUSED */
+flock(p, uap, retval)
+	struct proc *p;
+	register struct flock_args *uap;
+	int *retval;
+{
+	register struct filedesc *fdp = p->p_fd;
+	register struct file *fp;
+	struct vnode *vp;
+	struct flock lf;
+
+	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
+		return (EBADF);
+	if (fp->f_type != DTYPE_VNODE)
+		return (EOPNOTSUPP);
+	vp = (struct vnode *)fp->f_data;
+	lf.l_whence = SEEK_SET;
+	lf.l_start = 0;
+	lf.l_len = 0;
+	if (uap->how & LOCK_UN) {
+		lf.l_type = F_UNLCK;
+		fp->f_flag &= ~FHASLOCK;
+		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK));
+	}
+	if (uap->how & LOCK_EX)
+		lf.l_type = F_WRLCK;
+	else if (uap->how & LOCK_SH)
+		lf.l_type = F_RDLCK;
+	else
+		return (EBADF);
+	fp->f_flag |= FHASLOCK;
+	if (uap->how & LOCK_NB)
+		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK));
+	return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT));
+}
+
+/*
+ * File Descriptor pseudo-device driver (/dev/fd/).
+ *
+ * Opening minor device N dup()s the file (if any) connected to file
+ * descriptor N belonging to the calling process.  Note that this driver
+ * consists of only the ``open()'' routine, because all subsequent
+ * references to this file will be direct to the other driver.
+ */
+/* ARGSUSED */
+fdopen(dev, mode, type, p)
+	dev_t dev;
+	int mode, type;
+	struct proc *p;
+{
+
+	/*
+	 * XXX Kludge: set curproc->p_dupfd to contain the value of the
+	 * the file descriptor being sought for duplication. The error 
+	 * return ensures that the vnode for this device will be released
+	 * by vn_open. Open will detect this special error and take the
+	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
+	 * will simply report the error.
+	 */
+	p->p_dupfd = minor(dev);
+	return (ENODEV);
+}
+
+/*
+ * Duplicate the specified descriptor to a free descriptor.
+ */
+dupfdopen(fdp, indx, dfd, mode, error)
+	register struct filedesc *fdp;
+	register int indx, dfd;
+	int mode;
+	int error;
+{
+	register struct file *wfp;
+	struct file *fp;
+	
+	/*
+	 * If the to-be-dup'd fd number is greater than the allowed number
+	 * of file descriptors, or the fd to be dup'd has already been
+	 * closed, reject.  Note, check for new == old is necessary as
+	 * falloc could allocate an already closed to-be-dup'd descriptor
+	 * as the new descriptor.
+	 */
+	fp = fdp->fd_ofiles[indx];
+	if ((u_int)dfd >= fdp->fd_nfiles ||
+	    (wfp = fdp->fd_ofiles[dfd]) == NULL || fp == wfp)
+		return (EBADF);
+
+	/*
+	 * There are two cases of interest here.
+	 *
+	 * For ENODEV simply dup (dfd) to file descriptor
+	 * (indx) and return.
+	 *
+	 * For ENXIO steal away the file structure from (dfd) and
+	 * store it in (indx).  (dfd) is effectively closed by
+	 * this operation.
+	 *
+	 * Any other error code is just returned.
+	 */
+	switch (error) {
+	case ENODEV:
+		/*
+		 * Check that the mode the file is being opened for is a
+		 * subset of the mode of the existing descriptor.
+		 */
+		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag)
+			return (EACCES);
+		fdp->fd_ofiles[indx] = wfp;
+		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
+		wfp->f_count++;
+		if (indx > fdp->fd_lastfile)
+			fdp->fd_lastfile = indx;
+		return (0);
+
+	case ENXIO:
+		/*
+		 * Steal away the file pointer from dfd, and stuff it into indx.
+		 */
+		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
+		fdp->fd_ofiles[dfd] = NULL;
+		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
+		fdp->fd_ofileflags[dfd] = 0;
+		/*
+		 * Complete the clean up of the filedesc structure by
+		 * recomputing the various hints.
+		 */
+		if (indx > fdp->fd_lastfile)
+			fdp->fd_lastfile = indx;
+		else
+			while (fdp->fd_lastfile > 0 &&
+			       fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
+				fdp->fd_lastfile--;
+			if (dfd < fdp->fd_freefile)
+				fdp->fd_freefile = dfd;
+		return (0);
+
+	default:
+		return (error);
+	}
+	/* NOTREACHED */
+}
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
new file mode 100644
index 00000000000..fbb4444d52b
--- /dev/null
+++ b/sys/kern/kern_exec.c
@@ -0,0 +1,64 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)kern_exec.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/errno.h>
+#include <sys/proc.h>
+
+/*
+ * exec system call
+ */
+struct execve_args {
+	char	*fname;
+	char	**argp;
+	char	**envp;
+};
+/* ARGSUSED */
+execve(a1, a2, a3)
+	struct proc *a1;
+	struct execve_args *a2;
+	int *a3;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return (ENOSYS);
+}
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
new file mode 100644
index 00000000000..03353c72d1d
--- /dev/null
+++ b/sys/kern/kern_exit.c
@@ -0,0 +1,492 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_exit.c	8.7 (Berkeley) 2/12/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/map.h>
+#include <sys/ioctl.h>
+#include <sys/proc.h>
+#include <sys/tty.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/kernel.h>
+#include <sys/buf.h>
+#include <sys/wait.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+#include <sys/syslog.h>
+#include <sys/malloc.h>
+#include <sys/resourcevar.h>
+#include <sys/ptrace.h>
+
+#include <machine/cpu.h>
+#ifdef COMPAT_43
+#include <machine/reg.h>
+#include <machine/psl.h>
+#endif
+
+#include <vm/vm.h>
+#include <vm/vm_kern.h>
+
+__dead void cpu_exit __P((struct proc *));
+__dead void exit1 __P((struct proc *, int));
+
+/*
+ * exit --
+ *	Death of process.
+ */
+struct rexit_args {
+	int	rval;
+};
+__dead void
+exit(p, uap, retval)
+	struct proc *p;
+	struct rexit_args *uap;
+	int *retval;
+{
+
+	exit1(p, W_EXITCODE(uap->rval, 0));
+	/* NOTREACHED */
+}
+
+/*
+ * Exit: deallocate address space and other resources, change proc state
+ * to zombie, and unlink proc from allproc and parent's lists.  Save exit
+ * status and rusage for wait().  Check for child processes and orphan them.
+ */
+__dead void
+exit1(p, rv)
+	register struct proc *p;
+	int rv;
+{
+	register struct proc *q, *nq;
+	register struct proc **pp;
+	register struct vmspace *vm;
+
+	if (p->p_pid == 1)
+		panic("init died (signal %d, exit %d)",
+		    WTERMSIG(rv), WEXITSTATUS(rv));
+#ifdef PGINPROF
+	vmsizmon();
+#endif
+	if (p->p_flag & P_PROFIL)
+		stopprofclock(p);
+	MALLOC(p->p_ru, struct rusage *, sizeof(struct rusage),
+		M_ZOMBIE, M_WAITOK);
+	/*
+	 * If parent is waiting for us to exit or exec,
+	 * P_PPWAIT is set; we will wakeup the parent below.
+	 */
+	p->p_flag &= ~(P_TRACED | P_PPWAIT);
+	p->p_flag |= P_WEXIT;
+	p->p_sigignore = ~0;
+	p->p_siglist = 0;
+	untimeout(realitexpire, (caddr_t)p);
+
+	/*
+	 * Close open files and release open-file table.
+	 * This may block!
+	 */
+	fdfree(p);
+
+	/* The next two chunks should probably be moved to vmspace_exit. */
+	vm = p->p_vmspace;
+#ifdef SYSVSHM
+	if (vm->vm_shm)
+		shmexit(p);
+#endif
+	/*
+	 * Release user portion of address space.
+	 * This releases references to vnodes,
+	 * which could cause I/O if the file has been unlinked.
+	 * Need to do this early enough that we can still sleep.
+	 * Can't free the entire vmspace as the kernel stack
+	 * may be mapped within that space also.
+	 */
+	if (vm->vm_refcnt == 1)
+		(void) vm_map_remove(&vm->vm_map, VM_MIN_ADDRESS,
+		    VM_MAXUSER_ADDRESS);
+
+	if (SESS_LEADER(p)) {
+		register struct session *sp = p->p_session;
+
+		if (sp->s_ttyvp) {
+			/*
+			 * Controlling process.
+			 * Signal foreground pgrp,
+			 * drain controlling terminal
+			 * and revoke access to controlling terminal.
+			 */
+			if (sp->s_ttyp->t_session == sp) {
+				if (sp->s_ttyp->t_pgrp)
+					pgsignal(sp->s_ttyp->t_pgrp, SIGHUP, 1);
+				(void) ttywait(sp->s_ttyp);
+				/*
+				 * The tty could have been revoked
+				 * if we blocked.
+				 */
+				if (sp->s_ttyvp)
+					vgoneall(sp->s_ttyvp);
+			}
+			if (sp->s_ttyvp)
+				vrele(sp->s_ttyvp);
+			sp->s_ttyvp = NULL;
+			/*
+			 * s_ttyp is not zero'd; we use this to indicate
+			 * that the session once had a controlling terminal.
+			 * (for logging and informational purposes)
+			 */
+		}
+		sp->s_leader = NULL;
+	}
+	fixjobc(p, p->p_pgrp, 0);
+	p->p_rlimit[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
+	(void)acct_process(p);
+#ifdef KTRACE
+	/* 
+	 * release trace file
+	 */
+	p->p_traceflag = 0;	/* don't trace the vrele() */
+	if (p->p_tracep)
+		vrele(p->p_tracep);
+#endif
+	/*
+	 * Remove proc from allproc queue and pidhash chain.
+	 * Place onto zombproc.  Unlink from parent's child list.
+	 */
+	if (*p->p_prev = p->p_next)
+		p->p_next->p_prev = p->p_prev;
+	if (p->p_next = zombproc)
+		p->p_next->p_prev = &p->p_next;
+	p->p_prev = &zombproc;
+	zombproc = p;
+	p->p_stat = SZOMB;
+
+	for (pp = &pidhash[PIDHASH(p->p_pid)]; *pp; pp = &(*pp)->p_hash)
+		if (*pp == p) {
+			*pp = p->p_hash;
+			goto done;
+		}
+	panic("exit");
+done:
+
+	if (p->p_cptr)		/* only need this if any child is S_ZOMB */
+		wakeup((caddr_t) initproc);
+	for (q = p->p_cptr; q != NULL; q = nq) {
+		nq = q->p_osptr;
+		if (nq != NULL)
+			nq->p_ysptr = NULL;
+		if (initproc->p_cptr)
+			initproc->p_cptr->p_ysptr = q;
+		q->p_osptr = initproc->p_cptr;
+		q->p_ysptr = NULL;
+		initproc->p_cptr = q;
+
+		q->p_pptr = initproc;
+		/*
+		 * Traced processes are killed
+		 * since their existence means someone is screwing up.
+		 */
+		if (q->p_flag & P_TRACED) {
+			q->p_flag &= ~P_TRACED;
+			psignal(q, SIGKILL);
+		}
+	}
+	p->p_cptr = NULL;
+
+	/*
+	 * Save exit status and final rusage info, adding in child rusage
+	 * info and self times.
+	 */
+	p->p_xstat = rv;
+	*p->p_ru = p->p_stats->p_ru;
+	calcru(p, &p->p_ru->ru_utime, &p->p_ru->ru_stime, NULL);
+	ruadd(p->p_ru, &p->p_stats->p_cru);
+
+	/*
+	 * Notify parent that we're gone.
+	 */
+	psignal(p->p_pptr, SIGCHLD);
+	wakeup((caddr_t)p->p_pptr);
+#if defined(tahoe)
+	/* move this to cpu_exit */
+	p->p_addr->u_pcb.pcb_savacc.faddr = (float *)NULL;
+#endif
+	/*
+	 * Clear curproc after we've done all operations
+	 * that could block, and before tearing down the rest
+	 * of the process state that might be used from clock, etc.
+	 * Also, can't clear curproc while we're still runnable,
+	 * as we're not on a run queue (we are current, just not
+	 * a proper proc any longer!).
+	 *
+	 * Other substructures are freed from wait().
+	 */
+	curproc = NULL;
+	if (--p->p_limit->p_refcnt == 0)
+		FREE(p->p_limit, M_SUBPROC);
+
+	/*
+	 * Finally, call machine-dependent code to release the remaining
+	 * resources including address space, the kernel stack and pcb.
+	 * The address space is released by "vmspace_free(p->p_vmspace)";
+	 * This is machine-dependent, as we may have to change stacks
+	 * or ensure that the current one isn't reallocated before we
+	 * finish.  cpu_exit will end with a call to cpu_swtch(), finishing
+	 * our execution (pun intended).
+	 */
+	cpu_exit(p);
+}
+
+struct wait_args {
+	int	pid;
+	int	*status;
+	int	options;
+	struct	rusage *rusage;
+#ifdef COMPAT_43
+	int	compat;		/* pseudo */
+#endif
+};
+
+#ifdef COMPAT_43
+#if defined(hp300) || defined(luna68k)
+#include <machine/frame.h>
+#define GETPS(rp)	((struct frame *)(rp))->f_sr
+#else
+#define GETPS(rp)	(rp)[PS]
+#endif
+
+owait(p, uap, retval)
+	struct proc *p;
+	register struct wait_args *uap;
+	int *retval;
+{
+
+#ifdef PSL_ALLCC
+	if ((GETPS(p->p_md.md_regs) & PSL_ALLCC) != PSL_ALLCC) {
+		uap->options = 0;
+		uap->rusage = NULL;
+	} else {
+		uap->options = p->p_md.md_regs[R0];
+		uap->rusage = (struct rusage *)p->p_md.md_regs[R1];
+	}
+#else
+	uap->options = 0;
+	uap->rusage = NULL;
+#endif
+	uap->pid = WAIT_ANY;
+	uap->status = NULL;
+	uap->compat = 1;
+	return (wait1(p, uap, retval));
+}
+
+wait4(p, uap, retval)
+	struct proc *p;
+	struct wait_args *uap;
+	int *retval;
+{
+
+	uap->compat = 0;
+	return (wait1(p, uap, retval));
+}
+#else
+#define	wait1	wait4
+#endif
+
+int
+wait1(q, uap, retval)
+	register struct proc *q;
+	register struct wait_args *uap;
+	int retval[];
+{
+	register int nfound;
+	register struct proc *p, *t;
+	int status, error;
+
+	if (uap->pid == 0)
+		uap->pid = -q->p_pgid;
+#ifdef notyet
+	if (uap->options &~ (WUNTRACED|WNOHANG))
+		return (EINVAL);
+#endif
+loop:
+	nfound = 0;
+	for (p = q->p_cptr; p; p = p->p_osptr) {
+		if (uap->pid != WAIT_ANY &&
+		    p->p_pid != uap->pid && p->p_pgid != -uap->pid)
+			continue;
+		nfound++;
+		if (p->p_stat == SZOMB) {
+			retval[0] = p->p_pid;
+#ifdef COMPAT_43
+			if (uap->compat)
+				retval[1] = p->p_xstat;
+			else
+#endif
+			if (uap->status) {
+				status = p->p_xstat;	/* convert to int */
+				if (error = copyout((caddr_t)&status,
+				    (caddr_t)uap->status, sizeof(status)))
+					return (error);
+			}
+			if (uap->rusage && (error = copyout((caddr_t)p->p_ru,
+			    (caddr_t)uap->rusage, sizeof (struct rusage))))
+				return (error);
+			/*
+			 * If we got the child via a ptrace 'attach',
+			 * we need to give it back to the old parent.
+			 */
+			if (p->p_oppid && (t = pfind(p->p_oppid))) {
+				p->p_oppid = 0;
+				proc_reparent(p, t);
+				psignal(t, SIGCHLD);
+				wakeup((caddr_t)t);
+				return (0);
+			}
+			p->p_xstat = 0;
+			ruadd(&q->p_stats->p_cru, p->p_ru);
+			FREE(p->p_ru, M_ZOMBIE);
+
+			/*
+			 * Decrement the count of procs running with this uid.
+			 */
+			(void)chgproccnt(p->p_cred->p_ruid, -1);
+
+			/*
+			 * Free up credentials.
+			 */
+			if (--p->p_cred->p_refcnt == 0) {
+				crfree(p->p_cred->pc_ucred);
+				FREE(p->p_cred, M_SUBPROC);
+			}
+
+			/*
+			 * Release reference to text vnode
+			 */
+			if (p->p_textvp)
+				vrele(p->p_textvp);
+
+			/*
+			 * Finally finished with old proc entry.
+			 * Unlink it from its process group and free it.
+			 */
+			leavepgrp(p);
+			if (*p->p_prev = p->p_next)	/* off zombproc */
+				p->p_next->p_prev = p->p_prev;
+			if (q = p->p_ysptr)
+				q->p_osptr = p->p_osptr;
+			if (q = p->p_osptr)
+				q->p_ysptr = p->p_ysptr;
+			if ((q = p->p_pptr)->p_cptr == p)
+				q->p_cptr = p->p_osptr;
+
+			/*
+			 * Give machine-dependent layer a chance
+			 * to free anything that cpu_exit couldn't
+			 * release while still running in process context.
+			 */
+			cpu_wait(p);
+			FREE(p, M_PROC);
+			nprocs--;
+			return (0);
+		}
+		if (p->p_stat == SSTOP && (p->p_flag & P_WAITED) == 0 &&
+		    (p->p_flag & P_TRACED || uap->options & WUNTRACED)) {
+			p->p_flag |= P_WAITED;
+			retval[0] = p->p_pid;
+#ifdef COMPAT_43
+			if (uap->compat) {
+				retval[1] = W_STOPCODE(p->p_xstat);
+				error = 0;
+			} else
+#endif
+			if (uap->status) {
+				status = W_STOPCODE(p->p_xstat);
+				error = copyout((caddr_t)&status,
+					(caddr_t)uap->status, sizeof(status));
+			} else
+				error = 0;
+			return (error);
+		}
+	}
+	if (nfound == 0)
+		return (ECHILD);
+	if (uap->options & WNOHANG) {
+		retval[0] = 0;
+		return (0);
+	}
+	if (error = tsleep((caddr_t)q, PWAIT | PCATCH, "wait", 0))
+		return (error);
+	goto loop;
+}
+
+/*
+ * make process 'parent' the new parent of process 'child'.
+ */
+void
+proc_reparent(child, parent)
+	register struct proc *child;
+	register struct proc *parent;
+{
+	register struct proc *o;
+	register struct proc *y;
+
+	if (child->p_pptr == parent)
+		return;
+
+	/* fix up the child linkage for the old parent */
+	o = child->p_osptr;
+	y = child->p_ysptr;
+	if (y)
+		y->p_osptr = o;
+	if (o)
+		o->p_ysptr = y;
+	if (child->p_pptr->p_cptr == child)
+		child->p_pptr->p_cptr = o;
+
+	/* fix up child linkage for new parent */
+	o = parent->p_cptr;
+	if (o)
+		o->p_ysptr = child;
+	child->p_osptr = o;
+	child->p_ysptr = NULL;
+	parent->p_cptr = child;
+	child->p_pptr = parent;
+}
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
new file mode 100644
index 00000000000..8bec2fa5d5f
--- /dev/null
+++ b/sys/kern/kern_fork.c
@@ -0,0 +1,315 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_fork.c	8.6 (Berkeley) 4/8/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/map.h>
+#include <sys/filedesc.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/acct.h>
+#include <sys/ktrace.h>
+
+struct fork_args {
+	int	dummy;
+};
+/* ARGSUSED */
+fork(p, uap, retval)
+	struct proc *p;
+	struct fork_args *uap;
+	int retval[];
+{
+
+	return (fork1(p, 0, retval));
+}
+
+/* ARGSUSED */
+vfork(p, uap, retval)
+	struct proc *p;
+	struct fork_args *uap;
+	int retval[];
+{
+
+	return (fork1(p, 1, retval));
+}
+
+int	nprocs = 1;		/* process 0 */
+
+fork1(p1, isvfork, retval)
+	register struct proc *p1;
+	int isvfork, retval[];
+{
+	register struct proc *p2;
+	register uid_t uid;
+	struct proc *newproc;
+	struct proc **hash;
+	int count;
+	static int nextpid, pidchecked = 0;
+
+	/*
+	 * Although process entries are dynamically created, we still keep
+	 * a global limit on the maximum number we will create.  Don't allow
+	 * a nonprivileged user to use the last process; don't let root
+	 * exceed the limit. The variable nprocs is the current number of
+	 * processes, maxproc is the limit.
+	 */
+	uid = p1->p_cred->p_ruid;
+	if ((nprocs >= maxproc - 1 && uid != 0) || nprocs >= maxproc) {
+		tablefull("proc");
+		return (EAGAIN);
+	}
+	/*
+	 * Increment the count of procs running with this uid. Don't allow
+	 * a nonprivileged user to exceed their current limit.
+	 */
+	count = chgproccnt(uid, 1);
+	if (uid != 0 && count > p1->p_rlimit[RLIMIT_NPROC].rlim_cur) {
+		(void)chgproccnt(uid, -1);
+		return (EAGAIN);
+	}
+
+	/* Allocate new proc. */
+	MALLOC(newproc, struct proc *, sizeof(struct proc), M_PROC, M_WAITOK);
+
+	/*
+	 * Find an unused process ID.  We remember a range of unused IDs
+	 * ready to use (from nextpid+1 through pidchecked-1).
+	 */
+	nextpid++;
+retry:
+	/*
+	 * If the process ID prototype has wrapped around,
+	 * restart somewhat above 0, as the low-numbered procs
+	 * tend to include daemons that don't exit.
+	 */
+	if (nextpid >= PID_MAX) {
+		nextpid = 100;
+		pidchecked = 0;
+	}
+	if (nextpid >= pidchecked) {
+		int doingzomb = 0;
+
+		pidchecked = PID_MAX;
+		/*
+		 * Scan the active and zombie procs to check whether this pid
+		 * is in use.  Remember the lowest pid that's greater
+		 * than nextpid, so we can avoid checking for a while.
+		 */
+		p2 = (struct proc *)allproc;
+again:
+		for (; p2 != NULL; p2 = p2->p_next) {
+			while (p2->p_pid == nextpid ||
+			    p2->p_pgrp->pg_id == nextpid) {
+				nextpid++;
+				if (nextpid >= pidchecked)
+					goto retry;
+			}
+			if (p2->p_pid > nextpid && pidchecked > p2->p_pid)
+				pidchecked = p2->p_pid;
+			if (p2->p_pgrp->pg_id > nextpid && 
+			    pidchecked > p2->p_pgrp->pg_id)
+				pidchecked = p2->p_pgrp->pg_id;
+		}
+		if (!doingzomb) {
+			doingzomb = 1;
+			p2 = zombproc;
+			goto again;
+		}
+	}
+
+
+	/*
+	 * Link onto allproc (this should probably be delayed).
+	 * Heavy use of volatile here to prevent the compiler from
+	 * rearranging code.  Yes, it *is* terribly ugly, but at least
+	 * it works.
+	 */
+	nprocs++;
+	p2 = newproc;
+#define	Vp2 ((volatile struct proc *)p2)
+	Vp2->p_stat = SIDL;			/* protect against others */
+	Vp2->p_pid = nextpid;
+	/*
+	 * This is really:
+	 *	p2->p_next = allproc;
+	 *	allproc->p_prev = &p2->p_next;
+	 *	p2->p_prev = &allproc;
+	 *	allproc = p2;
+	 * The assignment via allproc is legal since it is never NULL.
+	 */
+	*(volatile struct proc **)&Vp2->p_next = allproc;
+	*(volatile struct proc ***)&allproc->p_prev =
+	    (volatile struct proc **)&Vp2->p_next;
+	*(volatile struct proc ***)&Vp2->p_prev = &allproc;
+	allproc = Vp2;
+#undef Vp2
+	p2->p_forw = p2->p_back = NULL;		/* shouldn't be necessary */
+
+	/* Insert on the hash chain. */
+	hash = &pidhash[PIDHASH(p2->p_pid)];
+	p2->p_hash = *hash;
+	*hash = p2;
+
+	/*
+	 * Make a proc table entry for the new process.
+	 * Start by zeroing the section of proc that is zero-initialized,
+	 * then copy the section that is copied directly from the parent.
+	 */
+	bzero(&p2->p_startzero,
+	    (unsigned) ((caddr_t)&p2->p_endzero - (caddr_t)&p2->p_startzero));
+	bcopy(&p1->p_startcopy, &p2->p_startcopy,
+	    (unsigned) ((caddr_t)&p2->p_endcopy - (caddr_t)&p2->p_startcopy));
+
+	/*
+	 * Duplicate sub-structures as needed.
+	 * Increase reference counts on shared objects.
+	 * The p_stats and p_sigacts substructs are set in vm_fork.
+	 */
+	p2->p_flag = P_INMEM;
+	if (p1->p_flag & P_PROFIL)
+		startprofclock(p2);
+	MALLOC(p2->p_cred, struct pcred *, sizeof(struct pcred),
+	    M_SUBPROC, M_WAITOK);
+	bcopy(p1->p_cred, p2->p_cred, sizeof(*p2->p_cred));
+	p2->p_cred->p_refcnt = 1;
+	crhold(p1->p_ucred);
+
+	/* bump references to the text vnode (for procfs) */
+	p2->p_textvp = p1->p_textvp;
+	if (p2->p_textvp)
+		VREF(p2->p_textvp);
+
+	p2->p_fd = fdcopy(p1);
+	/*
+	 * If p_limit is still copy-on-write, bump refcnt,
+	 * otherwise get a copy that won't be modified.
+	 * (If PL_SHAREMOD is clear, the structure is shared
+	 * copy-on-write.)
+	 */
+	if (p1->p_limit->p_lflags & PL_SHAREMOD)
+		p2->p_limit = limcopy(p1->p_limit);
+	else {
+		p2->p_limit = p1->p_limit;
+		p2->p_limit->p_refcnt++;
+	}
+
+	if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
+		p2->p_flag |= P_CONTROLT;
+	if (isvfork)
+		p2->p_flag |= P_PPWAIT;
+	p2->p_pgrpnxt = p1->p_pgrpnxt;
+	p1->p_pgrpnxt = p2;
+	p2->p_pptr = p1;
+	p2->p_osptr = p1->p_cptr;
+	if (p1->p_cptr)
+		p1->p_cptr->p_ysptr = p2;
+	p1->p_cptr = p2;
+#ifdef KTRACE
+	/*
+	 * Copy traceflag and tracefile if enabled.
+	 * If not inherited, these were zeroed above.
+	 */
+	if (p1->p_traceflag&KTRFAC_INHERIT) {
+		p2->p_traceflag = p1->p_traceflag;
+		if ((p2->p_tracep = p1->p_tracep) != NULL)
+			VREF(p2->p_tracep);
+	}
+#endif
+
+	/*
+	 * This begins the section where we must prevent the parent
+	 * from being swapped.
+	 */
+	p1->p_flag |= P_NOSWAP;
+	/*
+	 * Set return values for child before vm_fork,
+	 * so they can be copied to child stack.
+	 * We return parent pid, and mark as child in retval[1].
+	 * NOTE: the kernel stack may be at a different location in the child
+	 * process, and thus addresses of automatic variables (including retval)
+	 * may be invalid after vm_fork returns in the child process.
+	 */
+	retval[0] = p1->p_pid;
+	retval[1] = 1;
+	if (vm_fork(p1, p2, isvfork)) {
+		/*
+		 * Child process.  Set start time and get to work.
+		 */
+		(void) splclock();
+		p2->p_stats->p_start = time;
+		(void) spl0();
+		p2->p_acflag = AFORK;
+		return (0);
+	}
+
+	/*
+	 * Make child runnable and add to run queue.
+	 */
+	(void) splhigh();
+	p2->p_stat = SRUN;
+	setrunqueue(p2);
+	(void) spl0();
+
+	/*
+	 * Now can be swapped.
+	 */
+	p1->p_flag &= ~P_NOSWAP;
+
+	/*
+	 * Preserve synchronization semantics of vfork.  If waiting for
+	 * child to exec or exit, set P_PPWAIT on child, and sleep on our
+	 * proc (in case of exit).
+	 */
+	if (isvfork)
+		while (p2->p_flag & P_PPWAIT)
+			tsleep(p1, PWAIT, "ppwait", 0);
+
+	/*
+	 * Return child pid to parent process,
+	 * marking us as parent via retval[1].
+	 */
+	retval[0] = p2->p_pid;
+	retval[1] = 0;
+	return (0);
+}
diff --git a/sys/kern/kern_ktrace.c b/sys/kern/kern_ktrace.c
new file mode 100644
index 00000000000..763cfb257ff
--- /dev/null
+++ b/sys/kern/kern_ktrace.c
@@ -0,0 +1,466 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_ktrace.c	8.2 (Berkeley) 9/23/93
+ */
+
+#ifdef KTRACE
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/ktrace.h>
+#include <sys/malloc.h>
+#include <sys/syslog.h>
+
+struct ktr_header *
+ktrgetheader(type)
+	int type;
+{
+	register struct ktr_header *kth;
+	struct proc *p = curproc;	/* XXX */
+
+	MALLOC(kth, struct ktr_header *, sizeof (struct ktr_header), 
+		M_TEMP, M_WAITOK);
+	kth->ktr_type = type;
+	microtime(&kth->ktr_time);
+	kth->ktr_pid = p->p_pid;
+	bcopy(p->p_comm, kth->ktr_comm, MAXCOMLEN);
+	return (kth);
+}
+
+ktrsyscall(vp, code, narg, args)
+	struct vnode *vp;
+	int code, narg, args[];
+{
+	struct	ktr_header *kth;
+	struct	ktr_syscall *ktp;
+	register len = sizeof(struct ktr_syscall) + (narg * sizeof(int));
+	struct proc *p = curproc;	/* XXX */
+	int 	*argp, i;
+
+	p->p_traceflag |= KTRFAC_ACTIVE;
+	kth = ktrgetheader(KTR_SYSCALL);
+	MALLOC(ktp, struct ktr_syscall *, len, M_TEMP, M_WAITOK);
+	ktp->ktr_code = code;
+	ktp->ktr_narg = narg;
+	argp = (int *)((char *)ktp + sizeof(struct ktr_syscall));
+	for (i = 0; i < narg; i++)
+		*argp++ = args[i];
+	kth->ktr_buf = (caddr_t)ktp;
+	kth->ktr_len = len;
+	ktrwrite(vp, kth);
+	FREE(ktp, M_TEMP);
+	FREE(kth, M_TEMP);
+	p->p_traceflag &= ~KTRFAC_ACTIVE;
+}
+
+ktrsysret(vp, code, error, retval)
+	struct vnode *vp;
+	int code, error, retval;
+{
+	struct ktr_header *kth;
+	struct ktr_sysret ktp;
+	struct proc *p = curproc;	/* XXX */
+
+	p->p_traceflag |= KTRFAC_ACTIVE;
+	kth = ktrgetheader(KTR_SYSRET);
+	ktp.ktr_code = code;
+	ktp.ktr_error = error;
+	ktp.ktr_retval = retval;		/* what about val2 ? */
+
+	kth->ktr_buf = (caddr_t)&ktp;
+	kth->ktr_len = sizeof(struct ktr_sysret);
+
+	ktrwrite(vp, kth);
+	FREE(kth, M_TEMP);
+	p->p_traceflag &= ~KTRFAC_ACTIVE;
+}
+
+ktrnamei(vp, path)
+	struct vnode *vp;
+	char *path;
+{
+	struct ktr_header *kth;
+	struct proc *p = curproc;	/* XXX */
+
+	p->p_traceflag |= KTRFAC_ACTIVE;
+	kth = ktrgetheader(KTR_NAMEI);
+	kth->ktr_len = strlen(path);
+	kth->ktr_buf = path;
+
+	ktrwrite(vp, kth);
+	FREE(kth, M_TEMP);
+	p->p_traceflag &= ~KTRFAC_ACTIVE;
+}
+
+ktrgenio(vp, fd, rw, iov, len, error)
+	struct vnode *vp;
+	int fd;
+	enum uio_rw rw;
+	register struct iovec *iov;
+	int len, error;
+{
+	struct ktr_header *kth;
+	register struct ktr_genio *ktp;
+	register caddr_t cp;
+	register int resid = len, cnt;
+	struct proc *p = curproc;	/* XXX */
+	
+	if (error)
+		return;
+	p->p_traceflag |= KTRFAC_ACTIVE;
+	kth = ktrgetheader(KTR_GENIO);
+	MALLOC(ktp, struct ktr_genio *, sizeof(struct ktr_genio) + len,
+		M_TEMP, M_WAITOK);
+	ktp->ktr_fd = fd;
+	ktp->ktr_rw = rw;
+	cp = (caddr_t)((char *)ktp + sizeof (struct ktr_genio));
+	while (resid > 0) {
+		if ((cnt = iov->iov_len) > resid)
+			cnt = resid;
+		if (copyin(iov->iov_base, cp, (unsigned)cnt))
+			goto done;
+		cp += cnt;
+		resid -= cnt;
+		iov++;
+	}
+	kth->ktr_buf = (caddr_t)ktp;
+	kth->ktr_len = sizeof (struct ktr_genio) + len;
+
+	ktrwrite(vp, kth);
+done:
+	FREE(kth, M_TEMP);
+	FREE(ktp, M_TEMP);
+	p->p_traceflag &= ~KTRFAC_ACTIVE;
+}
+
+ktrpsig(vp, sig, action, mask, code)
+	struct vnode *vp;
+	int sig;
+	sig_t action;
+	int mask, code;
+{
+	struct ktr_header *kth;
+	struct ktr_psig	kp;
+	struct proc *p = curproc;	/* XXX */
+
+	p->p_traceflag |= KTRFAC_ACTIVE;
+	kth = ktrgetheader(KTR_PSIG);
+	kp.signo = (char)sig;
+	kp.action = action;
+	kp.mask = mask;
+	kp.code = code;
+	kth->ktr_buf = (caddr_t)&kp;
+	kth->ktr_len = sizeof (struct ktr_psig);
+
+	ktrwrite(vp, kth);
+	FREE(kth, M_TEMP);
+	p->p_traceflag &= ~KTRFAC_ACTIVE;
+}
+
+ktrcsw(vp, out, user)
+	struct vnode *vp;
+	int out, user;
+{
+	struct ktr_header *kth;
+	struct	ktr_csw kc;
+	struct proc *p = curproc;	/* XXX */
+
+	p->p_traceflag |= KTRFAC_ACTIVE;
+	kth = ktrgetheader(KTR_CSW);
+	kc.out = out;
+	kc.user = user;
+	kth->ktr_buf = (caddr_t)&kc;
+	kth->ktr_len = sizeof (struct ktr_csw);
+
+	ktrwrite(vp, kth);
+	FREE(kth, M_TEMP);
+	p->p_traceflag &= ~KTRFAC_ACTIVE;
+}
+
+/* Interface and common routines */
+
+/*
+ * ktrace system call
+ */
+struct ktrace_args {
+	char	*fname;
+	int	ops;
+	int	facs;
+	int	pid;
+};
+/* ARGSUSED */
+ktrace(curp, uap, retval)
+	struct proc *curp;
+	register struct ktrace_args *uap;
+	int *retval;
+{
+	register struct vnode *vp = NULL;
+	register struct proc *p;
+	struct pgrp *pg;
+	int facs = uap->facs & ~KTRFAC_ROOT;
+	int ops = KTROP(uap->ops);
+	int descend = uap->ops & KTRFLAG_DESCEND;
+	int ret = 0;
+	int error = 0;
+	struct nameidata nd;
+
+	curp->p_traceflag |= KTRFAC_ACTIVE;
+	if (ops != KTROP_CLEAR) {
+		/*
+		 * an operation which requires a file argument.
+		 */
+		NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->fname, curp);
+		if (error = vn_open(&nd, FREAD|FWRITE, 0)) {
+			curp->p_traceflag &= ~KTRFAC_ACTIVE;
+			return (error);
+		}
+		vp = nd.ni_vp;
+		VOP_UNLOCK(vp);
+		if (vp->v_type != VREG) {
+			(void) vn_close(vp, FREAD|FWRITE, curp->p_ucred, curp);
+			curp->p_traceflag &= ~KTRFAC_ACTIVE;
+			return (EACCES);
+		}
+	}
+	/*
+	 * Clear all uses of the tracefile
+	 */
+	if (ops == KTROP_CLEARFILE) {
+		for (p = (struct proc *)allproc; p != NULL; p = p->p_next) {
+			if (p->p_tracep == vp) {
+				if (ktrcanset(curp, p)) {
+					p->p_tracep = NULL;
+					p->p_traceflag = 0;
+					(void) vn_close(vp, FREAD|FWRITE,
+						p->p_ucred, p);
+				} else
+					error = EPERM;
+			}
+		}
+		goto done;
+	}
+	/*
+	 * need something to (un)trace (XXX - why is this here?)
+	 */
+	if (!facs) {
+		error = EINVAL;
+		goto done;
+	}
+	/* 
+	 * do it
+	 */
+	if (uap->pid < 0) {
+		/*
+		 * by process group
+		 */
+		pg = pgfind(-uap->pid);
+		if (pg == NULL) {
+			error = ESRCH;
+			goto done;
+		}
+		for (p = pg->pg_mem; p != NULL; p = p->p_pgrpnxt)
+			if (descend)
+				ret |= ktrsetchildren(curp, p, ops, facs, vp);
+			else 
+				ret |= ktrops(curp, p, ops, facs, vp);
+					
+	} else {
+		/*
+		 * by pid
+		 */
+		p = pfind(uap->pid);
+		if (p == NULL) {
+			error = ESRCH;
+			goto done;
+		}
+		if (descend)
+			ret |= ktrsetchildren(curp, p, ops, facs, vp);
+		else
+			ret |= ktrops(curp, p, ops, facs, vp);
+	}
+	if (!ret)
+		error = EPERM;
+done:
+	if (vp != NULL)
+		(void) vn_close(vp, FWRITE, curp->p_ucred, curp);
+	curp->p_traceflag &= ~KTRFAC_ACTIVE;
+	return (error);
+}
+
+int
+ktrops(curp, p, ops, facs, vp)
+	struct proc *p, *curp;
+	int ops, facs;
+	struct vnode *vp;
+{
+
+	if (!ktrcanset(curp, p))
+		return (0);
+	if (ops == KTROP_SET) {
+		if (p->p_tracep != vp) { 
+			/*
+			 * if trace file already in use, relinquish
+			 */
+			if (p->p_tracep != NULL)
+				vrele(p->p_tracep);
+			VREF(vp);
+			p->p_tracep = vp;
+		}
+		p->p_traceflag |= facs;
+		if (curp->p_ucred->cr_uid == 0)
+			p->p_traceflag |= KTRFAC_ROOT;
+	} else {	
+		/* KTROP_CLEAR */
+		if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) {
+			/* no more tracing */
+			p->p_traceflag = 0;
+			if (p->p_tracep != NULL) {
+				vrele(p->p_tracep);
+				p->p_tracep = NULL;
+			}
+		}
+	}
+
+	return (1);
+}
+
+ktrsetchildren(curp, top, ops, facs, vp)
+	struct proc *curp, *top;
+	int ops, facs;
+	struct vnode *vp;
+{
+	register struct proc *p;
+	register int ret = 0;
+
+	p = top;
+	for (;;) {
+		ret |= ktrops(curp, p, ops, facs, vp);
+		/*
+		 * If this process has children, descend to them next,
+		 * otherwise do any siblings, and if done with this level,
+		 * follow back up the tree (but not past top).
+		 */
+		if (p->p_cptr)
+			p = p->p_cptr;
+		else if (p == top)
+			return (ret);
+		else if (p->p_osptr)
+			p = p->p_osptr;
+		else for (;;) {
+			p = p->p_pptr;
+			if (p == top)
+				return (ret);
+			if (p->p_osptr) {
+				p = p->p_osptr;
+				break;
+			}
+		}
+	}
+	/*NOTREACHED*/
+}
+
+ktrwrite(vp, kth)
+	struct vnode *vp;
+	register struct ktr_header *kth;
+{
+	struct uio auio;
+	struct iovec aiov[2];
+	register struct proc *p = curproc;	/* XXX */
+	int error;
+
+	if (vp == NULL)
+		return;
+	auio.uio_iov = &aiov[0];
+	auio.uio_offset = 0;
+	auio.uio_segflg = UIO_SYSSPACE;
+	auio.uio_rw = UIO_WRITE;
+	aiov[0].iov_base = (caddr_t)kth;
+	aiov[0].iov_len = sizeof(struct ktr_header);
+	auio.uio_resid = sizeof(struct ktr_header);
+	auio.uio_iovcnt = 1;
+	auio.uio_procp = (struct proc *)0;
+	if (kth->ktr_len > 0) {
+		auio.uio_iovcnt++;
+		aiov[1].iov_base = kth->ktr_buf;
+		aiov[1].iov_len = kth->ktr_len;
+		auio.uio_resid += kth->ktr_len;
+	}
+	VOP_LOCK(vp);
+	error = VOP_WRITE(vp, &auio, IO_UNIT|IO_APPEND, p->p_ucred);
+	VOP_UNLOCK(vp);
+	if (!error)
+		return;
+	/*
+	 * If error encountered, give up tracing on this vnode.
+	 */
+	log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n",
+	    error);
+	for (p = (struct proc *)allproc; p != NULL; p = p->p_next) {
+		if (p->p_tracep == vp) {
+			p->p_tracep = NULL;
+			p->p_traceflag = 0;
+			vrele(vp);
+		}
+	}
+}
+
+/*
+ * Return true if caller has permission to set the ktracing state
+ * of target.  Essentially, the target can't possess any
+ * more permissions than the caller.  KTRFAC_ROOT signifies that
+ * root previously set the tracing status on the target process, and 
+ * so, only root may further change it.
+ *
+ * TODO: check groups.  use caller effective gid.
+ */
+ktrcanset(callp, targetp)
+	struct proc *callp, *targetp;
+{
+	register struct pcred *caller = callp->p_cred;
+	register struct pcred *target = targetp->p_cred;
+
+	if ((caller->pc_ucred->cr_uid == target->p_ruid &&
+	     target->p_ruid == target->p_svuid &&
+	     caller->p_rgid == target->p_rgid &&	/* XXX */
+	     target->p_rgid == target->p_svgid &&
+	     (targetp->p_traceflag & KTRFAC_ROOT) == 0) ||
+	     caller->pc_ucred->cr_uid == 0)
+		return (1);
+
+	return (0);
+}
+
+#endif
diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c
new file mode 100644
index 00000000000..c6276bc73cf
--- /dev/null
+++ b/sys/kern/kern_malloc.c
@@ -0,0 +1,381 @@
+/*
+ * Copyright (c) 1987, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_malloc.c	8.3 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/map.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+#include <vm/vm_kern.h>
+
+struct kmembuckets bucket[MINBUCKET + 16];
+struct kmemstats kmemstats[M_LAST];
+struct kmemusage *kmemusage;
+char *kmembase, *kmemlimit;
+char *memname[] = INITKMEMNAMES;
+
+#ifdef DIAGNOSTIC
+/*
+ * This structure provides a set of masks to catch unaligned frees.
+ */
+long addrmask[] = { 0,
+	0x00000001, 0x00000003, 0x00000007, 0x0000000f,
+	0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff,
+	0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff,
+	0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff,
+};
+
+/*
+ * The WEIRD_ADDR is used as known text to copy into free objects so
+ * that modifications after frees can be detected.
+ */
+#define WEIRD_ADDR	0xdeadbeef
+#define MAX_COPY	32
+
+/*
+ * Normally the first word of the structure is used to hold the list
+ * pointer for free objects. However, when running with diagnostics,
+ * we use the third and fourth fields, so as to catch modifications
+ * in the most commonly trashed first two words.
+ */
+struct freelist {
+	long	spare0;
+	short	type;
+	long	spare1;
+	caddr_t	next;
+};
+#else /* !DIAGNOSTIC */
+struct freelist {
+	caddr_t	next;
+};
+#endif /* DIAGNOSTIC */
+
+/*
+ * Allocate a block of memory
+ */
+void *
+malloc(size, type, flags)
+	unsigned long size;
+	int type, flags;
+{
+	register struct kmembuckets *kbp;
+	register struct kmemusage *kup;
+	register struct freelist *freep;
+	long indx, npg, allocsize;
+	int s;
+	caddr_t va, cp, savedlist;
+#ifdef DIAGNOSTIC
+	long *end, *lp;
+	int copysize;
+	char *savedtype;
+#endif
+#ifdef KMEMSTATS
+	register struct kmemstats *ksp = &kmemstats[type];
+
+	if (((unsigned long)type) > M_LAST)
+		panic("malloc - bogus type");
+#endif
+	indx = BUCKETINDX(size);
+	kbp = &bucket[indx];
+	s = splimp();
+#ifdef KMEMSTATS
+	while (ksp->ks_memuse >= ksp->ks_limit) {
+		if (flags & M_NOWAIT) {
+			splx(s);
+			return ((void *) NULL);
+		}
+		if (ksp->ks_limblocks < 65535)
+			ksp->ks_limblocks++;
+		tsleep((caddr_t)ksp, PSWP+2, memname[type], 0);
+	}
+	ksp->ks_size |= 1 << indx;
+#endif
+#ifdef DIAGNOSTIC
+	copysize = 1 << indx < MAX_COPY ? 1 << indx : MAX_COPY;
+#endif
+	if (kbp->kb_next == NULL) {
+		kbp->kb_last = NULL;
+		if (size > MAXALLOCSAVE)
+			allocsize = roundup(size, CLBYTES);
+		else
+			allocsize = 1 << indx;
+		npg = clrnd(btoc(allocsize));
+		va = (caddr_t) kmem_malloc(kmem_map, (vm_size_t)ctob(npg),
+					   !(flags & M_NOWAIT));
+		if (va == NULL) {
+			splx(s);
+			return ((void *) NULL);
+		}
+#ifdef KMEMSTATS
+		kbp->kb_total += kbp->kb_elmpercl;
+#endif
+		kup = btokup(va);
+		kup->ku_indx = indx;
+		if (allocsize > MAXALLOCSAVE) {
+			if (npg > 65535)
+				panic("malloc: allocation too large");
+			kup->ku_pagecnt = npg;
+#ifdef KMEMSTATS
+			ksp->ks_memuse += allocsize;
+#endif
+			goto out;
+		}
+#ifdef KMEMSTATS
+		kup->ku_freecnt = kbp->kb_elmpercl;
+		kbp->kb_totalfree += kbp->kb_elmpercl;
+#endif
+		/*
+		 * Just in case we blocked while allocating memory,
+		 * and someone else also allocated memory for this
+		 * bucket, don't assume the list is still empty.
+		 */
+		savedlist = kbp->kb_next;
+		kbp->kb_next = cp = va + (npg * NBPG) - allocsize;
+		for (;;) {
+			freep = (struct freelist *)cp;
+#ifdef DIAGNOSTIC
+			/*
+			 * Copy in known text to detect modification
+			 * after freeing.
+			 */
+			end = (long *)&cp[copysize];
+			for (lp = (long *)cp; lp < end; lp++)
+				*lp = WEIRD_ADDR;
+			freep->type = M_FREE;
+#endif /* DIAGNOSTIC */
+			if (cp <= va)
+				break;
+			cp -= allocsize;
+			freep->next = cp;
+		}
+		freep->next = savedlist;
+		if (kbp->kb_last == NULL)
+			kbp->kb_last = (caddr_t)freep;
+	}
+	va = kbp->kb_next;
+	kbp->kb_next = ((struct freelist *)va)->next;
+#ifdef DIAGNOSTIC
+	freep = (struct freelist *)va;
+	savedtype = (unsigned)freep->type < M_LAST ?
+		memname[freep->type] : "???";
+	if (kbp->kb_next &&
+	    !kernacc(kbp->kb_next, sizeof(struct freelist), 0)) {
+		printf("%s of object 0x%x size %d %s %s (invalid addr 0x%x)\n",
+			"Data modified on freelist: word 2.5", va, size,
+			"previous type", savedtype, kbp->kb_next);
+		kbp->kb_next = NULL;
+	}
+#if BYTE_ORDER == BIG_ENDIAN
+	freep->type = WEIRD_ADDR >> 16;
+#endif
+#if BYTE_ORDER == LITTLE_ENDIAN
+	freep->type = (short)WEIRD_ADDR;
+#endif
+	if (((long)(&freep->next)) & 0x2)
+		freep->next = (caddr_t)((WEIRD_ADDR >> 16)|(WEIRD_ADDR << 16));
+	else
+		freep->next = (caddr_t)WEIRD_ADDR;
+	end = (long *)&va[copysize];
+	for (lp = (long *)va; lp < end; lp++) {
+		if (*lp == WEIRD_ADDR)
+			continue;
+		printf("%s %d of object 0x%x size %d %s %s (0x%x != 0x%x)\n",
+			"Data modified on freelist: word", lp - (long *)va,
+			va, size, "previous type", savedtype, *lp, WEIRD_ADDR);
+		break;
+	}
+	freep->spare0 = 0;
+#endif /* DIAGNOSTIC */
+#ifdef KMEMSTATS
+	kup = btokup(va);
+	if (kup->ku_indx != indx)
+		panic("malloc: wrong bucket");
+	if (kup->ku_freecnt == 0)
+		panic("malloc: lost data");
+	kup->ku_freecnt--;
+	kbp->kb_totalfree--;
+	ksp->ks_memuse += 1 << indx;
+out:
+	kbp->kb_calls++;
+	ksp->ks_inuse++;
+	ksp->ks_calls++;
+	if (ksp->ks_memuse > ksp->ks_maxused)
+		ksp->ks_maxused = ksp->ks_memuse;
+#else
+out:
+#endif
+	splx(s);
+	return ((void *) va);
+}
+
+/*
+ * Free a block of memory allocated by malloc.
+ */
+void
+free(addr, type)
+	void *addr;
+	int type;
+{
+	register struct kmembuckets *kbp;
+	register struct kmemusage *kup;
+	register struct freelist *freep;
+	long size;
+	int s;
+#ifdef DIAGNOSTIC
+	caddr_t cp;
+	long *end, *lp, alloc, copysize;
+#endif
+#ifdef KMEMSTATS
+	register struct kmemstats *ksp = &kmemstats[type];
+#endif
+
+	kup = btokup(addr);
+	size = 1 << kup->ku_indx;
+	kbp = &bucket[kup->ku_indx];
+	s = splimp();
+#ifdef DIAGNOSTIC
+	/*
+	 * Check for returns of data that do not point to the
+	 * beginning of the allocation.
+	 */
+	if (size > NBPG * CLSIZE)
+		alloc = addrmask[BUCKETINDX(NBPG * CLSIZE)];
+	else
+		alloc = addrmask[kup->ku_indx];
+	if (((u_long)addr & alloc) != 0)
+		panic("free: unaligned addr 0x%x, size %d, type %s, mask %d\n",
+			addr, size, memname[type], alloc);
+#endif /* DIAGNOSTIC */
+	if (size > MAXALLOCSAVE) {
+		kmem_free(kmem_map, (vm_offset_t)addr, ctob(kup->ku_pagecnt));
+#ifdef KMEMSTATS
+		size = kup->ku_pagecnt << PGSHIFT;
+		ksp->ks_memuse -= size;
+		kup->ku_indx = 0;
+		kup->ku_pagecnt = 0;
+		if (ksp->ks_memuse + size >= ksp->ks_limit &&
+		    ksp->ks_memuse < ksp->ks_limit)
+			wakeup((caddr_t)ksp);
+		ksp->ks_inuse--;
+		kbp->kb_total -= 1;
+#endif
+		splx(s);
+		return;
+	}
+	freep = (struct freelist *)addr;
+#ifdef DIAGNOSTIC
+	/*
+	 * Check for multiple frees. Use a quick check to see if
+	 * it looks free before laboriously searching the freelist.
+	 */
+	if (freep->spare0 == WEIRD_ADDR) {
+		for (cp = kbp->kb_next; cp; cp = *(caddr_t *)cp) {
+			if (addr != cp)
+				continue;
+			printf("multiply freed item 0x%x\n", addr);
+			panic("free: duplicated free");
+		}
+	}
+	/*
+	 * Copy in known text to detect modification after freeing
+	 * and to make it look free. Also, save the type being freed
+	 * so we can list likely culprit if modification is detected
+	 * when the object is reallocated.
+	 */
+	copysize = size < MAX_COPY ? size : MAX_COPY;
+	end = (long *)&((caddr_t)addr)[copysize];
+	for (lp = (long *)addr; lp < end; lp++)
+		*lp = WEIRD_ADDR;
+	freep->type = type;
+#endif /* DIAGNOSTIC */
+#ifdef KMEMSTATS
+	kup->ku_freecnt++;
+	if (kup->ku_freecnt >= kbp->kb_elmpercl)
+		if (kup->ku_freecnt > kbp->kb_elmpercl)
+			panic("free: multiple frees");
+		else if (kbp->kb_totalfree > kbp->kb_highwat)
+			kbp->kb_couldfree++;
+	kbp->kb_totalfree++;
+	ksp->ks_memuse -= size;
+	if (ksp->ks_memuse + size >= ksp->ks_limit &&
+	    ksp->ks_memuse < ksp->ks_limit)
+		wakeup((caddr_t)ksp);
+	ksp->ks_inuse--;
+#endif
+	if (kbp->kb_next == NULL)
+		kbp->kb_next = addr;
+	else
+		((struct freelist *)kbp->kb_last)->next = addr;
+	freep->next = NULL;
+	kbp->kb_last = addr;
+	splx(s);
+}
+
+/*
+ * Initialize the kernel memory allocator
+ */
+kmeminit()
+{
+	register long indx;
+	int npg;
+
+#if	((MAXALLOCSAVE & (MAXALLOCSAVE - 1)) != 0)
+		ERROR!_kmeminit:_MAXALLOCSAVE_not_power_of_2
+#endif
+#if	(MAXALLOCSAVE > MINALLOCSIZE * 32768)
+		ERROR!_kmeminit:_MAXALLOCSAVE_too_big
+#endif
+#if	(MAXALLOCSAVE < CLBYTES)
+		ERROR!_kmeminit:_MAXALLOCSAVE_too_small
+#endif
+	npg = VM_KMEM_SIZE/ NBPG;
+	kmemusage = (struct kmemusage *) kmem_alloc(kernel_map,
+		(vm_size_t)(npg * sizeof(struct kmemusage)));
+	kmem_map = kmem_suballoc(kernel_map, (vm_offset_t *)&kmembase,
+		(vm_offset_t *)&kmemlimit, (vm_size_t)(npg * NBPG), FALSE);
+#ifdef KMEMSTATS
+	for (indx = 0; indx < MINBUCKET + 16; indx++) {
+		if (1 << indx >= CLBYTES)
+			bucket[indx].kb_elmpercl = 1;
+		else
+			bucket[indx].kb_elmpercl = CLBYTES / (1 << indx);
+		bucket[indx].kb_highwat = 5 * bucket[indx].kb_elmpercl;
+	}
+	for (indx = 0; indx < M_LAST; indx++)
+		kmemstats[indx].ks_limit = npg * NBPG * 6 / 10;
+#endif
+}
diff --git a/sys/kern/kern_physio.c b/sys/kern/kern_physio.c
new file mode 100644
index 00000000000..1eaae3599de
--- /dev/null
+++ b/sys/kern/kern_physio.c
@@ -0,0 +1,93 @@
+/*-
+ * Copyright (c) 1982, 1986, 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)kern_physio.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/conf.h>
+#include <sys/proc.h>
+
+physio(a1, a2, a3, a4, a5, a6)
+	int (*a1)(); 
+	struct buf *a2;
+	dev_t a3;
+	int a4;
+	u_int (*a5)();
+	struct uio *a6;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return (EIO);
+}
+
+u_int
+minphys(a1)
+	struct buf *a1;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return (0);
+}
+
+/*
+ * Do a read on a device for a user process.
+ */
+rawread(dev, uio)
+	dev_t dev;
+	struct uio *uio;
+{
+	return (physio(cdevsw[major(dev)].d_strategy, (struct buf *)NULL,
+	    dev, B_READ, minphys, uio));
+}
+
+/*
+ * Do a write on a device for a user process.
+ */
+rawwrite(dev, uio)
+	dev_t dev;
+	struct uio *uio;
+{
+	return (physio(cdevsw[major(dev)].d_strategy, (struct buf *)NULL,
+	    dev, B_WRITE, minphys, uio));
+}
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
new file mode 100644
index 00000000000..91d9e212d38
--- /dev/null
+++ b/sys/kern/kern_proc.c
@@ -0,0 +1,401 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_proc.c	8.4 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/map.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/acct.h>
+#include <sys/wait.h>
+#include <sys/file.h>
+#include <ufs/ufs/quota.h>
+#include <sys/uio.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/ioctl.h>
+#include <sys/tty.h>
+
+/*
+ * Structure associated with user cacheing.
+ */
+struct uidinfo {
+	struct	uidinfo *ui_next;
+	struct	uidinfo **ui_prev;
+	uid_t	ui_uid;
+	long	ui_proccnt;
+} **uihashtbl;
+u_long	uihash;		/* size of hash table - 1 */
+#define	UIHASH(uid)	((uid) & uihash)
+
+/*
+ * Allocate a hash table.
+ */
+usrinfoinit()
+{
+
+	uihashtbl = hashinit(maxproc / 16, M_PROC, &uihash);
+}
+
+/*
+ * Change the count associated with number of processes
+ * a given user is using.
+ */
+int
+chgproccnt(uid, diff)
+	uid_t	uid;
+	int	diff;
+{
+	register struct uidinfo **uipp, *uip, *uiq;
+
+	uipp = &uihashtbl[UIHASH(uid)];
+	for (uip = *uipp; uip; uip = uip->ui_next)
+		if (uip->ui_uid == uid)
+			break;
+	if (uip) {
+		uip->ui_proccnt += diff;
+		if (uip->ui_proccnt > 0)
+			return (uip->ui_proccnt);
+		if (uip->ui_proccnt < 0)
+			panic("chgproccnt: procs < 0");
+		if (uiq = uip->ui_next)
+			uiq->ui_prev = uip->ui_prev;
+		*uip->ui_prev = uiq;
+		FREE(uip, M_PROC);
+		return (0);
+	}
+	if (diff <= 0) {
+		if (diff == 0)
+			return(0);
+		panic("chgproccnt: lost user");
+	}
+	MALLOC(uip, struct uidinfo *, sizeof(*uip), M_PROC, M_WAITOK);
+	if (uiq = *uipp)
+		uiq->ui_prev = &uip->ui_next;
+	uip->ui_next = uiq;
+	uip->ui_prev = uipp;
+	*uipp = uip;
+	uip->ui_uid = uid;
+	uip->ui_proccnt = diff;
+	return (diff);
+}
+
+/*
+ * Is p an inferior of the current process?
+ */
+inferior(p)
+	register struct proc *p;
+{
+
+	for (; p != curproc; p = p->p_pptr)
+		if (p->p_pid == 0)
+			return (0);
+	return (1);
+}
+
+/*
+ * Locate a process by number
+ */
+struct proc *
+pfind(pid)
+	register pid_t pid;
+{
+	register struct proc *p;
+
+	for (p = pidhash[PIDHASH(pid)]; p != NULL; p = p->p_hash)
+		if (p->p_pid == pid)
+			return (p);
+	return (NULL);
+}
+
+/*
+ * Locate a process group by number
+ */
+struct pgrp *
+pgfind(pgid)
+	register pid_t pgid;
+{
+	register struct pgrp *pgrp;
+
+	for (pgrp = pgrphash[PIDHASH(pgid)];
+	    pgrp != NULL; pgrp = pgrp->pg_hforw)
+		if (pgrp->pg_id == pgid)
+			return (pgrp);
+	return (NULL);
+}
+
+/*
+ * Move p to a new or existing process group (and session)
+ */
+enterpgrp(p, pgid, mksess)
+	register struct proc *p;
+	pid_t pgid;
+	int mksess;
+{
+	register struct pgrp *pgrp = pgfind(pgid);
+	register struct proc **pp;
+	int n;
+
+#ifdef DIAGNOSTIC
+	if (pgrp != NULL && mksess)	/* firewalls */
+		panic("enterpgrp: setsid into non-empty pgrp");
+	if (SESS_LEADER(p))
+		panic("enterpgrp: session leader attempted setpgrp");
+#endif
+	if (pgrp == NULL) {
+		pid_t savepid = p->p_pid;
+		struct proc *np;
+		/*
+		 * new process group
+		 */
+#ifdef DIAGNOSTIC
+		if (p->p_pid != pgid)
+			panic("enterpgrp: new pgrp and pid != pgid");
+#endif
+		MALLOC(pgrp, struct pgrp *, sizeof(struct pgrp), M_PGRP,
+		       M_WAITOK);
+		if ((np = pfind(savepid)) == NULL || np != p)
+			return (ESRCH);
+		if (mksess) {
+			register struct session *sess;
+
+			/*
+			 * new session
+			 */
+			MALLOC(sess, struct session *, sizeof(struct session),
+				M_SESSION, M_WAITOK);
+			sess->s_leader = p;
+			sess->s_count = 1;
+			sess->s_ttyvp = NULL;
+			sess->s_ttyp = NULL;
+			bcopy(p->p_session->s_login, sess->s_login,
+			    sizeof(sess->s_login));
+			p->p_flag &= ~P_CONTROLT;
+			pgrp->pg_session = sess;
+#ifdef DIAGNOSTIC
+			if (p != curproc)
+				panic("enterpgrp: mksession and p != curproc");
+#endif
+		} else {
+			pgrp->pg_session = p->p_session;
+			pgrp->pg_session->s_count++;
+		}
+		pgrp->pg_id = pgid;
+		pgrp->pg_hforw = pgrphash[n = PIDHASH(pgid)];
+		pgrphash[n] = pgrp;
+		pgrp->pg_jobc = 0;
+		pgrp->pg_mem = NULL;
+	} else if (pgrp == p->p_pgrp)
+		return (0);
+
+	/*
+	 * Adjust eligibility of affected pgrps to participate in job control.
+	 * Increment eligibility counts before decrementing, otherwise we
+	 * could reach 0 spuriously during the first call.
+	 */
+	fixjobc(p, pgrp, 1);
+	fixjobc(p, p->p_pgrp, 0);
+
+	/*
+	 * unlink p from old process group
+	 */
+	for (pp = &p->p_pgrp->pg_mem; *pp; pp = &(*pp)->p_pgrpnxt) {
+		if (*pp == p) {
+			*pp = p->p_pgrpnxt;
+			break;
+		}
+	}
+#ifdef DIAGNOSTIC
+	if (pp == NULL)
+		panic("enterpgrp: can't find p on old pgrp");
+#endif
+	/*
+	 * delete old if empty
+	 */
+	if (p->p_pgrp->pg_mem == 0)
+		pgdelete(p->p_pgrp);
+	/*
+	 * link into new one
+	 */
+	p->p_pgrp = pgrp;
+	p->p_pgrpnxt = pgrp->pg_mem;
+	pgrp->pg_mem = p;
+	return (0);
+}
+
+/*
+ * remove process from process group
+ */
+leavepgrp(p)
+	register struct proc *p;
+{
+	register struct proc **pp = &p->p_pgrp->pg_mem;
+
+	for (; *pp; pp = &(*pp)->p_pgrpnxt) {
+		if (*pp == p) {
+			*pp = p->p_pgrpnxt;
+			break;
+		}
+	}
+#ifdef DIAGNOSTIC
+	if (pp == NULL)
+		panic("leavepgrp: can't find p in pgrp");
+#endif
+	if (!p->p_pgrp->pg_mem)
+		pgdelete(p->p_pgrp);
+	p->p_pgrp = 0;
+	return (0);
+}
+
+/*
+ * delete a process group
+ */
+pgdelete(pgrp)
+	register struct pgrp *pgrp;
+{
+	register struct pgrp **pgp = &pgrphash[PIDHASH(pgrp->pg_id)];
+
+	if (pgrp->pg_session->s_ttyp != NULL && 
+	    pgrp->pg_session->s_ttyp->t_pgrp == pgrp)
+		pgrp->pg_session->s_ttyp->t_pgrp = NULL;
+	for (; *pgp; pgp = &(*pgp)->pg_hforw) {
+		if (*pgp == pgrp) {
+			*pgp = pgrp->pg_hforw;
+			break;
+		}
+	}
+#ifdef DIAGNOSTIC
+	if (pgp == NULL)
+		panic("pgdelete: can't find pgrp on hash chain");
+#endif
+	if (--pgrp->pg_session->s_count == 0)
+		FREE(pgrp->pg_session, M_SESSION);
+	FREE(pgrp, M_PGRP);
+}
+
+static void orphanpg();
+
+/*
+ * Adjust pgrp jobc counters when specified process changes process group.
+ * We count the number of processes in each process group that "qualify"
+ * the group for terminal job control (those with a parent in a different
+ * process group of the same session).  If that count reaches zero, the
+ * process group becomes orphaned.  Check both the specified process'
+ * process group and that of its children.
+ * entering == 0 => p is leaving specified group.
+ * entering == 1 => p is entering specified group.
+ */
+fixjobc(p, pgrp, entering)
+	register struct proc *p;
+	register struct pgrp *pgrp;
+	int entering;
+{
+	register struct pgrp *hispgrp;
+	register struct session *mysession = pgrp->pg_session;
+
+	/*
+	 * Check p's parent to see whether p qualifies its own process
+	 * group; if so, adjust count for p's process group.
+	 */
+	if ((hispgrp = p->p_pptr->p_pgrp) != pgrp &&
+	    hispgrp->pg_session == mysession)
+		if (entering)
+			pgrp->pg_jobc++;
+		else if (--pgrp->pg_jobc == 0)
+			orphanpg(pgrp);
+
+	/*
+	 * Check this process' children to see whether they qualify
+	 * their process groups; if so, adjust counts for children's
+	 * process groups.
+	 */
+	for (p = p->p_cptr; p; p = p->p_osptr)
+		if ((hispgrp = p->p_pgrp) != pgrp &&
+		    hispgrp->pg_session == mysession &&
+		    p->p_stat != SZOMB)
+			if (entering)
+				hispgrp->pg_jobc++;
+			else if (--hispgrp->pg_jobc == 0)
+				orphanpg(hispgrp);
+}
+
+/* 
+ * A process group has become orphaned;
+ * if there are any stopped processes in the group,
+ * hang-up all process in that group.
+ */
+static void
+orphanpg(pg)
+	struct pgrp *pg;
+{
+	register struct proc *p;
+
+	for (p = pg->pg_mem; p; p = p->p_pgrpnxt) {
+		if (p->p_stat == SSTOP) {
+			for (p = pg->pg_mem; p; p = p->p_pgrpnxt) {
+				psignal(p, SIGHUP);
+				psignal(p, SIGCONT);
+			}
+			return;
+		}
+	}
+}
+
+#ifdef debug
+/* DEBUG */
+pgrpdump()
+{
+	register struct pgrp *pgrp;
+	register struct proc *p;
+	register i;
+
+	for (i=0; i<PIDHSZ; i++) {
+		if (pgrphash[i]) {
+		  printf("\tindx %d\n", i);
+		  for (pgrp=pgrphash[i]; pgrp; pgrp=pgrp->pg_hforw) {
+		    printf("\tpgrp %x, pgid %d, sess %x, sesscnt %d, mem %x\n",
+			pgrp, pgrp->pg_id, pgrp->pg_session,
+			pgrp->pg_session->s_count, pgrp->pg_mem);
+		    for (p=pgrp->pg_mem; p; p=p->p_pgrpnxt) {
+			printf("\t\tpid %d addr %x pgrp %x\n", 
+				p->p_pid, p, p->p_pgrp);
+		    }
+		  }
+
+		}
+	}
+}
+#endif /* debug */
diff --git a/sys/kern/kern_prot.c b/sys/kern/kern_prot.c
new file mode 100644
index 00000000000..ef400770e20
--- /dev/null
+++ b/sys/kern/kern_prot.c
@@ -0,0 +1,566 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1990, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_prot.c	8.6 (Berkeley) 1/21/94
+ */
+
+/*
+ * System calls related to processes and protection
+ */
+
+#include <sys/param.h>
+#include <sys/acct.h>
+#include <sys/systm.h>
+#include <sys/ucred.h>
+#include <sys/proc.h>
+#include <sys/timeb.h>
+#include <sys/times.h>
+#include <sys/malloc.h>
+
+struct args {
+	int	dummy;
+};
+
+/* ARGSUSED */
+getpid(p, uap, retval)
+	struct proc *p;
+	struct args *uap;
+	int *retval;
+{
+
+	*retval = p->p_pid;
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+	retval[1] = p->p_pptr->p_pid;
+#endif
+	return (0);
+}
+
+/* ARGSUSED */
+getppid(p, uap, retval)
+	struct proc *p;
+	struct args *uap;
+	int *retval;
+{
+
+	*retval = p->p_pptr->p_pid;
+	return (0);
+}
+
+/* Get process group ID; note that POSIX getpgrp takes no parameter */
+getpgrp(p, uap, retval)
+	struct proc *p;
+	struct args *uap;
+	int *retval;
+{
+
+	*retval = p->p_pgrp->pg_id;
+	return (0);
+}
+
+/* ARGSUSED */
+getuid(p, uap, retval)
+	struct proc *p;
+	struct args *uap;
+	int *retval;
+{
+
+	*retval = p->p_cred->p_ruid;
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+	retval[1] = p->p_ucred->cr_uid;
+#endif
+	return (0);
+}
+
+/* ARGSUSED */
+geteuid(p, uap, retval)
+	struct proc *p;
+	struct args *uap;
+	int *retval;
+{
+
+	*retval = p->p_ucred->cr_uid;
+	return (0);
+}
+
+/* ARGSUSED */
+getgid(p, uap, retval)
+	struct proc *p;
+	struct args *uap;
+	int *retval;
+{
+
+	*retval = p->p_cred->p_rgid;
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+	retval[1] = p->p_ucred->cr_groups[0];
+#endif
+	return (0);
+}
+
+/*
+ * Get effective group ID.  The "egid" is groups[0], and could be obtained
+ * via getgroups.  This syscall exists because it is somewhat painful to do
+ * correctly in a library function.
+ */
+/* ARGSUSED */
+getegid(p, uap, retval)
+	struct proc *p;
+	struct args *uap;
+	int *retval;
+{
+
+	*retval = p->p_ucred->cr_groups[0];
+	return (0);
+}
+
+struct getgroups_args {
+	u_int	gidsetsize;
+	gid_t	*gidset;
+};
+getgroups(p, uap, retval)
+	struct proc *p;
+	register struct	getgroups_args *uap;
+	int *retval;
+{
+	register struct pcred *pc = p->p_cred;
+	register u_int ngrp;
+	int error;
+
+	if ((ngrp = uap->gidsetsize) == 0) {
+		*retval = pc->pc_ucred->cr_ngroups;
+		return (0);
+	}
+	if (ngrp < pc->pc_ucred->cr_ngroups)
+		return (EINVAL);
+	ngrp = pc->pc_ucred->cr_ngroups;
+	if (error = copyout((caddr_t)pc->pc_ucred->cr_groups,
+	    (caddr_t)uap->gidset, ngrp * sizeof(gid_t)))
+		return (error);
+	*retval = ngrp;
+	return (0);
+}
+
+/* ARGSUSED */
+setsid(p, uap, retval)
+	register struct proc *p;
+	struct args *uap;
+	int *retval;
+{
+
+	if (p->p_pgid == p->p_pid || pgfind(p->p_pid)) {
+		return (EPERM);
+	} else {
+		(void)enterpgrp(p, p->p_pid, 1);
+		*retval = p->p_pid;
+		return (0);
+	}
+}
+
+/*
+ * set process group (setpgid/old setpgrp)
+ *
+ * caller does setpgid(targpid, targpgid)
+ *
+ * pid must be caller or child of caller (ESRCH)
+ * if a child
+ *	pid must be in same session (EPERM)
+ *	pid can't have done an exec (EACCES)
+ * if pgid != pid
+ * 	there must exist some pid in same session having pgid (EPERM)
+ * pid must not be session leader (EPERM)
+ */
+struct setpgid_args {
+	int	pid;	/* target process id */
+	int	pgid;	/* target pgrp id */
+};
+/* ARGSUSED */
+setpgid(curp, uap, retval)
+	struct proc *curp;
+	register struct setpgid_args *uap;
+	int *retval;
+{
+	register struct proc *targp;		/* target process */
+	register struct pgrp *pgrp;		/* target pgrp */
+
+	if (uap->pid != 0 && uap->pid != curp->p_pid) {
+		if ((targp = pfind(uap->pid)) == 0 || !inferior(targp))
+			return (ESRCH);
+		if (targp->p_session != curp->p_session)
+			return (EPERM);
+		if (targp->p_flag & P_EXEC)
+			return (EACCES);
+	} else
+		targp = curp;
+	if (SESS_LEADER(targp))
+		return (EPERM);
+	if (uap->pgid == 0)
+		uap->pgid = targp->p_pid;
+	else if (uap->pgid != targp->p_pid)
+		if ((pgrp = pgfind(uap->pgid)) == 0 ||
+	            pgrp->pg_session != curp->p_session)
+			return (EPERM);
+	return (enterpgrp(targp, uap->pgid, 0));
+}
+
+struct setuid_args {
+	uid_t	uid;
+};
+/* ARGSUSED */
+setuid(p, uap, retval)
+	struct proc *p;
+	struct setuid_args *uap;
+	int *retval;
+{
+	register struct pcred *pc = p->p_cred;
+	register uid_t uid;
+	int error;
+
+	uid = uap->uid;
+	if (uid != pc->p_ruid &&
+	    (error = suser(pc->pc_ucred, &p->p_acflag)))
+		return (error);
+	/*
+	 * Everything's okay, do it.
+	 * Transfer proc count to new user.
+	 * Copy credentials so other references do not see our changes.
+	 */
+	(void)chgproccnt(pc->p_ruid, -1);
+	(void)chgproccnt(uid, 1);
+	pc->pc_ucred = crcopy(pc->pc_ucred);
+	pc->pc_ucred->cr_uid = uid;
+	pc->p_ruid = uid;
+	pc->p_svuid = uid;
+	p->p_flag |= P_SUGID;
+	return (0);
+}
+
+struct seteuid_args {
+	uid_t	euid;
+};
+/* ARGSUSED */
+seteuid(p, uap, retval)
+	struct proc *p;
+	struct seteuid_args *uap;
+	int *retval;
+{
+	register struct pcred *pc = p->p_cred;
+	register uid_t euid;
+	int error;
+
+	euid = uap->euid;
+	if (euid != pc->p_ruid && euid != pc->p_svuid &&
+	    (error = suser(pc->pc_ucred, &p->p_acflag)))
+		return (error);
+	/*
+	 * Everything's okay, do it.  Copy credentials so other references do
+	 * not see our changes.
+	 */
+	pc->pc_ucred = crcopy(pc->pc_ucred);
+	pc->pc_ucred->cr_uid = euid;
+	p->p_flag |= P_SUGID;
+	return (0);
+}
+
+struct setgid_args {
+	gid_t	gid;
+};
+/* ARGSUSED */
+setgid(p, uap, retval)
+	struct proc *p;
+	struct setgid_args *uap;
+	int *retval;
+{
+	register struct pcred *pc = p->p_cred;
+	register gid_t gid;
+	int error;
+
+	gid = uap->gid;
+	if (gid != pc->p_rgid && (error = suser(pc->pc_ucred, &p->p_acflag)))
+		return (error);
+	pc->pc_ucred = crcopy(pc->pc_ucred);
+	pc->pc_ucred->cr_groups[0] = gid;
+	pc->p_rgid = gid;
+	pc->p_svgid = gid;		/* ??? */
+	p->p_flag |= P_SUGID;
+	return (0);
+}
+
+struct setegid_args {
+	gid_t	egid;
+};
+/* ARGSUSED */
+setegid(p, uap, retval)
+	struct proc *p;
+	struct setegid_args *uap;
+	int *retval;
+{
+	register struct pcred *pc = p->p_cred;
+	register gid_t egid;
+	int error;
+
+	egid = uap->egid;
+	if (egid != pc->p_rgid && egid != pc->p_svgid &&
+	    (error = suser(pc->pc_ucred, &p->p_acflag)))
+		return (error);
+	pc->pc_ucred = crcopy(pc->pc_ucred);
+	pc->pc_ucred->cr_groups[0] = egid;
+	p->p_flag |= P_SUGID;
+	return (0);
+}
+
+struct setgroups_args {
+	u_int	gidsetsize;
+	gid_t	*gidset;
+};
+/* ARGSUSED */
+setgroups(p, uap, retval)
+	struct proc *p;
+	struct setgroups_args *uap;
+	int *retval;
+{
+	register struct pcred *pc = p->p_cred;
+	register u_int ngrp;
+	int error;
+
+	if (error = suser(pc->pc_ucred, &p->p_acflag))
+		return (error);
+	if ((ngrp = uap->gidsetsize) > NGROUPS)
+		return (EINVAL);
+	pc->pc_ucred = crcopy(pc->pc_ucred);
+	if (error = copyin((caddr_t)uap->gidset,
+	    (caddr_t)pc->pc_ucred->cr_groups, ngrp * sizeof(gid_t)))
+		return (error);
+	pc->pc_ucred->cr_ngroups = ngrp;
+	p->p_flag |= P_SUGID;
+	return (0);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+struct setreuid_args {
+	int	ruid;
+	int	euid;
+};
+/* ARGSUSED */
+osetreuid(p, uap, retval)
+	register struct proc *p;
+	struct setreuid_args *uap;
+	int *retval;
+{
+	register struct pcred *pc = p->p_cred;
+	struct seteuid_args args;
+
+	/*
+	 * we assume that the intent of setting ruid is to be able to get
+	 * back ruid priviledge. So we make sure that we will be able to
+	 * do so, but do not actually set the ruid.
+	 */
+	if (uap->ruid != (uid_t)-1 && uap->ruid != pc->p_ruid &&
+	    uap->ruid != pc->p_svuid)
+		return (EPERM);
+	if (uap->euid == (uid_t)-1)
+		return (0);
+	args.euid = uap->euid;
+	return (seteuid(p, &args, retval));
+}
+
+struct setregid_args {
+	int	rgid;
+	int	egid;
+};
+/* ARGSUSED */
+osetregid(p, uap, retval)
+	register struct proc *p;
+	struct setregid_args *uap;
+	int *retval;
+{
+	register struct pcred *pc = p->p_cred;
+	struct setegid_args args;
+
+	/*
+	 * we assume that the intent of setting rgid is to be able to get
+	 * back rgid priviledge. So we make sure that we will be able to
+	 * do so, but do not actually set the rgid.
+	 */
+	if (uap->rgid != (gid_t)-1 && uap->rgid != pc->p_rgid &&
+	    uap->rgid != pc->p_svgid)
+		return (EPERM);
+	if (uap->egid == (gid_t)-1)
+		return (0);
+	args.egid = uap->egid;
+	return (setegid(p, &args, retval));
+}
+#endif /* defined(COMPAT_43) || defined(COMPAT_SUNOS) */
+
+/*
+ * Check if gid is a member of the group set.
+ */
+groupmember(gid, cred)
+	gid_t gid;
+	register struct ucred *cred;
+{
+	register gid_t *gp;
+	gid_t *egp;
+
+	egp = &(cred->cr_groups[cred->cr_ngroups]);
+	for (gp = cred->cr_groups; gp < egp; gp++)
+		if (*gp == gid)
+			return (1);
+	return (0);
+}
+
+/*
+ * Test whether the specified credentials imply "super-user"
+ * privilege; if so, and we have accounting info, set the flag
+ * indicating use of super-powers.
+ * Returns 0 or error.
+ */
+suser(cred, acflag)
+	struct ucred *cred;
+	short *acflag;
+{
+	if (cred->cr_uid == 0) {
+		if (acflag)
+			*acflag |= ASU;
+		return (0);
+	}
+	return (EPERM);
+}
+
+/*
+ * Allocate a zeroed cred structure.
+ */
+struct ucred *
+crget()
+{
+	register struct ucred *cr;
+
+	MALLOC(cr, struct ucred *, sizeof(*cr), M_CRED, M_WAITOK);
+	bzero((caddr_t)cr, sizeof(*cr));
+	cr->cr_ref = 1;
+	return (cr);
+}
+
+/*
+ * Free a cred structure.
+ * Throws away space when ref count gets to 0.
+ */
+crfree(cr)
+	struct ucred *cr;
+{
+	int s;
+
+	s = splimp();				/* ??? */
+	if (--cr->cr_ref == 0)
+		FREE((caddr_t)cr, M_CRED);
+	(void) splx(s);
+}
+
+/*
+ * Copy cred structure to a new one and free the old one.
+ */
+struct ucred *
+crcopy(cr)
+	struct ucred *cr;
+{
+	struct ucred *newcr;
+
+	if (cr->cr_ref == 1)
+		return (cr);
+	newcr = crget();
+	*newcr = *cr;
+	crfree(cr);
+	newcr->cr_ref = 1;
+	return (newcr);
+}
+
+/*
+ * Dup cred struct to a new held one.
+ */
+struct ucred *
+crdup(cr)
+	struct ucred *cr;
+{
+	struct ucred *newcr;
+
+	newcr = crget();
+	*newcr = *cr;
+	newcr->cr_ref = 1;
+	return (newcr);
+}
+
+/*
+ * Get login name, if available.
+ */
+struct getlogin_args {
+	char	*namebuf;
+	u_int	namelen;
+};
+/* ARGSUSED */
+getlogin(p, uap, retval)
+	struct proc *p;
+	struct getlogin_args *uap;
+	int *retval;
+{
+
+	if (uap->namelen > sizeof (p->p_pgrp->pg_session->s_login))
+		uap->namelen = sizeof (p->p_pgrp->pg_session->s_login);
+	return (copyout((caddr_t) p->p_pgrp->pg_session->s_login,
+	    (caddr_t) uap->namebuf, uap->namelen));
+}
+
+/*
+ * Set login name.
+ */
+struct setlogin_args {
+	char	*namebuf;
+};
+/* ARGSUSED */
+setlogin(p, uap, retval)
+	struct proc *p;
+	struct setlogin_args *uap;
+	int *retval;
+{
+	int error;
+
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+	error = copyinstr((caddr_t) uap->namebuf,
+	    (caddr_t) p->p_pgrp->pg_session->s_login,
+	    sizeof (p->p_pgrp->pg_session->s_login) - 1, (u_int *)0);
+	if (error == ENAMETOOLONG)
+		error = EINVAL;
+	return (error);
+}
diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c
new file mode 100644
index 00000000000..68e9dfbc86d
--- /dev/null
+++ b/sys/kern/kern_resource.c
@@ -0,0 +1,476 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_resource.c	8.5 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/resourcevar.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+
+#include <vm/vm.h>
+
+/*
+ * Resource controls and accounting.
+ */
+
+struct getpriority_args {
+	int	which;
+	int	who;
+};
+getpriority(curp, uap, retval)
+	struct proc *curp;
+	register struct getpriority_args *uap;
+	int *retval;
+{
+	register struct proc *p;
+	register int low = PRIO_MAX + 1;
+
+	switch (uap->which) {
+
+	case PRIO_PROCESS:
+		if (uap->who == 0)
+			p = curp;
+		else
+			p = pfind(uap->who);
+		if (p == 0)
+			break;
+		low = p->p_nice;
+		break;
+
+	case PRIO_PGRP: {
+		register struct pgrp *pg;
+
+		if (uap->who == 0)
+			pg = curp->p_pgrp;
+		else if ((pg = pgfind(uap->who)) == NULL)
+			break;
+		for (p = pg->pg_mem; p != NULL; p = p->p_pgrpnxt) {
+			if (p->p_nice < low)
+				low = p->p_nice;
+		}
+		break;
+	}
+
+	case PRIO_USER:
+		if (uap->who == 0)
+			uap->who = curp->p_ucred->cr_uid;
+		for (p = (struct proc *)allproc; p != NULL; p = p->p_next) {
+			if (p->p_ucred->cr_uid == uap->who &&
+			    p->p_nice < low)
+				low = p->p_nice;
+		}
+		break;
+
+	default:
+		return (EINVAL);
+	}
+	if (low == PRIO_MAX + 1)
+		return (ESRCH);
+	*retval = low;
+	return (0);
+}
+
+struct setpriority_args {
+	int	which;
+	int	who;
+	int	prio;
+};
+/* ARGSUSED */
+setpriority(curp, uap, retval)
+	struct proc *curp;
+	register struct setpriority_args *uap;
+	int *retval;
+{
+	register struct proc *p;
+	int found = 0, error = 0;
+
+	switch (uap->which) {
+
+	case PRIO_PROCESS:
+		if (uap->who == 0)
+			p = curp;
+		else
+			p = pfind(uap->who);
+		if (p == 0)
+			break;
+		error = donice(curp, p, uap->prio);
+		found++;
+		break;
+
+	case PRIO_PGRP: {
+		register struct pgrp *pg;
+		 
+		if (uap->who == 0)
+			pg = curp->p_pgrp;
+		else if ((pg = pgfind(uap->who)) == NULL)
+			break;
+		for (p = pg->pg_mem; p != NULL; p = p->p_pgrpnxt) {
+			error = donice(curp, p, uap->prio);
+			found++;
+		}
+		break;
+	}
+
+	case PRIO_USER:
+		if (uap->who == 0)
+			uap->who = curp->p_ucred->cr_uid;
+		for (p = (struct proc *)allproc; p != NULL; p = p->p_next)
+			if (p->p_ucred->cr_uid == uap->who) {
+				error = donice(curp, p, uap->prio);
+				found++;
+			}
+		break;
+
+	default:
+		return (EINVAL);
+	}
+	if (found == 0)
+		return (ESRCH);
+	return (error);
+}
+
+donice(curp, chgp, n)
+	register struct proc *curp, *chgp;
+	register int n;
+{
+	register struct pcred *pcred = curp->p_cred;
+
+	if (pcred->pc_ucred->cr_uid && pcred->p_ruid &&
+	    pcred->pc_ucred->cr_uid != chgp->p_ucred->cr_uid &&
+	    pcred->p_ruid != chgp->p_ucred->cr_uid)
+		return (EPERM);
+	if (n > PRIO_MAX)
+		n = PRIO_MAX;
+	if (n < PRIO_MIN)
+		n = PRIO_MIN;
+	if (n < chgp->p_nice && suser(pcred->pc_ucred, &curp->p_acflag))
+		return (EACCES);
+	chgp->p_nice = n;
+	(void)resetpriority(chgp);
+	return (0);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+struct setrlimit_args {
+	u_int	which;
+	struct	orlimit *lim;
+};
+/* ARGSUSED */
+osetrlimit(p, uap, retval)
+	struct proc *p;
+	register struct setrlimit_args *uap;
+	int *retval;
+{
+	struct orlimit olim;
+	struct rlimit lim;
+	int error;
+
+	if (error =
+	    copyin((caddr_t)uap->lim, (caddr_t)&olim, sizeof (struct orlimit)))
+		return (error);
+	lim.rlim_cur = olim.rlim_cur;
+	lim.rlim_max = olim.rlim_max;
+	return (dosetrlimit(p, uap->which, &lim));
+}
+
+struct getrlimit_args {
+	u_int	which;
+	struct	orlimit *rlp;
+};
+/* ARGSUSED */
+ogetrlimit(p, uap, retval)
+	struct proc *p;
+	register struct getrlimit_args *uap;
+	int *retval;
+{
+	struct orlimit olim;
+
+	if (uap->which >= RLIM_NLIMITS)
+		return (EINVAL);
+	olim.rlim_cur = p->p_rlimit[uap->which].rlim_cur;
+	if (olim.rlim_cur == -1)
+		olim.rlim_cur = 0x7fffffff;
+	olim.rlim_max = p->p_rlimit[uap->which].rlim_max;
+	if (olim.rlim_max == -1)
+		olim.rlim_max = 0x7fffffff;
+	return (copyout((caddr_t)&olim, (caddr_t)uap->rlp, sizeof(olim)));
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+struct __setrlimit_args {
+	u_int	which;
+	struct	rlimit *lim;
+};
+/* ARGSUSED */
+setrlimit(p, uap, retval)
+	struct proc *p;
+	register struct __setrlimit_args *uap;
+	int *retval;
+{
+	struct rlimit alim;
+	int error;
+
+	if (error =
+	    copyin((caddr_t)uap->lim, (caddr_t)&alim, sizeof (struct rlimit)))
+		return (error);
+	return (dosetrlimit(p, uap->which, &alim));
+}
+
+dosetrlimit(p, which, limp)
+	struct proc *p;
+	u_int which;
+	struct rlimit *limp;
+{
+	register struct rlimit *alimp;
+	extern unsigned maxdmap;
+	int error;
+
+	if (which >= RLIM_NLIMITS)
+		return (EINVAL);
+	alimp = &p->p_rlimit[which];
+	if (limp->rlim_cur > alimp->rlim_max || 
+	    limp->rlim_max > alimp->rlim_max)
+		if (error = suser(p->p_ucred, &p->p_acflag))
+			return (error);
+	if (limp->rlim_cur > limp->rlim_max)
+		limp->rlim_cur = limp->rlim_max;
+	if (p->p_limit->p_refcnt > 1 &&
+	    (p->p_limit->p_lflags & PL_SHAREMOD) == 0) {
+		p->p_limit->p_refcnt--;
+		p->p_limit = limcopy(p->p_limit);
+		alimp = &p->p_rlimit[which];
+	}
+
+	switch (which) {
+
+	case RLIMIT_DATA:
+		if (limp->rlim_cur > maxdmap)
+			limp->rlim_cur = maxdmap;
+		if (limp->rlim_max > maxdmap)
+			limp->rlim_max = maxdmap;
+		break;
+
+	case RLIMIT_STACK:
+		if (limp->rlim_cur > maxdmap)
+			limp->rlim_cur = maxdmap;
+		if (limp->rlim_max > maxdmap)
+			limp->rlim_max = maxdmap;
+		/*
+		 * Stack is allocated to the max at exec time with only
+		 * "rlim_cur" bytes accessible.  If stack limit is going
+		 * up make more accessible, if going down make inaccessible.
+		 */
+		if (limp->rlim_cur != alimp->rlim_cur) {
+			vm_offset_t addr;
+			vm_size_t size;
+			vm_prot_t prot;
+
+			if (limp->rlim_cur > alimp->rlim_cur) {
+				prot = VM_PROT_ALL;
+				size = limp->rlim_cur - alimp->rlim_cur;
+				addr = USRSTACK - limp->rlim_cur;
+			} else {
+				prot = VM_PROT_NONE;
+				size = alimp->rlim_cur - limp->rlim_cur;
+				addr = USRSTACK - alimp->rlim_cur;
+			}
+			addr = trunc_page(addr);
+			size = round_page(size);
+			(void) vm_map_protect(&p->p_vmspace->vm_map,
+					      addr, addr+size, prot, FALSE);
+		}
+		break;
+
+	case RLIMIT_NOFILE:
+		if (limp->rlim_cur > maxfiles)
+			limp->rlim_cur = maxfiles;
+		if (limp->rlim_max > maxfiles)
+			limp->rlim_max = maxfiles;
+		break;
+
+	case RLIMIT_NPROC:
+		if (limp->rlim_cur > maxproc)
+			limp->rlim_cur = maxproc;
+		if (limp->rlim_max > maxproc)
+			limp->rlim_max = maxproc;
+		break;
+	}
+	*alimp = *limp;
+	return (0);
+}
+
+struct __getrlimit_args {
+	u_int	which;
+	struct	rlimit *rlp;
+};
+/* ARGSUSED */
+getrlimit(p, uap, retval)
+	struct proc *p;
+	register struct __getrlimit_args *uap;
+	int *retval;
+{
+
+	if (uap->which >= RLIM_NLIMITS)
+		return (EINVAL);
+	return (copyout((caddr_t)&p->p_rlimit[uap->which], (caddr_t)uap->rlp,
+	    sizeof (struct rlimit)));
+}
+
+/*
+ * Transform the running time and tick information in proc p into user,
+ * system, and interrupt time usage.
+ */
+calcru(p, up, sp, ip)
+	register struct proc *p;
+	register struct timeval *up;
+	register struct timeval *sp;
+	register struct timeval *ip;
+{
+	register u_quad_t u, st, ut, it, tot;
+	register u_long sec, usec;
+	register int s;
+	struct timeval tv;
+
+	s = splstatclock();
+	st = p->p_sticks;
+	ut = p->p_uticks;
+	it = p->p_iticks;
+	splx(s);
+
+	tot = st + ut + it;
+	if (tot == 0) {
+		up->tv_sec = up->tv_usec = 0;
+		sp->tv_sec = sp->tv_usec = 0;
+		if (ip != NULL)
+			ip->tv_sec = ip->tv_usec = 0;
+		return;
+	}
+
+	sec = p->p_rtime.tv_sec;
+	usec = p->p_rtime.tv_usec;
+	if (p == curproc) {
+		/*
+		 * Adjust for the current time slice.  This is actually fairly
+		 * important since the error here is on the order of a time
+		 * quantum, which is much greater than the sampling error.
+		 */
+		microtime(&tv);
+		sec += tv.tv_sec - runtime.tv_sec;
+		usec += tv.tv_usec - runtime.tv_usec;
+	}
+	u = sec * 1000000 + usec;
+	st = (u * st) / tot;
+	sp->tv_sec = st / 1000000;
+	sp->tv_usec = st % 1000000;
+	ut = (u * ut) / tot;
+	up->tv_sec = ut / 1000000;
+	up->tv_usec = ut % 1000000;
+	if (ip != NULL) {
+		it = (u * it) / tot;
+		ip->tv_sec = it / 1000000;
+		ip->tv_usec = it % 1000000;
+	}
+}
+
+struct getrusage_args {
+	int	who;
+	struct	rusage *rusage;
+};
+/* ARGSUSED */
+getrusage(p, uap, retval)
+	register struct proc *p;
+	register struct getrusage_args *uap;
+	int *retval;
+{
+	register struct rusage *rup;
+
+	switch (uap->who) {
+
+	case RUSAGE_SELF:
+		rup = &p->p_stats->p_ru;
+		calcru(p, &rup->ru_utime, &rup->ru_stime, NULL);
+		break;
+
+	case RUSAGE_CHILDREN:
+		rup = &p->p_stats->p_cru;
+		break;
+
+	default:
+		return (EINVAL);
+	}
+	return (copyout((caddr_t)rup, (caddr_t)uap->rusage,
+	    sizeof (struct rusage)));
+}
+
+ruadd(ru, ru2)
+	register struct rusage *ru, *ru2;
+{
+	register long *ip, *ip2;
+	register int i;
+
+	timevaladd(&ru->ru_utime, &ru2->ru_utime);
+	timevaladd(&ru->ru_stime, &ru2->ru_stime);
+	if (ru->ru_maxrss < ru2->ru_maxrss)
+		ru->ru_maxrss = ru2->ru_maxrss;
+	ip = &ru->ru_first; ip2 = &ru2->ru_first;
+	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
+		*ip++ += *ip2++;
+}
+
+/*
+ * Make a copy of the plimit structure.
+ * We share these structures copy-on-write after fork,
+ * and copy when a limit is changed.
+ */
+struct plimit *
+limcopy(lim)
+	struct plimit *lim;
+{
+	register struct plimit *copy;
+
+	MALLOC(copy, struct plimit *, sizeof(struct plimit),
+	    M_SUBPROC, M_WAITOK);
+	bcopy(lim->pl_rlimit, copy->pl_rlimit,
+	    sizeof(struct rlimit) * RLIM_NLIMITS);
+	copy->p_lflags = 0;
+	copy->p_refcnt = 1;
+	return (copy);
+}
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
new file mode 100644
index 00000000000..3dcff922c39
--- /dev/null
+++ b/sys/kern/kern_sig.c
@@ -0,0 +1,1197 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_sig.c	8.7 (Berkeley) 4/18/94
+ */
+
+#define	SIGPROP		/* include signal properties table */
+#include <sys/param.h>
+#include <sys/signalvar.h>
+#include <sys/resourcevar.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/timeb.h>
+#include <sys/times.h>
+#include <sys/buf.h>
+#include <sys/acct.h>
+#include <sys/file.h>
+#include <sys/kernel.h>
+#include <sys/wait.h>
+#include <sys/ktrace.h>
+#include <sys/syslog.h>
+#include <sys/stat.h>
+
+#include <machine/cpu.h>
+
+#include <vm/vm.h>
+#include <sys/user.h>		/* for coredump */
+
+/*
+ * Can process p, with pcred pc, send the signal signum to process q?
+ */
+#define CANSIGNAL(p, pc, q, signum) \
+	((pc)->pc_ucred->cr_uid == 0 || \
+	    (pc)->p_ruid == (q)->p_cred->p_ruid || \
+	    (pc)->pc_ucred->cr_uid == (q)->p_cred->p_ruid || \
+	    (pc)->p_ruid == (q)->p_ucred->cr_uid || \
+	    (pc)->pc_ucred->cr_uid == (q)->p_ucred->cr_uid || \
+	    ((signum) == SIGCONT && (q)->p_session == (p)->p_session))
+
+struct sigaction_args {
+	int	signum;
+	struct	sigaction *nsa;
+	struct	sigaction *osa;
+};
+/* ARGSUSED */
+sigaction(p, uap, retval)
+	struct proc *p;
+	register struct sigaction_args *uap;
+	int *retval;
+{
+	struct sigaction vec;
+	register struct sigaction *sa;
+	register struct sigacts *ps = p->p_sigacts;
+	register int signum;
+	int bit, error;
+
+	signum = uap->signum;
+	if (signum <= 0 || signum >= NSIG ||
+	    signum == SIGKILL || signum == SIGSTOP)
+		return (EINVAL);
+	sa = &vec;
+	if (uap->osa) {
+		sa->sa_handler = ps->ps_sigact[signum];
+		sa->sa_mask = ps->ps_catchmask[signum];
+		bit = sigmask(signum);
+		sa->sa_flags = 0;
+		if ((ps->ps_sigonstack & bit) != 0)
+			sa->sa_flags |= SA_ONSTACK;
+		if ((ps->ps_sigintr & bit) == 0)
+			sa->sa_flags |= SA_RESTART;
+		if (p->p_flag & P_NOCLDSTOP)
+			sa->sa_flags |= SA_NOCLDSTOP;
+		if (error = copyout((caddr_t)sa, (caddr_t)uap->osa,
+		    sizeof (vec)))
+			return (error);
+	}
+	if (uap->nsa) {
+		if (error = copyin((caddr_t)uap->nsa, (caddr_t)sa,
+		    sizeof (vec)))
+			return (error);
+		setsigvec(p, signum, sa);
+	}
+	return (0);
+}
+
+setsigvec(p, signum, sa)
+	register struct proc *p;
+	int signum;
+	register struct sigaction *sa;
+{
+	register struct sigacts *ps = p->p_sigacts;
+	register int bit;
+
+	bit = sigmask(signum);
+	/*
+	 * Change setting atomically.
+	 */
+	(void) splhigh();
+	ps->ps_sigact[signum] = sa->sa_handler;
+	ps->ps_catchmask[signum] = sa->sa_mask &~ sigcantmask;
+	if ((sa->sa_flags & SA_RESTART) == 0)
+		ps->ps_sigintr |= bit;
+	else
+		ps->ps_sigintr &= ~bit;
+	if (sa->sa_flags & SA_ONSTACK)
+		ps->ps_sigonstack |= bit;
+	else
+		ps->ps_sigonstack &= ~bit;
+#ifdef COMPAT_SUNOS
+	if (sa->sa_flags & SA_USERTRAMP)
+		ps->ps_usertramp |= bit;
+	else
+		ps->ps_usertramp &= ~bit;
+#endif
+	if (signum == SIGCHLD) {
+		if (sa->sa_flags & SA_NOCLDSTOP)
+			p->p_flag |= P_NOCLDSTOP;
+		else
+			p->p_flag &= ~P_NOCLDSTOP;
+	}
+	/*
+	 * Set bit in p_sigignore for signals that are set to SIG_IGN,
+	 * and for signals set to SIG_DFL where the default is to ignore.
+	 * However, don't put SIGCONT in p_sigignore,
+	 * as we have to restart the process.
+	 */
+	if (sa->sa_handler == SIG_IGN ||
+	    (sigprop[signum] & SA_IGNORE && sa->sa_handler == SIG_DFL)) {
+		p->p_siglist &= ~bit;		/* never to be seen again */
+		if (signum != SIGCONT)
+			p->p_sigignore |= bit;	/* easier in psignal */
+		p->p_sigcatch &= ~bit;
+	} else {
+		p->p_sigignore &= ~bit;
+		if (sa->sa_handler == SIG_DFL)
+			p->p_sigcatch &= ~bit;
+		else
+			p->p_sigcatch |= bit;
+	}
+	(void) spl0();
+}
+
+/*
+ * Initialize signal state for process 0;
+ * set to ignore signals that are ignored by default.
+ */
+void
+siginit(p)
+	struct proc *p;
+{
+	register int i;
+
+	for (i = 0; i < NSIG; i++)
+		if (sigprop[i] & SA_IGNORE && i != SIGCONT)
+			p->p_sigignore |= sigmask(i);
+}
+
+/*
+ * Reset signals for an exec of the specified process.
+ */
+void
+execsigs(p)
+	register struct proc *p;
+{
+	register struct sigacts *ps = p->p_sigacts;
+	register int nc, mask;
+
+	/*
+	 * Reset caught signals.  Held signals remain held
+	 * through p_sigmask (unless they were caught,
+	 * and are now ignored by default).
+	 */
+	while (p->p_sigcatch) {
+		nc = ffs((long)p->p_sigcatch);
+		mask = sigmask(nc);
+		p->p_sigcatch &= ~mask;
+		if (sigprop[nc] & SA_IGNORE) {
+			if (nc != SIGCONT)
+				p->p_sigignore |= mask;
+			p->p_siglist &= ~mask;
+		}
+		ps->ps_sigact[nc] = SIG_DFL;
+	}
+	/*
+	 * Reset stack state to the user stack.
+	 * Clear set of signals caught on the signal stack.
+	 */
+	ps->ps_sigstk.ss_flags = SA_DISABLE;
+	ps->ps_sigstk.ss_size = 0;
+	ps->ps_sigstk.ss_base = 0;
+	ps->ps_flags = 0;
+}
+
+/*
+ * Manipulate signal mask.
+ * Note that we receive new mask, not pointer,
+ * and return old mask as return value;
+ * the library stub does the rest.
+ */
+struct sigprocmask_args {
+	int	how;
+	sigset_t mask;
+};
+sigprocmask(p, uap, retval)
+	register struct proc *p;
+	struct sigprocmask_args *uap;
+	int *retval;
+{
+	int error = 0;
+
+	*retval = p->p_sigmask;
+	(void) splhigh();
+
+	switch (uap->how) {
+	case SIG_BLOCK:
+		p->p_sigmask |= uap->mask &~ sigcantmask;
+		break;
+
+	case SIG_UNBLOCK:
+		p->p_sigmask &= ~uap->mask;
+		break;
+
+	case SIG_SETMASK:
+		p->p_sigmask = uap->mask &~ sigcantmask;
+		break;
+	
+	default:
+		error = EINVAL;
+		break;
+	}
+	(void) spl0();
+	return (error);
+}
+
+struct sigpending_args {
+	int	dummy;
+};
+/* ARGSUSED */
+sigpending(p, uap, retval)
+	struct proc *p;
+	struct sigpending_args *uap;
+	int *retval;
+{
+
+	*retval = p->p_siglist;
+	return (0);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/*
+ * Generalized interface signal handler, 4.3-compatible.
+ */
+struct osigvec_args {
+	int	signum;
+	struct	sigvec *nsv;
+	struct	sigvec *osv;
+};
+/* ARGSUSED */
+osigvec(p, uap, retval)
+	struct proc *p;
+	register struct osigvec_args *uap;
+	int *retval;
+{
+	struct sigvec vec;
+	register struct sigacts *ps = p->p_sigacts;
+	register struct sigvec *sv;
+	register int signum;
+	int bit, error;
+
+	signum = uap->signum;
+	if (signum <= 0 || signum >= NSIG ||
+	    signum == SIGKILL || signum == SIGSTOP)
+		return (EINVAL);
+	sv = &vec;
+	if (uap->osv) {
+		*(sig_t *)&sv->sv_handler = ps->ps_sigact[signum];
+		sv->sv_mask = ps->ps_catchmask[signum];
+		bit = sigmask(signum);
+		sv->sv_flags = 0;
+		if ((ps->ps_sigonstack & bit) != 0)
+			sv->sv_flags |= SV_ONSTACK;
+		if ((ps->ps_sigintr & bit) != 0)
+			sv->sv_flags |= SV_INTERRUPT;
+#ifndef COMPAT_SUNOS
+		if (p->p_flag & P_NOCLDSTOP)
+			sv->sv_flags |= SA_NOCLDSTOP;
+#endif
+		if (error = copyout((caddr_t)sv, (caddr_t)uap->osv,
+		    sizeof (vec)))
+			return (error);
+	}
+	if (uap->nsv) {
+		if (error = copyin((caddr_t)uap->nsv, (caddr_t)sv,
+		    sizeof (vec)))
+			return (error);
+#ifdef COMPAT_SUNOS
+		/*
+		 * SunOS uses this bit (4, aka SA_DISABLE) as SV_RESETHAND,
+		 * `reset to SIG_DFL on delivery'. We have no such option
+		 * now or ever!
+		 */
+		if (sv->sv_flags & SA_DISABLE)
+			return (EINVAL);
+		sv->sv_flags |= SA_USERTRAMP;
+#endif
+		sv->sv_flags ^= SA_RESTART;	/* opposite of SV_INTERRUPT */
+		setsigvec(p, signum, (struct sigaction *)sv);
+	}
+	return (0);
+}
+
+struct osigblock_args {
+	int	mask;
+};
+osigblock(p, uap, retval)
+	register struct proc *p;
+	struct osigblock_args *uap;
+	int *retval;
+{
+
+	(void) splhigh();
+	*retval = p->p_sigmask;
+	p->p_sigmask |= uap->mask &~ sigcantmask;
+	(void) spl0();
+	return (0);
+}
+
+struct osigsetmask_args {
+	int	mask;
+};
+osigsetmask(p, uap, retval)
+	struct proc *p;
+	struct osigsetmask_args *uap;
+	int *retval;
+{
+
+	(void) splhigh();
+	*retval = p->p_sigmask;
+	p->p_sigmask = uap->mask &~ sigcantmask;
+	(void) spl0();
+	return (0);
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+/*
+ * Suspend process until signal, providing mask to be set
+ * in the meantime.  Note nonstandard calling convention:
+ * libc stub passes mask, not pointer, to save a copyin.
+ */
+struct sigsuspend_args {
+	sigset_t mask;
+};
+/* ARGSUSED */
+sigsuspend(p, uap, retval)
+	register struct proc *p;
+	struct sigsuspend_args *uap;
+	int *retval;
+{
+	register struct sigacts *ps = p->p_sigacts;
+
+	/*
+	 * When returning from sigpause, we want
+	 * the old mask to be restored after the
+	 * signal handler has finished.  Thus, we
+	 * save it here and mark the sigacts structure
+	 * to indicate this.
+	 */
+	ps->ps_oldmask = p->p_sigmask;
+	ps->ps_flags |= SAS_OLDMASK;
+	p->p_sigmask = uap->mask &~ sigcantmask;
+	while (tsleep((caddr_t) ps, PPAUSE|PCATCH, "pause", 0) == 0)
+		/* void */;
+	/* always return EINTR rather than ERESTART... */
+	return (EINTR);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+struct osigstack_args {
+	struct	sigstack *nss;
+	struct	sigstack *oss;
+};
+/* ARGSUSED */
+osigstack(p, uap, retval)
+	struct proc *p;
+	register struct osigstack_args *uap;
+	int *retval;
+{
+	struct sigstack ss;
+	struct sigacts *psp;
+	int error = 0;
+
+	psp = p->p_sigacts;
+	ss.ss_sp = psp->ps_sigstk.ss_base;
+	ss.ss_onstack = psp->ps_sigstk.ss_flags & SA_ONSTACK;
+	if (uap->oss && (error = copyout((caddr_t)&ss, (caddr_t)uap->oss,
+	    sizeof (struct sigstack))))
+		return (error);
+	if (uap->nss && (error = copyin((caddr_t)uap->nss, (caddr_t)&ss,
+	    sizeof (ss))) == 0) {
+		psp->ps_sigstk.ss_base = ss.ss_sp;
+		psp->ps_sigstk.ss_size = 0;
+		psp->ps_sigstk.ss_flags |= ss.ss_onstack & SA_ONSTACK;
+		psp->ps_flags |= SAS_ALTSTACK;
+	}
+	return (error);
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+struct sigaltstack_args {
+	struct	sigaltstack *nss;
+	struct	sigaltstack *oss;
+};
+/* ARGSUSED */
+sigaltstack(p, uap, retval)
+	struct proc *p;
+	register struct sigaltstack_args *uap;
+	int *retval;
+{
+	struct sigacts *psp;
+	struct sigaltstack ss;
+	int error;
+
+	psp = p->p_sigacts;
+	if ((psp->ps_flags & SAS_ALTSTACK) == 0)
+		psp->ps_sigstk.ss_flags |= SA_DISABLE;
+	if (uap->oss && (error = copyout((caddr_t)&psp->ps_sigstk,
+	    (caddr_t)uap->oss, sizeof (struct sigaltstack))))
+		return (error);
+	if (uap->nss == 0)
+		return (0);
+	if (error = copyin((caddr_t)uap->nss, (caddr_t)&ss, sizeof (ss)))
+		return (error);
+	if (ss.ss_flags & SA_DISABLE) {
+		if (psp->ps_sigstk.ss_flags & SA_ONSTACK)
+			return (EINVAL);
+		psp->ps_flags &= ~SAS_ALTSTACK;
+		psp->ps_sigstk.ss_flags = ss.ss_flags;
+		return (0);
+	}
+	if (ss.ss_size < MINSIGSTKSZ)
+		return (ENOMEM);
+	psp->ps_flags |= SAS_ALTSTACK;
+	psp->ps_sigstk= ss;
+	return (0);
+}
+
+struct kill_args {
+	int	pid;
+	int	signum;
+};
+/* ARGSUSED */
+kill(cp, uap, retval)
+	register struct proc *cp;
+	register struct kill_args *uap;
+	int *retval;
+{
+	register struct proc *p;
+	register struct pcred *pc = cp->p_cred;
+
+	if ((u_int)uap->signum >= NSIG)
+		return (EINVAL);
+	if (uap->pid > 0) {
+		/* kill single process */
+		if ((p = pfind(uap->pid)) == NULL)
+			return (ESRCH);
+		if (!CANSIGNAL(cp, pc, p, uap->signum))
+			return (EPERM);
+		if (uap->signum)
+			psignal(p, uap->signum);
+		return (0);
+	}
+	switch (uap->pid) {
+	case -1:		/* broadcast signal */
+		return (killpg1(cp, uap->signum, 0, 1));
+	case 0:			/* signal own process group */
+		return (killpg1(cp, uap->signum, 0, 0));
+	default:		/* negative explicit process group */
+		return (killpg1(cp, uap->signum, -uap->pid, 0));
+	}
+	/* NOTREACHED */
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+struct okillpg_args {
+	int	pgid;
+	int	signum;
+};
+/* ARGSUSED */
+okillpg(p, uap, retval)
+	struct proc *p;
+	register struct okillpg_args *uap;
+	int *retval;
+{
+
+	if ((u_int)uap->signum >= NSIG)
+		return (EINVAL);
+	return (killpg1(p, uap->signum, uap->pgid, 0));
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+/*
+ * Common code for kill process group/broadcast kill.
+ * cp is calling process.
+ */
+killpg1(cp, signum, pgid, all)
+	register struct proc *cp;
+	int signum, pgid, all;
+{
+	register struct proc *p;
+	register struct pcred *pc = cp->p_cred;
+	struct pgrp *pgrp;
+	int nfound = 0;
+	
+	if (all)	
+		/* 
+		 * broadcast 
+		 */
+		for (p = (struct proc *)allproc; p != NULL; p = p->p_next) {
+			if (p->p_pid <= 1 || p->p_flag & P_SYSTEM || 
+			    p == cp || !CANSIGNAL(cp, pc, p, signum))
+				continue;
+			nfound++;
+			if (signum)
+				psignal(p, signum);
+		}
+	else {
+		if (pgid == 0)		
+			/* 
+			 * zero pgid means send to my process group.
+			 */
+			pgrp = cp->p_pgrp;
+		else {
+			pgrp = pgfind(pgid);
+			if (pgrp == NULL)
+				return (ESRCH);
+		}
+		for (p = pgrp->pg_mem; p != NULL; p = p->p_pgrpnxt) {
+			if (p->p_pid <= 1 || p->p_flag & P_SYSTEM ||
+			    p->p_stat == SZOMB ||
+			    !CANSIGNAL(cp, pc, p, signum))
+				continue;
+			nfound++;
+			if (signum)
+				psignal(p, signum);
+		}
+	}
+	return (nfound ? 0 : ESRCH);
+}
+
+/*
+ * Send a signal to a process group.
+ */
+void
+gsignal(pgid, signum)
+	int pgid, signum;
+{
+	struct pgrp *pgrp;
+
+	if (pgid && (pgrp = pgfind(pgid)))
+		pgsignal(pgrp, signum, 0);
+}
+
+/*
+ * Send a signal to a  process group.  If checktty is 1,
+ * limit to members which have a controlling terminal.
+ */
+void
+pgsignal(pgrp, signum, checkctty)
+	struct pgrp *pgrp;
+	int signum, checkctty;
+{
+	register struct proc *p;
+
+	if (pgrp)
+		for (p = pgrp->pg_mem; p != NULL; p = p->p_pgrpnxt)
+			if (checkctty == 0 || p->p_flag & P_CONTROLT)
+				psignal(p, signum);
+}
+
+/*
+ * Send a signal caused by a trap to the current process.
+ * If it will be caught immediately, deliver it with correct code.
+ * Otherwise, post it normally.
+ */
+void
+trapsignal(p, signum, code)
+	struct proc *p;
+	register int signum;
+	u_int code;
+{
+	register struct sigacts *ps = p->p_sigacts;
+	int mask;
+
+	mask = sigmask(signum);
+	if ((p->p_flag & P_TRACED) == 0 && (p->p_sigcatch & mask) != 0 &&
+	    (p->p_sigmask & mask) == 0) {
+		p->p_stats->p_ru.ru_nsignals++;
+#ifdef KTRACE
+		if (KTRPOINT(p, KTR_PSIG))
+			ktrpsig(p->p_tracep, signum, ps->ps_sigact[signum], 
+				p->p_sigmask, code);
+#endif
+		sendsig(ps->ps_sigact[signum], signum, p->p_sigmask, code);
+		p->p_sigmask |= ps->ps_catchmask[signum] | mask;
+	} else {
+		ps->ps_code = code;	/* XXX for core dump/debugger */
+		psignal(p, signum);
+	}
+}
+
+/*
+ * Send the signal to the process.  If the signal has an action, the action
+ * is usually performed by the target process rather than the caller; we add
+ * the signal to the set of pending signals for the process.
+ *
+ * Exceptions:
+ *   o When a stop signal is sent to a sleeping process that takes the
+ *     default action, the process is stopped without awakening it.
+ *   o SIGCONT restarts stopped processes (or puts them back to sleep)
+ *     regardless of the signal action (eg, blocked or ignored).
+ *
+ * Other ignored signals are discarded immediately.
+ */
+void
+psignal(p, signum)
+	register struct proc *p;
+	register int signum;
+{
+	register int s, prop;
+	register sig_t action;
+	int mask;
+
+	if ((u_int)signum >= NSIG || signum == 0)
+		panic("psignal signal number");
+	mask = sigmask(signum);
+	prop = sigprop[signum];
+
+	/*
+	 * If proc is traced, always give parent a chance.
+	 */
+	if (p->p_flag & P_TRACED)
+		action = SIG_DFL;
+	else {
+		/*
+		 * If the signal is being ignored,
+		 * then we forget about it immediately.
+		 * (Note: we don't set SIGCONT in p_sigignore,
+		 * and if it is set to SIG_IGN,
+		 * action will be SIG_DFL here.)
+		 */
+		if (p->p_sigignore & mask)
+			return;
+		if (p->p_sigmask & mask)
+			action = SIG_HOLD;
+		else if (p->p_sigcatch & mask)
+			action = SIG_CATCH;
+		else
+			action = SIG_DFL;
+	}
+
+	if (p->p_nice > NZERO && action == SIG_DFL && (prop & SA_KILL) &&
+	    (p->p_flag & P_TRACED) == 0)
+		p->p_nice = NZERO;
+
+	if (prop & SA_CONT)
+		p->p_siglist &= ~stopsigmask;
+
+	if (prop & SA_STOP) {
+		/*
+		 * If sending a tty stop signal to a member of an orphaned
+		 * process group, discard the signal here if the action
+		 * is default; don't stop the process below if sleeping,
+		 * and don't clear any pending SIGCONT.
+		 */
+		if (prop & SA_TTYSTOP && p->p_pgrp->pg_jobc == 0 &&
+		    action == SIG_DFL)
+		        return;
+		p->p_siglist &= ~contsigmask;
+	}
+	p->p_siglist |= mask;
+
+	/*
+	 * Defer further processing for signals which are held,
+	 * except that stopped processes must be continued by SIGCONT.
+	 */
+	if (action == SIG_HOLD && ((prop & SA_CONT) == 0 || p->p_stat != SSTOP))
+		return;
+	s = splhigh();
+	switch (p->p_stat) {
+
+	case SSLEEP:
+		/*
+		 * If process is sleeping uninterruptibly
+		 * we can't interrupt the sleep... the signal will
+		 * be noticed when the process returns through
+		 * trap() or syscall().
+		 */
+		if ((p->p_flag & P_SINTR) == 0)
+			goto out;
+		/*
+		 * Process is sleeping and traced... make it runnable
+		 * so it can discover the signal in issignal() and stop
+		 * for the parent.
+		 */
+		if (p->p_flag & P_TRACED)
+			goto run;
+		/*
+		 * If SIGCONT is default (or ignored) and process is
+		 * asleep, we are finished; the process should not
+		 * be awakened.
+		 */
+		if ((prop & SA_CONT) && action == SIG_DFL) {
+			p->p_siglist &= ~mask;
+			goto out;
+		}
+		/*
+		 * When a sleeping process receives a stop
+		 * signal, process immediately if possible.
+		 * All other (caught or default) signals
+		 * cause the process to run.
+		 */
+		if (prop & SA_STOP) {
+			if (action != SIG_DFL)
+				goto runfast;
+			/*
+			 * If a child holding parent blocked,
+			 * stopping could cause deadlock.
+			 */
+			if (p->p_flag & P_PPWAIT)
+				goto out;
+			p->p_siglist &= ~mask;
+			p->p_xstat = signum;
+			if ((p->p_pptr->p_flag & P_NOCLDSTOP) == 0)
+				psignal(p->p_pptr, SIGCHLD);
+			stop(p);
+			goto out;
+		} else
+			goto runfast;
+		/*NOTREACHED*/
+
+	case SSTOP:
+		/*
+		 * If traced process is already stopped,
+		 * then no further action is necessary.
+		 */
+		if (p->p_flag & P_TRACED)
+			goto out;
+
+		/*
+		 * Kill signal always sets processes running.
+		 */
+		if (signum == SIGKILL)
+			goto runfast;
+
+		if (prop & SA_CONT) {
+			/*
+			 * If SIGCONT is default (or ignored), we continue the
+			 * process but don't leave the signal in p_siglist, as
+			 * it has no further action.  If SIGCONT is held, we
+			 * continue the process and leave the signal in
+			 * p_siglist.  If the process catches SIGCONT, let it
+			 * handle the signal itself.  If it isn't waiting on
+			 * an event, then it goes back to run state.
+			 * Otherwise, process goes back to sleep state.
+			 */
+			if (action == SIG_DFL)
+				p->p_siglist &= ~mask;
+			if (action == SIG_CATCH)
+				goto runfast;
+			if (p->p_wchan == 0)
+				goto run;
+			p->p_stat = SSLEEP;
+			goto out;
+		}
+
+		if (prop & SA_STOP) {
+			/*
+			 * Already stopped, don't need to stop again.
+			 * (If we did the shell could get confused.)
+			 */
+			p->p_siglist &= ~mask;		/* take it away */
+			goto out;
+		}
+
+		/*
+		 * If process is sleeping interruptibly, then simulate a
+		 * wakeup so that when it is continued, it will be made
+		 * runnable and can look at the signal.  But don't make
+		 * the process runnable, leave it stopped.
+		 */
+		if (p->p_wchan && p->p_flag & P_SINTR)
+			unsleep(p);
+		goto out;
+
+	default:
+		/*
+		 * SRUN, SIDL, SZOMB do nothing with the signal,
+		 * other than kicking ourselves if we are running.
+		 * It will either never be noticed, or noticed very soon.
+		 */
+		if (p == curproc)
+			signotify(p);
+		goto out;
+	}
+	/*NOTREACHED*/
+
+runfast:
+	/*
+	 * Raise priority to at least PUSER.
+	 */
+	if (p->p_priority > PUSER)
+		p->p_priority = PUSER;
+run:
+	setrunnable(p);
+out:
+	splx(s);
+}
+
+/*
+ * If the current process has received a signal (should be caught or cause
+ * termination, should interrupt current syscall), return the signal number.
+ * Stop signals with default action are processed immediately, then cleared;
+ * they aren't returned.  This is checked after each entry to the system for
+ * a syscall or trap (though this can usually be done without calling issignal
+ * by checking the pending signal masks in the CURSIG macro.) The normal call
+ * sequence is
+ *
+ *	while (signum = CURSIG(curproc))
+ *		postsig(signum);
+ */
+issignal(p)
+	register struct proc *p;
+{
+	register int signum, mask, prop;
+
+	for (;;) {
+		mask = p->p_siglist & ~p->p_sigmask;
+		if (p->p_flag & P_PPWAIT)
+			mask &= ~stopsigmask;
+		if (mask == 0)	 	/* no signal to send */
+			return (0);
+		signum = ffs((long)mask);
+		mask = sigmask(signum);
+		prop = sigprop[signum];
+		/*
+		 * We should see pending but ignored signals
+		 * only if P_TRACED was on when they were posted.
+		 */
+		if (mask & p->p_sigignore && (p->p_flag & P_TRACED) == 0) {
+			p->p_siglist &= ~mask;
+			continue;
+		}
+		if (p->p_flag & P_TRACED && (p->p_flag & P_PPWAIT) == 0) {
+			/*
+			 * If traced, always stop, and stay
+			 * stopped until released by the parent.
+			 */
+			p->p_xstat = signum;
+			psignal(p->p_pptr, SIGCHLD);
+			do {
+				stop(p);
+				mi_switch();
+			} while (!trace_req(p) && p->p_flag & P_TRACED);
+
+			/*
+			 * If the traced bit got turned off, go back up
+			 * to the top to rescan signals.  This ensures
+			 * that p_sig* and ps_sigact are consistent.
+			 */
+			if ((p->p_flag & P_TRACED) == 0)
+				continue;
+
+			/*
+			 * If parent wants us to take the signal,
+			 * then it will leave it in p->p_xstat;
+			 * otherwise we just look for signals again.
+			 */
+			p->p_siglist &= ~mask;	/* clear the old signal */
+			signum = p->p_xstat;
+			if (signum == 0)
+				continue;
+
+			/*
+			 * Put the new signal into p_siglist.  If the
+			 * signal is being masked, look for other signals.
+			 */
+			mask = sigmask(signum);
+			p->p_siglist |= mask;
+			if (p->p_sigmask & mask)
+				continue;
+		}
+
+		/*
+		 * Decide whether the signal should be returned.
+		 * Return the signal's number, or fall through
+		 * to clear it from the pending mask.
+		 */
+		switch ((int)p->p_sigacts->ps_sigact[signum]) {
+
+		case SIG_DFL:
+			/*
+			 * Don't take default actions on system processes.
+			 */
+			if (p->p_pid <= 1) {
+#ifdef DIAGNOSTIC
+				/*
+				 * Are you sure you want to ignore SIGSEGV
+				 * in init? XXX
+				 */
+				printf("Process (pid %d) got signal %d\n",
+					p->p_pid, signum);
+#endif
+				break;		/* == ignore */
+			}
+			/*
+			 * If there is a pending stop signal to process
+			 * with default action, stop here,
+			 * then clear the signal.  However,
+			 * if process is member of an orphaned
+			 * process group, ignore tty stop signals.
+			 */
+			if (prop & SA_STOP) {
+				if (p->p_flag & P_TRACED ||
+		    		    (p->p_pgrp->pg_jobc == 0 &&
+				    prop & SA_TTYSTOP))
+					break;	/* == ignore */
+				p->p_xstat = signum;
+				stop(p);
+				if ((p->p_pptr->p_flag & P_NOCLDSTOP) == 0)
+					psignal(p->p_pptr, SIGCHLD);
+				mi_switch();
+				break;
+			} else if (prop & SA_IGNORE) {
+				/*
+				 * Except for SIGCONT, shouldn't get here.
+				 * Default action is to ignore; drop it.
+				 */
+				break;		/* == ignore */
+			} else
+				return (signum);
+			/*NOTREACHED*/
+
+		case SIG_IGN:
+			/*
+			 * Masking above should prevent us ever trying
+			 * to take action on an ignored signal other
+			 * than SIGCONT, unless process is traced.
+			 */
+			if ((prop & SA_CONT) == 0 &&
+			    (p->p_flag & P_TRACED) == 0)
+				printf("issignal\n");
+			break;		/* == ignore */
+
+		default:
+			/*
+			 * This signal has an action, let
+			 * postsig() process it.
+			 */
+			return (signum);
+		}
+		p->p_siglist &= ~mask;		/* take the signal! */
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * Put the argument process into the stopped state and notify the parent
+ * via wakeup.  Signals are handled elsewhere.  The process must not be
+ * on the run queue.
+ */
+stop(p)
+	register struct proc *p;
+{
+
+	p->p_stat = SSTOP;
+	p->p_flag &= ~P_WAITED;
+	wakeup((caddr_t)p->p_pptr);
+}
+
+/*
+ * Take the action for the specified signal
+ * from the current set of pending signals.
+ */
+void
+postsig(signum)
+	register int signum;
+{
+	register struct proc *p = curproc;
+	register struct sigacts *ps = p->p_sigacts;
+	register sig_t action;
+	int code, mask, returnmask;
+
+#ifdef DIAGNOSTIC
+	if (signum == 0)
+		panic("postsig");
+#endif
+	mask = sigmask(signum);
+	p->p_siglist &= ~mask;
+	action = ps->ps_sigact[signum];
+#ifdef KTRACE
+	if (KTRPOINT(p, KTR_PSIG))
+		ktrpsig(p->p_tracep,
+		    signum, action, ps->ps_flags & SAS_OLDMASK ?
+		    ps->ps_oldmask : p->p_sigmask, 0);
+#endif
+	if (action == SIG_DFL) {
+		/*
+		 * Default action, where the default is to kill
+		 * the process.  (Other cases were ignored above.)
+		 */
+		sigexit(p, signum);
+		/* NOTREACHED */
+	} else {
+		/*
+		 * If we get here, the signal must be caught.
+		 */
+#ifdef DIAGNOSTIC
+		if (action == SIG_IGN || (p->p_sigmask & mask))
+			panic("postsig action");
+#endif
+		/*
+		 * Set the new mask value and also defer further
+		 * occurences of this signal.
+		 *
+		 * Special case: user has done a sigpause.  Here the
+		 * current mask is not of interest, but rather the
+		 * mask from before the sigpause is what we want
+		 * restored after the signal processing is completed.
+		 */
+		(void) splhigh();
+		if (ps->ps_flags & SAS_OLDMASK) {
+			returnmask = ps->ps_oldmask;
+			ps->ps_flags &= ~SAS_OLDMASK;
+		} else
+			returnmask = p->p_sigmask;
+		p->p_sigmask |= ps->ps_catchmask[signum] | mask;
+		(void) spl0();
+		p->p_stats->p_ru.ru_nsignals++;
+		if (ps->ps_sig != signum) {
+			code = 0;
+		} else {
+			code = ps->ps_code;
+			ps->ps_code = 0;
+		}
+		sendsig(action, signum, returnmask, code);
+	}
+}
+
+/*
+ * Kill the current process for stated reason.
+ */
+killproc(p, why)
+	struct proc *p;
+	char *why;
+{
+
+	log(LOG_ERR, "pid %d was killed: %s\n", p->p_pid, why);
+	uprintf("sorry, pid %d was killed: %s\n", p->p_pid, why);
+	psignal(p, SIGKILL);
+}
+
+/*
+ * Force the current process to exit with the specified signal, dumping core
+ * if appropriate.  We bypass the normal tests for masked and caught signals,
+ * allowing unrecoverable failures to terminate the process without changing
+ * signal state.  Mark the accounting record with the signal termination.
+ * If dumping core, save the signal number for the debugger.  Calls exit and
+ * does not return.
+ */
+sigexit(p, signum)
+	register struct proc *p;
+	int signum;
+{
+
+	p->p_acflag |= AXSIG;
+	if (sigprop[signum] & SA_CORE) {
+		p->p_sigacts->ps_sig = signum;
+		if (coredump(p) == 0)
+			signum |= WCOREFLAG;
+	}
+	exit1(p, W_EXITCODE(0, signum));
+	/* NOTREACHED */
+}
+
+/*
+ * Dump core, into a file named "progname.core", unless the process was
+ * setuid/setgid.
+ */
+coredump(p)
+	register struct proc *p;
+{
+	register struct vnode *vp;
+	register struct pcred *pcred = p->p_cred;
+	register struct ucred *cred = pcred->pc_ucred;
+	register struct vmspace *vm = p->p_vmspace;
+	struct nameidata nd;
+	struct vattr vattr;
+	int error, error1;
+	char name[MAXCOMLEN+6];		/* progname.core */
+
+	if (pcred->p_svuid != pcred->p_ruid || pcred->p_svgid != pcred->p_rgid)
+		return (EFAULT);
+	if (ctob(UPAGES + vm->vm_dsize + vm->vm_ssize) >=
+	    p->p_rlimit[RLIMIT_CORE].rlim_cur)
+		return (EFAULT);
+	sprintf(name, "%s.core", p->p_comm);
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, name, p);
+	if (error = vn_open(&nd,
+	    O_CREAT | FWRITE, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH))
+		return (error);
+	vp = nd.ni_vp;
+
+	/* Don't dump to non-regular files or files with links. */
+	if (vp->v_type != VREG ||
+	    VOP_GETATTR(vp, &vattr, cred, p) || vattr.va_nlink != 1) {
+		error = EFAULT;
+		goto out;
+	}
+	VATTR_NULL(&vattr);
+	vattr.va_size = 0;
+	LEASE_CHECK(vp, p, cred, LEASE_WRITE);
+	VOP_SETATTR(vp, &vattr, cred, p);
+	p->p_acflag |= ACORE;
+	bcopy(p, &p->p_addr->u_kproc.kp_proc, sizeof(struct proc));
+	fill_eproc(p, &p->p_addr->u_kproc.kp_eproc);
+	error = cpu_coredump(p, vp, cred);
+	if (error == 0)
+		error = vn_rdwr(UIO_WRITE, vp, vm->vm_daddr,
+		    (int)ctob(vm->vm_dsize), (off_t)ctob(UPAGES), UIO_USERSPACE,
+		    IO_NODELOCKED|IO_UNIT, cred, (int *) NULL, p);
+	if (error == 0)
+		error = vn_rdwr(UIO_WRITE, vp,
+		    (caddr_t) trunc_page(USRSTACK - ctob(vm->vm_ssize)),
+		    round_page(ctob(vm->vm_ssize)),
+		    (off_t)ctob(UPAGES) + ctob(vm->vm_dsize), UIO_USERSPACE,
+		    IO_NODELOCKED|IO_UNIT, cred, (int *) NULL, p);
+out:
+	VOP_UNLOCK(vp);
+	error1 = vn_close(vp, FWRITE, cred, p);
+	if (error == 0)
+		error = error1;
+	return (error);
+}
+
+/*
+ * Nonexistent system call-- signal process (may want to handle it).
+ * Flag error in case process won't see signal immediately (blocked or ignored).
+ */
+struct nosys_args {
+	int	dummy;
+};
+/* ARGSUSED */
+nosys(p, args, retval)
+	struct proc *p;
+	struct nosys_args *args;
+	int *retval;
+{
+
+	psignal(p, SIGSYS);
+	return (EINVAL);
+}
diff --git a/sys/kern/kern_subr.c b/sys/kern/kern_subr.c
new file mode 100644
index 00000000000..5c12afcba33
--- /dev/null
+++ b/sys/kern/kern_subr.c
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 1982, 1986, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_subr.c	8.3 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/queue.h>
+
+uiomove(cp, n, uio)
+	register caddr_t cp;
+	register int n;
+	register struct uio *uio;
+{
+	register struct iovec *iov;
+	u_int cnt;
+	int error = 0;
+
+#ifdef DIAGNOSTIC
+	if (uio->uio_rw != UIO_READ && uio->uio_rw != UIO_WRITE)
+		panic("uiomove: mode");
+	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
+		panic("uiomove proc");
+#endif
+	while (n > 0 && uio->uio_resid) {
+		iov = uio->uio_iov;
+		cnt = iov->iov_len;
+		if (cnt == 0) {
+			uio->uio_iov++;
+			uio->uio_iovcnt--;
+			continue;
+		}
+		if (cnt > n)
+			cnt = n;
+		switch (uio->uio_segflg) {
+
+		case UIO_USERSPACE:
+		case UIO_USERISPACE:
+			if (uio->uio_rw == UIO_READ)
+				error = copyout(cp, iov->iov_base, cnt);
+			else
+				error = copyin(iov->iov_base, cp, cnt);
+			if (error)
+				return (error);
+			break;
+
+		case UIO_SYSSPACE:
+			if (uio->uio_rw == UIO_READ)
+				bcopy((caddr_t)cp, iov->iov_base, cnt);
+			else
+				bcopy(iov->iov_base, (caddr_t)cp, cnt);
+			break;
+		}
+		iov->iov_base += cnt;
+		iov->iov_len -= cnt;
+		uio->uio_resid -= cnt;
+		uio->uio_offset += cnt;
+		cp += cnt;
+		n -= cnt;
+	}
+	return (error);
+}
+
+/*
+ * Give next character to user as result of read.
+ */
+ureadc(c, uio)
+	register int c;
+	register struct uio *uio;
+{
+	register struct iovec *iov;
+
+again:
+	if (uio->uio_iovcnt == 0 || uio->uio_resid == 0)
+		panic("ureadc");
+	iov = uio->uio_iov;
+	if (iov->iov_len == 0) {
+		uio->uio_iovcnt--;
+		uio->uio_iov++;
+		goto again;
+	}
+	switch (uio->uio_segflg) {
+
+	case UIO_USERSPACE:
+		if (subyte(iov->iov_base, c) < 0)
+			return (EFAULT);
+		break;
+
+	case UIO_SYSSPACE:
+		*iov->iov_base = c;
+		break;
+
+	case UIO_USERISPACE:
+		if (suibyte(iov->iov_base, c) < 0)
+			return (EFAULT);
+		break;
+	}
+	iov->iov_base++;
+	iov->iov_len--;
+	uio->uio_resid--;
+	uio->uio_offset++;
+	return (0);
+}
+
+#ifdef vax	/* unused except by ct.c, other oddities XXX */
+/*
+ * Get next character written in by user from uio.
+ */
+uwritec(uio)
+	struct uio *uio;
+{
+	register struct iovec *iov;
+	register int c;
+
+	if (uio->uio_resid <= 0)
+		return (-1);
+again:
+	if (uio->uio_iovcnt <= 0)
+		panic("uwritec");
+	iov = uio->uio_iov;
+	if (iov->iov_len == 0) {
+		uio->uio_iov++;
+		if (--uio->uio_iovcnt == 0)
+			return (-1);
+		goto again;
+	}
+	switch (uio->uio_segflg) {
+
+	case UIO_USERSPACE:
+		c = fubyte(iov->iov_base);
+		break;
+
+	case UIO_SYSSPACE:
+		c = *(u_char *) iov->iov_base;
+		break;
+
+	case UIO_USERISPACE:
+		c = fuibyte(iov->iov_base);
+		break;
+	}
+	if (c < 0)
+		return (-1);
+	iov->iov_base++;
+	iov->iov_len--;
+	uio->uio_resid--;
+	uio->uio_offset++;
+	return (c);
+}
+#endif /* vax */
+
+/*
+ * General routine to allocate a hash table.
+ */
+void *
+hashinit(elements, type, hashmask)
+	int elements, type;
+	u_long *hashmask;
+{
+	long hashsize;
+	LIST_HEAD(generic, generic) *hashtbl;
+	int i;
+
+	if (elements <= 0)
+		panic("hashinit: bad cnt");
+	for (hashsize = 1; hashsize <= elements; hashsize <<= 1)
+		continue;
+	hashsize >>= 1;
+	hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK);
+	for (i = 0; i < hashsize; i++)
+		LIST_INIT(&hashtbl[i]);
+	*hashmask = hashsize - 1;
+	return (hashtbl);
+}
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
new file mode 100644
index 00000000000..1c2a578f303
--- /dev/null
+++ b/sys/kern/kern_synch.c
@@ -0,0 +1,666 @@
+/*-
+ * Copyright (c) 1982, 1986, 1990, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_synch.c	8.6 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#include <sys/buf.h>
+#include <sys/signalvar.h>
+#include <sys/resourcevar.h>
+#include <sys/vmmeter.h>
+#ifdef KTRACE
+#include <sys/ktrace.h>
+#endif
+
+#include <machine/cpu.h>
+
+u_char	curpriority;		/* usrpri of curproc */
+int	lbolt;			/* once a second sleep address */
+
+/*
+ * Force switch among equal priority processes every 100ms.
+ */
+/* ARGSUSED */
+void
+roundrobin(arg)
+	void *arg;
+{
+
+	need_resched();
+	timeout(roundrobin, NULL, hz / 10);
+}
+
+/*
+ * Constants for digital decay and forget:
+ *	90% of (p_estcpu) usage in 5 * loadav time
+ *	95% of (p_pctcpu) usage in 60 seconds (load insensitive)
+ *          Note that, as ps(1) mentions, this can let percentages
+ *          total over 100% (I've seen 137.9% for 3 processes).
+ *
+ * Note that hardclock updates p_estcpu and p_cpticks independently.
+ *
+ * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds.
+ * That is, the system wants to compute a value of decay such
+ * that the following for loop:
+ * 	for (i = 0; i < (5 * loadavg); i++)
+ * 		p_estcpu *= decay;
+ * will compute
+ * 	p_estcpu *= 0.1;
+ * for all values of loadavg:
+ *
+ * Mathematically this loop can be expressed by saying:
+ * 	decay ** (5 * loadavg) ~= .1
+ *
+ * The system computes decay as:
+ * 	decay = (2 * loadavg) / (2 * loadavg + 1)
+ *
+ * We wish to prove that the system's computation of decay
+ * will always fulfill the equation:
+ * 	decay ** (5 * loadavg) ~= .1
+ *
+ * If we compute b as:
+ * 	b = 2 * loadavg
+ * then
+ * 	decay = b / (b + 1)
+ *
+ * We now need to prove two things:
+ *	1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
+ *	2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
+ *	
+ * Facts:
+ *         For x close to zero, exp(x) =~ 1 + x, since
+ *              exp(x) = 0! + x**1/1! + x**2/2! + ... .
+ *              therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
+ *         For x close to zero, ln(1+x) =~ x, since
+ *              ln(1+x) = x - x**2/2 + x**3/3 - ...     -1 < x < 1
+ *              therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
+ *         ln(.1) =~ -2.30
+ *
+ * Proof of (1):
+ *    Solve (factor)**(power) =~ .1 given power (5*loadav):
+ *	solving for factor,
+ *      ln(factor) =~ (-2.30/5*loadav), or
+ *      factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) =
+ *          exp(-1/b) =~ (b-1)/b =~ b/(b+1).                    QED
+ *
+ * Proof of (2):
+ *    Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
+ *	solving for power,
+ *      power*ln(b/(b+1)) =~ -2.30, or
+ *      power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav.  QED
+ *
+ * Actual power values for the implemented algorithm are as follows:
+ *      loadav: 1       2       3       4
+ *      power:  5.68    10.32   14.94   19.55
+ */
+
+/* calculations for digital decay to forget 90% of usage in 5*loadav sec */
+#define	loadfactor(loadav)	(2 * (loadav))
+#define	decay_cpu(loadfac, cpu)	(((loadfac) * (cpu)) / ((loadfac) + FSCALE))
+
+/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
+fixpt_t	ccpu = 0.95122942450071400909 * FSCALE;		/* exp(-1/20) */
+
+/*
+ * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
+ * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
+ * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
+ *
+ * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
+ *	1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
+ *
+ * If you dont want to bother with the faster/more-accurate formula, you
+ * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
+ * (more general) method of calculating the %age of CPU used by a process.
+ */
+#define	CCPU_SHIFT	11
+
+/*
+ * Recompute process priorities, every hz ticks.
+ */
+/* ARGSUSED */
+void
+schedcpu(arg)
+	void *arg;
+{
+	register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
+	register struct proc *p;
+	register int s;
+	register unsigned int newcpu;
+
+	wakeup((caddr_t)&lbolt);
+	for (p = (struct proc *)allproc; p != NULL; p = p->p_next) {
+		/*
+		 * Increment time in/out of memory and sleep time
+		 * (if sleeping).  We ignore overflow; with 16-bit int's
+		 * (remember them?) overflow takes 45 days.
+		 */
+		p->p_swtime++;
+		if (p->p_stat == SSLEEP || p->p_stat == SSTOP)
+			p->p_slptime++;
+		p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
+		/*
+		 * If the process has slept the entire second,
+		 * stop recalculating its priority until it wakes up.
+		 */
+		if (p->p_slptime > 1)
+			continue;
+		s = splstatclock();	/* prevent state changes */
+		/*
+		 * p_pctcpu is only for ps.
+		 */
+#if	(FSHIFT >= CCPU_SHIFT)
+		p->p_pctcpu += (hz == 100)?
+			((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
+                	100 * (((fixpt_t) p->p_cpticks)
+				<< (FSHIFT - CCPU_SHIFT)) / hz;
+#else
+		p->p_pctcpu += ((FSCALE - ccpu) *
+			(p->p_cpticks * FSCALE / hz)) >> FSHIFT;
+#endif
+		p->p_cpticks = 0;
+		newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu) + p->p_nice;
+		p->p_estcpu = min(newcpu, UCHAR_MAX);
+		resetpriority(p);
+		if (p->p_priority >= PUSER) {
+#define	PPQ	(128 / NQS)		/* priorities per queue */
+			if ((p != curproc) &&
+			    p->p_stat == SRUN &&
+			    (p->p_flag & P_INMEM) &&
+			    (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) {
+				remrq(p);
+				p->p_priority = p->p_usrpri;
+				setrunqueue(p);
+			} else
+				p->p_priority = p->p_usrpri;
+		}
+		splx(s);
+	}
+	vmmeter();
+	if (bclnlist != NULL)
+		wakeup((caddr_t)pageproc);
+	timeout(schedcpu, (void *)0, hz);
+}
+
+/*
+ * Recalculate the priority of a process after it has slept for a while.
+ * For all load averages >= 1 and max p_estcpu of 255, sleeping for at
+ * least six times the loadfactor will decay p_estcpu to zero.
+ */
+void
+updatepri(p)
+	register struct proc *p;
+{
+	register unsigned int newcpu = p->p_estcpu;
+	register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
+
+	if (p->p_slptime > 5 * loadfac)
+		p->p_estcpu = 0;
+	else {
+		p->p_slptime--;	/* the first time was done in schedcpu */
+		while (newcpu && --p->p_slptime)
+			newcpu = (int) decay_cpu(loadfac, newcpu);
+		p->p_estcpu = min(newcpu, UCHAR_MAX);
+	}
+	resetpriority(p);
+}
+
+/*
+ * We're only looking at 7 bits of the address; everything is
+ * aligned to 4, lots of things are aligned to greater powers
+ * of 2.  Shift right by 8, i.e. drop the bottom 256 worth.
+ */
+#define TABLESIZE	128
+#define LOOKUP(x)	(((int)(x) >> 8) & (TABLESIZE - 1))
+struct slpque {
+	struct proc *sq_head;
+	struct proc **sq_tailp;
+} slpque[TABLESIZE];
+
+/*
+ * During autoconfiguration or after a panic, a sleep will simply
+ * lower the priority briefly to allow interrupts, then return.
+ * The priority to be used (safepri) is machine-dependent, thus this
+ * value is initialized and maintained in the machine-dependent layers.
+ * This priority will typically be 0, or the lowest priority
+ * that is safe for use on the interrupt stack; it can be made
+ * higher to block network software interrupts after panics.
+ */
+int safepri;
+
+/*
+ * General sleep call.  Suspends the current process until a wakeup is
+ * performed on the specified identifier.  The process will then be made
+ * runnable with the specified priority.  Sleeps at most timo/hz seconds
+ * (0 means no timeout).  If pri includes PCATCH flag, signals are checked
+ * before and after sleeping, else signals are not checked.  Returns 0 if
+ * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
+ * signal needs to be delivered, ERESTART is returned if the current system
+ * call should be restarted if possible, and EINTR is returned if the system
+ * call should be interrupted by the signal (return EINTR).
+ */
+int
+tsleep(ident, priority, wmesg, timo)
+	void *ident;
+	int priority, timo;
+	char *wmesg;
+{
+	register struct proc *p = curproc;
+	register struct slpque *qp;
+	register s;
+	int sig, catch = priority & PCATCH;
+	extern int cold;
+	void endtsleep __P((void *));
+
+#ifdef KTRACE
+	if (KTRPOINT(p, KTR_CSW))
+		ktrcsw(p->p_tracep, 1, 0);
+#endif
+	s = splhigh();
+	if (cold || panicstr) {
+		/*
+		 * After a panic, or during autoconfiguration,
+		 * just give interrupts a chance, then just return;
+		 * don't run any other procs or panic below,
+		 * in case this is the idle process and already asleep.
+		 */
+		splx(safepri);
+		splx(s);
+		return (0);
+	}
+#ifdef DIAGNOSTIC
+	if (ident == NULL || p->p_stat != SRUN || p->p_back)
+		panic("tsleep");
+#endif
+	p->p_wchan = ident;
+	p->p_wmesg = wmesg;
+	p->p_slptime = 0;
+	p->p_priority = priority & PRIMASK;
+	qp = &slpque[LOOKUP(ident)];
+	if (qp->sq_head == 0)
+		qp->sq_head = p;
+	else
+		*qp->sq_tailp = p;
+	*(qp->sq_tailp = &p->p_forw) = 0;
+	if (timo)
+		timeout(endtsleep, (void *)p, timo);
+	/*
+	 * We put ourselves on the sleep queue and start our timeout
+	 * before calling CURSIG, as we could stop there, and a wakeup
+	 * or a SIGCONT (or both) could occur while we were stopped.
+	 * A SIGCONT would cause us to be marked as SSLEEP
+	 * without resuming us, thus we must be ready for sleep
+	 * when CURSIG is called.  If the wakeup happens while we're
+	 * stopped, p->p_wchan will be 0 upon return from CURSIG.
+	 */
+	if (catch) {
+		p->p_flag |= P_SINTR;
+		if (sig = CURSIG(p)) {
+			if (p->p_wchan)
+				unsleep(p);
+			p->p_stat = SRUN;
+			goto resume;
+		}
+		if (p->p_wchan == 0) {
+			catch = 0;
+			goto resume;
+		}
+	} else
+		sig = 0;
+	p->p_stat = SSLEEP;
+	p->p_stats->p_ru.ru_nvcsw++;
+	mi_switch();
+resume:
+	curpriority = p->p_usrpri;
+	splx(s);
+	p->p_flag &= ~P_SINTR;
+	if (p->p_flag & P_TIMEOUT) {
+		p->p_flag &= ~P_TIMEOUT;
+		if (sig == 0) {
+#ifdef KTRACE
+			if (KTRPOINT(p, KTR_CSW))
+				ktrcsw(p->p_tracep, 0, 0);
+#endif
+			return (EWOULDBLOCK);
+		}
+	} else if (timo)
+		untimeout(endtsleep, (void *)p);
+	if (catch && (sig != 0 || (sig = CURSIG(p)))) {
+#ifdef KTRACE
+		if (KTRPOINT(p, KTR_CSW))
+			ktrcsw(p->p_tracep, 0, 0);
+#endif
+		if (p->p_sigacts->ps_sigintr & sigmask(sig))
+			return (EINTR);
+		return (ERESTART);
+	}
+#ifdef KTRACE
+	if (KTRPOINT(p, KTR_CSW))
+		ktrcsw(p->p_tracep, 0, 0);
+#endif
+	return (0);
+}
+
+/*
+ * Implement timeout for tsleep.
+ * If process hasn't been awakened (wchan non-zero),
+ * set timeout flag and undo the sleep.  If proc
+ * is stopped, just unsleep so it will remain stopped.
+ */
+void
+endtsleep(arg)
+	void *arg;
+{
+	register struct proc *p;
+	int s;
+
+	p = (struct proc *)arg;
+	s = splhigh();
+	if (p->p_wchan) {
+		if (p->p_stat == SSLEEP)
+			setrunnable(p);
+		else
+			unsleep(p);
+		p->p_flag |= P_TIMEOUT;
+	}
+	splx(s);
+}
+
+/*
+ * Short-term, non-interruptable sleep.
+ */
+void
+sleep(ident, priority)
+	void *ident;
+	int priority;
+{
+	register struct proc *p = curproc;
+	register struct slpque *qp;
+	register s;
+	extern int cold;
+
+#ifdef DIAGNOSTIC
+	if (priority > PZERO) {
+		printf("sleep called with priority %d > PZERO, wchan: %x\n",
+		    priority, ident);
+		panic("old sleep");
+	}
+#endif
+	s = splhigh();
+	if (cold || panicstr) {
+		/*
+		 * After a panic, or during autoconfiguration,
+		 * just give interrupts a chance, then just return;
+		 * don't run any other procs or panic below,
+		 * in case this is the idle process and already asleep.
+		 */
+		splx(safepri);
+		splx(s);
+		return;
+	}
+#ifdef DIAGNOSTIC
+	if (ident == NULL || p->p_stat != SRUN || p->p_back)
+		panic("sleep");
+#endif
+	p->p_wchan = ident;
+	p->p_wmesg = NULL;
+	p->p_slptime = 0;
+	p->p_priority = priority;
+	qp = &slpque[LOOKUP(ident)];
+	if (qp->sq_head == 0)
+		qp->sq_head = p;
+	else
+		*qp->sq_tailp = p;
+	*(qp->sq_tailp = &p->p_forw) = 0;
+	p->p_stat = SSLEEP;
+	p->p_stats->p_ru.ru_nvcsw++;
+#ifdef KTRACE
+	if (KTRPOINT(p, KTR_CSW))
+		ktrcsw(p->p_tracep, 1, 0);
+#endif
+	mi_switch();
+#ifdef KTRACE
+	if (KTRPOINT(p, KTR_CSW))
+		ktrcsw(p->p_tracep, 0, 0);
+#endif
+	curpriority = p->p_usrpri;
+	splx(s);
+}
+
+/*
+ * Remove a process from its wait queue
+ */
+void
+unsleep(p)
+	register struct proc *p;
+{
+	register struct slpque *qp;
+	register struct proc **hp;
+	int s;
+
+	s = splhigh();
+	if (p->p_wchan) {
+		hp = &(qp = &slpque[LOOKUP(p->p_wchan)])->sq_head;
+		while (*hp != p)
+			hp = &(*hp)->p_forw;
+		*hp = p->p_forw;
+		if (qp->sq_tailp == &p->p_forw)
+			qp->sq_tailp = hp;
+		p->p_wchan = 0;
+	}
+	splx(s);
+}
+
+/*
+ * Make all processes sleeping on the specified identifier runnable.
+ */
+void
+wakeup(ident)
+	register void *ident;
+{
+	register struct slpque *qp;
+	register struct proc *p, **q;
+	int s;
+
+	s = splhigh();
+	qp = &slpque[LOOKUP(ident)];
+restart:
+	for (q = &qp->sq_head; p = *q; ) {
+#ifdef DIAGNOSTIC
+		if (p->p_back || p->p_stat != SSLEEP && p->p_stat != SSTOP)
+			panic("wakeup");
+#endif
+		if (p->p_wchan == ident) {
+			p->p_wchan = 0;
+			*q = p->p_forw;
+			if (qp->sq_tailp == &p->p_forw)
+				qp->sq_tailp = q;
+			if (p->p_stat == SSLEEP) {
+				/* OPTIMIZED EXPANSION OF setrunnable(p); */
+				if (p->p_slptime > 1)
+					updatepri(p);
+				p->p_slptime = 0;
+				p->p_stat = SRUN;
+				if (p->p_flag & P_INMEM)
+					setrunqueue(p);
+				/*
+				 * Since curpriority is a user priority,
+				 * p->p_priority is always better than
+				 * curpriority.
+				 */
+				if ((p->p_flag & P_INMEM) == 0)
+					wakeup((caddr_t)&proc0);
+				else
+					need_resched();
+				/* END INLINE EXPANSION */
+				goto restart;
+			}
+		} else
+			q = &p->p_forw;
+	}
+	splx(s);
+}
+
+/*
+ * The machine independent parts of mi_switch().
+ * Must be called at splstatclock() or higher.
+ */
+void
+mi_switch()
+{
+	register struct proc *p = curproc;	/* XXX */
+	register struct rlimit *rlim;
+	register long s, u;
+	struct timeval tv;
+
+	/*
+	 * Compute the amount of time during which the current
+	 * process was running, and add that to its total so far.
+	 */
+	microtime(&tv);
+	u = p->p_rtime.tv_usec + (tv.tv_usec - runtime.tv_usec);
+	s = p->p_rtime.tv_sec + (tv.tv_sec - runtime.tv_sec);
+	if (u < 0) {
+		u += 1000000;
+		s--;
+	} else if (u >= 1000000) {
+		u -= 1000000;
+		s++;
+	}
+	p->p_rtime.tv_usec = u;
+	p->p_rtime.tv_sec = s;
+
+	/*
+	 * Check if the process exceeds its cpu resource allocation.
+	 * If over max, kill it.  In any case, if it has run for more
+	 * than 10 minutes, reduce priority to give others a chance.
+	 */
+	rlim = &p->p_rlimit[RLIMIT_CPU];
+	if (s >= rlim->rlim_cur) {
+		if (s >= rlim->rlim_max)
+			psignal(p, SIGKILL);
+		else {
+			psignal(p, SIGXCPU);
+			if (rlim->rlim_cur < rlim->rlim_max)
+				rlim->rlim_cur += 5;
+		}
+	}
+	if (s > 10 * 60 && p->p_ucred->cr_uid && p->p_nice == NZERO) {
+		p->p_nice = NZERO + 4;
+		resetpriority(p);
+	}
+
+	/*
+	 * Pick a new current process and record its start time.
+	 */
+	cnt.v_swtch++;
+	cpu_switch(p);
+	microtime(&runtime);
+}
+
+/*
+ * Initialize the (doubly-linked) run queues
+ * to be empty.
+ */
+rqinit()
+{
+	register int i;
+
+	for (i = 0; i < NQS; i++)
+		qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i];
+}
+
+/*
+ * Change process state to be runnable,
+ * placing it on the run queue if it is in memory,
+ * and awakening the swapper if it isn't in memory.
+ */
+void
+setrunnable(p)
+	register struct proc *p;
+{
+	register int s;
+
+	s = splhigh();
+	switch (p->p_stat) {
+	case 0:
+	case SRUN:
+	case SZOMB:
+	default:
+		panic("setrunnable");
+	case SSTOP:
+	case SSLEEP:
+		unsleep(p);		/* e.g. when sending signals */
+		break;
+
+	case SIDL:
+		break;
+	}
+	p->p_stat = SRUN;
+	if (p->p_flag & P_INMEM)
+		setrunqueue(p);
+	splx(s);
+	if (p->p_slptime > 1)
+		updatepri(p);
+	p->p_slptime = 0;
+	if ((p->p_flag & P_INMEM) == 0)
+		wakeup((caddr_t)&proc0);
+	else if (p->p_priority < curpriority)
+		need_resched();
+}
+
+/*
+ * Compute the priority of a process when running in user mode.
+ * Arrange to reschedule if the resulting priority is better
+ * than that of the current process.
+ */
+void
+resetpriority(p)
+	register struct proc *p;
+{
+	register unsigned int newpriority;
+
+	newpriority = PUSER + p->p_estcpu / 4 + 2 * p->p_nice;
+	newpriority = min(newpriority, MAXPRI);
+	p->p_usrpri = newpriority;
+	if (newpriority < curpriority)
+		need_resched();
+}
diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c
new file mode 100644
index 00000000000..ae16decff81
--- /dev/null
+++ b/sys/kern/kern_sysctl.c
@@ -0,0 +1,787 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Mike Karels at Berkeley Software Design, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_sysctl.c	8.4 (Berkeley) 4/14/94
+ */
+
+/*
+ * sysctl system call.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+#include <sys/unistd.h>
+#include <sys/buf.h>
+#include <sys/ioctl.h>
+#include <sys/tty.h>
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+
+sysctlfn kern_sysctl;
+sysctlfn hw_sysctl;
+#ifdef DEBUG
+sysctlfn debug_sysctl;
+#endif
+extern sysctlfn vm_sysctl;
+extern sysctlfn fs_sysctl;
+extern sysctlfn net_sysctl;
+extern sysctlfn cpu_sysctl;
+
+/*
+ * Locking and stats
+ */
+static struct sysctl_lock {
+	int	sl_lock;
+	int	sl_want;
+	int	sl_locked;
+} memlock;
+
+struct sysctl_args {
+	int	*name;
+	u_int	namelen;
+	void	*old;
+	size_t	*oldlenp;
+	void	*new;
+	size_t	newlen;
+};
+
+int
+__sysctl(p, uap, retval)
+	struct proc *p;
+	register struct sysctl_args *uap;
+	int *retval;
+{
+	int error, dolock = 1;
+	u_int savelen, oldlen = 0;
+	sysctlfn *fn;
+	int name[CTL_MAXNAME];
+
+	if (uap->new != NULL && (error = suser(p->p_ucred, &p->p_acflag)))
+		return (error);
+	/*
+	 * all top-level sysctl names are non-terminal
+	 */
+	if (uap->namelen > CTL_MAXNAME || uap->namelen < 2)
+		return (EINVAL);
+	if (error = copyin(uap->name, &name, uap->namelen * sizeof(int)))
+		return (error);
+
+	switch (name[0]) {
+	case CTL_KERN:
+		fn = kern_sysctl;
+		if (name[2] != KERN_VNODE)	/* XXX */
+			dolock = 0;
+		break;
+	case CTL_HW:
+		fn = hw_sysctl;
+		break;
+	case CTL_VM:
+		fn = vm_sysctl;
+		break;
+	case CTL_NET:
+		fn = net_sysctl;
+		break;
+#ifdef notyet
+	case CTL_FS:
+		fn = fs_sysctl;
+		break;
+#endif
+	case CTL_MACHDEP:
+		fn = cpu_sysctl;
+		break;
+#ifdef DEBUG
+	case CTL_DEBUG:
+		fn = debug_sysctl;
+		break;
+#endif
+	default:
+		return (EOPNOTSUPP);
+	}
+
+	if (uap->oldlenp &&
+	    (error = copyin(uap->oldlenp, &oldlen, sizeof(oldlen))))
+		return (error);
+	if (uap->old != NULL) {
+		if (!useracc(uap->old, oldlen, B_WRITE))
+			return (EFAULT);
+		while (memlock.sl_lock) {
+			memlock.sl_want = 1;
+			sleep((caddr_t)&memlock, PRIBIO+1);
+			memlock.sl_locked++;
+		}
+		memlock.sl_lock = 1;
+		if (dolock)
+			vslock(uap->old, oldlen);
+		savelen = oldlen;
+	}
+	error = (*fn)(name + 1, uap->namelen - 1, uap->old, &oldlen,
+	    uap->new, uap->newlen, p);
+	if (uap->old != NULL) {
+		if (dolock)
+			vsunlock(uap->old, savelen, B_WRITE);
+		memlock.sl_lock = 0;
+		if (memlock.sl_want) {
+			memlock.sl_want = 0;
+			wakeup((caddr_t)&memlock);
+		}
+	}
+	if (error)
+		return (error);
+	if (uap->oldlenp)
+		error = copyout(&oldlen, uap->oldlenp, sizeof(oldlen));
+	*retval = oldlen;
+	return (0);
+}
+
+/*
+ * Attributes stored in the kernel.
+ */
+char hostname[MAXHOSTNAMELEN];
+int hostnamelen;
+long hostid;
+int securelevel;
+
+/*
+ * kernel related system variables.
+ */
+kern_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
+	int *name;
+	u_int namelen;
+	void *oldp;
+	size_t *oldlenp;
+	void *newp;
+	size_t newlen;
+	struct proc *p;
+{
+	int error, level, inthostid;
+	extern char ostype[], osrelease[], version[];
+
+	/* all sysctl names at this level are terminal */
+	if (namelen != 1 && !(name[0] == KERN_PROC || name[0] == KERN_PROF))
+		return (ENOTDIR);		/* overloaded */
+
+	switch (name[0]) {
+	case KERN_OSTYPE:
+		return (sysctl_rdstring(oldp, oldlenp, newp, ostype));
+	case KERN_OSRELEASE:
+		return (sysctl_rdstring(oldp, oldlenp, newp, osrelease));
+	case KERN_OSREV:
+		return (sysctl_rdint(oldp, oldlenp, newp, BSD));
+	case KERN_VERSION:
+		return (sysctl_rdstring(oldp, oldlenp, newp, version));
+	case KERN_MAXVNODES:
+		return(sysctl_int(oldp, oldlenp, newp, newlen, &desiredvnodes));
+	case KERN_MAXPROC:
+		return (sysctl_int(oldp, oldlenp, newp, newlen, &maxproc));
+	case KERN_MAXFILES:
+		return (sysctl_int(oldp, oldlenp, newp, newlen, &maxfiles));
+	case KERN_ARGMAX:
+		return (sysctl_rdint(oldp, oldlenp, newp, ARG_MAX));
+	case KERN_SECURELVL:
+		level = securelevel;
+		if ((error = sysctl_int(oldp, oldlenp, newp, newlen, &level)) ||
+		    newp == NULL)
+			return (error);
+		if (level < securelevel && p->p_pid != 1)
+			return (EPERM);
+		securelevel = level;
+		return (0);
+	case KERN_HOSTNAME:
+		error = sysctl_string(oldp, oldlenp, newp, newlen,
+		    hostname, sizeof(hostname));
+		if (newp && !error)
+			hostnamelen = newlen;
+		return (error);
+	case KERN_HOSTID:
+		inthostid = hostid;  /* XXX assumes sizeof long <= sizeof int */
+		error =  sysctl_int(oldp, oldlenp, newp, newlen, &inthostid);
+		hostid = inthostid;
+		return (error);
+	case KERN_CLOCKRATE:
+		return (sysctl_clockrate(oldp, oldlenp));
+	case KERN_BOOTTIME:
+		return (sysctl_rdstruct(oldp, oldlenp, newp, &boottime,
+		    sizeof(struct timeval)));
+	case KERN_VNODE:
+		return (sysctl_vnode(oldp, oldlenp));
+	case KERN_PROC:
+		return (sysctl_doproc(name + 1, namelen - 1, oldp, oldlenp));
+	case KERN_FILE:
+		return (sysctl_file(oldp, oldlenp));
+#ifdef GPROF
+	case KERN_PROF:
+		return (sysctl_doprof(name + 1, namelen - 1, oldp, oldlenp,
+		    newp, newlen));
+#endif
+	case KERN_POSIX1:
+		return (sysctl_rdint(oldp, oldlenp, newp, _POSIX_VERSION));
+	case KERN_NGROUPS:
+		return (sysctl_rdint(oldp, oldlenp, newp, NGROUPS_MAX));
+	case KERN_JOB_CONTROL:
+		return (sysctl_rdint(oldp, oldlenp, newp, 1));
+	case KERN_SAVED_IDS:
+#ifdef _POSIX_SAVED_IDS
+		return (sysctl_rdint(oldp, oldlenp, newp, 1));
+#else
+		return (sysctl_rdint(oldp, oldlenp, newp, 0));
+#endif
+	default:
+		return (EOPNOTSUPP);
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * hardware related system variables.
+ */
+hw_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
+	int *name;
+	u_int namelen;
+	void *oldp;
+	size_t *oldlenp;
+	void *newp;
+	size_t newlen;
+	struct proc *p;
+{
+	extern char machine[], cpu_model[];
+
+	/* all sysctl names at this level are terminal */
+	if (namelen != 1)
+		return (ENOTDIR);		/* overloaded */
+
+	switch (name[0]) {
+	case HW_MACHINE:
+		return (sysctl_rdstring(oldp, oldlenp, newp, machine));
+	case HW_MODEL:
+		return (sysctl_rdstring(oldp, oldlenp, newp, cpu_model));
+	case HW_NCPU:
+		return (sysctl_rdint(oldp, oldlenp, newp, 1));	/* XXX */
+	case HW_BYTEORDER:
+		return (sysctl_rdint(oldp, oldlenp, newp, BYTE_ORDER));
+	case HW_PHYSMEM:
+		return (sysctl_rdint(oldp, oldlenp, newp, ctob(physmem)));
+	case HW_USERMEM:
+		return (sysctl_rdint(oldp, oldlenp, newp,
+		    ctob(physmem - cnt.v_wire_count)));
+	case HW_PAGESIZE:
+		return (sysctl_rdint(oldp, oldlenp, newp, PAGE_SIZE));
+	default:
+		return (EOPNOTSUPP);
+	}
+	/* NOTREACHED */
+}
+
+#ifdef DEBUG
+/*
+ * Debugging related system variables.
+ */
+struct ctldebug debug0, debug1, debug2, debug3, debug4;
+struct ctldebug debug5, debug6, debug7, debug8, debug9;
+struct ctldebug debug10, debug11, debug12, debug13, debug14;
+struct ctldebug debug15, debug16, debug17, debug18, debug19;
+static struct ctldebug *debugvars[CTL_DEBUG_MAXID] = {
+	&debug0, &debug1, &debug2, &debug3, &debug4,
+	&debug5, &debug6, &debug7, &debug8, &debug9,
+	&debug10, &debug11, &debug12, &debug13, &debug14,
+	&debug15, &debug16, &debug17, &debug18, &debug19,
+};
+int
+debug_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
+	int *name;
+	u_int namelen;
+	void *oldp;
+	size_t *oldlenp;
+	void *newp;
+	size_t newlen;
+	struct proc *p;
+{
+	struct ctldebug *cdp;
+
+	/* all sysctl names at this level are name and field */
+	if (namelen != 2)
+		return (ENOTDIR);		/* overloaded */
+	cdp = debugvars[name[0]];
+	if (cdp->debugname == 0)
+		return (EOPNOTSUPP);
+	switch (name[1]) {
+	case CTL_DEBUG_NAME:
+		return (sysctl_rdstring(oldp, oldlenp, newp, cdp->debugname));
+	case CTL_DEBUG_VALUE:
+		return (sysctl_int(oldp, oldlenp, newp, newlen, cdp->debugvar));
+	default:
+		return (EOPNOTSUPP);
+	}
+	/* NOTREACHED */
+}
+#endif /* DEBUG */
+
+/*
+ * Validate parameters and get old / set new parameters
+ * for an integer-valued sysctl function.
+ */
+sysctl_int(oldp, oldlenp, newp, newlen, valp)
+	void *oldp;
+	size_t *oldlenp;
+	void *newp;
+	size_t newlen;
+	int *valp;
+{
+	int error = 0;
+
+	if (oldp && *oldlenp < sizeof(int))
+		return (ENOMEM);
+	if (newp && newlen != sizeof(int))
+		return (EINVAL);
+	*oldlenp = sizeof(int);
+	if (oldp)
+		error = copyout(valp, oldp, sizeof(int));
+	if (error == 0 && newp)
+		error = copyin(newp, valp, sizeof(int));
+	return (error);
+}
+
+/*
+ * As above, but read-only.
+ */
+sysctl_rdint(oldp, oldlenp, newp, val)
+	void *oldp;
+	size_t *oldlenp;
+	void *newp;
+	int val;
+{
+	int error = 0;
+
+	if (oldp && *oldlenp < sizeof(int))
+		return (ENOMEM);
+	if (newp)
+		return (EPERM);
+	*oldlenp = sizeof(int);
+	if (oldp)
+		error = copyout((caddr_t)&val, oldp, sizeof(int));
+	return (error);
+}
+
+/*
+ * Validate parameters and get old / set new parameters
+ * for a string-valued sysctl function.
+ */
+sysctl_string(oldp, oldlenp, newp, newlen, str, maxlen)
+	void *oldp;
+	size_t *oldlenp;
+	void *newp;
+	size_t newlen;
+	char *str;
+	int maxlen;
+{
+	int len, error = 0;
+
+	len = strlen(str) + 1;
+	if (oldp && *oldlenp < len)
+		return (ENOMEM);
+	if (newp && newlen >= maxlen)
+		return (EINVAL);
+	if (oldp) {
+		*oldlenp = len;
+		error = copyout(str, oldp, len);
+	}
+	if (error == 0 && newp) {
+		error = copyin(newp, str, newlen);
+		str[newlen] = 0;
+	}
+	return (error);
+}
+
+/*
+ * As above, but read-only.
+ */
+sysctl_rdstring(oldp, oldlenp, newp, str)
+	void *oldp;
+	size_t *oldlenp;
+	void *newp;
+	char *str;
+{
+	int len, error = 0;
+
+	len = strlen(str) + 1;
+	if (oldp && *oldlenp < len)
+		return (ENOMEM);
+	if (newp)
+		return (EPERM);
+	*oldlenp = len;
+	if (oldp)
+		error = copyout(str, oldp, len);
+	return (error);
+}
+
+/*
+ * Validate parameters and get old / set new parameters
+ * for a structure oriented sysctl function.
+ */
+sysctl_struct(oldp, oldlenp, newp, newlen, sp, len)
+	void *oldp;
+	size_t *oldlenp;
+	void *newp;
+	size_t newlen;
+	void *sp;
+	int len;
+{
+	int error = 0;
+
+	if (oldp && *oldlenp < len)
+		return (ENOMEM);
+	if (newp && newlen > len)
+		return (EINVAL);
+	if (oldp) {
+		*oldlenp = len;
+		error = copyout(sp, oldp, len);
+	}
+	if (error == 0 && newp)
+		error = copyin(newp, sp, len);
+	return (error);
+}
+
+/*
+ * Validate parameters and get old parameters
+ * for a structure oriented sysctl function.
+ */
+sysctl_rdstruct(oldp, oldlenp, newp, sp, len)
+	void *oldp;
+	size_t *oldlenp;
+	void *newp, *sp;
+	int len;
+{
+	int error = 0;
+
+	if (oldp && *oldlenp < len)
+		return (ENOMEM);
+	if (newp)
+		return (EPERM);
+	*oldlenp = len;
+	if (oldp)
+		error = copyout(sp, oldp, len);
+	return (error);
+}
+
+/*
+ * Get file structures.
+ */
+sysctl_file(where, sizep)
+	char *where;
+	size_t *sizep;
+{
+	int buflen, error;
+	struct file *fp;
+	char *start = where;
+
+	buflen = *sizep;
+	if (where == NULL) {
+		/*
+		 * overestimate by 10 files
+		 */
+		*sizep = sizeof(filehead) + (nfiles + 10) * sizeof(struct file);
+		return (0);
+	}
+
+	/*
+	 * first copyout filehead
+	 */
+	if (buflen < sizeof(filehead)) {
+		*sizep = 0;
+		return (0);
+	}
+	if (error = copyout((caddr_t)&filehead, where, sizeof(filehead)))
+		return (error);
+	buflen -= sizeof(filehead);
+	where += sizeof(filehead);
+
+	/*
+	 * followed by an array of file structures
+	 */
+	for (fp = filehead; fp != NULL; fp = fp->f_filef) {
+		if (buflen < sizeof(struct file)) {
+			*sizep = where - start;
+			return (ENOMEM);
+		}
+		if (error = copyout((caddr_t)fp, where, sizeof (struct file)))
+			return (error);
+		buflen -= sizeof(struct file);
+		where += sizeof(struct file);
+	}
+	*sizep = where - start;
+	return (0);
+}
+
+/*
+ * try over estimating by 5 procs
+ */
+#define KERN_PROCSLOP	(5 * sizeof (struct kinfo_proc))
+
+sysctl_doproc(name, namelen, where, sizep)
+	int *name;
+	u_int namelen;
+	char *where;
+	size_t *sizep;
+{
+	register struct proc *p;
+	register struct kinfo_proc *dp = (struct kinfo_proc *)where;
+	register int needed = 0;
+	int buflen = where != NULL ? *sizep : 0;
+	int doingzomb;
+	struct eproc eproc;
+	int error = 0;
+
+	if (namelen != 2 && !(namelen == 1 && name[0] == KERN_PROC_ALL))
+		return (EINVAL);
+	p = (struct proc *)allproc;
+	doingzomb = 0;
+again:
+	for (; p != NULL; p = p->p_next) {
+		/*
+		 * Skip embryonic processes.
+		 */
+		if (p->p_stat == SIDL)
+			continue;
+		/*
+		 * TODO - make more efficient (see notes below).
+		 * do by session.
+		 */
+		switch (name[0]) {
+
+		case KERN_PROC_PID:
+			/* could do this with just a lookup */
+			if (p->p_pid != (pid_t)name[1])
+				continue;
+			break;
+
+		case KERN_PROC_PGRP:
+			/* could do this by traversing pgrp */
+			if (p->p_pgrp->pg_id != (pid_t)name[1])
+				continue;
+			break;
+
+		case KERN_PROC_TTY:
+			if ((p->p_flag & P_CONTROLT) == 0 ||
+			    p->p_session->s_ttyp == NULL ||
+			    p->p_session->s_ttyp->t_dev != (dev_t)name[1])
+				continue;
+			break;
+
+		case KERN_PROC_UID:
+			if (p->p_ucred->cr_uid != (uid_t)name[1])
+				continue;
+			break;
+
+		case KERN_PROC_RUID:
+			if (p->p_cred->p_ruid != (uid_t)name[1])
+				continue;
+			break;
+		}
+		if (buflen >= sizeof(struct kinfo_proc)) {
+			fill_eproc(p, &eproc);
+			if (error = copyout((caddr_t)p, &dp->kp_proc,
+			    sizeof(struct proc)))
+				return (error);
+			if (error = copyout((caddr_t)&eproc, &dp->kp_eproc,
+			    sizeof(eproc)))
+				return (error);
+			dp++;
+			buflen -= sizeof(struct kinfo_proc);
+		}
+		needed += sizeof(struct kinfo_proc);
+	}
+	if (doingzomb == 0) {
+		p = zombproc;
+		doingzomb++;
+		goto again;
+	}
+	if (where != NULL) {
+		*sizep = (caddr_t)dp - where;
+		if (needed > *sizep)
+			return (ENOMEM);
+	} else {
+		needed += KERN_PROCSLOP;
+		*sizep = needed;
+	}
+	return (0);
+}
+
+/*
+ * Fill in an eproc structure for the specified process.
+ */
+void
+fill_eproc(p, ep)
+	register struct proc *p;
+	register struct eproc *ep;
+{
+	register struct tty *tp;
+
+	ep->e_paddr = p;
+	ep->e_sess = p->p_pgrp->pg_session;
+	ep->e_pcred = *p->p_cred;
+	ep->e_ucred = *p->p_ucred;
+	if (p->p_stat == SIDL || p->p_stat == SZOMB) {
+		ep->e_vm.vm_rssize = 0;
+		ep->e_vm.vm_tsize = 0;
+		ep->e_vm.vm_dsize = 0;
+		ep->e_vm.vm_ssize = 0;
+#ifndef sparc
+		/* ep->e_vm.vm_pmap = XXX; */
+#endif
+	} else {
+		register struct vmspace *vm = p->p_vmspace;
+
+#ifdef pmap_resident_count
+		ep->e_vm.vm_rssize = pmap_resident_count(&vm->vm_pmap); /*XXX*/
+#else
+		ep->e_vm.vm_rssize = vm->vm_rssize;
+#endif
+		ep->e_vm.vm_tsize = vm->vm_tsize;
+		ep->e_vm.vm_dsize = vm->vm_dsize;
+		ep->e_vm.vm_ssize = vm->vm_ssize;
+#ifndef sparc
+		ep->e_vm.vm_pmap = vm->vm_pmap;
+#endif
+	}
+	if (p->p_pptr)
+		ep->e_ppid = p->p_pptr->p_pid;
+	else
+		ep->e_ppid = 0;
+	ep->e_pgid = p->p_pgrp->pg_id;
+	ep->e_jobc = p->p_pgrp->pg_jobc;
+	if ((p->p_flag & P_CONTROLT) &&
+	     (tp = ep->e_sess->s_ttyp)) {
+		ep->e_tdev = tp->t_dev;
+		ep->e_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID;
+		ep->e_tsess = tp->t_session;
+	} else
+		ep->e_tdev = NODEV;
+	ep->e_flag = ep->e_sess->s_ttyvp ? EPROC_CTTY : 0;
+	if (SESS_LEADER(p))
+		ep->e_flag |= EPROC_SLEADER;
+	if (p->p_wmesg)
+		strncpy(ep->e_wmesg, p->p_wmesg, WMESGLEN);
+	ep->e_xsize = ep->e_xrssize = 0;
+	ep->e_xccount = ep->e_xswrss = 0;
+}
+
+#ifdef COMPAT_43
+#include <sys/socket.h>
+#define	KINFO_PROC		(0<<8)
+#define	KINFO_RT		(1<<8)
+#define	KINFO_VNODE		(2<<8)
+#define	KINFO_FILE		(3<<8)
+#define	KINFO_METER		(4<<8)
+#define	KINFO_LOADAVG		(5<<8)
+#define	KINFO_CLOCKRATE		(6<<8)
+
+struct getkerninfo_args {
+	int	op;
+	char	*where;
+	int	*size;
+	int	arg;
+};
+
+ogetkerninfo(p, uap, retval)
+	struct proc *p;
+	register struct getkerninfo_args *uap;
+	int *retval;
+{
+	int error, name[5];
+	u_int size;
+
+	if (uap->size &&
+	    (error = copyin((caddr_t)uap->size, (caddr_t)&size, sizeof(size))))
+		return (error);
+
+	switch (uap->op & 0xff00) {
+
+	case KINFO_RT:
+		name[0] = PF_ROUTE;
+		name[1] = 0;
+		name[2] = (uap->op & 0xff0000) >> 16;
+		name[3] = uap->op & 0xff;
+		name[4] = uap->arg;
+		error = net_sysctl(name, 5, uap->where, &size, NULL, 0, p);
+		break;
+
+	case KINFO_VNODE:
+		name[0] = KERN_VNODE;
+		error = kern_sysctl(name, 1, uap->where, &size, NULL, 0, p);
+		break;
+
+	case KINFO_PROC:
+		name[0] = KERN_PROC;
+		name[1] = uap->op & 0xff;
+		name[2] = uap->arg;
+		error = kern_sysctl(name, 3, uap->where, &size, NULL, 0, p);
+		break;
+
+	case KINFO_FILE:
+		name[0] = KERN_FILE;
+		error = kern_sysctl(name, 1, uap->where, &size, NULL, 0, p);
+		break;
+
+	case KINFO_METER:
+		name[0] = VM_METER;
+		error = vm_sysctl(name, 1, uap->where, &size, NULL, 0, p);
+		break;
+
+	case KINFO_LOADAVG:
+		name[0] = VM_LOADAVG;
+		error = vm_sysctl(name, 1, uap->where, &size, NULL, 0, p);
+		break;
+
+	case KINFO_CLOCKRATE:
+		name[0] = KERN_CLOCKRATE;
+		error = kern_sysctl(name, 1, uap->where, &size, NULL, 0, p);
+		break;
+
+	default:
+		return (EOPNOTSUPP);
+	}
+	if (error)
+		return (error);
+	*retval = size;
+	if (uap->size)
+		error = copyout((caddr_t)&size, (caddr_t)uap->size,
+		    sizeof(size));
+	return (error);
+}
+#endif /* COMPAT_43 */
diff --git a/sys/kern/kern_tc.c b/sys/kern/kern_tc.c
new file mode 100644
index 00000000000..f42900cb75d
--- /dev/null
+++ b/sys/kern/kern_tc.c
@@ -0,0 +1,528 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_clock.c	8.5 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/dkstat.h>
+#include <sys/callout.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+
+#include <machine/cpu.h>
+
+#ifdef GPROF
+#include <sys/gmon.h>
+#endif
+
+/*
+ * Clock handling routines.
+ *
+ * This code is written to operate with two timers that run independently of
+ * each other.  The main clock, running hz times per second, is used to keep
+ * track of real time.  The second timer handles kernel and user profiling,
+ * and does resource use estimation.  If the second timer is programmable,
+ * it is randomized to avoid aliasing between the two clocks.  For example,
+ * the randomization prevents an adversary from always giving up the cpu
+ * just before its quantum expires.  Otherwise, it would never accumulate
+ * cpu ticks.  The mean frequency of the second timer is stathz.
+ *
+ * If no second timer exists, stathz will be zero; in this case we drive
+ * profiling and statistics off the main clock.  This WILL NOT be accurate;
+ * do not do it unless absolutely necessary.
+ *
+ * The statistics clock may (or may not) be run at a higher rate while
+ * profiling.  This profile clock runs at profhz.  We require that profhz
+ * be an integral multiple of stathz.
+ *
+ * If the statistics clock is running fast, it must be divided by the ratio
+ * profhz/stathz for statistics.  (For profiling, every tick counts.)
+ */
+
+/*
+ * TODO:
+ *	allocate more timeout table slots when table overflows.
+ */
+
+/*
+ * Bump a timeval by a small number of usec's.
+ */
+#define BUMPTIME(t, usec) { \
+	register volatile struct timeval *tp = (t); \
+	register long us; \
+ \
+	tp->tv_usec = us = tp->tv_usec + (usec); \
+	if (us >= 1000000) { \
+		tp->tv_usec = us - 1000000; \
+		tp->tv_sec++; \
+	} \
+}
+
+int	stathz;
+int	profhz;
+int	profprocs;
+int	ticks;
+static int psdiv, pscnt;	/* prof => stat divider */
+int	psratio;		/* ratio: prof / stat */
+
+volatile struct	timeval time;
+volatile struct	timeval mono_time;
+
+/*
+ * Initialize clock frequencies and start both clocks running.
+ */
+void
+initclocks()
+{
+	register int i;
+
+	/*
+	 * Set divisors to 1 (normal case) and let the machine-specific
+	 * code do its bit.
+	 */
+	psdiv = pscnt = 1;
+	cpu_initclocks();
+
+	/*
+	 * Compute profhz/stathz, and fix profhz if needed.
+	 */
+	i = stathz ? stathz : hz;
+	if (profhz == 0)
+		profhz = i;
+	psratio = profhz / i;
+}
+
+/*
+ * The real-time timer, interrupting hz times per second.
+ */
+void
+hardclock(frame)
+	register struct clockframe *frame;
+{
+	register struct callout *p1;
+	register struct proc *p;
+	register int delta, needsoft;
+	extern int tickdelta;
+	extern long timedelta;
+
+	/*
+	 * Update real-time timeout queue.
+	 * At front of queue are some number of events which are ``due''.
+	 * The time to these is <= 0 and if negative represents the
+	 * number of ticks which have passed since it was supposed to happen.
+	 * The rest of the q elements (times > 0) are events yet to happen,
+	 * where the time for each is given as a delta from the previous.
+	 * Decrementing just the first of these serves to decrement the time
+	 * to all events.
+	 */
+	needsoft = 0;
+	for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) {
+		if (--p1->c_time > 0)
+			break;
+		needsoft = 1;
+		if (p1->c_time == 0)
+			break;
+	}
+
+	p = curproc;
+	if (p) {
+		register struct pstats *pstats;
+
+		/*
+		 * Run current process's virtual and profile time, as needed.
+		 */
+		pstats = p->p_stats;
+		if (CLKF_USERMODE(frame) &&
+		    timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
+		    itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
+			psignal(p, SIGVTALRM);
+		if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
+		    itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
+			psignal(p, SIGPROF);
+	}
+
+	/*
+	 * If no separate statistics clock is available, run it from here.
+	 */
+	if (stathz == 0)
+		statclock(frame);
+
+	/*
+	 * Increment the time-of-day.  The increment is just ``tick'' unless
+	 * we are still adjusting the clock; see adjtime().
+	 */
+	ticks++;
+	if (timedelta == 0)
+		delta = tick;
+	else {
+		delta = tick + tickdelta;
+		timedelta -= tickdelta;
+	}
+	BUMPTIME(&time, delta);
+	BUMPTIME(&mono_time, delta);
+
+	/*
+	 * Process callouts at a very low cpu priority, so we don't keep the
+	 * relatively high clock interrupt priority any longer than necessary.
+	 */
+	if (needsoft) {
+		if (CLKF_BASEPRI(frame)) {
+			/*
+			 * Save the overhead of a software interrupt;
+			 * it will happen as soon as we return, so do it now.
+			 */
+			(void)splsoftclock();
+			softclock();
+		} else
+			setsoftclock();
+	}
+}
+
+/*
+ * Software (low priority) clock interrupt.
+ * Run periodic events from timeout queue.
+ */
+/*ARGSUSED*/
+void
+softclock()
+{
+	register struct callout *c;
+	register void *arg;
+	register void (*func) __P((void *));
+	register int s;
+
+	s = splhigh();
+	while ((c = calltodo.c_next) != NULL && c->c_time <= 0) {
+		func = c->c_func;
+		arg = c->c_arg;
+		calltodo.c_next = c->c_next;
+		c->c_next = callfree;
+		callfree = c;
+		splx(s);
+		(*func)(arg);
+		(void) splhigh();
+	}
+	splx(s);
+}
+
+/*
+ * timeout --
+ *	Execute a function after a specified length of time.
+ *
+ * untimeout --
+ *	Cancel previous timeout function call.
+ *
+ *	See AT&T BCI Driver Reference Manual for specification.  This
+ *	implementation differs from that one in that no identification
+ *	value is returned from timeout, rather, the original arguments
+ *	to timeout are used to identify entries for untimeout.
+ */
+void
+timeout(ftn, arg, ticks)
+	void (*ftn) __P((void *));
+	void *arg;
+	register int ticks;
+{
+	register struct callout *new, *p, *t;
+	register int s;
+
+	if (ticks <= 0)
+		ticks = 1;
+
+	/* Lock out the clock. */
+	s = splhigh();
+
+	/* Fill in the next free callout structure. */
+	if (callfree == NULL)
+		panic("timeout table full");
+	new = callfree;
+	callfree = new->c_next;
+	new->c_arg = arg;
+	new->c_func = ftn;
+
+	/*
+	 * The time for each event is stored as a difference from the time
+	 * of the previous event on the queue.  Walk the queue, correcting
+	 * the ticks argument for queue entries passed.  Correct the ticks
+	 * value for the queue entry immediately after the insertion point
+	 * as well.  Watch out for negative c_time values; these represent
+	 * overdue events.
+	 */
+	for (p = &calltodo;
+	    (t = p->c_next) != NULL && ticks > t->c_time; p = t)
+		if (t->c_time > 0)
+			ticks -= t->c_time;
+	new->c_time = ticks;
+	if (t != NULL)
+		t->c_time -= ticks;
+
+	/* Insert the new entry into the queue. */
+	p->c_next = new;
+	new->c_next = t;
+	splx(s);
+}
+
+void
+untimeout(ftn, arg)
+	void (*ftn) __P((void *));
+	void *arg;
+{
+	register struct callout *p, *t;
+	register int s;
+
+	s = splhigh();
+	for (p = &calltodo; (t = p->c_next) != NULL; p = t)
+		if (t->c_func == ftn && t->c_arg == arg) {
+			/* Increment next entry's tick count. */
+			if (t->c_next && t->c_time > 0)
+				t->c_next->c_time += t->c_time;
+
+			/* Move entry from callout queue to callfree queue. */
+			p->c_next = t->c_next;
+			t->c_next = callfree;
+			callfree = t;
+			break;
+		}
+	splx(s);
+}
+
+/*
+ * Compute number of hz until specified time.  Used to
+ * compute third argument to timeout() from an absolute time.
+ */
+int
+hzto(tv)
+	struct timeval *tv;
+{
+	register long ticks, sec;
+	int s;
+
+	/*
+	 * If number of milliseconds will fit in 32 bit arithmetic,
+	 * then compute number of milliseconds to time and scale to
+	 * ticks.  Otherwise just compute number of hz in time, rounding
+	 * times greater than representible to maximum value.
+	 *
+	 * Delta times less than 25 days can be computed ``exactly''.
+	 * Maximum value for any timeout in 10ms ticks is 250 days.
+	 */
+	s = splhigh();
+	sec = tv->tv_sec - time.tv_sec;
+	if (sec <= 0x7fffffff / 1000 - 1000)
+		ticks = ((tv->tv_sec - time.tv_sec) * 1000 +
+			(tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000);
+	else if (sec <= 0x7fffffff / hz)
+		ticks = sec * hz;
+	else
+		ticks = 0x7fffffff;
+	splx(s);
+	return (ticks);
+}
+
+/*
+ * Start profiling on a process.
+ *
+ * Kernel profiling passes proc0 which never exits and hence
+ * keeps the profile clock running constantly.
+ */
+void
+startprofclock(p)
+	register struct proc *p;
+{
+	int s;
+
+	if ((p->p_flag & P_PROFIL) == 0) {
+		p->p_flag |= P_PROFIL;
+		if (++profprocs == 1 && stathz != 0) {
+			s = splstatclock();
+			psdiv = pscnt = psratio;
+			setstatclockrate(profhz);
+			splx(s);
+		}
+	}
+}
+
+/*
+ * Stop profiling on a process.
+ */
+void
+stopprofclock(p)
+	register struct proc *p;
+{
+	int s;
+
+	if (p->p_flag & P_PROFIL) {
+		p->p_flag &= ~P_PROFIL;
+		if (--profprocs == 0 && stathz != 0) {
+			s = splstatclock();
+			psdiv = pscnt = 1;
+			setstatclockrate(stathz);
+			splx(s);
+		}
+	}
+}
+
+int	dk_ndrive = DK_NDRIVE;
+
+/*
+ * Statistics clock.  Grab profile sample, and if divider reaches 0,
+ * do process and kernel statistics.
+ */
+void
+statclock(frame)
+	register struct clockframe *frame;
+{
+#ifdef GPROF
+	register struct gmonparam *g;
+#endif
+	register struct proc *p;
+	register int i;
+
+	if (CLKF_USERMODE(frame)) {
+		p = curproc;
+		if (p->p_flag & P_PROFIL)
+			addupc_intr(p, CLKF_PC(frame), 1);
+		if (--pscnt > 0)
+			return;
+		/*
+		 * Came from user mode; CPU was in user state.
+		 * If this process is being profiled record the tick.
+		 */
+		p->p_uticks++;
+		if (p->p_nice > NZERO)
+			cp_time[CP_NICE]++;
+		else
+			cp_time[CP_USER]++;
+	} else {
+#ifdef GPROF
+		/*
+		 * Kernel statistics are just like addupc_intr, only easier.
+		 */
+		g = &_gmonparam;
+		if (g->state == GMON_PROF_ON) {
+			i = CLKF_PC(frame) - g->lowpc;
+			if (i < g->textsize) {
+				i /= HISTFRACTION * sizeof(*g->kcount);
+				g->kcount[i]++;
+			}
+		}
+#endif
+		if (--pscnt > 0)
+			return;
+		/*
+		 * Came from kernel mode, so we were:
+		 * - handling an interrupt,
+		 * - doing syscall or trap work on behalf of the current
+		 *   user process, or
+		 * - spinning in the idle loop.
+		 * Whichever it is, charge the time as appropriate.
+		 * Note that we charge interrupts to the current process,
+		 * regardless of whether they are ``for'' that process,
+		 * so that we know how much of its real time was spent
+		 * in ``non-process'' (i.e., interrupt) work.
+		 */
+		p = curproc;
+		if (CLKF_INTR(frame)) {
+			if (p != NULL)
+				p->p_iticks++;
+			cp_time[CP_INTR]++;
+		} else if (p != NULL) {
+			p->p_sticks++;
+			cp_time[CP_SYS]++;
+		} else
+			cp_time[CP_IDLE]++;
+	}
+	pscnt = psdiv;
+
+	/*
+	 * We maintain statistics shown by user-level statistics
+	 * programs:  the amount of time in each cpu state, and
+	 * the amount of time each of DK_NDRIVE ``drives'' is busy.
+	 *
+	 * XXX	should either run linked list of drives, or (better)
+	 *	grab timestamps in the start & done code.
+	 */
+	for (i = 0; i < DK_NDRIVE; i++)
+		if (dk_busy & (1 << i))
+			dk_time[i]++;
+
+	/*
+	 * We adjust the priority of the current process.  The priority of
+	 * a process gets worse as it accumulates CPU time.  The cpu usage
+	 * estimator (p_estcpu) is increased here.  The formula for computing
+	 * priorities (in kern_synch.c) will compute a different value each
+	 * time p_estcpu increases by 4.  The cpu usage estimator ramps up
+	 * quite quickly when the process is running (linearly), and decays
+	 * away exponentially, at a rate which is proportionally slower when
+	 * the system is busy.  The basic principal is that the system will
+	 * 90% forget that the process used a lot of CPU time in 5 * loadav
+	 * seconds.  This causes the system to favor processes which haven't
+	 * run much recently, and to round-robin among other processes.
+	 */
+	if (p != NULL) {
+		p->p_cpticks++;
+		if (++p->p_estcpu == 0)
+			p->p_estcpu--;
+		if ((p->p_estcpu & 3) == 0) {
+			resetpriority(p);
+			if (p->p_priority >= PUSER)
+				p->p_priority = p->p_usrpri;
+		}
+	}
+}
+
+/*
+ * Return information about system clocks.
+ */
+sysctl_clockrate(where, sizep)
+	register char *where;
+	size_t *sizep;
+{
+	struct clockinfo clkinfo;
+
+	/*
+	 * Construct clockinfo structure.
+	 */
+	clkinfo.hz = hz;
+	clkinfo.tick = tick;
+	clkinfo.profhz = profhz;
+	clkinfo.stathz = stathz ? stathz : hz;
+	return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo)));
+}
diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c
new file mode 100644
index 00000000000..4dadcb8e0b9
--- /dev/null
+++ b/sys/kern/kern_time.c
@@ -0,0 +1,416 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_time.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/resourcevar.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+
+#include <machine/cpu.h>
+
+/* 
+ * Time of day and interval timer support.
+ *
+ * These routines provide the kernel entry points to get and set
+ * the time-of-day and per-process interval timers.  Subroutines
+ * here provide support for adding and subtracting timeval structures
+ * and decrementing interval timers, optionally reloading the interval
+ * timers when they expire.
+ */
+
+struct gettimeofday_args {
+	struct	timeval *tp;
+	struct	timezone *tzp;
+};
+/* ARGSUSED */
+gettimeofday(p, uap, retval)
+	struct proc *p;
+	register struct gettimeofday_args *uap;
+	int *retval;
+{
+	struct timeval atv;
+	int error = 0;
+
+	if (uap->tp) {
+		microtime(&atv);
+		if (error = copyout((caddr_t)&atv, (caddr_t)uap->tp,
+		    sizeof (atv)))
+			return (error);
+	}
+	if (uap->tzp)
+		error = copyout((caddr_t)&tz, (caddr_t)uap->tzp,
+		    sizeof (tz));
+	return (error);
+}
+
+struct settimeofday_args {
+	struct	timeval *tv;
+	struct	timezone *tzp;
+};
+/* ARGSUSED */
+settimeofday(p, uap, retval)
+	struct proc *p;
+	struct settimeofday_args *uap;
+	int *retval;
+{
+	struct timeval atv, delta;
+	struct timezone atz;
+	int error, s;
+
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+	/* Verify all parameters before changing time. */
+	if (uap->tv &&
+	    (error = copyin((caddr_t)uap->tv, (caddr_t)&atv, sizeof(atv))))
+		return (error);
+	if (uap->tzp &&
+	    (error = copyin((caddr_t)uap->tzp, (caddr_t)&atz, sizeof(atz))))
+		return (error);
+	if (uap->tv) {
+		/* WHAT DO WE DO ABOUT PENDING REAL-TIME TIMEOUTS??? */
+		s = splclock();
+		/* nb. delta.tv_usec may be < 0, but this is OK here */
+		delta.tv_sec = atv.tv_sec - time.tv_sec;
+		delta.tv_usec = atv.tv_usec - time.tv_usec;
+		time = atv;
+		(void) splsoftclock();
+		timevaladd(&boottime, &delta);
+		timevalfix(&boottime);
+		timevaladd(&runtime, &delta);
+		timevalfix(&runtime);
+		LEASE_UPDATETIME(delta.tv_sec);
+		splx(s);
+		resettodr();
+	}
+	if (uap->tzp)
+		tz = atz;
+	return (0);
+}
+
+extern	int tickadj;			/* "standard" clock skew, us./tick */
+int	tickdelta;			/* current clock skew, us. per tick */
+long	timedelta;			/* unapplied time correction, us. */
+long	bigadj = 1000000;		/* use 10x skew above bigadj us. */
+
+struct adjtime_args {
+	struct timeval *delta;
+	struct timeval *olddelta;
+};
+/* ARGSUSED */
+adjtime(p, uap, retval)
+	struct proc *p;
+	register struct adjtime_args *uap;
+	int *retval;
+{
+	struct timeval atv;
+	register long ndelta, ntickdelta, odelta;
+	int s, error;
+
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+	if (error =
+	    copyin((caddr_t)uap->delta, (caddr_t)&atv, sizeof(struct timeval)))
+		return (error);
+
+	/*
+	 * Compute the total correction and the rate at which to apply it.
+	 * Round the adjustment down to a whole multiple of the per-tick
+	 * delta, so that after some number of incremental changes in
+	 * hardclock(), tickdelta will become zero, lest the correction
+	 * overshoot and start taking us away from the desired final time.
+	 */
+	ndelta = atv.tv_sec * 1000000 + atv.tv_usec;
+	if (ndelta > bigadj)
+		ntickdelta = 10 * tickadj;
+	else
+		ntickdelta = tickadj;
+	if (ndelta % ntickdelta)
+		ndelta = ndelta / ntickdelta * ntickdelta;
+
+	/*
+	 * To make hardclock()'s job easier, make the per-tick delta negative
+	 * if we want time to run slower; then hardclock can simply compute
+	 * tick + tickdelta, and subtract tickdelta from timedelta.
+	 */
+	if (ndelta < 0)
+		ntickdelta = -ntickdelta;
+	s = splclock();
+	odelta = timedelta;
+	timedelta = ndelta;
+	tickdelta = ntickdelta;
+	splx(s);
+
+	if (uap->olddelta) {
+		atv.tv_sec = odelta / 1000000;
+		atv.tv_usec = odelta % 1000000;
+		(void) copyout((caddr_t)&atv, (caddr_t)uap->olddelta,
+		    sizeof(struct timeval));
+	}
+	return (0);
+}
+
+/*
+ * Get value of an interval timer.  The process virtual and
+ * profiling virtual time timers are kept in the p_stats area, since
+ * they can be swapped out.  These are kept internally in the
+ * way they are specified externally: in time until they expire.
+ *
+ * The real time interval timer is kept in the process table slot
+ * for the process, and its value (it_value) is kept as an
+ * absolute time rather than as a delta, so that it is easy to keep
+ * periodic real-time signals from drifting.
+ *
+ * Virtual time timers are processed in the hardclock() routine of
+ * kern_clock.c.  The real time timer is processed by a timeout
+ * routine, called from the softclock() routine.  Since a callout
+ * may be delayed in real time due to interrupt processing in the system,
+ * it is possible for the real time timeout routine (realitexpire, given below),
+ * to be delayed in real time past when it is supposed to occur.  It
+ * does not suffice, therefore, to reload the real timer .it_value from the
+ * real time timers .it_interval.  Rather, we compute the next time in
+ * absolute time the timer should go off.
+ */
+struct getitimer_args {
+	u_int	which;
+	struct	itimerval *itv;
+};
+/* ARGSUSED */
+getitimer(p, uap, retval)
+	struct proc *p;
+	register struct getitimer_args *uap;
+	int *retval;
+{
+	struct itimerval aitv;
+	int s;
+
+	if (uap->which > ITIMER_PROF)
+		return (EINVAL);
+	s = splclock();
+	if (uap->which == ITIMER_REAL) {
+		/*
+		 * Convert from absoulte to relative time in .it_value
+		 * part of real time timer.  If time for real time timer
+		 * has passed return 0, else return difference between
+		 * current time and time for the timer to go off.
+		 */
+		aitv = p->p_realtimer;
+		if (timerisset(&aitv.it_value))
+			if (timercmp(&aitv.it_value, &time, <))
+				timerclear(&aitv.it_value);
+			else
+				timevalsub(&aitv.it_value,
+				    (struct timeval *)&time);
+	} else
+		aitv = p->p_stats->p_timer[uap->which];
+	splx(s);
+	return (copyout((caddr_t)&aitv, (caddr_t)uap->itv,
+	    sizeof (struct itimerval)));
+}
+
+struct setitimer_args {
+	u_int	which;
+	struct	itimerval *itv, *oitv;
+};
+/* ARGSUSED */
+setitimer(p, uap, retval)
+	struct proc *p;
+	register struct setitimer_args *uap;
+	int *retval;
+{
+	struct itimerval aitv;
+	register struct itimerval *itvp;
+	int s, error;
+
+	if (uap->which > ITIMER_PROF)
+		return (EINVAL);
+	itvp = uap->itv;
+	if (itvp && (error = copyin((caddr_t)itvp, (caddr_t)&aitv,
+	    sizeof(struct itimerval))))
+		return (error);
+	if ((uap->itv = uap->oitv) && (error = getitimer(p, uap, retval)))
+		return (error);
+	if (itvp == 0)
+		return (0);
+	if (itimerfix(&aitv.it_value) || itimerfix(&aitv.it_interval))
+		return (EINVAL);
+	s = splclock();
+	if (uap->which == ITIMER_REAL) {
+		untimeout(realitexpire, (caddr_t)p);
+		if (timerisset(&aitv.it_value)) {
+			timevaladd(&aitv.it_value, (struct timeval *)&time);
+			timeout(realitexpire, (caddr_t)p, hzto(&aitv.it_value));
+		}
+		p->p_realtimer = aitv;
+	} else
+		p->p_stats->p_timer[uap->which] = aitv;
+	splx(s);
+	return (0);
+}
+
+/*
+ * Real interval timer expired:
+ * send process whose timer expired an alarm signal.
+ * If time is not set up to reload, then just return.
+ * Else compute next time timer should go off which is > current time.
+ * This is where delay in processing this timeout causes multiple
+ * SIGALRM calls to be compressed into one.
+ */
+void
+realitexpire(arg)
+	void *arg;
+{
+	register struct proc *p;
+	int s;
+
+	p = (struct proc *)arg;
+	psignal(p, SIGALRM);
+	if (!timerisset(&p->p_realtimer.it_interval)) {
+		timerclear(&p->p_realtimer.it_value);
+		return;
+	}
+	for (;;) {
+		s = splclock();
+		timevaladd(&p->p_realtimer.it_value,
+		    &p->p_realtimer.it_interval);
+		if (timercmp(&p->p_realtimer.it_value, &time, >)) {
+			timeout(realitexpire, (caddr_t)p,
+			    hzto(&p->p_realtimer.it_value));
+			splx(s);
+			return;
+		}
+		splx(s);
+	}
+}
+
+/*
+ * Check that a proposed value to load into the .it_value or
+ * .it_interval part of an interval timer is acceptable, and
+ * fix it to have at least minimal value (i.e. if it is less
+ * than the resolution of the clock, round it up.)
+ */
+itimerfix(tv)
+	struct timeval *tv;
+{
+
+	if (tv->tv_sec < 0 || tv->tv_sec > 100000000 ||
+	    tv->tv_usec < 0 || tv->tv_usec >= 1000000)
+		return (EINVAL);
+	if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < tick)
+		tv->tv_usec = tick;
+	return (0);
+}
+
+/*
+ * Decrement an interval timer by a specified number
+ * of microseconds, which must be less than a second,
+ * i.e. < 1000000.  If the timer expires, then reload
+ * it.  In this case, carry over (usec - old value) to
+ * reduce the value reloaded into the timer so that
+ * the timer does not drift.  This routine assumes
+ * that it is called in a context where the timers
+ * on which it is operating cannot change in value.
+ */
+itimerdecr(itp, usec)
+	register struct itimerval *itp;
+	int usec;
+{
+
+	if (itp->it_value.tv_usec < usec) {
+		if (itp->it_value.tv_sec == 0) {
+			/* expired, and already in next interval */
+			usec -= itp->it_value.tv_usec;
+			goto expire;
+		}
+		itp->it_value.tv_usec += 1000000;
+		itp->it_value.tv_sec--;
+	}
+	itp->it_value.tv_usec -= usec;
+	usec = 0;
+	if (timerisset(&itp->it_value))
+		return (1);
+	/* expired, exactly at end of interval */
+expire:
+	if (timerisset(&itp->it_interval)) {
+		itp->it_value = itp->it_interval;
+		itp->it_value.tv_usec -= usec;
+		if (itp->it_value.tv_usec < 0) {
+			itp->it_value.tv_usec += 1000000;
+			itp->it_value.tv_sec--;
+		}
+	} else
+		itp->it_value.tv_usec = 0;		/* sec is already 0 */
+	return (0);
+}
+
+/*
+ * Add and subtract routines for timevals.
+ * N.B.: subtract routine doesn't deal with
+ * results which are before the beginning,
+ * it just gets very confused in this case.
+ * Caveat emptor.
+ */
+timevaladd(t1, t2)
+	struct timeval *t1, *t2;
+{
+
+	t1->tv_sec += t2->tv_sec;
+	t1->tv_usec += t2->tv_usec;
+	timevalfix(t1);
+}
+
+timevalsub(t1, t2)
+	struct timeval *t1, *t2;
+{
+
+	t1->tv_sec -= t2->tv_sec;
+	t1->tv_usec -= t2->tv_usec;
+	timevalfix(t1);
+}
+
+timevalfix(t1)
+	struct timeval *t1;
+{
+
+	if (t1->tv_usec < 0) {
+		t1->tv_sec--;
+		t1->tv_usec += 1000000;
+	}
+	if (t1->tv_usec >= 1000000) {
+		t1->tv_sec++;
+		t1->tv_usec -= 1000000;
+	}
+}
diff --git a/sys/kern/kern_timeout.c b/sys/kern/kern_timeout.c
new file mode 100644
index 00000000000..f42900cb75d
--- /dev/null
+++ b/sys/kern/kern_timeout.c
@@ -0,0 +1,528 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_clock.c	8.5 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/dkstat.h>
+#include <sys/callout.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+
+#include <machine/cpu.h>
+
+#ifdef GPROF
+#include <sys/gmon.h>
+#endif
+
+/*
+ * Clock handling routines.
+ *
+ * This code is written to operate with two timers that run independently of
+ * each other.  The main clock, running hz times per second, is used to keep
+ * track of real time.  The second timer handles kernel and user profiling,
+ * and does resource use estimation.  If the second timer is programmable,
+ * it is randomized to avoid aliasing between the two clocks.  For example,
+ * the randomization prevents an adversary from always giving up the cpu
+ * just before its quantum expires.  Otherwise, it would never accumulate
+ * cpu ticks.  The mean frequency of the second timer is stathz.
+ *
+ * If no second timer exists, stathz will be zero; in this case we drive
+ * profiling and statistics off the main clock.  This WILL NOT be accurate;
+ * do not do it unless absolutely necessary.
+ *
+ * The statistics clock may (or may not) be run at a higher rate while
+ * profiling.  This profile clock runs at profhz.  We require that profhz
+ * be an integral multiple of stathz.
+ *
+ * If the statistics clock is running fast, it must be divided by the ratio
+ * profhz/stathz for statistics.  (For profiling, every tick counts.)
+ */
+
+/*
+ * TODO:
+ *	allocate more timeout table slots when table overflows.
+ */
+
+/*
+ * Bump a timeval by a small number of usec's.
+ */
+#define BUMPTIME(t, usec) { \
+	register volatile struct timeval *tp = (t); \
+	register long us; \
+ \
+	tp->tv_usec = us = tp->tv_usec + (usec); \
+	if (us >= 1000000) { \
+		tp->tv_usec = us - 1000000; \
+		tp->tv_sec++; \
+	} \
+}
+
+int	stathz;
+int	profhz;
+int	profprocs;
+int	ticks;
+static int psdiv, pscnt;	/* prof => stat divider */
+int	psratio;		/* ratio: prof / stat */
+
+volatile struct	timeval time;
+volatile struct	timeval mono_time;
+
+/*
+ * Initialize clock frequencies and start both clocks running.
+ */
+void
+initclocks()
+{
+	register int i;
+
+	/*
+	 * Set divisors to 1 (normal case) and let the machine-specific
+	 * code do its bit.
+	 */
+	psdiv = pscnt = 1;
+	cpu_initclocks();
+
+	/*
+	 * Compute profhz/stathz, and fix profhz if needed.
+	 */
+	i = stathz ? stathz : hz;
+	if (profhz == 0)
+		profhz = i;
+	psratio = profhz / i;
+}
+
+/*
+ * The real-time timer, interrupting hz times per second.
+ */
+void
+hardclock(frame)
+	register struct clockframe *frame;
+{
+	register struct callout *p1;
+	register struct proc *p;
+	register int delta, needsoft;
+	extern int tickdelta;
+	extern long timedelta;
+
+	/*
+	 * Update real-time timeout queue.
+	 * At front of queue are some number of events which are ``due''.
+	 * The time to these is <= 0 and if negative represents the
+	 * number of ticks which have passed since it was supposed to happen.
+	 * The rest of the q elements (times > 0) are events yet to happen,
+	 * where the time for each is given as a delta from the previous.
+	 * Decrementing just the first of these serves to decrement the time
+	 * to all events.
+	 */
+	needsoft = 0;
+	for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) {
+		if (--p1->c_time > 0)
+			break;
+		needsoft = 1;
+		if (p1->c_time == 0)
+			break;
+	}
+
+	p = curproc;
+	if (p) {
+		register struct pstats *pstats;
+
+		/*
+		 * Run current process's virtual and profile time, as needed.
+		 */
+		pstats = p->p_stats;
+		if (CLKF_USERMODE(frame) &&
+		    timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
+		    itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
+			psignal(p, SIGVTALRM);
+		if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
+		    itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
+			psignal(p, SIGPROF);
+	}
+
+	/*
+	 * If no separate statistics clock is available, run it from here.
+	 */
+	if (stathz == 0)
+		statclock(frame);
+
+	/*
+	 * Increment the time-of-day.  The increment is just ``tick'' unless
+	 * we are still adjusting the clock; see adjtime().
+	 */
+	ticks++;
+	if (timedelta == 0)
+		delta = tick;
+	else {
+		delta = tick + tickdelta;
+		timedelta -= tickdelta;
+	}
+	BUMPTIME(&time, delta);
+	BUMPTIME(&mono_time, delta);
+
+	/*
+	 * Process callouts at a very low cpu priority, so we don't keep the
+	 * relatively high clock interrupt priority any longer than necessary.
+	 */
+	if (needsoft) {
+		if (CLKF_BASEPRI(frame)) {
+			/*
+			 * Save the overhead of a software interrupt;
+			 * it will happen as soon as we return, so do it now.
+			 */
+			(void)splsoftclock();
+			softclock();
+		} else
+			setsoftclock();
+	}
+}
+
+/*
+ * Software (low priority) clock interrupt.
+ * Run periodic events from timeout queue.
+ */
+/*ARGSUSED*/
+void
+softclock()
+{
+	register struct callout *c;
+	register void *arg;
+	register void (*func) __P((void *));
+	register int s;
+
+	s = splhigh();
+	while ((c = calltodo.c_next) != NULL && c->c_time <= 0) {
+		func = c->c_func;
+		arg = c->c_arg;
+		calltodo.c_next = c->c_next;
+		c->c_next = callfree;
+		callfree = c;
+		splx(s);
+		(*func)(arg);
+		(void) splhigh();
+	}
+	splx(s);
+}
+
+/*
+ * timeout --
+ *	Execute a function after a specified length of time.
+ *
+ * untimeout --
+ *	Cancel previous timeout function call.
+ *
+ *	See AT&T BCI Driver Reference Manual for specification.  This
+ *	implementation differs from that one in that no identification
+ *	value is returned from timeout, rather, the original arguments
+ *	to timeout are used to identify entries for untimeout.
+ */
+void
+timeout(ftn, arg, ticks)
+	void (*ftn) __P((void *));
+	void *arg;
+	register int ticks;
+{
+	register struct callout *new, *p, *t;
+	register int s;
+
+	if (ticks <= 0)
+		ticks = 1;
+
+	/* Lock out the clock. */
+	s = splhigh();
+
+	/* Fill in the next free callout structure. */
+	if (callfree == NULL)
+		panic("timeout table full");
+	new = callfree;
+	callfree = new->c_next;
+	new->c_arg = arg;
+	new->c_func = ftn;
+
+	/*
+	 * The time for each event is stored as a difference from the time
+	 * of the previous event on the queue.  Walk the queue, correcting
+	 * the ticks argument for queue entries passed.  Correct the ticks
+	 * value for the queue entry immediately after the insertion point
+	 * as well.  Watch out for negative c_time values; these represent
+	 * overdue events.
+	 */
+	for (p = &calltodo;
+	    (t = p->c_next) != NULL && ticks > t->c_time; p = t)
+		if (t->c_time > 0)
+			ticks -= t->c_time;
+	new->c_time = ticks;
+	if (t != NULL)
+		t->c_time -= ticks;
+
+	/* Insert the new entry into the queue. */
+	p->c_next = new;
+	new->c_next = t;
+	splx(s);
+}
+
+void
+untimeout(ftn, arg)
+	void (*ftn) __P((void *));
+	void *arg;
+{
+	register struct callout *p, *t;
+	register int s;
+
+	s = splhigh();
+	for (p = &calltodo; (t = p->c_next) != NULL; p = t)
+		if (t->c_func == ftn && t->c_arg == arg) {
+			/* Increment next entry's tick count. */
+			if (t->c_next && t->c_time > 0)
+				t->c_next->c_time += t->c_time;
+
+			/* Move entry from callout queue to callfree queue. */
+			p->c_next = t->c_next;
+			t->c_next = callfree;
+			callfree = t;
+			break;
+		}
+	splx(s);
+}
+
+/*
+ * Compute number of hz until specified time.  Used to
+ * compute third argument to timeout() from an absolute time.
+ */
+int
+hzto(tv)
+	struct timeval *tv;
+{
+	register long ticks, sec;
+	int s;
+
+	/*
+	 * If number of milliseconds will fit in 32 bit arithmetic,
+	 * then compute number of milliseconds to time and scale to
+	 * ticks.  Otherwise just compute number of hz in time, rounding
+	 * times greater than representible to maximum value.
+	 *
+	 * Delta times less than 25 days can be computed ``exactly''.
+	 * Maximum value for any timeout in 10ms ticks is 250 days.
+	 */
+	s = splhigh();
+	sec = tv->tv_sec - time.tv_sec;
+	if (sec <= 0x7fffffff / 1000 - 1000)
+		ticks = ((tv->tv_sec - time.tv_sec) * 1000 +
+			(tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000);
+	else if (sec <= 0x7fffffff / hz)
+		ticks = sec * hz;
+	else
+		ticks = 0x7fffffff;
+	splx(s);
+	return (ticks);
+}
+
+/*
+ * Start profiling on a process.
+ *
+ * Kernel profiling passes proc0 which never exits and hence
+ * keeps the profile clock running constantly.
+ */
+void
+startprofclock(p)
+	register struct proc *p;
+{
+	int s;
+
+	if ((p->p_flag & P_PROFIL) == 0) {
+		p->p_flag |= P_PROFIL;
+		if (++profprocs == 1 && stathz != 0) {
+			s = splstatclock();
+			psdiv = pscnt = psratio;
+			setstatclockrate(profhz);
+			splx(s);
+		}
+	}
+}
+
+/*
+ * Stop profiling on a process.
+ */
+void
+stopprofclock(p)
+	register struct proc *p;
+{
+	int s;
+
+	if (p->p_flag & P_PROFIL) {
+		p->p_flag &= ~P_PROFIL;
+		if (--profprocs == 0 && stathz != 0) {
+			s = splstatclock();
+			psdiv = pscnt = 1;
+			setstatclockrate(stathz);
+			splx(s);
+		}
+	}
+}
+
+int	dk_ndrive = DK_NDRIVE;
+
+/*
+ * Statistics clock.  Grab profile sample, and if divider reaches 0,
+ * do process and kernel statistics.
+ */
+void
+statclock(frame)
+	register struct clockframe *frame;
+{
+#ifdef GPROF
+	register struct gmonparam *g;
+#endif
+	register struct proc *p;
+	register int i;
+
+	if (CLKF_USERMODE(frame)) {
+		p = curproc;
+		if (p->p_flag & P_PROFIL)
+			addupc_intr(p, CLKF_PC(frame), 1);
+		if (--pscnt > 0)
+			return;
+		/*
+		 * Came from user mode; CPU was in user state.
+		 * If this process is being profiled record the tick.
+		 */
+		p->p_uticks++;
+		if (p->p_nice > NZERO)
+			cp_time[CP_NICE]++;
+		else
+			cp_time[CP_USER]++;
+	} else {
+#ifdef GPROF
+		/*
+		 * Kernel statistics are just like addupc_intr, only easier.
+		 */
+		g = &_gmonparam;
+		if (g->state == GMON_PROF_ON) {
+			i = CLKF_PC(frame) - g->lowpc;
+			if (i < g->textsize) {
+				i /= HISTFRACTION * sizeof(*g->kcount);
+				g->kcount[i]++;
+			}
+		}
+#endif
+		if (--pscnt > 0)
+			return;
+		/*
+		 * Came from kernel mode, so we were:
+		 * - handling an interrupt,
+		 * - doing syscall or trap work on behalf of the current
+		 *   user process, or
+		 * - spinning in the idle loop.
+		 * Whichever it is, charge the time as appropriate.
+		 * Note that we charge interrupts to the current process,
+		 * regardless of whether they are ``for'' that process,
+		 * so that we know how much of its real time was spent
+		 * in ``non-process'' (i.e., interrupt) work.
+		 */
+		p = curproc;
+		if (CLKF_INTR(frame)) {
+			if (p != NULL)
+				p->p_iticks++;
+			cp_time[CP_INTR]++;
+		} else if (p != NULL) {
+			p->p_sticks++;
+			cp_time[CP_SYS]++;
+		} else
+			cp_time[CP_IDLE]++;
+	}
+	pscnt = psdiv;
+
+	/*
+	 * We maintain statistics shown by user-level statistics
+	 * programs:  the amount of time in each cpu state, and
+	 * the amount of time each of DK_NDRIVE ``drives'' is busy.
+	 *
+	 * XXX	should either run linked list of drives, or (better)
+	 *	grab timestamps in the start & done code.
+	 */
+	for (i = 0; i < DK_NDRIVE; i++)
+		if (dk_busy & (1 << i))
+			dk_time[i]++;
+
+	/*
+	 * We adjust the priority of the current process.  The priority of
+	 * a process gets worse as it accumulates CPU time.  The cpu usage
+	 * estimator (p_estcpu) is increased here.  The formula for computing
+	 * priorities (in kern_synch.c) will compute a different value each
+	 * time p_estcpu increases by 4.  The cpu usage estimator ramps up
+	 * quite quickly when the process is running (linearly), and decays
+	 * away exponentially, at a rate which is proportionally slower when
+	 * the system is busy.  The basic principal is that the system will
+	 * 90% forget that the process used a lot of CPU time in 5 * loadav
+	 * seconds.  This causes the system to favor processes which haven't
+	 * run much recently, and to round-robin among other processes.
+	 */
+	if (p != NULL) {
+		p->p_cpticks++;
+		if (++p->p_estcpu == 0)
+			p->p_estcpu--;
+		if ((p->p_estcpu & 3) == 0) {
+			resetpriority(p);
+			if (p->p_priority >= PUSER)
+				p->p_priority = p->p_usrpri;
+		}
+	}
+}
+
+/*
+ * Return information about system clocks.
+ */
+sysctl_clockrate(where, sizep)
+	register char *where;
+	size_t *sizep;
+{
+	struct clockinfo clkinfo;
+
+	/*
+	 * Construct clockinfo structure.
+	 */
+	clkinfo.hz = hz;
+	clkinfo.tick = tick;
+	clkinfo.profhz = profhz;
+	clkinfo.stathz = stathz ? stathz : hz;
+	return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo)));
+}
diff --git a/sys/kern/kern_xxx.c b/sys/kern/kern_xxx.c
new file mode 100644
index 00000000000..64fac9105d7
--- /dev/null
+++ b/sys/kern/kern_xxx.c
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_xxx.c	8.2 (Berkeley) 11/14/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/reboot.h>
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+
+struct reboot_args {
+	int	opt;
+};
+/* ARGSUSED */
+reboot(p, uap, retval)
+	struct proc *p;
+	struct reboot_args *uap;
+	int *retval;
+{
+	int error;
+
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+	boot(uap->opt);
+	return (0);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+
+struct gethostname_args {
+	char	*hostname;
+	u_int	len;
+};
+/* ARGSUSED */
+ogethostname(p, uap, retval)
+	struct proc *p;
+	struct gethostname_args *uap;
+	int *retval;
+{
+	int name;
+
+	name = KERN_HOSTNAME;
+	return (kern_sysctl(&name, 1, uap->hostname, &uap->len, 0, 0));
+}
+
+struct sethostname_args {
+	char	*hostname;
+	u_int	len;
+};
+/* ARGSUSED */
+osethostname(p, uap, retval)
+	struct proc *p;
+	register struct sethostname_args *uap;
+	int *retval;
+{
+	int name;
+	int error;
+
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+	name = KERN_HOSTNAME;
+	return (kern_sysctl(&name, 1, 0, 0, uap->hostname, uap->len));
+}
+
+extern long hostid;
+
+struct gethostid_args {
+	int	dummy;
+};
+/* ARGSUSED */
+ogethostid(p, uap, retval)
+	struct proc *p;
+	struct gethostid_args *uap;
+	int *retval;
+{
+
+	*(long *)retval = hostid;
+	return (0);
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+#ifdef COMPAT_43
+struct sethostid_args {
+	long	hostid;
+};
+/* ARGSUSED */
+osethostid(p, uap, retval)
+	struct proc *p;
+	struct sethostid_args *uap;
+	int *retval;
+{
+	int error;
+
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+	hostid = uap->hostid;
+	return (0);
+}
+
+oquota()
+{
+
+	return (ENOSYS);
+}
+#endif /* COMPAT_43 */
diff --git a/sys/kern/makesyscalls.sh b/sys/kern/makesyscalls.sh
new file mode 100644
index 00000000000..0ddea0c28fa
--- /dev/null
+++ b/sys/kern/makesyscalls.sh
@@ -0,0 +1,171 @@
+#! /bin/sh -
+#	@(#)makesyscalls.sh	8.1 (Berkeley) 6/10/93
+
+set -e
+
+# name of compat option:
+compat=COMPAT_43
+
+# output files:
+sysnames="syscalls.c"
+syshdr="../sys/syscall.h"
+syssw="init_sysent.c"
+
+# tmp files:
+sysdcl="sysent.dcl"
+syscompat="sysent.compat"
+sysent="sysent.switch"
+
+trap "rm $sysdcl $syscompat $sysent" 0
+
+case $# in
+    0)	echo "Usage: $0 input-file" 1>&2
+	exit 1
+	;;
+esac
+
+awk < $1 "
+	BEGIN {
+		sysdcl = \"$sysdcl\"
+		syscompat = \"$syscompat\"
+		sysent = \"$sysent\"
+		sysnames = \"$sysnames\"
+		syshdr = \"$syshdr\"
+		compat = \"$compat\"
+		infile = \"$1\"
+		"'
+
+		printf "/*\n * System call switch table.\n *\n" > sysdcl
+		printf " * DO NOT EDIT-- this file is automatically generated.\n" > sysdcl
+
+		printf "\n#ifdef %s\n", compat > syscompat
+		printf "#define compat(n, name) n, __CONCAT(o,name)\n\n" > syscompat
+
+		printf "/*\n * System call names.\n *\n" > sysnames
+		printf " * DO NOT EDIT-- this file is automatically generated.\n" > sysnames
+
+		printf "/*\n * System call numbers.\n *\n" > syshdr
+		printf " * DO NOT EDIT-- this file is automatically generated.\n" > syshdr
+	}
+	NR == 1 {
+		printf " * created from%s\n */\n\n", $0 > sysdcl
+		printf "#include <sys/param.h>\n" > sysdcl
+		printf "#include <sys/systm.h>\n\n" > sysdcl
+		printf "int\tnosys();\n\n" > sysdcl
+
+		printf "struct sysent sysent[] = {\n" > sysent
+
+		printf " * created from%s\n */\n\n", $0 > sysnames
+		printf "char *syscallnames[] = {\n" > sysnames
+
+		printf " * created from%s\n */\n\n", $0 > syshdr
+		next
+	}
+	NF == 0 || $1 ~ /^;/ {
+		next
+	}
+	$1 ~ /^#[ 	]*if/ {
+		print > sysent
+		print > sysdcl
+		print > syscompat
+		print > sysnames
+		savesyscall = syscall
+		next
+	}
+	$1 ~ /^#[ 	]*else/ {
+		print > sysent
+		print > sysdcl
+		print > syscompat
+		print > sysnames
+		syscall = savesyscall
+		next
+	}
+	$1 ~ /^#/ {
+		print > sysent
+		print > sysdcl
+		print > syscompat
+		print > sysnames
+		next
+	}
+	syscall != $1 {
+		printf "%s: line %d: syscall number out of sync at %d\n", \
+		   infile, NR, syscall
+		printf "line is:\n"
+		print
+		exit 1
+	}
+	{	comment = $4
+		for (i = 5; i <= NF; i++)
+			comment = comment " " $i
+		if (NF < 5)
+			$5 = $4
+	}
+	$2 == "STD" {
+		printf("int\t%s();\n", $4) > sysdcl
+		printf("\t{ %d, %s },\t\t\t/* %d = %s */\n", \
+		    $3, $4, syscall, $5) > sysent
+		printf("\t\"%s\",\t\t\t/* %d = %s */\n", \
+		    $5, syscall, $5) > sysnames
+		printf("#define\tSYS_%s\t%d\n", \
+		    $5, syscall) > syshdr
+		syscall++
+		next
+	}
+	$2 == "COMPAT" {
+		printf("int\to%s();\n", $4) > syscompat
+		printf("\t{ compat(%d,%s) },\t\t/* %d = old %s */\n", \
+		    $3, $4, syscall, $5) > sysent
+		printf("\t\"old.%s\",\t\t/* %d = old %s */\n", \
+		    $5, syscall, $5) > sysnames
+		printf("\t\t\t\t/* %d is old %s */\n", \
+		    syscall, comment) > syshdr
+		syscall++
+		next
+	}
+	$2 == "LIBCOMPAT" {
+		printf("int\to%s();\n", $4) > syscompat
+		printf("\t{ compat(%d,%s) },\t\t/* %d = old %s */\n", \
+		    $3, $4, syscall, $5) > sysent
+		printf("\t\"old.%s\",\t\t/* %d = old %s */\n", \
+		    $5, syscall, $5) > sysnames
+		printf("#define\tSYS_%s\t%d\t/* compatibility; still used by libc */\n", \
+		    $5, syscall) > syshdr
+		syscall++
+		next
+	}
+	$2 == "OBSOL" {
+		printf("\t{ 0, nosys },\t\t\t/* %d = obsolete %s */\n", \
+		    syscall, comment) > sysent
+		printf("\t\"obs_%s\",\t\t\t/* %d = obsolete %s */\n", \
+		    $4, syscall, comment) > sysnames
+		printf("\t\t\t\t/* %d is obsolete %s */\n", \
+		    syscall, comment) > syshdr
+		syscall++
+		next
+	}
+	$2 == "UNIMPL" {
+		printf("\t{ 0, nosys },\t\t\t/* %d = %s */\n", \
+		    syscall, comment) > sysent
+		printf("\t\"#%d\",\t\t\t/* %d = %s */\n", \
+		    syscall, syscall, comment) > sysnames
+		syscall++
+		next
+	}
+	{
+		printf "%s: line %d: unrecognized keyword %s\n", infile, NR, $2
+		exit 1
+	}
+	END {
+		printf("\n#else /* %s */\n", compat) > syscompat
+		printf("#define compat(n, name) 0, nosys\n") > syscompat
+		printf("#endif /* %s */\n\n", compat) > syscompat
+
+		printf("};\n\n") > sysent
+		printf("int\tnsysent = sizeof(sysent) / sizeof(sysent[0]);\n") > sysent
+
+		printf("};\n") > sysnames
+	} '
+
+cat $sysdcl $syscompat $sysent >$syssw
+
+chmod 444 $sysnames $syshdr $syssw
diff --git a/sys/kern/subr_autoconf.c b/sys/kern/subr_autoconf.c
new file mode 100644
index 00000000000..af17988c935
--- /dev/null
+++ b/sys/kern/subr_autoconf.c
@@ -0,0 +1,342 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Lawrence Berkeley Laboratories.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)subr_autoconf.c	8.1 (Berkeley) 6/10/93
+ *
+ * from: $Header: subr_autoconf.c,v 1.12 93/02/01 19:31:48 torek Exp $ (LBL)
+ */
+
+#include <sys/param.h>
+#include <sys/device.h>
+#include <sys/malloc.h>
+
+/*
+ * Autoconfiguration subroutines.
+ */
+
+/*
+ * ioconf.c exports exactly two names: cfdata and cfroots.  All system
+ * devices and drivers are found via these tables.
+ */
+extern struct cfdata cfdata[];
+extern short cfroots[];
+
+#define	ROOT ((struct device *)NULL)
+
+struct matchinfo {
+	cfmatch_t fn;
+	struct	device *parent;
+	void	*aux;
+	struct	cfdata *match;
+	int	pri;
+};
+
+/*
+ * Apply the matching function and choose the best.  This is used
+ * a few times and we want to keep the code small.
+ */
+static void
+mapply(m, cf)
+	register struct matchinfo *m;
+	register struct cfdata *cf;
+{
+	register int pri;
+
+	if (m->fn != NULL)
+		pri = (*m->fn)(m->parent, cf, m->aux);
+	else
+		pri = (*cf->cf_driver->cd_match)(m->parent, cf, m->aux);
+	if (pri > m->pri) {
+		m->match = cf;
+		m->pri = pri;
+	}
+}
+
+/*
+ * Iterate over all potential children of some device, calling the given
+ * function (default being the child's match function) for each one.
+ * Nonzero returns are matches; the highest value returned is considered
+ * the best match.  Return the `found child' if we got a match, or NULL
+ * otherwise.  The `aux' pointer is simply passed on through.
+ *
+ * Note that this function is designed so that it can be used to apply
+ * an arbitrary function to all potential children (its return value
+ * can be ignored).
+ */
+struct cfdata *
+config_search(fn, parent, aux)
+	cfmatch_t fn;
+	register struct device *parent;
+	void *aux;
+{
+	register struct cfdata *cf;
+	register short *p;
+	struct matchinfo m;
+
+	m.fn = fn;
+	m.parent = parent;
+	m.aux = aux;
+	m.match = NULL;
+	m.pri = 0;
+	for (cf = cfdata; cf->cf_driver; cf++) {
+		/*
+		 * Skip cf if no longer eligible, otherwise scan through
+		 * parents for one matching `parent', and try match function.
+		 */
+		if (cf->cf_fstate == FSTATE_FOUND)
+			continue;
+		for (p = cf->cf_parents; *p >= 0; p++)
+			if (parent->dv_cfdata == &cfdata[*p])
+				mapply(&m, cf);
+	}
+	return (m.match);
+}
+
+/*
+ * Find the given root device.
+ * This is much like config_search, but there is no parent.
+ */
+struct cfdata *
+config_rootsearch(fn, rootname, aux)
+	register cfmatch_t fn;
+	register char *rootname;
+	register void *aux;
+{
+	register struct cfdata *cf;
+	register short *p;
+	struct matchinfo m;
+
+	m.fn = fn;
+	m.parent = ROOT;
+	m.aux = aux;
+	m.match = NULL;
+	m.pri = 0;
+	/*
+	 * Look at root entries for matching name.  We do not bother
+	 * with found-state here since only one root should ever be
+	 * searched (and it must be done first).
+	 */
+	for (p = cfroots; *p >= 0; p++) {
+		cf = &cfdata[*p];
+		if (strcmp(cf->cf_driver->cd_name, rootname) == 0)
+			mapply(&m, cf);
+	}
+	return (m.match);
+}
+
+static char *msgs[3] = { "", " not configured\n", " unsupported\n" };
+
+/*
+ * The given `aux' argument describes a device that has been found
+ * on the given parent, but not necessarily configured.  Locate the
+ * configuration data for that device (using the cd_match configuration
+ * driver function) and attach it, and return true.  If the device was
+ * not configured, call the given `print' function and return 0.
+ */
+int
+config_found(parent, aux, print)
+	struct device *parent;
+	void *aux;
+	cfprint_t print;
+{
+	struct cfdata *cf;
+
+	if ((cf = config_search((cfmatch_t)NULL, parent, aux)) != NULL) {
+		config_attach(parent, cf, aux, print);
+		return (1);
+	}
+	printf(msgs[(*print)(aux, parent->dv_xname)]);
+	return (0);
+}
+
+/*
+ * As above, but for root devices.
+ */
+int
+config_rootfound(rootname, aux)
+	char *rootname;
+	void *aux;
+{
+	struct cfdata *cf;
+
+	if ((cf = config_rootsearch((cfmatch_t)NULL, rootname, aux)) != NULL) {
+		config_attach(ROOT, cf, aux, (cfprint_t)NULL);
+		return (1);
+	}
+	printf("root device %s not configured\n", rootname);
+	return (0);
+}
+
+/* just like sprintf(buf, "%d") except that it works from the end */
+static char *
+number(ep, n)
+	register char *ep;
+	register int n;
+{
+
+	*--ep = 0;
+	while (n >= 10) {
+		*--ep = (n % 10) + '0';
+		n /= 10;
+	}
+	*--ep = n + '0';
+	return (ep);
+}
+
+/*
+ * Attach a found device.  Allocates memory for device variables.
+ */
+void
+config_attach(parent, cf, aux, print)
+	register struct device *parent;
+	register struct cfdata *cf;
+	register void *aux;
+	cfprint_t print;
+{
+	register struct device *dev;
+	register struct cfdriver *cd;
+	register size_t lname, lunit;
+	register char *xunit;
+	int myunit;
+	char num[10];
+	static struct device **nextp = &alldevs;
+
+	cd = cf->cf_driver;
+	if (cd->cd_devsize < sizeof(struct device))
+		panic("config_attach");
+	myunit = cf->cf_unit;
+	if (cf->cf_fstate == FSTATE_NOTFOUND)
+		cf->cf_fstate = FSTATE_FOUND;
+	else
+		cf->cf_unit++;
+
+	/* compute length of name and decimal expansion of unit number */
+	lname = strlen(cd->cd_name);
+	xunit = number(&num[sizeof num], myunit);
+	lunit = &num[sizeof num] - xunit;
+	if (lname + lunit >= sizeof(dev->dv_xname))
+		panic("config_attach: device name too long");
+
+	/* get memory for all device vars */
+	dev = (struct device *)malloc(cd->cd_devsize, M_DEVBUF, M_WAITOK);
+					/* XXX cannot wait! */
+	bzero(dev, cd->cd_devsize);
+	*nextp = dev;			/* link up */
+	nextp = &dev->dv_next;
+	dev->dv_class = cd->cd_class;
+	dev->dv_cfdata = cf;
+	dev->dv_unit = myunit;
+	bcopy(cd->cd_name, dev->dv_xname, lname);
+	bcopy(xunit, dev->dv_xname + lname, lunit);
+	dev->dv_parent = parent;
+	if (parent == ROOT)
+		printf("%s (root)", dev->dv_xname);
+	else {
+		printf("%s at %s", dev->dv_xname, parent->dv_xname);
+		(void) (*print)(aux, (char *)0);
+	}
+
+	/* put this device in the devices array */
+	if (dev->dv_unit >= cd->cd_ndevs) {
+		/*
+		 * Need to expand the array.
+		 */
+		int old = cd->cd_ndevs, oldbytes, new, newbytes;
+		void **nsp;
+
+		if (old == 0) {
+			nsp = malloc(MINALLOCSIZE, M_DEVBUF, M_WAITOK);	/*XXX*/
+			bzero(nsp, MINALLOCSIZE);
+			cd->cd_ndevs = MINALLOCSIZE / sizeof(void *);
+		} else {
+			new = cd->cd_ndevs;
+			do {
+				new *= 2;
+			} while (new <= dev->dv_unit);
+			cd->cd_ndevs = new;
+			oldbytes = old * sizeof(void *);
+			newbytes = new * sizeof(void *);
+			nsp = malloc(newbytes, M_DEVBUF, M_WAITOK);	/*XXX*/
+			bcopy(cd->cd_devs, nsp, oldbytes);
+			bzero(&nsp[old], newbytes - oldbytes);
+			free(cd->cd_devs, M_DEVBUF);
+		}
+		cd->cd_devs = nsp;
+	}
+	if (cd->cd_devs[dev->dv_unit])
+		panic("config_attach: duplicate %s", dev->dv_xname);
+	cd->cd_devs[dev->dv_unit] = dev;
+
+	/*
+	 * Before attaching, clobber any unfound devices that are
+	 * otherwise identical.
+	 */
+	for (cf = cfdata; cf->cf_driver; cf++)
+		if (cf->cf_driver == cd && cf->cf_unit == dev->dv_unit &&
+		    cf->cf_fstate == FSTATE_NOTFOUND)
+			cf->cf_fstate = FSTATE_FOUND;
+	(*cd->cd_attach)(parent, dev, aux);
+}
+
+/*
+ * Attach an event.  These must come from initially-zero space (see
+ * commented-out assignments below), but that occurs naturally for
+ * device instance variables.
+ */
+void
+evcnt_attach(dev, name, ev)
+	struct device *dev;
+	const char *name;
+	struct evcnt *ev;
+{
+	static struct evcnt **nextp = &allevents;
+
+#ifdef DIAGNOSTIC
+	if (strlen(name) >= sizeof(ev->ev_name))
+		panic("evcnt_attach");
+#endif
+	/* ev->ev_next = NULL; */
+	ev->ev_dev = dev;
+	/* ev->ev_count = 0; */
+	strcpy(ev->ev_name, name);
+	*nextp = ev;
+	nextp = &ev->ev_next;
+}
diff --git a/sys/kern/subr_clist.c b/sys/kern/subr_clist.c
new file mode 100644
index 00000000000..fe8f000f87d
--- /dev/null
+++ b/sys/kern/subr_clist.c
@@ -0,0 +1,159 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)tty_subr.c	8.2 (Berkeley) 9/5/93
+ */
+
+#include <sys/param.h>
+#include <sys/ioctl.h>
+#include <sys/tty.h>
+
+char cwaiting;
+struct cblock *cfree, *cfreelist;
+int cfreecount, nclist;
+
+void
+clist_init()
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return;
+}
+
+getc(a1)
+	struct clist *a1;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return ((char)0);
+}
+
+q_to_b(a1, a2, a3)
+	struct clist *a1;
+	char *a2;
+	int a3;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return (0);
+}
+
+ndqb(a1, a2)
+	struct clist *a1;
+	int a2;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return (0);
+}
+
+void
+ndflush(a1, a2)
+	struct clist *a1;
+	int a2;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return;
+}
+
+putc(a1, a2)
+	char a1;
+	struct clist *a2;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return (0);
+}
+
+b_to_q(a1, a2, a3)
+	char *a1;
+	int a2;
+	struct clist *a3;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return (0);
+}
+
+char *
+nextc(a1, a2, a3)
+	struct clist *a1;
+	char *a2;
+	int *a3;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return ((char *)0);
+}
+
+unputc(a1)
+	struct clist *a1;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return ((char)0);
+}
+
+void
+catq(a1, a2)
+	struct clist *a1, *a2;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return;
+}
diff --git a/sys/kern/subr_disklabel.c b/sys/kern/subr_disklabel.c
new file mode 100644
index 00000000000..78dede4da77
--- /dev/null
+++ b/sys/kern/subr_disklabel.c
@@ -0,0 +1,364 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufs_disksubr.c	8.5 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/disklabel.h>
+#include <sys/syslog.h>
+
+/*
+ * Seek sort for disks.  We depend on the driver which calls us using b_resid
+ * as the current cylinder number.
+ *
+ * The argument ap structure holds a b_actf activity chain pointer on which we
+ * keep two queues, sorted in ascending cylinder order.  The first queue holds
+ * those requests which are positioned after the current cylinder (in the first
+ * request); the second holds requests which came in after their cylinder number
+ * was passed.  Thus we implement a one way scan, retracting after reaching the
+ * end of the drive to the first request on the second queue, at which time it
+ * becomes the first queue.
+ *
+ * A one-way scan is natural because of the way UNIX read-ahead blocks are
+ * allocated.
+ */
+
+/*
+ * For portability with historic industry practice, the
+ * cylinder number has to be maintained in the `b_resid'
+ * field.
+ */
+#define	b_cylinder	b_resid
+
+void
+disksort(ap, bp)
+	register struct buf *ap, *bp;
+{
+	register struct buf *bq;
+
+	/* If the queue is empty, then it's easy. */
+	if (ap->b_actf == NULL) {
+		bp->b_actf = NULL;
+		ap->b_actf = bp;
+		return;
+	}
+
+	/*
+	 * If we lie after the first (currently active) request, then we
+	 * must locate the second request list and add ourselves to it.
+	 */
+	bq = ap->b_actf;
+	if (bp->b_cylinder < bq->b_cylinder) {
+		while (bq->b_actf) {
+			/*
+			 * Check for an ``inversion'' in the normally ascending
+			 * cylinder numbers, indicating the start of the second
+			 * request list.
+			 */
+			if (bq->b_actf->b_cylinder < bq->b_cylinder) {
+				/*
+				 * Search the second request list for the first
+				 * request at a larger cylinder number.  We go
+				 * before that; if there is no such request, we
+				 * go at end.
+				 */
+				do {
+					if (bp->b_cylinder <
+					    bq->b_actf->b_cylinder)
+						goto insert;
+					if (bp->b_cylinder ==
+					    bq->b_actf->b_cylinder &&
+					    bp->b_blkno < bq->b_actf->b_blkno)
+						goto insert;
+					bq = bq->b_actf;
+				} while (bq->b_actf);
+				goto insert;		/* after last */
+			}
+			bq = bq->b_actf;
+		}
+		/*
+		 * No inversions... we will go after the last, and
+		 * be the first request in the second request list.
+		 */
+		goto insert;
+	}
+	/*
+	 * Request is at/after the current request...
+	 * sort in the first request list.
+	 */
+	while (bq->b_actf) {
+		/*
+		 * We want to go after the current request if there is an
+		 * inversion after it (i.e. it is the end of the first
+		 * request list), or if the next request is a larger cylinder
+		 * than our request.
+		 */
+		if (bq->b_actf->b_cylinder < bq->b_cylinder ||
+		    bp->b_cylinder < bq->b_actf->b_cylinder ||
+		    (bp->b_cylinder == bq->b_actf->b_cylinder &&
+		    bp->b_blkno < bq->b_actf->b_blkno))
+			goto insert;
+		bq = bq->b_actf;
+	}
+	/*
+	 * Neither a second list nor a larger request... we go at the end of
+	 * the first list, which is the same as the end of the whole schebang.
+	 */
+insert:	bp->b_actf = bq->b_actf;
+	bq->b_actf = bp;
+}
+
+/*
+ * Attempt to read a disk label from a device using the indicated stategy
+ * routine.  The label must be partly set up before this: secpercyl and
+ * anything required in the strategy routine (e.g., sector size) must be
+ * filled in before calling us.  Returns NULL on success and an error
+ * string on failure.
+ */
+char *
+readdisklabel(dev, strat, lp)
+	dev_t dev;
+	int (*strat)();
+	register struct disklabel *lp;
+{
+	register struct buf *bp;
+	struct disklabel *dlp;
+	char *msg = NULL;
+
+	if (lp->d_secperunit == 0)
+		lp->d_secperunit = 0x1fffffff;
+	lp->d_npartitions = 1;
+	if (lp->d_partitions[0].p_size == 0)
+		lp->d_partitions[0].p_size = 0x1fffffff;
+	lp->d_partitions[0].p_offset = 0;
+
+	bp = geteblk((int)lp->d_secsize);
+	bp->b_dev = dev;
+	bp->b_blkno = LABELSECTOR;
+	bp->b_bcount = lp->d_secsize;
+	bp->b_flags = B_BUSY | B_READ;
+	bp->b_cylinder = LABELSECTOR / lp->d_secpercyl;
+	(*strat)(bp);
+	if (biowait(bp))
+		msg = "I/O error";
+	else for (dlp = (struct disklabel *)bp->b_data;
+	    dlp <= (struct disklabel *)((char *)bp->b_data +
+	    DEV_BSIZE - sizeof(*dlp));
+	    dlp = (struct disklabel *)((char *)dlp + sizeof(long))) {
+		if (dlp->d_magic != DISKMAGIC || dlp->d_magic2 != DISKMAGIC) {
+			if (msg == NULL)
+				msg = "no disk label";
+		} else if (dlp->d_npartitions > MAXPARTITIONS ||
+			   dkcksum(dlp) != 0)
+			msg = "disk label corrupted";
+		else {
+			*lp = *dlp;
+			msg = NULL;
+			break;
+		}
+	}
+	bp->b_flags = B_INVAL | B_AGE;
+	brelse(bp);
+	return (msg);
+}
+
+/*
+ * Check new disk label for sensibility before setting it.
+ */
+int
+setdisklabel(olp, nlp, openmask)
+	register struct disklabel *olp, *nlp;
+	u_long openmask;
+{
+	register i;
+	register struct partition *opp, *npp;
+
+	if (nlp->d_magic != DISKMAGIC || nlp->d_magic2 != DISKMAGIC ||
+	    dkcksum(nlp) != 0)
+		return (EINVAL);
+	while ((i = ffs((long)openmask)) != 0) {
+		i--;
+		openmask &= ~(1 << i);
+		if (nlp->d_npartitions <= i)
+			return (EBUSY);
+		opp = &olp->d_partitions[i];
+		npp = &nlp->d_partitions[i];
+		if (npp->p_offset != opp->p_offset || npp->p_size < opp->p_size)
+			return (EBUSY);
+		/*
+		 * Copy internally-set partition information
+		 * if new label doesn't include it.		XXX
+		 */
+		if (npp->p_fstype == FS_UNUSED && opp->p_fstype != FS_UNUSED) {
+			npp->p_fstype = opp->p_fstype;
+			npp->p_fsize = opp->p_fsize;
+			npp->p_frag = opp->p_frag;
+			npp->p_cpg = opp->p_cpg;
+		}
+	}
+ 	nlp->d_checksum = 0;
+ 	nlp->d_checksum = dkcksum(nlp);
+	*olp = *nlp;
+	return (0);
+}
+
+/* encoding of disk minor numbers, should be elsewhere... */
+#define dkunit(dev)		(minor(dev) >> 3)
+#define dkpart(dev)		(minor(dev) & 07)
+#define dkminor(unit, part)	(((unit) << 3) | (part))
+
+/*
+ * Write disk label back to device after modification.
+ */
+int
+writedisklabel(dev, strat, lp)
+	dev_t dev;
+	int (*strat)();
+	register struct disklabel *lp;
+{
+	struct buf *bp;
+	struct disklabel *dlp;
+	int labelpart;
+	int error = 0;
+
+	labelpart = dkpart(dev);
+	if (lp->d_partitions[labelpart].p_offset != 0) {
+		if (lp->d_partitions[0].p_offset != 0)
+			return (EXDEV);			/* not quite right */
+		labelpart = 0;
+	}
+	bp = geteblk((int)lp->d_secsize);
+	bp->b_dev = makedev(major(dev), dkminor(dkunit(dev), labelpart));
+	bp->b_blkno = LABELSECTOR;
+	bp->b_bcount = lp->d_secsize;
+	bp->b_flags = B_READ;
+	(*strat)(bp);
+	if (error = biowait(bp))
+		goto done;
+	for (dlp = (struct disklabel *)bp->b_data;
+	    dlp <= (struct disklabel *)
+	      ((char *)bp->b_data + lp->d_secsize - sizeof(*dlp));
+	    dlp = (struct disklabel *)((char *)dlp + sizeof(long))) {
+		if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC &&
+		    dkcksum(dlp) == 0) {
+			*dlp = *lp;
+			bp->b_flags = B_WRITE;
+			(*strat)(bp);
+			error = biowait(bp);
+			goto done;
+		}
+	}
+	error = ESRCH;
+done:
+	brelse(bp);
+	return (error);
+}
+
+/*
+ * Compute checksum for disk label.
+ */
+dkcksum(lp)
+	register struct disklabel *lp;
+{
+	register u_short *start, *end;
+	register u_short sum = 0;
+
+	start = (u_short *)lp;
+	end = (u_short *)&lp->d_partitions[lp->d_npartitions];
+	while (start < end)
+		sum ^= *start++;
+	return (sum);
+}
+
+/*
+ * Disk error is the preface to plaintive error messages
+ * about failing disk transfers.  It prints messages of the form
+
+hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d)
+
+ * if the offset of the error in the transfer and a disk label
+ * are both available.  blkdone should be -1 if the position of the error
+ * is unknown; the disklabel pointer may be null from drivers that have not
+ * been converted to use them.  The message is printed with printf
+ * if pri is LOG_PRINTF, otherwise it uses log at the specified priority.
+ * The message should be completed (with at least a newline) with printf
+ * or addlog, respectively.  There is no trailing space.
+ */
+void
+diskerr(bp, dname, what, pri, blkdone, lp)
+	register struct buf *bp;
+	char *dname, *what;
+	int pri, blkdone;
+	register struct disklabel *lp;
+{
+	int unit = dkunit(bp->b_dev), part = dkpart(bp->b_dev);
+	register void (*pr) __P((const char *, ...));
+	char partname = 'a' + part;
+	int sn;
+
+	if (pri != LOG_PRINTF) {
+		log(pri, "");
+		pr = addlog;
+	} else
+		pr = printf;
+	(*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what,
+	    bp->b_flags & B_READ ? "read" : "writ");
+	sn = bp->b_blkno;
+	if (bp->b_bcount <= DEV_BSIZE)
+		(*pr)("%d", sn);
+	else {
+		if (blkdone >= 0) {
+			sn += blkdone;
+			(*pr)("%d of ", sn);
+		}
+		(*pr)("%d-%d", bp->b_blkno,
+		    bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE);
+	}
+	if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) {
+#ifdef tahoe
+		sn *= DEV_BSIZE / lp->d_secsize;		/* XXX */
+#endif
+		sn += lp->d_partitions[part].p_offset;
+		(*pr)(" (%s%d bn %d; cn %d", dname, unit, sn,
+		    sn / lp->d_secpercyl);
+		sn %= lp->d_secpercyl;
+		(*pr)(" tn %d sn %d)", sn / lp->d_nsectors, sn % lp->d_nsectors);
+	}
+}
diff --git a/sys/kern/subr_log.c b/sys/kern/subr_log.c
new file mode 100644
index 00000000000..f065761d756
--- /dev/null
+++ b/sys/kern/subr_log.c
@@ -0,0 +1,232 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)subr_log.c	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Error log buffer for kernel printf's.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/ioctl.h>
+#include <sys/msgbuf.h>
+#include <sys/file.h>
+
+#define LOG_RDPRI	(PZERO + 1)
+
+#define LOG_ASYNC	0x04
+#define LOG_RDWAIT	0x08
+
+struct logsoftc {
+	int	sc_state;		/* see above for possibilities */
+	struct	selinfo sc_selp;	/* process waiting on select call */
+	int	sc_pgid;		/* process/group for async I/O */
+} logsoftc;
+
+int	log_open;			/* also used in log() */
+
+/*ARGSUSED*/
+logopen(dev, flags, mode, p)
+	dev_t dev;
+	int flags, mode;
+	struct proc *p;
+{
+	register struct msgbuf *mbp = msgbufp;
+
+	if (log_open)
+		return (EBUSY);
+	log_open = 1;
+	logsoftc.sc_pgid = p->p_pid;		/* signal process only */
+	/*
+	 * Potential race here with putchar() but since putchar should be
+	 * called by autoconf, msg_magic should be initialized by the time
+	 * we get here.
+	 */
+	if (mbp->msg_magic != MSG_MAGIC) {
+		register int i;
+
+		mbp->msg_magic = MSG_MAGIC;
+		mbp->msg_bufx = mbp->msg_bufr = 0;
+		for (i=0; i < MSG_BSIZE; i++)
+			mbp->msg_bufc[i] = 0;
+	}
+	return (0);
+}
+
+/*ARGSUSED*/
+logclose(dev, flag, mode, p)
+	dev_t dev;
+	int flag, mode;
+	struct proc *p;
+{
+
+	log_open = 0;
+	logsoftc.sc_state = 0;
+	return (0);
+}
+
+/*ARGSUSED*/
+logread(dev, uio, flag)
+	dev_t dev;
+	struct uio *uio;
+	int flag;
+{
+	register struct msgbuf *mbp = msgbufp;
+	register long l;
+	register int s;
+	int error = 0;
+
+	s = splhigh();
+	while (mbp->msg_bufr == mbp->msg_bufx) {
+		if (flag & IO_NDELAY) {
+			splx(s);
+			return (EWOULDBLOCK);
+		}
+		logsoftc.sc_state |= LOG_RDWAIT;
+		if (error = tsleep((caddr_t)mbp, LOG_RDPRI | PCATCH,
+		    "klog", 0)) {
+			splx(s);
+			return (error);
+		}
+	}
+	splx(s);
+	logsoftc.sc_state &= ~LOG_RDWAIT;
+
+	while (uio->uio_resid > 0) {
+		l = mbp->msg_bufx - mbp->msg_bufr;
+		if (l < 0)
+			l = MSG_BSIZE - mbp->msg_bufr;
+		l = min(l, uio->uio_resid);
+		if (l == 0)
+			break;
+		error = uiomove((caddr_t)&mbp->msg_bufc[mbp->msg_bufr],
+			(int)l, uio);
+		if (error)
+			break;
+		mbp->msg_bufr += l;
+		if (mbp->msg_bufr < 0 || mbp->msg_bufr >= MSG_BSIZE)
+			mbp->msg_bufr = 0;
+	}
+	return (error);
+}
+
+/*ARGSUSED*/
+logselect(dev, rw, p)
+	dev_t dev;
+	int rw;
+	struct proc *p;
+{
+	int s = splhigh();
+
+	switch (rw) {
+
+	case FREAD:
+		if (msgbufp->msg_bufr != msgbufp->msg_bufx) {
+			splx(s);
+			return (1);
+		}
+		selrecord(p, &logsoftc.sc_selp);
+		break;
+	}
+	splx(s);
+	return (0);
+}
+
+logwakeup()
+{
+	struct proc *p;
+
+	if (!log_open)
+		return;
+	selwakeup(&logsoftc.sc_selp);
+	if (logsoftc.sc_state & LOG_ASYNC) {
+		if (logsoftc.sc_pgid < 0)
+			gsignal(-logsoftc.sc_pgid, SIGIO); 
+		else if (p = pfind(logsoftc.sc_pgid))
+			psignal(p, SIGIO);
+	}
+	if (logsoftc.sc_state & LOG_RDWAIT) {
+		wakeup((caddr_t)msgbufp);
+		logsoftc.sc_state &= ~LOG_RDWAIT;
+	}
+}
+
+/*ARGSUSED*/
+logioctl(dev, com, data, flag, p)
+	dev_t dev;
+	int com;
+	caddr_t data;
+	int flag;
+	struct proc *p;
+{
+	long l;
+	int s;
+
+	switch (com) {
+
+	/* return number of characters immediately available */
+	case FIONREAD:
+		s = splhigh();
+		l = msgbufp->msg_bufx - msgbufp->msg_bufr;
+		splx(s);
+		if (l < 0)
+			l += MSG_BSIZE;
+		*(int *)data = l;
+		break;
+
+	case FIONBIO:
+		break;
+
+	case FIOASYNC:
+		if (*(int *)data)
+			logsoftc.sc_state |= LOG_ASYNC;
+		else
+			logsoftc.sc_state &= ~LOG_ASYNC;
+		break;
+
+	case TIOCSPGRP:
+		logsoftc.sc_pgid = *(int *)data;
+		break;
+
+	case TIOCGPGRP:
+		*(int *)data = logsoftc.sc_pgid;
+		break;
+
+	default:
+		return (-1);
+	}
+	return (0);
+}
diff --git a/sys/kern/subr_param.c b/sys/kern/subr_param.c
new file mode 100644
index 00000000000..9f4e2cae857
--- /dev/null
+++ b/sys/kern/subr_param.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 1980, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)param.c	8.2 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/socket.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/callout.h>
+#include <sys/clist.h>
+#include <sys/mbuf.h>
+#include <sys/kernel.h>
+
+#include <ufs/ufs/quota.h>
+
+#ifdef SYSVSHM
+#include <machine/vmparam.h>
+#include <sys/shm.h>
+#endif
+
+/*
+ * System parameter formulae.
+ *
+ * This file is copied into each directory where we compile
+ * the kernel; it should be modified there to suit local taste
+ * if necessary.
+ *
+ * Compiled with -DHZ=xx -DTIMEZONE=x -DDST=x -DMAXUSERS=xx
+ */
+
+#ifndef HZ
+#define	HZ 100
+#endif
+int	hz = HZ;
+int	tick = 1000000 / HZ;
+int	tickadj = 30000 / (60 * HZ);		/* can adjust 30ms in 60s */
+struct	timezone tz = { TIMEZONE, DST };
+#define	NPROC (20 + 16 * MAXUSERS)
+int	maxproc = NPROC;
+#define	NTEXT (80 + NPROC / 8)			/* actually the object cache */
+#define	NVNODE (NPROC + NTEXT + 100)
+int	desiredvnodes = NVNODE;
+int	maxfiles = 3 * (NPROC + MAXUSERS) + 80;
+int	ncallout = 16 + NPROC;
+int	nclist = 60 + 12 * MAXUSERS;
+int	nmbclusters = NMBCLUSTERS;
+int	fscale = FSCALE;	/* kernel uses `FSCALE', user uses `fscale' */
+
+/*
+ * Values in support of System V compatible shared memory.	XXX
+ */
+#ifdef SYSVSHM
+#define	SHMMAX	(SHMMAXPGS*NBPG)
+#define	SHMMIN	1
+#define	SHMMNI	32			/* <= SHMMMNI in shm.h */
+#define	SHMSEG	8
+#define	SHMALL	(SHMMAXPGS/CLSIZE)
+
+struct	shminfo shminfo = {
+	SHMMAX,
+	SHMMIN,
+	SHMMNI,
+	SHMSEG,
+	SHMALL
+};
+#endif
+
+/*
+ * These are initialized at bootstrap time
+ * to values dependent on memory size
+ */
+int	nbuf, nswbuf;
+
+/*
+ * These have to be allocated somewhere; allocating
+ * them here forces loader errors if this file is omitted
+ * (if they've been externed everywhere else; hah!).
+ */
+struct 	callout *callout;
+struct	cblock *cfree;
+struct	buf *buf, *swbuf;
+char	*buffers;
+
+/*
+ * Proc/pgrp hashing.
+ * Here so that hash table sizes can depend on MAXUSERS/NPROC.
+ * Hash size must be a power of two.
+ * NOW omission of this file will cause loader errors!
+ */
+
+#if NPROC > 1024
+#define	PIDHSZ		512
+#else
+#if NPROC > 512
+#define	PIDHSZ		256
+#else
+#if NPROC > 256
+#define	PIDHSZ		128
+#else
+#define	PIDHSZ		64
+#endif
+#endif
+#endif
+
+struct	proc *pidhash[PIDHSZ];
+struct	pgrp *pgrphash[PIDHSZ];
+int	pidhashmask = PIDHSZ - 1;
diff --git a/sys/kern/subr_prf.c b/sys/kern/subr_prf.c
new file mode 100644
index 00000000000..2adb7793a3c
--- /dev/null
+++ b/sys/kern/subr_prf.c
@@ -0,0 +1,601 @@
+/*-
+ * Copyright (c) 1986, 1988, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)subr_prf.c	8.3 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/conf.h>
+#include <sys/reboot.h>
+#include <sys/msgbuf.h>
+#include <sys/proc.h>
+#include <sys/ioctl.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/tty.h>
+#include <sys/tprintf.h>
+#include <sys/syslog.h>
+#include <sys/malloc.h>
+
+/*
+ * Note that stdarg.h and the ANSI style va_start macro is used for both
+ * ANSI and traditional C compilers.
+ */
+#include <machine/stdarg.h>
+
+#ifdef KADB
+#include <machine/kdbparam.h>
+#endif
+
+#define TOCONS	0x01
+#define TOTTY	0x02
+#define TOLOG	0x04
+
+struct	tty *constty;			/* pointer to console "window" tty */
+
+extern	cnputc();			/* standard console putc */
+int	(*v_putc)() = cnputc;		/* routine to putc on virtual console */
+
+void  logpri __P((int level));
+static void  putchar __P((int ch, int flags, struct tty *tp));
+static char *ksprintn __P((u_long num, int base, int *len));
+void kprintf __P((const char *fmt, int flags, struct tty *tp, va_list ap));
+
+int consintr = 1;			/* Ok to handle console interrupts? */
+
+/*
+ * Variable panicstr contains argument to first call to panic; used as flag
+ * to indicate that the kernel has already called panic.
+ */
+const char *panicstr;
+
+/*
+ * Panic is called on unresolvable fatal errors.  It prints "panic: mesg",
+ * and then reboots.  If we are called twice, then we avoid trying to sync
+ * the disks as this often leads to recursive panics.
+ */
+#ifdef __GNUC__
+volatile void boot(int flags);	/* boot() does not return */
+volatile			/* panic() does not return */
+#endif
+void
+#ifdef __STDC__
+panic(const char *fmt, ...)
+#else
+panic(fmt, va_alist)
+	char *fmt;
+#endif
+{
+	int bootopt;
+	va_list ap;
+
+	bootopt = RB_AUTOBOOT | RB_DUMP;
+	if (panicstr)
+		bootopt |= RB_NOSYNC;
+	else
+		panicstr = fmt;
+
+	va_start(ap, fmt);
+	printf("panic: %r\n", fmt, ap);
+	va_end(ap);
+
+#ifdef KGDB
+	kgdb_panic();
+#endif
+#ifdef KADB
+	if (boothowto & RB_KDB)
+		kdbpanic();
+#endif
+	boot(bootopt);
+}
+
+/*
+ * Warn that a system table is full.
+ */
+void
+tablefull(tab)
+	const char *tab;
+{
+
+	log(LOG_ERR, "%s: table is full\n", tab);
+}
+
+/*
+ * Uprintf prints to the controlling terminal for the current process.
+ * It may block if the tty queue is overfull.  No message is printed if
+ * the queue does not clear in a reasonable time.
+ */
+void
+#ifdef __STDC__
+uprintf(const char *fmt, ...)
+#else
+uprintf(fmt, va_alist)
+	char *fmt;
+#endif
+{
+	register struct proc *p = curproc;
+	va_list ap;
+
+	if (p->p_flag & P_CONTROLT && p->p_session->s_ttyvp) {
+		va_start(ap, fmt);
+		kprintf(fmt, TOTTY, p->p_session->s_ttyp, ap);
+		va_end(ap);
+	}
+}
+
+tpr_t
+tprintf_open(p)
+	register struct proc *p;
+{
+
+	if (p->p_flag & P_CONTROLT && p->p_session->s_ttyvp) {
+		SESSHOLD(p->p_session);
+		return ((tpr_t) p->p_session);
+	}
+	return ((tpr_t) NULL);
+}
+
+void
+tprintf_close(sess)
+	tpr_t sess;
+{
+
+	if (sess)
+		SESSRELE((struct session *) sess);
+}
+
+/*
+ * tprintf prints on the controlling terminal associated
+ * with the given session.
+ */
+void
+#ifdef __STDC__
+tprintf(tpr_t tpr, const char *fmt, ...)
+#else
+tprintf(tpr, fmt, va_alist)
+	tpr_t tpr;
+	char *fmt;
+#endif
+{
+	register struct session *sess = (struct session *)tpr;
+	struct tty *tp = NULL;
+	int flags = TOLOG;
+	va_list ap;
+
+	logpri(LOG_INFO);
+	if (sess && sess->s_ttyvp && ttycheckoutq(sess->s_ttyp, 0)) {
+		flags |= TOTTY;
+		tp = sess->s_ttyp;
+	}
+	va_start(ap, fmt);
+	kprintf(fmt, flags, tp, ap);
+	va_end(ap);
+	logwakeup();
+}
+
+/*
+ * Ttyprintf displays a message on a tty; it should be used only by
+ * the tty driver, or anything that knows the underlying tty will not
+ * be revoke(2)'d away.  Other callers should use tprintf.
+ */
+void
+#ifdef __STDC__
+ttyprintf(struct tty *tp, const char *fmt, ...)
+#else
+ttyprintf(tp, fmt, va_alist)
+	struct tty *tp;
+	char *fmt;
+#endif
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	kprintf(fmt, TOTTY, tp, ap);
+	va_end(ap);
+}
+
+extern	int log_open;
+
+/*
+ * Log writes to the log buffer, and guarantees not to sleep (so can be
+ * called by interrupt routines).  If there is no process reading the
+ * log yet, it writes to the console also.
+ */
+void
+#ifdef __STDC__
+log(int level, const char *fmt, ...)
+#else
+log(level, fmt, va_alist)
+	int level;
+	char *fmt;
+#endif
+{
+	register int s;
+	va_list ap;
+
+	s = splhigh();
+	logpri(level);
+	va_start(ap, fmt);
+	kprintf(fmt, TOLOG, NULL, ap);
+	splx(s);
+	va_end(ap);
+	if (!log_open) {
+		va_start(ap, fmt);
+		kprintf(fmt, TOCONS, NULL, ap);
+		va_end(ap);
+	}
+	logwakeup();
+}
+
+void
+logpri(level)
+	int level;
+{
+	register int ch;
+	register char *p;
+
+	putchar('<', TOLOG, NULL);
+	for (p = ksprintn((u_long)level, 10, NULL); ch = *p--;)
+		putchar(ch, TOLOG, NULL);
+	putchar('>', TOLOG, NULL);
+}
+
+void
+#ifdef __STDC__
+addlog(const char *fmt, ...)
+#else
+addlog(fmt, va_alist)
+	char *fmt;
+#endif
+{
+	register int s;
+	va_list ap;
+
+	s = splhigh();
+	va_start(ap, fmt);
+	kprintf(fmt, TOLOG, NULL, ap);
+	splx(s);
+	va_end(ap);
+	if (!log_open) {
+		va_start(ap, fmt);
+		kprintf(fmt, TOCONS, NULL, ap);
+		va_end(ap);
+	}
+	logwakeup();
+}
+
+void
+#ifdef __STDC__
+printf(const char *fmt, ...)
+#else
+printf(fmt, va_alist)
+	char *fmt;
+#endif
+{
+	va_list ap;
+	register int savintr;
+
+	savintr = consintr;		/* disable interrupts */
+	consintr = 0;
+	va_start(ap, fmt);
+	kprintf(fmt, TOCONS | TOLOG, NULL, ap);
+	va_end(ap);
+	if (!panicstr)
+		logwakeup();
+	consintr = savintr;		/* reenable interrupts */
+}
+
+/*
+ * Scaled down version of printf(3).
+ *
+ * Two additional formats:
+ *
+ * The format %b is supported to decode error registers.
+ * Its usage is:
+ *
+ *	printf("reg=%b\n", regval, "<base><arg>*");
+ *
+ * where <base> is the output base expressed as a control character, e.g.
+ * \10 gives octal; \20 gives hex.  Each arg is a sequence of characters,
+ * the first of which gives the bit number to be inspected (origin 1), and
+ * the next characters (up to a control character, i.e. a character <= 32),
+ * give the name of the register.  Thus:
+ *
+ *	kprintf("reg=%b\n", 3, "\10\2BITTWO\1BITONE\n");
+ *
+ * would produce output:
+ *
+ *	reg=3<BITTWO,BITONE>
+ *
+ * The format %r passes an additional format string and argument list
+ * recursively.  Its usage is:
+ *
+ * fn(char *fmt, ...)
+ * {
+ *	va_list ap;
+ *	va_start(ap, fmt);
+ *	printf("prefix: %r: suffix\n", fmt, ap);
+ *	va_end(ap);
+ * }
+ *
+ * Space or zero padding and a field width are supported for the numeric
+ * formats only.
+ */
+void
+kprintf(fmt, flags, tp, ap)
+	register const char *fmt;
+	int flags;
+	struct tty *tp;
+	va_list ap;
+{
+	register char *p, *q;
+	register int ch, n;
+	u_long ul;
+	int base, lflag, tmp, width;
+	char padc;
+
+	for (;;) {
+		padc = ' ';
+		width = 0;
+		while ((ch = *(u_char *)fmt++) != '%') {
+			if (ch == '\0')
+				return;
+			putchar(ch, flags, tp);
+		}
+		lflag = 0;
+reswitch:	switch (ch = *(u_char *)fmt++) {
+		case '0':
+			padc = '0';
+			goto reswitch;
+		case '1': case '2': case '3': case '4':
+		case '5': case '6': case '7': case '8': case '9':
+			for (width = 0;; ++fmt) {
+				width = width * 10 + ch - '0';
+				ch = *fmt;
+				if (ch < '0' || ch > '9')
+					break;
+			}
+			goto reswitch;
+		case 'l':
+			lflag = 1;
+			goto reswitch;
+		case 'b':
+			ul = va_arg(ap, int);
+			p = va_arg(ap, char *);
+			for (q = ksprintn(ul, *p++, NULL); ch = *q--;)
+				putchar(ch, flags, tp);
+
+			if (!ul)
+				break;
+
+			for (tmp = 0; n = *p++;) {
+				if (ul & (1 << (n - 1))) {
+					putchar(tmp ? ',' : '<', flags, tp);
+					for (; (n = *p) > ' '; ++p)
+						putchar(n, flags, tp);
+					tmp = 1;
+				} else
+					for (; *p > ' '; ++p)
+						continue;
+			}
+			if (tmp)
+				putchar('>', flags, tp);
+			break;
+		case 'c':
+			putchar(va_arg(ap, int), flags, tp);
+			break;
+		case 'r':
+			p = va_arg(ap, char *);
+			kprintf(p, flags, tp, va_arg(ap, va_list));
+			break;
+		case 's':
+			p = va_arg(ap, char *);
+			while (ch = *p++)
+				putchar(ch, flags, tp);
+			break;
+		case 'd':
+			ul = lflag ? va_arg(ap, long) : va_arg(ap, int);
+			if ((long)ul < 0) {
+				putchar('-', flags, tp);
+				ul = -(long)ul;
+			}
+			base = 10;
+			goto number;
+		case 'o':
+			ul = lflag ? va_arg(ap, u_long) : va_arg(ap, u_int);
+			base = 8;
+			goto number;
+		case 'u':
+			ul = lflag ? va_arg(ap, u_long) : va_arg(ap, u_int);
+			base = 10;
+			goto number;
+		case 'x':
+			ul = lflag ? va_arg(ap, u_long) : va_arg(ap, u_int);
+			base = 16;
+number:			p = ksprintn(ul, base, &tmp);
+			if (width && (width -= tmp) > 0)
+				while (width--)
+					putchar(padc, flags, tp);
+			while (ch = *p--)
+				putchar(ch, flags, tp);
+			break;
+		default:
+			putchar('%', flags, tp);
+			if (lflag)
+				putchar('l', flags, tp);
+			/* FALLTHROUGH */
+		case '%':
+			putchar(ch, flags, tp);
+		}
+	}
+}
+
+/*
+ * Print a character on console or users terminal.  If destination is
+ * the console then the last MSGBUFS characters are saved in msgbuf for
+ * inspection later.
+ */
+static void
+putchar(c, flags, tp)
+	register int c;
+	int flags;
+	struct tty *tp;
+{
+	extern int msgbufmapped;
+	register struct msgbuf *mbp;
+
+	if (panicstr)
+		constty = NULL;
+	if ((flags & TOCONS) && tp == NULL && constty) {
+		tp = constty;
+		flags |= TOTTY;
+	}
+	if ((flags & TOTTY) && tp && tputchar(c, tp) < 0 &&
+	    (flags & TOCONS) && tp == constty)
+		constty = NULL;
+	if ((flags & TOLOG) &&
+	    c != '\0' && c != '\r' && c != 0177 && msgbufmapped) {
+		mbp = msgbufp;
+		if (mbp->msg_magic != MSG_MAGIC) {
+			bzero((caddr_t)mbp, sizeof(*mbp));
+			mbp->msg_magic = MSG_MAGIC;
+		}
+		mbp->msg_bufc[mbp->msg_bufx++] = c;
+		if (mbp->msg_bufx < 0 || mbp->msg_bufx >= MSG_BSIZE)
+			mbp->msg_bufx = 0;
+	}
+	if ((flags & TOCONS) && constty == NULL && c != '\0')
+		(*v_putc)(c);
+}
+
+/*
+ * Scaled down version of sprintf(3).
+ */
+#ifdef __STDC__
+sprintf(char *buf, const char *cfmt, ...)
+#else
+sprintf(buf, cfmt, va_alist)
+	char *buf, *cfmt;
+#endif
+{
+	register const char *fmt = cfmt;
+	register char *p, *bp;
+	register int ch, base;
+	u_long ul;
+	int lflag;
+	va_list ap;
+
+	va_start(ap, cfmt);
+	for (bp = buf; ; ) {
+		while ((ch = *(u_char *)fmt++) != '%')
+			if ((*bp++ = ch) == '\0')
+				return ((bp - buf) - 1);
+
+		lflag = 0;
+reswitch:	switch (ch = *(u_char *)fmt++) {
+		case 'l':
+			lflag = 1;
+			goto reswitch;
+		case 'c':
+			*bp++ = va_arg(ap, int);
+			break;
+		case 's':
+			p = va_arg(ap, char *);
+			while (*bp++ = *p++)
+				continue;
+			--bp;
+			break;
+		case 'd':
+			ul = lflag ? va_arg(ap, long) : va_arg(ap, int);
+			if ((long)ul < 0) {
+				*bp++ = '-';
+				ul = -(long)ul;
+			}
+			base = 10;
+			goto number;
+			break;
+		case 'o':
+			ul = lflag ? va_arg(ap, u_long) : va_arg(ap, u_int);
+			base = 8;
+			goto number;
+			break;
+		case 'u':
+			ul = lflag ? va_arg(ap, u_long) : va_arg(ap, u_int);
+			base = 10;
+			goto number;
+			break;
+		case 'x':
+			ul = lflag ? va_arg(ap, u_long) : va_arg(ap, u_int);
+			base = 16;
+number:			for (p = ksprintn(ul, base, NULL); ch = *p--;)
+				*bp++ = ch;
+			break;
+		default:
+			*bp++ = '%';
+			if (lflag)
+				*bp++ = 'l';
+			/* FALLTHROUGH */
+		case '%':
+			*bp++ = ch;
+		}
+	}
+	va_end(ap);
+}
+
+/*
+ * Put a number (base <= 16) in a buffer in reverse order; return an
+ * optional length and a pointer to the NULL terminated (preceded?)
+ * buffer.
+ */
+static char *
+ksprintn(ul, base, lenp)
+	register u_long ul;
+	register int base, *lenp;
+{					/* A long in base 8, plus NULL. */
+	static char buf[sizeof(long) * NBBY / 3 + 2];
+	register char *p;
+
+	p = buf;
+	do {
+		*++p = "0123456789abcdef"[ul % base];
+	} while (ul /= base);
+	if (lenp)
+		*lenp = p - buf;
+	return (p);
+}
diff --git a/sys/kern/subr_prof.c b/sys/kern/subr_prof.c
new file mode 100644
index 00000000000..4fb81d823ca
--- /dev/null
+++ b/sys/kern/subr_prof.c
@@ -0,0 +1,256 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)subr_prof.c	8.3 (Berkeley) 9/23/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/user.h>
+#include <machine/cpu.h>
+
+#ifdef GPROF
+#include <sys/malloc.h>
+#include <sys/gmon.h>
+
+/*
+ * Froms is actually a bunch of unsigned shorts indexing tos
+ */
+struct gmonparam _gmonparam = { GMON_PROF_OFF };
+
+extern char etext[];
+
+kmstartup()
+{
+	char *cp;
+	struct gmonparam *p = &_gmonparam;
+	/*
+	 * Round lowpc and highpc to multiples of the density we're using
+	 * so the rest of the scaling (here and in gprof) stays in ints.
+	 */
+	p->lowpc = ROUNDDOWN(KERNBASE, HISTFRACTION * sizeof(HISTCOUNTER));
+	p->highpc = ROUNDUP((u_long)etext, HISTFRACTION * sizeof(HISTCOUNTER));
+	p->textsize = p->highpc - p->lowpc;
+	printf("Profiling kernel, textsize=%d [%x..%x]\n",
+	       p->textsize, p->lowpc, p->highpc);
+	p->kcountsize = p->textsize / HISTFRACTION;
+	p->hashfraction = HASHFRACTION;
+	p->fromssize = p->textsize / HASHFRACTION;
+	p->tolimit = p->textsize * ARCDENSITY / 100;
+	if (p->tolimit < MINARCS)
+		p->tolimit = MINARCS;
+	else if (p->tolimit > MAXARCS)
+		p->tolimit = MAXARCS;
+	p->tossize = p->tolimit * sizeof(struct tostruct);
+	cp = (char *)malloc(p->kcountsize + p->fromssize + p->tossize,
+	    M_GPROF, M_NOWAIT);
+	if (cp == 0) {
+		printf("No memory for profiling.\n");
+		return;
+	}
+	bzero(cp, p->kcountsize + p->tossize + p->fromssize);
+	p->tos = (struct tostruct *)cp;
+	cp += p->tossize;
+	p->kcount = (u_short *)cp;
+	cp += p->kcountsize;
+	p->froms = (u_short *)cp;
+}
+
+/*
+ * Return kernel profiling information.
+ */
+sysctl_doprof(name, namelen, oldp, oldlenp, newp, newlen, p)
+	int *name;
+	u_int namelen;
+	void *oldp;
+	size_t *oldlenp;
+	void *newp;
+	size_t newlen;
+{
+	struct gmonparam *gp = &_gmonparam;
+	int error;
+
+	/* all sysctl names at this level are terminal */
+	if (namelen != 1)
+		return (ENOTDIR);		/* overloaded */
+
+	switch (name[0]) {
+	case GPROF_STATE:
+		error = sysctl_int(oldp, oldlenp, newp, newlen, &gp->state);
+		if (error)
+			return (error);
+		if (gp->state == GMON_PROF_OFF)
+			stopprofclock(&proc0);
+		else
+			startprofclock(&proc0);
+		return (0);
+	case GPROF_COUNT:
+		return (sysctl_struct(oldp, oldlenp, newp, newlen,
+		    gp->kcount, gp->kcountsize));
+	case GPROF_FROMS:
+		return (sysctl_struct(oldp, oldlenp, newp, newlen,
+		    gp->froms, gp->fromssize));
+	case GPROF_TOS:
+		return (sysctl_struct(oldp, oldlenp, newp, newlen,
+		    gp->tos, gp->tossize));
+	case GPROF_GMONPARAM:
+		return (sysctl_rdstruct(oldp, oldlenp, newp, gp, sizeof *gp));
+	default:
+		return (EOPNOTSUPP);
+	}
+	/* NOTREACHED */
+}
+#endif /* GPROF */
+
+/*
+ * Profiling system call.
+ *
+ * The scale factor is a fixed point number with 16 bits of fraction, so that
+ * 1.0 is represented as 0x10000.  A scale factor of 0 turns off profiling.
+ */
+struct profil_args {
+	caddr_t	samples;
+	u_int	size;
+	u_int	offset;
+	u_int	scale;
+};
+/* ARGSUSED */
+profil(p, uap, retval)
+	struct proc *p;
+	register struct profil_args *uap;
+	int *retval;
+{
+	register struct uprof *upp;
+	int s;
+
+	if (uap->scale > (1 << 16))
+		return (EINVAL);
+	if (uap->scale == 0) {
+		stopprofclock(p);
+		return (0);
+	}
+	upp = &p->p_stats->p_prof;
+
+	/* Block profile interrupts while changing state. */
+	s = splstatclock();
+	upp->pr_off = uap->offset;
+	upp->pr_scale = uap->scale;
+	upp->pr_base = uap->samples;
+	upp->pr_size = uap->size;
+	startprofclock(p);
+	splx(s);
+
+	return (0);
+}
+
+/*
+ * Scale is a fixed-point number with the binary point 16 bits
+ * into the value, and is <= 1.0.  pc is at most 32 bits, so the
+ * intermediate result is at most 48 bits.
+ */
+#define	PC_TO_INDEX(pc, prof) \
+	((int)(((u_quad_t)((pc) - (prof)->pr_off) * \
+	    (u_quad_t)((prof)->pr_scale)) >> 16) & ~1)
+
+/*
+ * Collect user-level profiling statistics; called on a profiling tick,
+ * when a process is running in user-mode.  This routine may be called
+ * from an interrupt context.  We try to update the user profiling buffers
+ * cheaply with fuswintr() and suswintr().  If that fails, we revert to
+ * an AST that will vector us to trap() with a context in which copyin
+ * and copyout will work.  Trap will then call addupc_task().
+ *
+ * Note that we may (rarely) not get around to the AST soon enough, and
+ * lose profile ticks when the next tick overwrites this one, but in this
+ * case the system is overloaded and the profile is probably already
+ * inaccurate.
+ */
+void
+addupc_intr(p, pc, ticks)
+	register struct proc *p;
+	register u_long pc;
+	u_int ticks;
+{
+	register struct uprof *prof;
+	register caddr_t addr;
+	register u_int i;
+	register int v;
+
+	if (ticks == 0)
+		return;
+	prof = &p->p_stats->p_prof;
+	if (pc < prof->pr_off ||
+	    (i = PC_TO_INDEX(pc, prof)) >= prof->pr_size)
+		return;			/* out of range; ignore */
+
+	addr = prof->pr_base + i;
+	if ((v = fuswintr(addr)) == -1 || suswintr(addr, v + ticks) == -1) {
+		prof->pr_addr = pc;
+		prof->pr_ticks = ticks;
+		need_proftick(p);
+	}
+}
+
+/*
+ * Much like before, but we can afford to take faults here.  If the
+ * update fails, we simply turn off profiling.
+ */
+void
+addupc_task(p, pc, ticks)
+	register struct proc *p;
+	register u_long pc;
+	u_int ticks;
+{
+	register struct uprof *prof;
+	register caddr_t addr;
+	register u_int i;
+	u_short v;
+
+	/* Testing P_PROFIL may be unnecessary, but is certainly safe. */
+	if ((p->p_flag & P_PROFIL) == 0 || ticks == 0)
+		return;
+
+	prof = &p->p_stats->p_prof;
+	if (pc < prof->pr_off ||
+	    (i = PC_TO_INDEX(pc, prof)) >= prof->pr_size)
+		return;
+
+	addr = prof->pr_base + i;
+	if (copyin(addr, (caddr_t)&v, sizeof(v)) == 0) {
+		v += ticks;
+		if (copyout((caddr_t)&v, addr, sizeof(v)) == 0)
+			return;
+	}
+	stopprofclock(p);
+}
diff --git a/sys/kern/subr_rmap.c b/sys/kern/subr_rmap.c
new file mode 100644
index 00000000000..2f31173321d
--- /dev/null
+++ b/sys/kern/subr_rmap.c
@@ -0,0 +1,81 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)subr_rmap.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/map.h>
+#include <sys/proc.h>
+
+void
+rminit(a1, a2, a3, a4, a5)
+	struct map *a1;
+	long a2, a3;
+	char *a4;
+	int a5;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return;
+}
+
+long
+rmalloc(a1, a2)
+	struct map *a1;
+	long a2;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return (0);
+}
+
+void
+rmfree(a1, a2, a3)
+	struct map *a1;
+	long a2, a3;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return;
+}
diff --git a/sys/kern/subr_xxx.c b/sys/kern/subr_xxx.c
new file mode 100644
index 00000000000..c692ec11a3b
--- /dev/null
+++ b/sys/kern/subr_xxx.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 1982, 1986, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)subr_xxx.c	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Miscellaneous trivial functions, including many
+ * that are often inline-expanded or done in assembler.
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <machine/cpu.h>
+
+/*
+ * Unsupported device function (e.g. writing to read-only device).
+ */
+enodev()
+{
+
+	return (ENODEV);
+}
+
+/*
+ * Unconfigured device function; driver not configured.
+ */
+enxio()
+{
+
+	return (ENXIO);
+}
+
+/*
+ * Unsupported ioctl function.
+ */
+enoioctl()
+{
+
+	return (ENOTTY);
+}
+
+/*
+ * Unsupported system function.
+ * This is used for an otherwise-reasonable operation
+ * that is not supported by the current system binary.
+ */
+enosys()
+{
+
+	return (ENOSYS);
+}
+
+/*
+ * Return error for operation not supported
+ * on a specific object or file type.
+ */
+eopnotsupp()
+{
+
+	return (EOPNOTSUPP);
+}
+
+/*
+ * Generic null operation, always returns success.
+ */
+nullop()
+{
+
+	return (0);
+}
diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c
new file mode 100644
index 00000000000..a121209f9fe
--- /dev/null
+++ b/sys/kern/sys_generic.c
@@ -0,0 +1,683 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)sys_generic.c	8.5 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/filedesc.h>
+#include <sys/ioctl.h>
+#include <sys/file.h>
+#include <sys/proc.h>
+#include <sys/socketvar.h>
+#include <sys/uio.h>
+#include <sys/kernel.h>
+#include <sys/stat.h>
+#include <sys/malloc.h>
+#ifdef KTRACE
+#include <sys/ktrace.h>
+#endif
+
+/*
+ * Read system call.
+ */
+struct read_args {
+	int	fd;
+	char	*buf;
+	u_int	nbyte;
+};
+/* ARGSUSED */
+read(p, uap, retval)
+	struct proc *p;
+	register struct read_args *uap;
+	int *retval;
+{
+	register struct file *fp;
+	register struct filedesc *fdp = p->p_fd;
+	struct uio auio;
+	struct iovec aiov;
+	long cnt, error = 0;
+#ifdef KTRACE
+	struct iovec ktriov;
+#endif
+
+	if (((u_int)uap->fd) >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
+	    (fp->f_flag & FREAD) == 0)
+		return (EBADF);
+	aiov.iov_base = (caddr_t)uap->buf;
+	aiov.iov_len = uap->nbyte;
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	auio.uio_resid = uap->nbyte;
+	auio.uio_rw = UIO_READ;
+	auio.uio_segflg = UIO_USERSPACE;
+	auio.uio_procp = p;
+#ifdef KTRACE
+	/*
+	 * if tracing, save a copy of iovec
+	 */
+	if (KTRPOINT(p, KTR_GENIO))
+		ktriov = aiov;
+#endif
+	cnt = uap->nbyte;
+	if (error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred))
+		if (auio.uio_resid != cnt && (error == ERESTART ||
+		    error == EINTR || error == EWOULDBLOCK))
+			error = 0;
+	cnt -= auio.uio_resid;
+#ifdef KTRACE
+	if (KTRPOINT(p, KTR_GENIO) && error == 0)
+		ktrgenio(p->p_tracep, uap->fd, UIO_READ, &ktriov, cnt, error);
+#endif
+	*retval = cnt;
+	return (error);
+}
+
+/*
+ * Scatter read system call.
+ */
+struct readv_args {
+	int	fdes;
+	struct	iovec *iovp;
+	u_int	iovcnt;
+};
+readv(p, uap, retval)
+	struct proc *p;
+	register struct readv_args *uap;
+	int *retval;
+{
+	register struct file *fp;
+	register struct filedesc *fdp = p->p_fd;
+	struct uio auio;
+	register struct iovec *iov;
+	struct iovec *needfree;
+	struct iovec aiov[UIO_SMALLIOV];
+	long i, cnt, error = 0;
+	u_int iovlen;
+#ifdef KTRACE
+	struct iovec *ktriov = NULL;
+#endif
+
+	if (((u_int)uap->fdes) >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[uap->fdes]) == NULL ||
+	    (fp->f_flag & FREAD) == 0)
+		return (EBADF);
+	/* note: can't use iovlen until iovcnt is validated */
+	iovlen = uap->iovcnt * sizeof (struct iovec);
+	if (uap->iovcnt > UIO_SMALLIOV) {
+		if (uap->iovcnt > UIO_MAXIOV)
+			return (EINVAL);
+		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
+		needfree = iov;
+	} else {
+		iov = aiov;
+		needfree = NULL;
+	}
+	auio.uio_iov = iov;
+	auio.uio_iovcnt = uap->iovcnt;
+	auio.uio_rw = UIO_READ;
+	auio.uio_segflg = UIO_USERSPACE;
+	auio.uio_procp = p;
+	if (error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))
+		goto done;
+	auio.uio_resid = 0;
+	for (i = 0; i < uap->iovcnt; i++) {
+		if (iov->iov_len < 0) {
+			error = EINVAL;
+			goto done;
+		}
+		auio.uio_resid += iov->iov_len;
+		if (auio.uio_resid < 0) {
+			error = EINVAL;
+			goto done;
+		}
+		iov++;
+	}
+#ifdef KTRACE
+	/*
+	 * if tracing, save a copy of iovec
+	 */
+	if (KTRPOINT(p, KTR_GENIO))  {
+		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
+		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
+	}
+#endif
+	cnt = auio.uio_resid;
+	if (error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred))
+		if (auio.uio_resid != cnt && (error == ERESTART ||
+		    error == EINTR || error == EWOULDBLOCK))
+			error = 0;
+	cnt -= auio.uio_resid;
+#ifdef KTRACE
+	if (ktriov != NULL) {
+		if (error == 0)
+			ktrgenio(p->p_tracep, uap->fdes, UIO_READ, ktriov,
+			    cnt, error);
+		FREE(ktriov, M_TEMP);
+	}
+#endif
+	*retval = cnt;
+done:
+	if (needfree)
+		FREE(needfree, M_IOV);
+	return (error);
+}
+
+/*
+ * Write system call
+ */
+struct write_args {
+	int	fd;
+	char	*buf;
+	u_int	nbyte;
+};
+write(p, uap, retval)
+	struct proc *p;
+	register struct write_args *uap;
+	int *retval;
+{
+	register struct file *fp;
+	register struct filedesc *fdp = p->p_fd;
+	struct uio auio;
+	struct iovec aiov;
+	long cnt, error = 0;
+#ifdef KTRACE
+	struct iovec ktriov;
+#endif
+
+	if (((u_int)uap->fd) >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
+	    (fp->f_flag & FWRITE) == 0)
+		return (EBADF);
+	aiov.iov_base = (caddr_t)uap->buf;
+	aiov.iov_len = uap->nbyte;
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	auio.uio_resid = uap->nbyte;
+	auio.uio_rw = UIO_WRITE;
+	auio.uio_segflg = UIO_USERSPACE;
+	auio.uio_procp = p;
+#ifdef KTRACE
+	/*
+	 * if tracing, save a copy of iovec
+	 */
+	if (KTRPOINT(p, KTR_GENIO))
+		ktriov = aiov;
+#endif
+	cnt = uap->nbyte;
+	if (error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred)) {
+		if (auio.uio_resid != cnt && (error == ERESTART ||
+		    error == EINTR || error == EWOULDBLOCK))
+			error = 0;
+		if (error == EPIPE)
+			psignal(p, SIGPIPE);
+	}
+	cnt -= auio.uio_resid;
+#ifdef KTRACE
+	if (KTRPOINT(p, KTR_GENIO) && error == 0)
+		ktrgenio(p->p_tracep, uap->fd, UIO_WRITE,
+		    &ktriov, cnt, error);
+#endif
+	*retval = cnt;
+	return (error);
+}
+
+/*
+ * Gather write system call
+ */
+struct writev_args {
+	int	fd;
+	struct	iovec *iovp;
+	u_int	iovcnt;
+};
+writev(p, uap, retval)
+	struct proc *p;
+	register struct writev_args *uap;
+	int *retval;
+{
+	register struct file *fp;
+	register struct filedesc *fdp = p->p_fd;
+	struct uio auio;
+	register struct iovec *iov;
+	struct iovec *needfree;
+	struct iovec aiov[UIO_SMALLIOV];
+	long i, cnt, error = 0;
+	u_int iovlen;
+#ifdef KTRACE
+	struct iovec *ktriov = NULL;
+#endif
+
+	if (((u_int)uap->fd) >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
+	    (fp->f_flag & FWRITE) == 0)
+		return (EBADF);
+	/* note: can't use iovlen until iovcnt is validated */
+	iovlen = uap->iovcnt * sizeof (struct iovec);
+	if (uap->iovcnt > UIO_SMALLIOV) {
+		if (uap->iovcnt > UIO_MAXIOV)
+			return (EINVAL);
+		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
+		needfree = iov;
+	} else {
+		iov = aiov;
+		needfree = NULL;
+	}
+	auio.uio_iov = iov;
+	auio.uio_iovcnt = uap->iovcnt;
+	auio.uio_rw = UIO_WRITE;
+	auio.uio_segflg = UIO_USERSPACE;
+	auio.uio_procp = p;
+	if (error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))
+		goto done;
+	auio.uio_resid = 0;
+	for (i = 0; i < uap->iovcnt; i++) {
+		if (iov->iov_len < 0) {
+			error = EINVAL;
+			goto done;
+		}
+		auio.uio_resid += iov->iov_len;
+		if (auio.uio_resid < 0) {
+			error = EINVAL;
+			goto done;
+		}
+		iov++;
+	}
+#ifdef KTRACE
+	/*
+	 * if tracing, save a copy of iovec
+	 */
+	if (KTRPOINT(p, KTR_GENIO))  {
+		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
+		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
+	}
+#endif
+	cnt = auio.uio_resid;
+	if (error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred)) {
+		if (auio.uio_resid != cnt && (error == ERESTART ||
+		    error == EINTR || error == EWOULDBLOCK))
+			error = 0;
+		if (error == EPIPE)
+			psignal(p, SIGPIPE);
+	}
+	cnt -= auio.uio_resid;
+#ifdef KTRACE
+	if (ktriov != NULL) {
+		if (error == 0)
+			ktrgenio(p->p_tracep, uap->fd, UIO_WRITE,
+				ktriov, cnt, error);
+		FREE(ktriov, M_TEMP);
+	}
+#endif
+	*retval = cnt;
+done:
+	if (needfree)
+		FREE(needfree, M_IOV);
+	return (error);
+}
+
+/*
+ * Ioctl system call
+ */
+struct ioctl_args {
+	int	fd;
+	int	com;
+	caddr_t	data;
+};
+/* ARGSUSED */
+ioctl(p, uap, retval)
+	struct proc *p;
+	register struct ioctl_args *uap;
+	int *retval;
+{
+	register struct file *fp;
+	register struct filedesc *fdp;
+	register int com, error;
+	register u_int size;
+	caddr_t data, memp;
+	int tmp;
+#define STK_PARAMS	128
+	char stkbuf[STK_PARAMS];
+
+	fdp = p->p_fd;
+	if ((u_int)uap->fd >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
+		return (EBADF);
+
+	if ((fp->f_flag & (FREAD | FWRITE)) == 0)
+		return (EBADF);
+
+	switch (com = uap->com) {
+	case FIONCLEX:
+		fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE;
+		return (0);
+	case FIOCLEX:
+		fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE;
+		return (0);
+	}
+
+	/*
+	 * Interpret high order word to find amount of data to be
+	 * copied to/from the user's address space.
+	 */
+	size = IOCPARM_LEN(com);
+	if (size > IOCPARM_MAX)
+		return (ENOTTY);
+	memp = NULL;
+	if (size > sizeof (stkbuf)) {
+		memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
+		data = memp;
+	} else
+		data = stkbuf;
+	if (com&IOC_IN) {
+		if (size) {
+			error = copyin(uap->data, data, (u_int)size);
+			if (error) {
+				if (memp)
+					free(memp, M_IOCTLOPS);
+				return (error);
+			}
+		} else
+			*(caddr_t *)data = uap->data;
+	} else if ((com&IOC_OUT) && size)
+		/*
+		 * Zero the buffer so the user always
+		 * gets back something deterministic.
+		 */
+		bzero(data, size);
+	else if (com&IOC_VOID)
+		*(caddr_t *)data = uap->data;
+
+	switch (com) {
+
+	case FIONBIO:
+		if (tmp = *(int *)data)
+			fp->f_flag |= FNONBLOCK;
+		else
+			fp->f_flag &= ~FNONBLOCK;
+		error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
+		break;
+
+	case FIOASYNC:
+		if (tmp = *(int *)data)
+			fp->f_flag |= FASYNC;
+		else
+			fp->f_flag &= ~FASYNC;
+		error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
+		break;
+
+	case FIOSETOWN:
+		tmp = *(int *)data;
+		if (fp->f_type == DTYPE_SOCKET) {
+			((struct socket *)fp->f_data)->so_pgid = tmp;
+			error = 0;
+			break;
+		}
+		if (tmp <= 0) {
+			tmp = -tmp;
+		} else {
+			struct proc *p1 = pfind(tmp);
+			if (p1 == 0) {
+				error = ESRCH;
+				break;
+			}
+			tmp = p1->p_pgrp->pg_id;
+		}
+		error = (*fp->f_ops->fo_ioctl)
+			(fp, (int)TIOCSPGRP, (caddr_t)&tmp, p);
+		break;
+
+	case FIOGETOWN:
+		if (fp->f_type == DTYPE_SOCKET) {
+			error = 0;
+			*(int *)data = ((struct socket *)fp->f_data)->so_pgid;
+			break;
+		}
+		error = (*fp->f_ops->fo_ioctl)(fp, (int)TIOCGPGRP, data, p);
+		*(int *)data = -*(int *)data;
+		break;
+
+	default:
+		error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
+		/*
+		 * Copy any data to user, size was
+		 * already set and checked above.
+		 */
+		if (error == 0 && (com&IOC_OUT) && size)
+			error = copyout(data, uap->data, (u_int)size);
+		break;
+	}
+	if (memp)
+		free(memp, M_IOCTLOPS);
+	return (error);
+}
+
+int	selwait, nselcoll;
+
+/*
+ * Select system call.
+ */
+struct select_args {
+	u_int	nd;
+	fd_set	*in, *ou, *ex;
+	struct	timeval *tv;
+};
+select(p, uap, retval)
+	register struct proc *p;
+	register struct select_args *uap;
+	int *retval;
+{
+	fd_set ibits[3], obits[3];
+	struct timeval atv;
+	int s, ncoll, error = 0, timo;
+	u_int ni;
+
+	bzero((caddr_t)ibits, sizeof(ibits));
+	bzero((caddr_t)obits, sizeof(obits));
+	if (uap->nd > FD_SETSIZE)
+		return (EINVAL);
+	if (uap->nd > p->p_fd->fd_nfiles)
+		uap->nd = p->p_fd->fd_nfiles;	/* forgiving; slightly wrong */
+	ni = howmany(uap->nd, NFDBITS) * sizeof(fd_mask);
+
+#define	getbits(name, x) \
+	if (uap->name && \
+	    (error = copyin((caddr_t)uap->name, (caddr_t)&ibits[x], ni))) \
+		goto done;
+	getbits(in, 0);
+	getbits(ou, 1);
+	getbits(ex, 2);
+#undef	getbits
+
+	if (uap->tv) {
+		error = copyin((caddr_t)uap->tv, (caddr_t)&atv,
+			sizeof (atv));
+		if (error)
+			goto done;
+		if (itimerfix(&atv)) {
+			error = EINVAL;
+			goto done;
+		}
+		s = splclock();
+		timevaladd(&atv, (struct timeval *)&time);
+		timo = hzto(&atv);
+		/*
+		 * Avoid inadvertently sleeping forever.
+		 */
+		if (timo == 0)
+			timo = 1;
+		splx(s);
+	} else
+		timo = 0;
+retry:
+	ncoll = nselcoll;
+	p->p_flag |= P_SELECT;
+	error = selscan(p, ibits, obits, uap->nd, retval);
+	if (error || *retval)
+		goto done;
+	s = splhigh();
+	/* this should be timercmp(&time, &atv, >=) */
+	if (uap->tv && (time.tv_sec > atv.tv_sec ||
+	    time.tv_sec == atv.tv_sec && time.tv_usec >= atv.tv_usec)) {
+		splx(s);
+		goto done;
+	}
+	if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
+		splx(s);
+		goto retry;
+	}
+	p->p_flag &= ~P_SELECT;
+	error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
+	splx(s);
+	if (error == 0)
+		goto retry;
+done:
+	p->p_flag &= ~P_SELECT;
+	/* select is not restarted after signals... */
+	if (error == ERESTART)
+		error = EINTR;
+	if (error == EWOULDBLOCK)
+		error = 0;
+#define	putbits(name, x) \
+	if (uap->name && \
+	    (error2 = copyout((caddr_t)&obits[x], (caddr_t)uap->name, ni))) \
+		error = error2;
+	if (error == 0) {
+		int error2;
+
+		putbits(in, 0);
+		putbits(ou, 1);
+		putbits(ex, 2);
+#undef putbits
+	}
+	return (error);
+}
+
+selscan(p, ibits, obits, nfd, retval)
+	struct proc *p;
+	fd_set *ibits, *obits;
+	int nfd, *retval;
+{
+	register struct filedesc *fdp = p->p_fd;
+	register int msk, i, j, fd;
+	register fd_mask bits;
+	struct file *fp;
+	int n = 0;
+	static int flag[3] = { FREAD, FWRITE, 0 };
+
+	for (msk = 0; msk < 3; msk++) {
+		for (i = 0; i < nfd; i += NFDBITS) {
+			bits = ibits[msk].fds_bits[i/NFDBITS];
+			while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
+				bits &= ~(1 << j);
+				fp = fdp->fd_ofiles[fd];
+				if (fp == NULL)
+					return (EBADF);
+				if ((*fp->f_ops->fo_select)(fp, flag[msk], p)) {
+					FD_SET(fd, &obits[msk]);
+					n++;
+				}
+			}
+		}
+	}
+	*retval = n;
+	return (0);
+}
+
+/*ARGSUSED*/
+seltrue(dev, flag, p)
+	dev_t dev;
+	int flag;
+	struct proc *p;
+{
+
+	return (1);
+}
+
+/*
+ * Record a select request.
+ */
+void
+selrecord(selector, sip)
+	struct proc *selector;
+	struct selinfo *sip;
+{
+	struct proc *p;
+	pid_t mypid;
+
+	mypid = selector->p_pid;
+	if (sip->si_pid == mypid)
+		return;
+	if (sip->si_pid && (p = pfind(sip->si_pid)) &&
+	    p->p_wchan == (caddr_t)&selwait)
+		sip->si_flags |= SI_COLL;
+	else
+		sip->si_pid = mypid;
+}
+
+/*
+ * Do a wakeup when a selectable event occurs.
+ */
+void
+selwakeup(sip)
+	register struct selinfo *sip;
+{
+	register struct proc *p;
+	int s;
+
+	if (sip->si_pid == 0)
+		return;
+	if (sip->si_flags & SI_COLL) {
+		nselcoll++;
+		sip->si_flags &= ~SI_COLL;
+		wakeup((caddr_t)&selwait);
+	}
+	p = pfind(sip->si_pid);
+	sip->si_pid = 0;
+	if (p != NULL) {
+		s = splhigh();
+		if (p->p_wchan == (caddr_t)&selwait) {
+			if (p->p_stat == SSLEEP)
+				setrunnable(p);
+			else
+				unsleep(p);
+		} else if (p->p_flag & P_SELECT)
+			p->p_flag &= ~P_SELECT;
+		splx(s);
+	}
+}
diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c
new file mode 100644
index 00000000000..4cc40baf582
--- /dev/null
+++ b/sys/kern/sys_process.c
@@ -0,0 +1,74 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)sys_process.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/errno.h>
+
+/*
+ * Process debugging system call.
+ */
+struct ptrace_args {
+	int	req;
+	pid_t	pid;
+	caddr_t	addr;
+	int	data;
+};
+ptrace(a1, a2, a3)
+	struct proc *a1;
+	struct ptrace_args *a2;
+	int *a3;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return (ENOSYS);
+}
+
+trace_req(a1)
+	struct proc *a1;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return (0);
+}
diff --git a/sys/kern/sys_socket.c b/sys/kern/sys_socket.c
new file mode 100644
index 00000000000..a93ae86df85
--- /dev/null
+++ b/sys/kern/sys_socket.c
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 1982, 1986, 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)sys_socket.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+struct	fileops socketops =
+    { soo_read, soo_write, soo_ioctl, soo_select, soo_close };
+
+/* ARGSUSED */
+soo_read(fp, uio, cred)
+	struct file *fp;
+	struct uio *uio;
+	struct ucred *cred;
+{
+
+	return (soreceive((struct socket *)fp->f_data, (struct mbuf **)0,
+		uio, (struct mbuf **)0, (struct mbuf **)0, (int *)0));
+}
+
+/* ARGSUSED */
+soo_write(fp, uio, cred)
+	struct file *fp;
+	struct uio *uio;
+	struct ucred *cred;
+{
+
+	return (sosend((struct socket *)fp->f_data, (struct mbuf *)0,
+		uio, (struct mbuf *)0, (struct mbuf *)0, 0));
+}
+
+soo_ioctl(fp, cmd, data, p)
+	struct file *fp;
+	int cmd;
+	register caddr_t data;
+	struct proc *p;
+{
+	register struct socket *so = (struct socket *)fp->f_data;
+
+	switch (cmd) {
+
+	case FIONBIO:
+		if (*(int *)data)
+			so->so_state |= SS_NBIO;
+		else
+			so->so_state &= ~SS_NBIO;
+		return (0);
+
+	case FIOASYNC:
+		if (*(int *)data) {
+			so->so_state |= SS_ASYNC;
+			so->so_rcv.sb_flags |= SB_ASYNC;
+			so->so_snd.sb_flags |= SB_ASYNC;
+		} else {
+			so->so_state &= ~SS_ASYNC;
+			so->so_rcv.sb_flags &= ~SB_ASYNC;
+			so->so_snd.sb_flags &= ~SB_ASYNC;
+		}
+		return (0);
+
+	case FIONREAD:
+		*(int *)data = so->so_rcv.sb_cc;
+		return (0);
+
+	case SIOCSPGRP:
+		so->so_pgid = *(int *)data;
+		return (0);
+
+	case SIOCGPGRP:
+		*(int *)data = so->so_pgid;
+		return (0);
+
+	case SIOCATMARK:
+		*(int *)data = (so->so_state&SS_RCVATMARK) != 0;
+		return (0);
+	}
+	/*
+	 * Interface/routing/protocol specific ioctls:
+	 * interface and routing ioctls should have a
+	 * different entry since a socket's unnecessary
+	 */
+	if (IOCGROUP(cmd) == 'i')
+		return (ifioctl(so, cmd, data, p));
+	if (IOCGROUP(cmd) == 'r')
+		return (rtioctl(cmd, data, p));
+	return ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL, 
+	    (struct mbuf *)cmd, (struct mbuf *)data, (struct mbuf *)0));
+}
+
+soo_select(fp, which, p)
+	struct file *fp;
+	int which;
+	struct proc *p;
+{
+	register struct socket *so = (struct socket *)fp->f_data;
+	register int s = splnet();
+
+	switch (which) {
+
+	case FREAD:
+		if (soreadable(so)) {
+			splx(s);
+			return (1);
+		}
+		selrecord(p, &so->so_rcv.sb_sel);
+		so->so_rcv.sb_flags |= SB_SEL;
+		break;
+
+	case FWRITE:
+		if (sowriteable(so)) {
+			splx(s);
+			return (1);
+		}
+		selrecord(p, &so->so_snd.sb_sel);
+		so->so_snd.sb_flags |= SB_SEL;
+		break;
+
+	case 0:
+		if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) {
+			splx(s);
+			return (1);
+		}
+		selrecord(p, &so->so_rcv.sb_sel);
+		so->so_rcv.sb_flags |= SB_SEL;
+		break;
+	}
+	splx(s);
+	return (0);
+}
+
+soo_stat(so, ub)
+	register struct socket *so;
+	register struct stat *ub;
+{
+
+	bzero((caddr_t)ub, sizeof (*ub));
+	ub->st_mode = S_IFSOCK;
+	return ((*so->so_proto->pr_usrreq)(so, PRU_SENSE,
+	    (struct mbuf *)ub, (struct mbuf *)0, 
+	    (struct mbuf *)0));
+}
+
+/* ARGSUSED */
+soo_close(fp, p)
+	struct file *fp;
+	struct proc *p;
+{
+	int error = 0;
+
+	if (fp->f_data)
+		error = soclose((struct socket *)fp->f_data);
+	fp->f_data = 0;
+	return (error);
+}
diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c
new file mode 100644
index 00000000000..1809905a4f6
--- /dev/null
+++ b/sys/kern/syscalls.c
@@ -0,0 +1,251 @@
+/*
+ * System call names.
+ *
+ * DO NOT EDIT-- this file is automatically generated.
+ * created from	@(#)syscalls.master	8.2 (Berkeley) 1/13/94
+ */
+
+char *syscallnames[] = {
+	"syscall",			/* 0 = syscall */
+	"exit",			/* 1 = exit */
+	"fork",			/* 2 = fork */
+	"read",			/* 3 = read */
+	"write",			/* 4 = write */
+	"open",			/* 5 = open */
+	"close",			/* 6 = close */
+	"wait4",			/* 7 = wait4 */
+	"old.creat",		/* 8 = old creat */
+	"link",			/* 9 = link */
+	"unlink",			/* 10 = unlink */
+	"obs_execv",			/* 11 = obsolete execv */
+	"chdir",			/* 12 = chdir */
+	"fchdir",			/* 13 = fchdir */
+	"mknod",			/* 14 = mknod */
+	"chmod",			/* 15 = chmod */
+	"chown",			/* 16 = chown */
+	"break",			/* 17 = break */
+	"getfsstat",			/* 18 = getfsstat */
+	"old.lseek",		/* 19 = old lseek */
+	"getpid",			/* 20 = getpid */
+	"mount",			/* 21 = mount */
+	"unmount",			/* 22 = unmount */
+	"setuid",			/* 23 = setuid */
+	"getuid",			/* 24 = getuid */
+	"geteuid",			/* 25 = geteuid */
+	"ptrace",			/* 26 = ptrace */
+	"recvmsg",			/* 27 = recvmsg */
+	"sendmsg",			/* 28 = sendmsg */
+	"recvfrom",			/* 29 = recvfrom */
+	"accept",			/* 30 = accept */
+	"getpeername",			/* 31 = getpeername */
+	"getsockname",			/* 32 = getsockname */
+	"access",			/* 33 = access */
+	"chflags",			/* 34 = chflags */
+	"fchflags",			/* 35 = fchflags */
+	"sync",			/* 36 = sync */
+	"kill",			/* 37 = kill */
+	"old.stat",		/* 38 = old stat */
+	"getppid",			/* 39 = getppid */
+	"old.lstat",		/* 40 = old lstat */
+	"dup",			/* 41 = dup */
+	"pipe",			/* 42 = pipe */
+	"getegid",			/* 43 = getegid */
+	"profil",			/* 44 = profil */
+#ifdef KTRACE
+	"ktrace",			/* 45 = ktrace */
+#else
+	"#45",			/* 45 = ktrace */
+#endif
+	"sigaction",			/* 46 = sigaction */
+	"getgid",			/* 47 = getgid */
+	"sigprocmask",			/* 48 = sigprocmask */
+	"getlogin",			/* 49 = getlogin */
+	"setlogin",			/* 50 = setlogin */
+	"acct",			/* 51 = acct */
+	"sigpending",			/* 52 = sigpending */
+	"sigaltstack",			/* 53 = sigaltstack */
+	"ioctl",			/* 54 = ioctl */
+	"reboot",			/* 55 = reboot */
+	"revoke",			/* 56 = revoke */
+	"symlink",			/* 57 = symlink */
+	"readlink",			/* 58 = readlink */
+	"execve",			/* 59 = execve */
+	"umask",			/* 60 = umask */
+	"chroot",			/* 61 = chroot */
+	"old.fstat",		/* 62 = old fstat */
+	"old.getkerninfo",		/* 63 = old getkerninfo */
+	"old.getpagesize",		/* 64 = old getpagesize */
+	"msync",			/* 65 = msync */
+	"vfork",			/* 66 = vfork */
+	"obs_vread",			/* 67 = obsolete vread */
+	"obs_vwrite",			/* 68 = obsolete vwrite */
+	"sbrk",			/* 69 = sbrk */
+	"sstk",			/* 70 = sstk */
+	"old.mmap",		/* 71 = old mmap */
+	"vadvise",			/* 72 = vadvise */
+	"munmap",			/* 73 = munmap */
+	"mprotect",			/* 74 = mprotect */
+	"madvise",			/* 75 = madvise */
+	"obs_vhangup",			/* 76 = obsolete vhangup */
+	"obs_vlimit",			/* 77 = obsolete vlimit */
+	"mincore",			/* 78 = mincore */
+	"getgroups",			/* 79 = getgroups */
+	"setgroups",			/* 80 = setgroups */
+	"getpgrp",			/* 81 = getpgrp */
+	"setpgid",			/* 82 = setpgid */
+	"setitimer",			/* 83 = setitimer */
+	"old.wait",		/* 84 = old wait */
+	"swapon",			/* 85 = swapon */
+	"getitimer",			/* 86 = getitimer */
+	"old.gethostname",		/* 87 = old gethostname */
+	"old.sethostname",		/* 88 = old sethostname */
+	"getdtablesize",			/* 89 = getdtablesize */
+	"dup2",			/* 90 = dup2 */
+	"#91",			/* 91 = getdopt */
+	"fcntl",			/* 92 = fcntl */
+	"select",			/* 93 = select */
+	"#94",			/* 94 = setdopt */
+	"fsync",			/* 95 = fsync */
+	"setpriority",			/* 96 = setpriority */
+	"socket",			/* 97 = socket */
+	"connect",			/* 98 = connect */
+	"old.accept",		/* 99 = old accept */
+	"getpriority",			/* 100 = getpriority */
+	"old.send",		/* 101 = old send */
+	"old.recv",		/* 102 = old recv */
+	"sigreturn",			/* 103 = sigreturn */
+	"bind",			/* 104 = bind */
+	"setsockopt",			/* 105 = setsockopt */
+	"listen",			/* 106 = listen */
+	"obs_vtimes",			/* 107 = obsolete vtimes */
+	"old.sigvec",		/* 108 = old sigvec */
+	"old.sigblock",		/* 109 = old sigblock */
+	"old.sigsetmask",		/* 110 = old sigsetmask */
+	"sigsuspend",			/* 111 = sigsuspend */
+	"old.sigstack",		/* 112 = old sigstack */
+	"old.recvmsg",		/* 113 = old recvmsg */
+	"old.sendmsg",		/* 114 = old sendmsg */
+#ifdef TRACE
+	"vtrace",			/* 115 = vtrace */
+#else
+	"obs_vtrace",			/* 115 = obsolete vtrace */
+#endif
+	"gettimeofday",			/* 116 = gettimeofday */
+	"getrusage",			/* 117 = getrusage */
+	"getsockopt",			/* 118 = getsockopt */
+#ifdef vax
+	"resuba",			/* 119 = resuba */
+#else
+	"#119",			/* 119 = nosys */
+#endif
+	"readv",			/* 120 = readv */
+	"writev",			/* 121 = writev */
+	"settimeofday",			/* 122 = settimeofday */
+	"fchown",			/* 123 = fchown */
+	"fchmod",			/* 124 = fchmod */
+	"old.recvfrom",		/* 125 = old recvfrom */
+	"old.setreuid",		/* 126 = old setreuid */
+	"old.setregid",		/* 127 = old setregid */
+	"rename",			/* 128 = rename */
+	"old.truncate",		/* 129 = old truncate */
+	"old.ftruncate",		/* 130 = old ftruncate */
+	"flock",			/* 131 = flock */
+	"mkfifo",			/* 132 = mkfifo */
+	"sendto",			/* 133 = sendto */
+	"shutdown",			/* 134 = shutdown */
+	"socketpair",			/* 135 = socketpair */
+	"mkdir",			/* 136 = mkdir */
+	"rmdir",			/* 137 = rmdir */
+	"utimes",			/* 138 = utimes */
+	"obs_4.2",			/* 139 = obsolete 4.2 sigreturn */
+	"adjtime",			/* 140 = adjtime */
+	"old.getpeername",		/* 141 = old getpeername */
+	"old.gethostid",		/* 142 = old gethostid */
+	"old.sethostid",		/* 143 = old sethostid */
+	"old.getrlimit",		/* 144 = old getrlimit */
+	"old.setrlimit",		/* 145 = old setrlimit */
+	"old.killpg",		/* 146 = old killpg */
+	"setsid",			/* 147 = setsid */
+	"quotactl",			/* 148 = quotactl */
+	"old.quota",		/* 149 = old quota */
+	"old.getsockname",		/* 150 = old getsockname */
+	"#151",			/* 151 = nosys */
+	"#152",			/* 152 = nosys */
+	"#153",			/* 153 = nosys */
+	"#154",			/* 154 = nosys */
+#ifdef NFS
+	"nfssvc",			/* 155 = nfssvc */
+#else
+	"#155",			/* 155 = nosys */
+#endif
+	"old.getdirentries",		/* 156 = old getdirentries */
+	"statfs",			/* 157 = statfs */
+	"fstatfs",			/* 158 = fstatfs */
+	"#159",			/* 159 = nosys */
+	"#160",			/* 160 = nosys */
+#ifdef NFS
+	"getfh",			/* 161 = getfh */
+#else
+	"#161",			/* 161 = nosys */
+#endif
+	"#162",			/* 162 = nosys */
+	"#163",			/* 163 = nosys */
+	"#164",			/* 164 = nosys */
+	"#165",			/* 165 = nosys */
+	"#166",			/* 166 = nosys */
+	"#167",			/* 167 = nosys */
+	"#168",			/* 168 = nosys */
+	"#169",			/* 169 = nosys */
+	"#170",			/* 170 = nosys */
+#ifdef SYSVSHM
+	"shmsys",			/* 171 = shmsys */
+#else
+	"#171",			/* 171 = nosys */
+#endif
+	"#172",			/* 172 = nosys */
+	"#173",			/* 173 = nosys */
+	"#174",			/* 174 = nosys */
+	"#175",			/* 175 = nosys */
+	"#176",			/* 176 = nosys */
+	"#177",			/* 177 = nosys */
+	"#178",			/* 178 = nosys */
+	"#179",			/* 179 = nosys */
+	"#180",			/* 180 = nosys */
+	"setgid",			/* 181 = setgid */
+	"setegid",			/* 182 = setegid */
+	"seteuid",			/* 183 = seteuid */
+#ifdef LFS
+	"lfs_bmapv",			/* 184 = lfs_bmapv */
+	"lfs_markv",			/* 185 = lfs_markv */
+	"lfs_segclean",			/* 186 = lfs_segclean */
+	"lfs_segwait",			/* 187 = lfs_segwait */
+#else
+	"#184",			/* 184 = nosys */
+	"#185",			/* 185 = nosys */
+	"#186",			/* 186 = nosys */
+	"#187",			/* 187 = nosys */
+#endif
+	"stat",			/* 188 = stat */
+	"fstat",			/* 189 = fstat */
+	"lstat",			/* 190 = lstat */
+	"pathconf",			/* 191 = pathconf */
+	"fpathconf",			/* 192 = fpathconf */
+	"#193",			/* 193 = nosys */
+	"getrlimit",			/* 194 = getrlimit */
+	"setrlimit",			/* 195 = setrlimit */
+	"getdirentries",			/* 196 = getdirentries */
+	"mmap",			/* 197 = mmap */
+	"__syscall",			/* 198 = __syscall */
+	"lseek",			/* 199 = lseek */
+	"truncate",			/* 200 = truncate */
+	"ftruncate",			/* 201 = ftruncate */
+	"__sysctl",			/* 202 = __sysctl */
+	"mlock",			/* 203 = mlock */
+	"munlock",			/* 204 = munlock */
+	"#205",			/* 205 = nosys */
+	"#206",			/* 206 = nosys */
+	"#207",			/* 207 = nosys */
+	"#208",			/* 208 = nosys */
+	"#209",			/* 209 = nosys */
+	"#210",			/* 210 = nosys */
+};
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
new file mode 100644
index 00000000000..1b8de145fba
--- /dev/null
+++ b/sys/kern/syscalls.master
@@ -0,0 +1,276 @@
+	@(#)syscalls.master	8.2 (Berkeley) 1/13/94
+; System call name/number master file.
+; Processed to created init_sysent.c, syscalls.c and syscall.h.
+
+; Columns: number type nargs name altname/comments
+;	number	system call number, must be in order
+;	type	one of STD, OBSOL, UNIMPL, COMPAT
+;	nargs	number of arguments
+;	name	name of syscall routine
+;	altname	name of system call if different
+;		for UNIMPL/OBSOL, name continues with comments
+
+; types:
+;	STD	always included
+;	COMPAT	included on COMPAT #ifdef
+;	LIBCOMPAT included on COMPAT #ifdef, and placed in syscall.h
+;	OBSOL	obsolete, not included in system, only specifies name
+;	UNIMPL	not implemented, placeholder only
+
+; #ifdef's, etc. may be included, and are copied to the output files.
+
+; Reserved/unimplemented system calls in the range 0-150 inclusive
+; are reserved for use in future Berkeley releases.
+; Additional system calls implemented in vendor and other
+; redistributions should be placed in the reserved range at the end
+; of the current calls.
+
+0	STD	0 nosys syscall
+1	STD	1 exit
+2	STD	0 fork
+3	STD	3 read
+4	STD	3 write
+5	STD	3 open
+6	STD	1 close
+7	STD	4 wait4
+8	COMPAT	2 creat
+9	STD	2 link
+10	STD	1 unlink
+11	OBSOL	2 execv
+12	STD	1 chdir
+13	STD	1 fchdir
+14	STD	3 mknod
+15	STD	2 chmod
+16	STD	3 chown
+17	STD	1 obreak break
+18	STD	3 getfsstat
+19	COMPAT	3 lseek
+20	STD	0 getpid
+21	STD	4 mount
+22	STD	2 unmount
+23	STD	1 setuid
+24	STD	0 getuid
+25	STD	0 geteuid
+26	STD	4 ptrace
+27	STD	3 recvmsg
+28	STD	3 sendmsg
+29	STD	6 recvfrom
+30	STD	3 accept
+31	STD	3 getpeername
+32	STD	3 getsockname
+33	STD	2 access
+34	STD	2 chflags
+35	STD	2 fchflags
+36	STD	0 sync
+37	STD	2 kill
+38	COMPAT	2 stat
+39	STD	0 getppid
+40	COMPAT	2 lstat
+41	STD	2 dup
+42	STD	0 pipe
+43	STD	0 getegid
+44	STD	4 profil
+#ifdef KTRACE
+45	STD	4 ktrace
+#else
+45	UNIMPL	0 ktrace
+#endif
+46	STD	3 sigaction
+47	STD	0 getgid
+48	STD	2 sigprocmask
+49	STD	2 getlogin
+50	STD	1 setlogin
+51	STD	1 acct
+52	STD	0 sigpending
+53	STD	2 sigaltstack
+54	STD	3 ioctl
+55	STD	1 reboot
+56	STD	1 revoke
+57	STD	2 symlink
+58	STD	3 readlink
+59	STD	3 execve
+60	STD	1 umask
+61	STD	1 chroot
+62	COMPAT	2 fstat
+63	COMPAT	4 getkerninfo
+64	COMPAT	0 getpagesize
+65	STD	2 msync
+66	STD	0 vfork
+67	OBSOL	0 vread
+68	OBSOL	0 vwrite
+69	STD	1 sbrk
+70	STD	1 sstk
+71	COMPAT	7 mmap
+72	STD	1 ovadvise vadvise
+73	STD	2 munmap
+74	STD	3 mprotect
+75	STD	3 madvise
+76	OBSOL	0 vhangup
+77	OBSOL	0 vlimit
+78	STD	3 mincore
+79	STD	2 getgroups
+80	STD	2 setgroups
+81	STD	0 getpgrp
+82	STD	2 setpgid
+83	STD	3 setitimer
+84	COMPAT	0 wait
+85	STD	1 swapon
+86	STD	2 getitimer
+87	COMPAT	2 gethostname
+88	COMPAT	2 sethostname
+89	STD	0 getdtablesize
+90	STD	2 dup2
+91	UNIMPL	2 getdopt
+92	STD	3 fcntl
+93	STD	5 select
+94	UNIMPL	2 setdopt
+95	STD	1 fsync
+96	STD	3 setpriority
+97	STD	3 socket
+98	STD	3 connect
+99	COMPAT	3 accept
+100	STD	2 getpriority
+101	COMPAT	4 send
+102	COMPAT	4 recv
+103	STD	1 sigreturn
+104	STD	3 bind
+105	STD	5 setsockopt
+106	STD	2 listen
+107	OBSOL	0 vtimes
+108	COMPAT	3 sigvec
+109	COMPAT	1 sigblock
+110	COMPAT	1 sigsetmask
+111	STD	1 sigsuspend
+112	COMPAT	2 sigstack
+113	COMPAT	3 recvmsg
+114	COMPAT	3 sendmsg
+#ifdef TRACE
+115	STD	2 vtrace
+#else
+115	OBSOL	2 vtrace
+#endif
+116	STD	2 gettimeofday
+117	STD	2 getrusage
+118	STD	5 getsockopt
+#ifdef vax
+119	STD	1 resuba
+#else
+119	UNIMPL	0 nosys
+#endif
+120	STD	3 readv
+121	STD	3 writev
+122	STD	2 settimeofday
+123	STD	3 fchown
+124	STD	2 fchmod
+125	COMPAT	6 recvfrom
+126	COMPAT	2 setreuid
+127	COMPAT	2 setregid
+128	STD	2 rename
+129	COMPAT	2 truncate
+130	COMPAT	2 ftruncate
+131	STD	2 flock
+132	STD	2 mkfifo
+133	STD	6 sendto
+134	STD	2 shutdown
+135	STD	5 socketpair
+136	STD	2 mkdir
+137	STD	1 rmdir
+138	STD	2 utimes
+139	OBSOL	0 4.2 sigreturn
+140	STD	2 adjtime
+141	COMPAT	3 getpeername
+142	COMPAT	0 gethostid
+143	COMPAT	1 sethostid
+144	COMPAT	2 getrlimit
+145	COMPAT	2 setrlimit
+146	COMPAT	2 killpg
+147	STD	0 setsid
+148	STD	4 quotactl
+149	COMPAT	4 quota
+150	COMPAT	3 getsockname
+
+; Syscalls 151-180 inclusive are reserved for vendor-specific
+; system calls.  (This includes various calls added for compatibity
+; with other Unix variants.)
+; Some of these calls are now supported by BSD...
+151	UNIMPL	0 nosys
+152	UNIMPL	0 nosys
+153	UNIMPL	0 nosys
+154	UNIMPL	0 nosys
+#ifdef NFS
+155	STD	2 nfssvc
+#else
+155	UNIMPL	0 nosys
+#endif
+156	COMPAT	4 getdirentries
+157	STD	2 statfs
+158	STD	2 fstatfs
+159	UNIMPL	0 nosys
+160	UNIMPL	0 nosys
+#ifdef NFS
+161	STD	2 getfh
+#else
+161	UNIMPL	0 nosys
+#endif
+162	UNIMPL	0 nosys
+163	UNIMPL	0 nosys
+164	UNIMPL	0 nosys
+165	UNIMPL	0 nosys
+166	UNIMPL	0 nosys
+167	UNIMPL	0 nosys
+168	UNIMPL	0 nosys
+169	UNIMPL	0 nosys
+170	UNIMPL	0 nosys
+#ifdef SYSVSHM
+171	STD	4 shmsys
+#else
+171	UNIMPL	0 nosys
+#endif
+172	UNIMPL	0 nosys
+173	UNIMPL	0 nosys
+174	UNIMPL	0 nosys
+175	UNIMPL	0 nosys
+176	UNIMPL	0 nosys
+177	UNIMPL	0 nosys
+178	UNIMPL	0 nosys
+179	UNIMPL	0 nosys
+180	UNIMPL	0 nosys
+
+; Syscalls 180-199 are used by/reserved for BSD
+181	STD	1 setgid
+182	STD	1 setegid
+183	STD	1 seteuid
+#ifdef LFS
+184	STD	3 lfs_bmapv
+185	STD	3 lfs_markv
+186	STD	2 lfs_segclean
+187	STD	2 lfs_segwait
+#else
+184	UNIMPL	0 nosys
+185	UNIMPL	0 nosys
+186	UNIMPL	0 nosys
+187	UNIMPL	0 nosys
+#endif
+188	STD	2 stat
+189	STD	2 fstat
+190	STD	2 lstat
+191	STD	2 pathconf
+192	STD	2 fpathconf
+193	UNIMPL	0 nosys
+194	STD	2 getrlimit
+195	STD	2 setrlimit
+196	STD	4 getdirentries
+197	STD	8 mmap
+198	STD	0 nosys __syscall
+199	STD	5 lseek
+200	STD	4 truncate
+201	STD	4 ftruncate
+202	STD	6 __sysctl
+203	STD	2 mlock
+204	STD	2 munlock
+205	UNIMPL	0 nosys
+206	UNIMPL	0 nosys
+207	UNIMPL	0 nosys
+208	UNIMPL	0 nosys
+209	UNIMPL	0 nosys
+210	UNIMPL	0 nosys
diff --git a/sys/kern/tty.c b/sys/kern/tty.c
new file mode 100644
index 00000000000..6cc7be23700
--- /dev/null
+++ b/sys/kern/tty.c
@@ -0,0 +1,1923 @@
+/*-
+ * Copyright (c) 1982, 1986, 1990, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tty.c	8.8 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/ioctl.h>
+#include <sys/proc.h>
+#define	TTYDEFCHARS
+#include <sys/tty.h>
+#undef	TTYDEFCHARS
+#include <sys/file.h>
+#include <sys/conf.h>
+#include <sys/dkstat.h>
+#include <sys/uio.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <sys/syslog.h>
+
+#include <vm/vm.h>
+
+static int	proc_compare __P((struct proc *p1, struct proc *p2));
+static int	ttnread __P((struct tty *));
+static void	ttyblock __P((struct tty *tp));
+static void	ttyecho __P((int, struct tty *tp));
+static void	ttyrubo __P((struct tty *, int));
+
+/* Symbolic sleep message strings. */
+char ttclos[]	= "ttycls";
+char ttopen[]	= "ttyopn";
+char ttybg[]	= "ttybg";
+char ttybuf[]	= "ttybuf";
+char ttyin[]	= "ttyin";
+char ttyout[]	= "ttyout";
+
+/*
+ * Table with character classes and parity. The 8th bit indicates parity,
+ * the 7th bit indicates the character is an alphameric or underscore (for
+ * ALTWERASE), and the low 6 bits indicate delay type.  If the low 6 bits
+ * are 0 then the character needs no special processing on output; classes
+ * other than 0 might be translated or (not currently) require delays.
+ */
+#define	E	0x00	/* Even parity. */
+#define	O	0x80	/* Odd parity. */
+#define	PARITY(c)	(char_type[c] & O)
+
+#define	ALPHA	0x40	/* Alpha or underscore. */
+#define	ISALPHA(c)	(char_type[(c) & TTY_CHARMASK] & ALPHA)
+
+#define	CCLASSMASK	0x3f
+#define	CCLASS(c)	(char_type[c] & CCLASSMASK)
+
+#define	BS	BACKSPACE
+#define	CC	CONTROL
+#define	CR	RETURN
+#define	NA	ORDINARY | ALPHA
+#define	NL	NEWLINE
+#define	NO	ORDINARY
+#define	TB	TAB
+#define	VT	VTAB
+
+char const char_type[] = {
+	E|CC, O|CC, O|CC, E|CC, O|CC, E|CC, E|CC, O|CC,	/* nul - bel */
+	O|BS, E|TB, E|NL, O|CC, E|VT, O|CR, O|CC, E|CC, /* bs - si */
+	O|CC, E|CC, E|CC, O|CC, E|CC, O|CC, O|CC, E|CC, /* dle - etb */
+	E|CC, O|CC, O|CC, E|CC, O|CC, E|CC, E|CC, O|CC, /* can - us */
+	O|NO, E|NO, E|NO, O|NO, E|NO, O|NO, O|NO, E|NO, /* sp - ' */
+	E|NO, O|NO, O|NO, E|NO, O|NO, E|NO, E|NO, O|NO, /* ( - / */
+	E|NA, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA, /* 0 - 7 */
+	O|NA, E|NA, E|NO, O|NO, E|NO, O|NO, O|NO, E|NO, /* 8 - ? */
+	O|NO, E|NA, E|NA, O|NA, E|NA, O|NA, O|NA, E|NA, /* @ - G */
+	E|NA, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA, /* H - O */
+	E|NA, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA, /* P - W */
+	O|NA, E|NA, E|NA, O|NO, E|NO, O|NO, O|NO, O|NA, /* X - _ */
+	E|NO, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA, /* ` - g */
+	O|NA, E|NA, E|NA, O|NA, E|NA, O|NA, O|NA, E|NA, /* h - o */
+	O|NA, E|NA, E|NA, O|NA, E|NA, O|NA, O|NA, E|NA, /* p - w */
+	E|NA, O|NA, O|NA, E|NO, O|NO, E|NO, E|NO, O|CC, /* x - del */
+	/*
+	 * Meta chars; should be settable per character set;
+	 * for now, treat them all as normal characters.
+	 */
+	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
+	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
+	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
+	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
+	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
+	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
+	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
+	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
+	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
+	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
+	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
+	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
+	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
+	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
+	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
+	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
+};
+#undef	BS
+#undef	CC
+#undef	CR
+#undef	NA
+#undef	NL
+#undef	NO
+#undef	TB
+#undef	VT
+
+/* Macros to clear/set/test flags. */
+#define	SET(t, f)	(t) |= (f)
+#define	CLR(t, f)	(t) &= ~(f)
+#define	ISSET(t, f)	((t) & (f))
+
+/*
+ * Initial open of tty, or (re)entry to standard tty line discipline.
+ */
+int
+ttyopen(device, tp)
+	dev_t device;
+	register struct tty *tp;
+{
+	int s;
+
+	s = spltty();
+	tp->t_dev = device;
+	if (!ISSET(tp->t_state, TS_ISOPEN)) {
+		SET(tp->t_state, TS_ISOPEN);
+		bzero(&tp->t_winsize, sizeof(tp->t_winsize));
+	}
+	CLR(tp->t_state, TS_WOPEN);
+	splx(s);
+	return (0);
+}
+
+/*
+ * Handle close() on a tty line: flush and set to initial state,
+ * bumping generation number so that pending read/write calls
+ * can detect recycling of the tty.
+ */
+int
+ttyclose(tp)
+	register struct tty *tp;
+{
+	extern struct tty *constty;	/* Temporary virtual console. */
+
+	if (constty == tp)
+		constty = NULL;
+
+	ttyflush(tp, FREAD | FWRITE);
+
+	tp->t_gen++;
+	tp->t_pgrp = NULL;
+	tp->t_session = NULL;
+	tp->t_state = 0;
+	return (0);
+}
+
+#define	FLUSHQ(q) {							\
+	if ((q)->c_cc)							\
+		ndflush(q, (q)->c_cc);					\
+}
+
+/* Is 'c' a line delimiter ("break" character)? */
+#define	TTBREAKC(c)							\
+	((c) == '\n' || ((c) == cc[VEOF] ||				\
+	(c) == cc[VEOL] || (c) == cc[VEOL2]) && (c) != _POSIX_VDISABLE)
+
+
+/*
+ * Process input of a single character received on a tty.
+ */
+int
+ttyinput(c, tp)
+	register int c;
+	register struct tty *tp;
+{
+	register int iflag, lflag;
+	register u_char *cc;
+	int i, err;
+
+	/*
+	 * If input is pending take it first.
+	 */
+	lflag = tp->t_lflag;
+	if (ISSET(lflag, PENDIN))
+		ttypend(tp);
+	/*
+	 * Gather stats.
+	 */
+	if (ISSET(lflag, ICANON)) {
+		++tk_cancc;
+		++tp->t_cancc;
+	} else {
+		++tk_rawcc;
+		++tp->t_rawcc;
+	}
+	++tk_nin;
+
+	/* Handle exceptional conditions (break, parity, framing). */
+	cc = tp->t_cc;
+	iflag = tp->t_iflag;
+	if (err = (ISSET(c, TTY_ERRORMASK))) {
+		CLR(c, TTY_ERRORMASK);
+		if (ISSET(err, TTY_FE) && !c) {	/* Break. */
+			if (ISSET(iflag, IGNBRK))
+				goto endcase;
+			else if (ISSET(iflag, BRKINT) &&
+			    ISSET(lflag, ISIG) &&
+			    (cc[VINTR] != _POSIX_VDISABLE))
+				c = cc[VINTR];
+			else if (ISSET(iflag, PARMRK))
+				goto parmrk;
+		} else if (ISSET(err, TTY_PE) &&
+		    ISSET(iflag, INPCK) || ISSET(err, TTY_FE)) {
+			if (ISSET(iflag, IGNPAR))
+				goto endcase;
+			else if (ISSET(iflag, PARMRK)) {
+parmrk:				(void)putc(0377 | TTY_QUOTE, &tp->t_rawq);
+				(void)putc(0 | TTY_QUOTE, &tp->t_rawq);
+				(void)putc(c | TTY_QUOTE, &tp->t_rawq);
+				goto endcase;
+			} else
+				c = 0;
+		}
+	}
+	/*
+	 * In tandem mode, check high water mark.
+	 */
+	if (ISSET(iflag, IXOFF))
+		ttyblock(tp);
+	if (!ISSET(tp->t_state, TS_TYPEN) && ISSET(iflag, ISTRIP))
+		CLR(c, 0x80);
+	if (!ISSET(lflag, EXTPROC)) {
+		/*
+		 * Check for literal nexting very first
+		 */
+		if (ISSET(tp->t_state, TS_LNCH)) {
+			SET(c, TTY_QUOTE);
+			CLR(tp->t_state, TS_LNCH);
+		}
+		/*
+		 * Scan for special characters.  This code
+		 * is really just a big case statement with
+		 * non-constant cases.  The bottom of the
+		 * case statement is labeled ``endcase'', so goto
+		 * it after a case match, or similar.
+		 */
+
+		/*
+		 * Control chars which aren't controlled
+		 * by ICANON, ISIG, or IXON.
+		 */
+		if (ISSET(lflag, IEXTEN)) {
+			if (CCEQ(cc[VLNEXT], c)) {
+				if (ISSET(lflag, ECHO)) {
+					if (ISSET(lflag, ECHOE)) {
+						(void)ttyoutput('^', tp);
+						(void)ttyoutput('\b', tp);
+					} else
+						ttyecho(c, tp);
+				}
+				SET(tp->t_state, TS_LNCH);
+				goto endcase;
+			}
+			if (CCEQ(cc[VDISCARD], c)) {
+				if (ISSET(lflag, FLUSHO))
+					CLR(tp->t_lflag, FLUSHO);
+				else {
+					ttyflush(tp, FWRITE);
+					ttyecho(c, tp);
+					if (tp->t_rawq.c_cc + tp->t_canq.c_cc)
+						ttyretype(tp);
+					SET(tp->t_lflag, FLUSHO);
+				}
+				goto startoutput;
+			}
+		}
+		/*
+		 * Signals.
+		 */
+		if (ISSET(lflag, ISIG)) {
+			if (CCEQ(cc[VINTR], c) || CCEQ(cc[VQUIT], c)) {
+				if (!ISSET(lflag, NOFLSH))
+					ttyflush(tp, FREAD | FWRITE);
+				ttyecho(c, tp);
+				pgsignal(tp->t_pgrp,
+				    CCEQ(cc[VINTR], c) ? SIGINT : SIGQUIT, 1);
+				goto endcase;
+			}
+			if (CCEQ(cc[VSUSP], c)) {
+				if (!ISSET(lflag, NOFLSH))
+					ttyflush(tp, FREAD);
+				ttyecho(c, tp);
+				pgsignal(tp->t_pgrp, SIGTSTP, 1);
+				goto endcase;
+			}
+		}
+		/*
+		 * Handle start/stop characters.
+		 */
+		if (ISSET(iflag, IXON)) {
+			if (CCEQ(cc[VSTOP], c)) {
+				if (!ISSET(tp->t_state, TS_TTSTOP)) {
+					SET(tp->t_state, TS_TTSTOP);
+#ifdef sun4c						/* XXX */
+					(*tp->t_stop)(tp, 0);
+#else
+					(*cdevsw[major(tp->t_dev)].d_stop)(tp,
+					   0);
+#endif
+					return (0);
+				}
+				if (!CCEQ(cc[VSTART], c))
+					return (0);
+				/*
+				 * if VSTART == VSTOP then toggle
+				 */
+				goto endcase;
+			}
+			if (CCEQ(cc[VSTART], c))
+				goto restartoutput;
+		}
+		/*
+		 * IGNCR, ICRNL, & INLCR
+		 */
+		if (c == '\r') {
+			if (ISSET(iflag, IGNCR))
+				goto endcase;
+			else if (ISSET(iflag, ICRNL))
+				c = '\n';
+		} else if (c == '\n' && ISSET(iflag, INLCR))
+			c = '\r';
+	}
+	if (!ISSET(tp->t_lflag, EXTPROC) && ISSET(lflag, ICANON)) {
+		/*
+		 * From here on down canonical mode character
+		 * processing takes place.
+		 */
+		/*
+		 * erase (^H / ^?)
+		 */
+		if (CCEQ(cc[VERASE], c)) {
+			if (tp->t_rawq.c_cc)
+				ttyrub(unputc(&tp->t_rawq), tp);
+			goto endcase;
+		}
+		/*
+		 * kill (^U)
+		 */
+		if (CCEQ(cc[VKILL], c)) {
+			if (ISSET(lflag, ECHOKE) &&
+			    tp->t_rawq.c_cc == tp->t_rocount &&
+			    !ISSET(lflag, ECHOPRT))
+				while (tp->t_rawq.c_cc)
+					ttyrub(unputc(&tp->t_rawq), tp);
+			else {
+				ttyecho(c, tp);
+				if (ISSET(lflag, ECHOK) ||
+				    ISSET(lflag, ECHOKE))
+					ttyecho('\n', tp);
+				FLUSHQ(&tp->t_rawq);
+				tp->t_rocount = 0;
+			}
+			CLR(tp->t_state, TS_LOCAL);
+			goto endcase;
+		}
+		/*
+		 * word erase (^W)
+		 */
+		if (CCEQ(cc[VWERASE], c)) {
+			int alt = ISSET(lflag, ALTWERASE);
+			int ctype;
+
+			/*
+			 * erase whitespace
+			 */
+			while ((c = unputc(&tp->t_rawq)) == ' ' || c == '\t')
+				ttyrub(c, tp);
+			if (c == -1)
+				goto endcase;
+			/*
+			 * erase last char of word and remember the
+			 * next chars type (for ALTWERASE)
+			 */
+			ttyrub(c, tp);
+			c = unputc(&tp->t_rawq);
+			if (c == -1)
+				goto endcase;
+			if (c == ' ' || c == '\t') {
+				(void)putc(c, &tp->t_rawq);
+				goto endcase;
+			}
+			ctype = ISALPHA(c);
+			/*
+			 * erase rest of word
+			 */
+			do {
+				ttyrub(c, tp);
+				c = unputc(&tp->t_rawq);
+				if (c == -1)
+					goto endcase;
+			} while (c != ' ' && c != '\t' &&
+			    (alt == 0 || ISALPHA(c) == ctype));
+			(void)putc(c, &tp->t_rawq);
+			goto endcase;
+		}
+		/*
+		 * reprint line (^R)
+		 */
+		if (CCEQ(cc[VREPRINT], c)) {
+			ttyretype(tp);
+			goto endcase;
+		}
+		/*
+		 * ^T - kernel info and generate SIGINFO
+		 */
+		if (CCEQ(cc[VSTATUS], c)) {
+			if (ISSET(lflag, ISIG))
+				pgsignal(tp->t_pgrp, SIGINFO, 1);
+			if (!ISSET(lflag, NOKERNINFO))
+				ttyinfo(tp);
+			goto endcase;
+		}
+	}
+	/*
+	 * Check for input buffer overflow
+	 */
+	if (tp->t_rawq.c_cc + tp->t_canq.c_cc >= TTYHOG) {
+		if (ISSET(iflag, IMAXBEL)) {
+			if (tp->t_outq.c_cc < tp->t_hiwat)
+				(void)ttyoutput(CTRL('g'), tp);
+		} else
+			ttyflush(tp, FREAD | FWRITE);
+		goto endcase;
+	}
+	/*
+	 * Put data char in q for user and
+	 * wakeup on seeing a line delimiter.
+	 */
+	if (putc(c, &tp->t_rawq) >= 0) {
+		if (!ISSET(lflag, ICANON)) {
+			ttwakeup(tp);
+			ttyecho(c, tp);
+			goto endcase;
+		}
+		if (TTBREAKC(c)) {
+			tp->t_rocount = 0;
+			catq(&tp->t_rawq, &tp->t_canq);
+			ttwakeup(tp);
+		} else if (tp->t_rocount++ == 0)
+			tp->t_rocol = tp->t_column;
+		if (ISSET(tp->t_state, TS_ERASE)) {
+			/*
+			 * end of prterase \.../
+			 */
+			CLR(tp->t_state, TS_ERASE);
+			(void)ttyoutput('/', tp);
+		}
+		i = tp->t_column;
+		ttyecho(c, tp);
+		if (CCEQ(cc[VEOF], c) && ISSET(lflag, ECHO)) {
+			/*
+			 * Place the cursor over the '^' of the ^D.
+			 */
+			i = min(2, tp->t_column - i);
+			while (i > 0) {
+				(void)ttyoutput('\b', tp);
+				i--;
+			}
+		}
+	}
+endcase:
+	/*
+	 * IXANY means allow any character to restart output.
+	 */
+	if (ISSET(tp->t_state, TS_TTSTOP) &&
+	    !ISSET(iflag, IXANY) && cc[VSTART] != cc[VSTOP])
+		return (0);
+restartoutput:
+	CLR(tp->t_lflag, FLUSHO);
+	CLR(tp->t_state, TS_TTSTOP);
+startoutput:
+	return (ttstart(tp));
+}
+
+/*
+ * Output a single character on a tty, doing output processing
+ * as needed (expanding tabs, newline processing, etc.).
+ * Returns < 0 if succeeds, otherwise returns char to resend.
+ * Must be recursive.
+ */
+int
+ttyoutput(c, tp)
+	register int c;
+	register struct tty *tp;
+{
+	register long oflag;
+	register int col, s;
+
+	oflag = tp->t_oflag;
+	if (!ISSET(oflag, OPOST)) {
+		if (ISSET(tp->t_lflag, FLUSHO))
+			return (-1);
+		if (putc(c, &tp->t_outq))
+			return (c);
+		tk_nout++;
+		tp->t_outcc++;
+		return (-1);
+	}
+	/*
+	 * Do tab expansion if OXTABS is set.  Special case if we external
+	 * processing, we don't do the tab expansion because we'll probably
+	 * get it wrong.  If tab expansion needs to be done, let it happen
+	 * externally.
+	 */
+	CLR(c, ~TTY_CHARMASK);
+	if (c == '\t' &&
+	    ISSET(oflag, OXTABS) && !ISSET(tp->t_lflag, EXTPROC)) {
+		c = 8 - (tp->t_column & 7);
+		if (!ISSET(tp->t_lflag, FLUSHO)) {
+			s = spltty();		/* Don't interrupt tabs. */
+			c -= b_to_q("        ", c, &tp->t_outq);
+			tk_nout += c;
+			tp->t_outcc += c;
+			splx(s);
+		}
+		tp->t_column += c;
+		return (c ? -1 : '\t');
+	}
+	if (c == CEOT && ISSET(oflag, ONOEOT))
+		return (-1);
+
+	/*
+	 * Newline translation: if ONLCR is set,
+	 * translate newline into "\r\n".
+	 */
+	if (c == '\n' && ISSET(tp->t_oflag, ONLCR)) {
+		tk_nout++;
+		tp->t_outcc++;
+		if (putc('\r', &tp->t_outq))
+			return (c);
+	}
+	tk_nout++;
+	tp->t_outcc++;
+	if (!ISSET(tp->t_lflag, FLUSHO) && putc(c, &tp->t_outq))
+		return (c);
+
+	col = tp->t_column;
+	switch (CCLASS(c)) {
+	case BACKSPACE:
+		if (col > 0)
+			--col;
+		break;
+	case CONTROL:
+		break;
+	case NEWLINE:
+	case RETURN:
+		col = 0;
+		break;
+	case ORDINARY:
+		++col;
+		break;
+	case TAB:
+		col = (col + 8) & ~7;
+		break;
+	}
+	tp->t_column = col;
+	return (-1);
+}
+
+/*
+ * Ioctls for all tty devices.  Called after line-discipline specific ioctl
+ * has been called to do discipline-specific functions and/or reject any
+ * of these ioctl commands.
+ */
+/* ARGSUSED */
+int
+ttioctl(tp, cmd, data, flag)
+	register struct tty *tp;
+	int cmd, flag;
+	void *data;
+{
+	extern struct tty *constty;	/* Temporary virtual console. */
+	extern int nlinesw;
+	register struct proc *p;
+	int s, error;
+
+	p = curproc;			/* XXX */
+
+	/* If the ioctl involves modification, hang if in the background. */
+	switch (cmd) {
+	case  TIOCFLUSH:
+	case  TIOCSETA:
+	case  TIOCSETD:
+	case  TIOCSETAF:
+	case  TIOCSETAW:
+#ifdef notdef
+	case  TIOCSPGRP:
+#endif
+	case  TIOCSTI:
+	case  TIOCSWINSZ:
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+	case  TIOCLBIC:
+	case  TIOCLBIS:
+	case  TIOCLSET:
+	case  TIOCSETC:
+	case OTIOCSETD:
+	case  TIOCSETN:
+	case  TIOCSETP:
+	case  TIOCSLTC:
+#endif
+		while (isbackground(curproc, tp) &&
+		    p->p_pgrp->pg_jobc && (p->p_flag & P_PPWAIT) == 0 &&
+		    (p->p_sigignore & sigmask(SIGTTOU)) == 0 &&
+		    (p->p_sigmask & sigmask(SIGTTOU)) == 0) {
+			pgsignal(p->p_pgrp, SIGTTOU, 1);
+			if (error = ttysleep(tp,
+			    &lbolt, TTOPRI | PCATCH, ttybg, 0))
+				return (error);
+		}
+		break;
+	}
+
+	switch (cmd) {			/* Process the ioctl. */
+	case FIOASYNC:			/* set/clear async i/o */
+		s = spltty();
+		if (*(int *)data)
+			SET(tp->t_state, TS_ASYNC);
+		else
+			CLR(tp->t_state, TS_ASYNC);
+		splx(s);
+		break;
+	case FIONBIO:			/* set/clear non-blocking i/o */
+		break;			/* XXX: delete. */
+	case FIONREAD:			/* get # bytes to read */
+		*(int *)data = ttnread(tp);
+		break;
+	case TIOCEXCL:			/* set exclusive use of tty */
+		s = spltty();
+		SET(tp->t_state, TS_XCLUDE);
+		splx(s);
+		break;
+	case TIOCFLUSH: {		/* flush buffers */
+		register int flags = *(int *)data;
+
+		if (flags == 0)
+			flags = FREAD | FWRITE;
+		else
+			flags &= FREAD | FWRITE;
+		ttyflush(tp, flags);
+		break;
+	}
+	case TIOCCONS:			/* become virtual console */
+		if (*(int *)data) {
+			if (constty && constty != tp &&
+			    ISSET(constty->t_state, TS_CARR_ON | TS_ISOPEN) ==
+			    (TS_CARR_ON | TS_ISOPEN))
+				return (EBUSY);
+#ifndef	UCONSOLE
+			if (error = suser(p->p_ucred, &p->p_acflag))
+				return (error);
+#endif
+			constty = tp;
+		} else if (tp == constty)
+			constty = NULL;
+		break;
+	case TIOCDRAIN:			/* wait till output drained */
+		if (error = ttywait(tp))
+			return (error);
+		break;
+	case TIOCGETA: {		/* get termios struct */
+		struct termios *t = (struct termios *)data;
+
+		bcopy(&tp->t_termios, t, sizeof(struct termios));
+		break;
+	}
+	case TIOCGETD:			/* get line discipline */
+		*(int *)data = tp->t_line;
+		break;
+	case TIOCGWINSZ:		/* get window size */
+		*(struct winsize *)data = tp->t_winsize;
+		break;
+	case TIOCGPGRP:			/* get pgrp of tty */
+		if (!isctty(p, tp))
+			return (ENOTTY);
+		*(int *)data = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID;
+		break;
+#ifdef TIOCHPCL
+	case TIOCHPCL:			/* hang up on last close */
+		s = spltty();
+		SET(tp->t_cflag, HUPCL);
+		splx(s);
+		break;
+#endif
+	case TIOCNXCL:			/* reset exclusive use of tty */
+		s = spltty();
+		CLR(tp->t_state, TS_XCLUDE);
+		splx(s);
+		break;
+	case TIOCOUTQ:			/* output queue size */
+		*(int *)data = tp->t_outq.c_cc;
+		break;
+	case TIOCSETA:			/* set termios struct */
+	case TIOCSETAW:			/* drain output, set */
+	case TIOCSETAF: {		/* drn out, fls in, set */
+		register struct termios *t = (struct termios *)data;
+
+		s = spltty();
+		if (cmd == TIOCSETAW || cmd == TIOCSETAF) {
+			if (error = ttywait(tp)) {
+				splx(s);
+				return (error);
+			}
+			if (cmd == TIOCSETAF)
+				ttyflush(tp, FREAD);
+		}
+		if (!ISSET(t->c_cflag, CIGNORE)) {
+			/*
+			 * Set device hardware.
+			 */
+			if (tp->t_param && (error = (*tp->t_param)(tp, t))) {
+				splx(s);
+				return (error);
+			} else {
+				if (!ISSET(tp->t_state, TS_CARR_ON) &&
+				    ISSET(tp->t_cflag, CLOCAL) &&
+				    !ISSET(t->c_cflag, CLOCAL)) {
+					CLR(tp->t_state, TS_ISOPEN);
+					SET(tp->t_state, TS_WOPEN);
+					ttwakeup(tp);
+				}
+				tp->t_cflag = t->c_cflag;
+				tp->t_ispeed = t->c_ispeed;
+				tp->t_ospeed = t->c_ospeed;
+			}
+			ttsetwater(tp);
+		}
+		if (cmd != TIOCSETAF) {
+			if (ISSET(t->c_lflag, ICANON) !=
+			    ISSET(tp->t_lflag, ICANON))
+				if (ISSET(t->c_lflag, ICANON)) {
+					SET(tp->t_lflag, PENDIN);
+					ttwakeup(tp);
+				} else {
+					struct clist tq;
+
+					catq(&tp->t_rawq, &tp->t_canq);
+					tq = tp->t_rawq;
+					tp->t_rawq = tp->t_canq;
+					tp->t_canq = tq;
+					CLR(tp->t_lflag, PENDIN);
+				}
+		}
+		tp->t_iflag = t->c_iflag;
+		tp->t_oflag = t->c_oflag;
+		/*
+		 * Make the EXTPROC bit read only.
+		 */
+		if (ISSET(tp->t_lflag, EXTPROC))
+			SET(t->c_lflag, EXTPROC);
+		else
+			CLR(t->c_lflag, EXTPROC);
+		tp->t_lflag = t->c_lflag | ISSET(tp->t_lflag, PENDIN);
+		bcopy(t->c_cc, tp->t_cc, sizeof(t->c_cc));
+		splx(s);
+		break;
+	}
+	case TIOCSETD: {		/* set line discipline */
+		register int t = *(int *)data;
+		dev_t device = tp->t_dev;
+
+		if ((u_int)t >= nlinesw)
+			return (ENXIO);
+		if (t != tp->t_line) {
+			s = spltty();
+			(*linesw[tp->t_line].l_close)(tp, flag);
+			error = (*linesw[t].l_open)(device, tp);
+			if (error) {
+				(void)(*linesw[tp->t_line].l_open)(device, tp);
+				splx(s);
+				return (error);
+			}
+			tp->t_line = t;
+			splx(s);
+		}
+		break;
+	}
+	case TIOCSTART:			/* start output, like ^Q */
+		s = spltty();
+		if (ISSET(tp->t_state, TS_TTSTOP) ||
+		    ISSET(tp->t_lflag, FLUSHO)) {
+			CLR(tp->t_lflag, FLUSHO);
+			CLR(tp->t_state, TS_TTSTOP);
+			ttstart(tp);
+		}
+		splx(s);
+		break;
+	case TIOCSTI:			/* simulate terminal input */
+		if (p->p_ucred->cr_uid && (flag & FREAD) == 0)
+			return (EPERM);
+		if (p->p_ucred->cr_uid && !isctty(p, tp))
+			return (EACCES);
+		(*linesw[tp->t_line].l_rint)(*(u_char *)data, tp);
+		break;
+	case TIOCSTOP:			/* stop output, like ^S */
+		s = spltty();
+		if (!ISSET(tp->t_state, TS_TTSTOP)) {
+			SET(tp->t_state, TS_TTSTOP);
+#ifdef sun4c				/* XXX */
+			(*tp->t_stop)(tp, 0);
+#else
+			(*cdevsw[major(tp->t_dev)].d_stop)(tp, 0);
+#endif
+		}
+		splx(s);
+		break;
+	case TIOCSCTTY:			/* become controlling tty */
+		/* Session ctty vnode pointer set in vnode layer. */
+		if (!SESS_LEADER(p) ||
+		    (p->p_session->s_ttyvp || tp->t_session) &&
+		    (tp->t_session != p->p_session))
+			return (EPERM);
+		tp->t_session = p->p_session;
+		tp->t_pgrp = p->p_pgrp;
+		p->p_session->s_ttyp = tp;
+		p->p_flag |= P_CONTROLT;
+		break;
+	case TIOCSPGRP: {		/* set pgrp of tty */
+		register struct pgrp *pgrp = pgfind(*(int *)data);
+
+		if (!isctty(p, tp))
+			return (ENOTTY);
+		else if (pgrp == NULL || pgrp->pg_session != p->p_session)
+			return (EPERM);
+		tp->t_pgrp = pgrp;
+		break;
+	}
+	case TIOCSWINSZ:		/* set window size */
+		if (bcmp((caddr_t)&tp->t_winsize, data,
+		    sizeof (struct winsize))) {
+			tp->t_winsize = *(struct winsize *)data;
+			pgsignal(tp->t_pgrp, SIGWINCH, 1);
+		}
+		break;
+	default:
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+		return (ttcompat(tp, cmd, data, flag));
+#else
+		return (-1);
+#endif
+	}
+	return (0);
+}
+
+int
+ttselect(device, rw, p)
+	dev_t device;
+	int rw;
+	struct proc *p;
+{
+	register struct tty *tp;
+	int nread, s;
+
+	tp = &cdevsw[major(device)].d_ttys[minor(device)];
+
+	s = spltty();
+	switch (rw) {
+	case FREAD:
+		nread = ttnread(tp);
+		if (nread > 0 || !ISSET(tp->t_cflag, CLOCAL) &&
+		    !ISSET(tp->t_state, TS_CARR_ON))
+			goto win;
+		selrecord(p, &tp->t_rsel);
+		break;
+	case FWRITE:
+		if (tp->t_outq.c_cc <= tp->t_lowat) {
+win:			splx(s);
+			return (1);
+		}
+		selrecord(p, &tp->t_wsel);
+		break;
+	}
+	splx(s);
+	return (0);
+}
+
+static int
+ttnread(tp)
+	struct tty *tp;
+{
+	int nread;
+
+	if (ISSET(tp->t_lflag, PENDIN))
+		ttypend(tp);
+	nread = tp->t_canq.c_cc;
+	if (!ISSET(tp->t_lflag, ICANON))
+		nread += tp->t_rawq.c_cc;
+	return (nread);
+}
+
+/*
+ * Wait for output to drain.
+ */
+int
+ttywait(tp)
+	register struct tty *tp;
+{
+	int error, s;
+
+	error = 0;
+	s = spltty();
+	while ((tp->t_outq.c_cc || ISSET(tp->t_state, TS_BUSY)) &&
+	    (ISSET(tp->t_state, TS_CARR_ON) || ISSET(tp->t_cflag, CLOCAL))
+	    && tp->t_oproc) {
+		(*tp->t_oproc)(tp);
+		SET(tp->t_state, TS_ASLEEP);
+		if (error = ttysleep(tp,
+		    &tp->t_outq, TTOPRI | PCATCH, ttyout, 0))
+			break;
+	}
+	splx(s);
+	return (error);
+}
+
+/*
+ * Flush if successfully wait.
+ */
+int
+ttywflush(tp)
+	struct tty *tp;
+{
+	int error;
+
+	if ((error = ttywait(tp)) == 0)
+		ttyflush(tp, FREAD);
+	return (error);
+}
+
+/*
+ * Flush tty read and/or write queues, notifying anyone waiting.
+ */
+void
+ttyflush(tp, rw)
+	register struct tty *tp;
+	int rw;
+{
+	register int s;
+
+	s = spltty();
+	if (rw & FREAD) {
+		FLUSHQ(&tp->t_canq);
+		FLUSHQ(&tp->t_rawq);
+		tp->t_rocount = 0;
+		tp->t_rocol = 0;
+		CLR(tp->t_state, TS_LOCAL);
+		ttwakeup(tp);
+	}
+	if (rw & FWRITE) {
+		CLR(tp->t_state, TS_TTSTOP);
+#ifdef sun4c						/* XXX */
+		(*tp->t_stop)(tp, rw);
+#else
+		(*cdevsw[major(tp->t_dev)].d_stop)(tp, rw);
+#endif
+		FLUSHQ(&tp->t_outq);
+		wakeup((caddr_t)&tp->t_outq);
+		selwakeup(&tp->t_wsel);
+	}
+	splx(s);
+}
+
+/*
+ * Copy in the default termios characters.
+ */
+void
+ttychars(tp)
+	struct tty *tp;
+{
+
+	bcopy(ttydefchars, tp->t_cc, sizeof(ttydefchars));
+}
+
+/*
+ * Send stop character on input overflow.
+ */
+static void
+ttyblock(tp)
+	register struct tty *tp;
+{
+	register int total;
+
+	total = tp->t_rawq.c_cc + tp->t_canq.c_cc;
+	if (tp->t_rawq.c_cc > TTYHOG) {
+		ttyflush(tp, FREAD | FWRITE);
+		CLR(tp->t_state, TS_TBLOCK);
+	}
+	/*
+	 * Block further input iff: current input > threshold
+	 * AND input is available to user program.
+	 */
+	if (total >= TTYHOG / 2 &&
+	    !ISSET(tp->t_state, TS_TBLOCK) &&
+	    !ISSET(tp->t_lflag, ICANON) || tp->t_canq.c_cc > 0 &&
+	    tp->t_cc[VSTOP] != _POSIX_VDISABLE) {
+		if (putc(tp->t_cc[VSTOP], &tp->t_outq) == 0) {
+			SET(tp->t_state, TS_TBLOCK);
+			ttstart(tp);
+		}
+	}
+}
+
+void
+ttrstrt(tp_arg)
+	void *tp_arg;
+{
+	struct tty *tp;
+	int s;
+
+#ifdef DIAGNOSTIC
+	if (tp_arg == NULL)
+		panic("ttrstrt");
+#endif
+	tp = tp_arg;
+	s = spltty();
+
+	CLR(tp->t_state, TS_TIMEOUT);
+	ttstart(tp);
+
+	splx(s);
+}
+
+int
+ttstart(tp)
+	struct tty *tp;
+{
+
+	if (tp->t_oproc != NULL)	/* XXX: Kludge for pty. */
+		(*tp->t_oproc)(tp);
+	return (0);
+}
+
+/*
+ * "close" a line discipline
+ */
+int
+ttylclose(tp, flag)
+	struct tty *tp;
+	int flag;
+{
+
+	if (flag & IO_NDELAY)
+		ttyflush(tp, FREAD | FWRITE);
+	else
+		ttywflush(tp);
+	return (0);
+}
+
+/*
+ * Handle modem control transition on a tty.
+ * Flag indicates new state of carrier.
+ * Returns 0 if the line should be turned off, otherwise 1.
+ */
+int
+ttymodem(tp, flag)
+	register struct tty *tp;
+	int flag;
+{
+
+	if (!ISSET(tp->t_state, TS_WOPEN) && ISSET(tp->t_cflag, MDMBUF)) {
+		/*
+		 * MDMBUF: do flow control according to carrier flag
+		 */
+		if (flag) {
+			CLR(tp->t_state, TS_TTSTOP);
+			ttstart(tp);
+		} else if (!ISSET(tp->t_state, TS_TTSTOP)) {
+			SET(tp->t_state, TS_TTSTOP);
+#ifdef sun4c						/* XXX */
+			(*tp->t_stop)(tp, 0);
+#else
+			(*cdevsw[major(tp->t_dev)].d_stop)(tp, 0);
+#endif
+		}
+	} else if (flag == 0) {
+		/*
+		 * Lost carrier.
+		 */
+		CLR(tp->t_state, TS_CARR_ON);
+		if (ISSET(tp->t_state, TS_ISOPEN) &&
+		    !ISSET(tp->t_cflag, CLOCAL)) {
+			if (tp->t_session && tp->t_session->s_leader)
+				psignal(tp->t_session->s_leader, SIGHUP);
+			ttyflush(tp, FREAD | FWRITE);
+			return (0);
+		}
+	} else {
+		/*
+		 * Carrier now on.
+		 */
+		SET(tp->t_state, TS_CARR_ON);
+		ttwakeup(tp);
+	}
+	return (1);
+}
+
+/*
+ * Default modem control routine (for other line disciplines).
+ * Return argument flag, to turn off device on carrier drop.
+ */
+int
+nullmodem(tp, flag)
+	register struct tty *tp;
+	int flag;
+{
+
+	if (flag)
+		SET(tp->t_state, TS_CARR_ON);
+	else {
+		CLR(tp->t_state, TS_CARR_ON);
+		if (!ISSET(tp->t_cflag, CLOCAL)) {
+			if (tp->t_session && tp->t_session->s_leader)
+				psignal(tp->t_session->s_leader, SIGHUP);
+			return (0);
+		}
+	}
+	return (1);
+}
+
+/*
+ * Reinput pending characters after state switch
+ * call at spltty().
+ */
+void
+ttypend(tp)
+	register struct tty *tp;
+{
+	struct clist tq;
+	register c;
+
+	CLR(tp->t_lflag, PENDIN);
+	SET(tp->t_state, TS_TYPEN);
+	tq = tp->t_rawq;
+	tp->t_rawq.c_cc = 0;
+	tp->t_rawq.c_cf = tp->t_rawq.c_cl = 0;
+	while ((c = getc(&tq)) >= 0)
+		ttyinput(c, tp);
+	CLR(tp->t_state, TS_TYPEN);
+}
+
+/*
+ * Process a read call on a tty device.
+ */
+int
+ttread(tp, uio, flag)
+	register struct tty *tp;
+	struct uio *uio;
+	int flag;
+{
+	register struct clist *qp;
+	register int c;
+	register long lflag;
+	register u_char *cc = tp->t_cc;
+	register struct proc *p = curproc;
+	int s, first, error = 0;
+
+loop:	lflag = tp->t_lflag;
+	s = spltty();
+	/*
+	 * take pending input first
+	 */
+	if (ISSET(lflag, PENDIN))
+		ttypend(tp);
+	splx(s);
+
+	/*
+	 * Hang process if it's in the background.
+	 */
+	if (isbackground(p, tp)) {
+		if ((p->p_sigignore & sigmask(SIGTTIN)) ||
+		   (p->p_sigmask & sigmask(SIGTTIN)) ||
+		    p->p_flag & P_PPWAIT || p->p_pgrp->pg_jobc == 0)
+			return (EIO);
+		pgsignal(p->p_pgrp, SIGTTIN, 1);
+		if (error = ttysleep(tp, &lbolt, TTIPRI | PCATCH, ttybg, 0))
+			return (error);
+		goto loop;
+	}
+
+	/*
+	 * If canonical, use the canonical queue,
+	 * else use the raw queue.
+	 *
+	 * (should get rid of clists...)
+	 */
+	qp = ISSET(lflag, ICANON) ? &tp->t_canq : &tp->t_rawq;
+
+	/*
+	 * If there is no input, sleep on rawq
+	 * awaiting hardware receipt and notification.
+	 * If we have data, we don't need to check for carrier.
+	 */
+	s = spltty();
+	if (qp->c_cc <= 0) {
+		int carrier;
+
+		carrier = ISSET(tp->t_state, TS_CARR_ON) ||
+		    ISSET(tp->t_cflag, CLOCAL);
+		if (!carrier && ISSET(tp->t_state, TS_ISOPEN)) {
+			splx(s);
+			return (0);	/* EOF */
+		}
+		if (flag & IO_NDELAY) {
+			splx(s);
+			return (EWOULDBLOCK);
+		}
+		error = ttysleep(tp, &tp->t_rawq, TTIPRI | PCATCH,
+		    carrier ? ttyin : ttopen, 0);
+		splx(s);
+		if (error)
+			return (error);
+		goto loop;
+	}
+	splx(s);
+
+	/*
+	 * Input present, check for input mapping and processing.
+	 */
+	first = 1;
+	while ((c = getc(qp)) >= 0) {
+		/*
+		 * delayed suspend (^Y)
+		 */
+		if (CCEQ(cc[VDSUSP], c) && ISSET(lflag, ISIG)) {
+			pgsignal(tp->t_pgrp, SIGTSTP, 1);
+			if (first) {
+				if (error = ttysleep(tp,
+				    &lbolt, TTIPRI | PCATCH, ttybg, 0))
+					break;
+				goto loop;
+			}
+			break;
+		}
+		/*
+		 * Interpret EOF only in canonical mode.
+		 */
+		if (CCEQ(cc[VEOF], c) && ISSET(lflag, ICANON))
+			break;
+		/*
+		 * Give user character.
+		 */
+ 		error = ureadc(c, uio);
+		if (error)
+			break;
+ 		if (uio->uio_resid == 0)
+			break;
+		/*
+		 * In canonical mode check for a "break character"
+		 * marking the end of a "line of input".
+		 */
+		if (ISSET(lflag, ICANON) && TTBREAKC(c))
+			break;
+		first = 0;
+	}
+	/*
+	 * Look to unblock output now that (presumably)
+	 * the input queue has gone down.
+	 */
+	s = spltty();
+	if (ISSET(tp->t_state, TS_TBLOCK) && tp->t_rawq.c_cc < TTYHOG/5) {
+		if (cc[VSTART] != _POSIX_VDISABLE &&
+		    putc(cc[VSTART], &tp->t_outq) == 0) {
+			CLR(tp->t_state, TS_TBLOCK);
+			ttstart(tp);
+		}
+	}
+	splx(s);
+	return (error);
+}
+
+/*
+ * Check the output queue on tp for space for a kernel message (from uprintf
+ * or tprintf).  Allow some space over the normal hiwater mark so we don't
+ * lose messages due to normal flow control, but don't let the tty run amok.
+ * Sleeps here are not interruptible, but we return prematurely if new signals
+ * arrive.
+ */
+int
+ttycheckoutq(tp, wait)
+	register struct tty *tp;
+	int wait;
+{
+	int hiwat, s, oldsig;
+
+	hiwat = tp->t_hiwat;
+	s = spltty();
+	oldsig = wait ? curproc->p_siglist : 0;
+	if (tp->t_outq.c_cc > hiwat + 200)
+		while (tp->t_outq.c_cc > hiwat) {
+			ttstart(tp);
+			if (wait == 0 || curproc->p_siglist != oldsig) {
+				splx(s);
+				return (0);
+			}
+			timeout((void (*)__P((void *)))wakeup,
+			    (void *)&tp->t_outq, hz);
+			SET(tp->t_state, TS_ASLEEP);
+			sleep((caddr_t)&tp->t_outq, PZERO - 1);
+		}
+	splx(s);
+	return (1);
+}
+
+/*
+ * Process a write call on a tty device.
+ */
+int
+ttwrite(tp, uio, flag)
+	register struct tty *tp;
+	register struct uio *uio;
+	int flag;
+{
+	register char *cp;
+	register int cc, ce;
+	register struct proc *p;
+	int i, hiwat, cnt, error, s;
+	char obuf[OBUFSIZ];
+
+	hiwat = tp->t_hiwat;
+	cnt = uio->uio_resid;
+	error = 0;
+	cc = 0;
+loop:
+	s = spltty();
+	if (!ISSET(tp->t_state, TS_CARR_ON) &&
+	    !ISSET(tp->t_cflag, CLOCAL)) {
+		if (ISSET(tp->t_state, TS_ISOPEN)) {
+			splx(s);
+			return (EIO);
+		} else if (flag & IO_NDELAY) {
+			splx(s);
+			error = EWOULDBLOCK;
+			goto out;
+		} else {
+			/* Sleep awaiting carrier. */
+			error = ttysleep(tp,
+			    &tp->t_rawq, TTIPRI | PCATCH,ttopen, 0);
+			splx(s);
+			if (error)
+				goto out;
+			goto loop;
+		}
+	}
+	splx(s);
+	/*
+	 * Hang the process if it's in the background.
+	 */
+	p = curproc;
+	if (isbackground(p, tp) &&
+	    ISSET(tp->t_lflag, TOSTOP) && (p->p_flag & P_PPWAIT) == 0 &&
+	    (p->p_sigignore & sigmask(SIGTTOU)) == 0 &&
+	    (p->p_sigmask & sigmask(SIGTTOU)) == 0 &&
+	     p->p_pgrp->pg_jobc) {
+		pgsignal(p->p_pgrp, SIGTTOU, 1);
+		if (error = ttysleep(tp, &lbolt, TTIPRI | PCATCH, ttybg, 0))
+			goto out;
+		goto loop;
+	}
+	/*
+	 * Process the user's data in at most OBUFSIZ chunks.  Perform any
+	 * output translation.  Keep track of high water mark, sleep on
+	 * overflow awaiting device aid in acquiring new space.
+	 */
+	while (uio->uio_resid > 0 || cc > 0) {
+		if (ISSET(tp->t_lflag, FLUSHO)) {
+			uio->uio_resid = 0;
+			return (0);
+		}
+		if (tp->t_outq.c_cc > hiwat)
+			goto ovhiwat;
+		/*
+		 * Grab a hunk of data from the user, unless we have some
+		 * leftover from last time.
+		 */
+		if (cc == 0) {
+			cc = min(uio->uio_resid, OBUFSIZ);
+			cp = obuf;
+			error = uiomove(cp, cc, uio);
+			if (error) {
+				cc = 0;
+				break;
+			}
+		}
+		/*
+		 * If nothing fancy need be done, grab those characters we
+		 * can handle without any of ttyoutput's processing and
+		 * just transfer them to the output q.  For those chars
+		 * which require special processing (as indicated by the
+		 * bits in char_type), call ttyoutput.  After processing
+		 * a hunk of data, look for FLUSHO so ^O's will take effect
+		 * immediately.
+		 */
+		while (cc > 0) {
+			if (!ISSET(tp->t_oflag, OPOST))
+				ce = cc;
+			else {
+				ce = cc - scanc((u_int)cc, (u_char *)cp,
+				   (u_char *)char_type, CCLASSMASK);
+				/*
+				 * If ce is zero, then we're processing
+				 * a special character through ttyoutput.
+				 */
+				if (ce == 0) {
+					tp->t_rocount = 0;
+					if (ttyoutput(*cp, tp) >= 0) {
+						/* No Clists, wait a bit. */
+						ttstart(tp);
+						if (error = ttysleep(tp, &lbolt,
+						    TTOPRI | PCATCH, ttybuf, 0))
+							break;
+						goto loop;
+					}
+					cp++;
+					cc--;
+					if (ISSET(tp->t_lflag, FLUSHO) ||
+					    tp->t_outq.c_cc > hiwat)
+						goto ovhiwat;
+					continue;
+				}
+			}
+			/*
+			 * A bunch of normal characters have been found.
+			 * Transfer them en masse to the output queue and
+			 * continue processing at the top of the loop.
+			 * If there are any further characters in this
+			 * <= OBUFSIZ chunk, the first should be a character
+			 * requiring special handling by ttyoutput.
+			 */
+			tp->t_rocount = 0;
+			i = b_to_q(cp, ce, &tp->t_outq);
+			ce -= i;
+			tp->t_column += ce;
+			cp += ce, cc -= ce, tk_nout += ce;
+			tp->t_outcc += ce;
+			if (i > 0) {
+				/* No Clists, wait a bit. */
+				ttstart(tp);
+				if (error = ttysleep(tp,
+				    &lbolt, TTOPRI | PCATCH, ttybuf, 0))
+					break;
+				goto loop;
+			}
+			if (ISSET(tp->t_lflag, FLUSHO) ||
+			    tp->t_outq.c_cc > hiwat)
+				break;
+		}
+		ttstart(tp);
+	}
+out:
+	/*
+	 * If cc is nonzero, we leave the uio structure inconsistent, as the
+	 * offset and iov pointers have moved forward, but it doesn't matter
+	 * (the call will either return short or restart with a new uio).
+	 */
+	uio->uio_resid += cc;
+	return (error);
+
+ovhiwat:
+	ttstart(tp);
+	s = spltty();
+	/*
+	 * This can only occur if FLUSHO is set in t_lflag,
+	 * or if ttstart/oproc is synchronous (or very fast).
+	 */
+	if (tp->t_outq.c_cc <= hiwat) {
+		splx(s);
+		goto loop;
+	}
+	if (flag & IO_NDELAY) {
+		splx(s);
+		uio->uio_resid += cc;
+		return (uio->uio_resid == cnt ? EWOULDBLOCK : 0);
+	}
+	SET(tp->t_state, TS_ASLEEP);
+	error = ttysleep(tp, &tp->t_outq, TTOPRI | PCATCH, ttyout, 0);
+	splx(s);
+	if (error)
+		goto out;
+	goto loop;
+}
+
+/*
+ * Rubout one character from the rawq of tp
+ * as cleanly as possible.
+ */
+void
+ttyrub(c, tp)
+	register int c;
+	register struct tty *tp;
+{
+	register char *cp;
+	register int savecol;
+	int tabc, s;
+
+	if (!ISSET(tp->t_lflag, ECHO) || ISSET(tp->t_lflag, EXTPROC))
+		return;
+	CLR(tp->t_lflag, FLUSHO);
+	if (ISSET(tp->t_lflag, ECHOE)) {
+		if (tp->t_rocount == 0) {
+			/*
+			 * Screwed by ttwrite; retype
+			 */
+			ttyretype(tp);
+			return;
+		}
+		if (c == ('\t' | TTY_QUOTE) || c == ('\n' | TTY_QUOTE))
+			ttyrubo(tp, 2);
+		else {
+			CLR(c, ~TTY_CHARMASK);
+			switch (CCLASS(c)) {
+			case ORDINARY:
+				ttyrubo(tp, 1);
+				break;
+			case BACKSPACE:
+			case CONTROL:
+			case NEWLINE:
+			case RETURN:
+			case VTAB:
+				if (ISSET(tp->t_lflag, ECHOCTL))
+					ttyrubo(tp, 2);
+				break;
+			case TAB:
+				if (tp->t_rocount < tp->t_rawq.c_cc) {
+					ttyretype(tp);
+					return;
+				}
+				s = spltty();
+				savecol = tp->t_column;
+				SET(tp->t_state, TS_CNTTB);
+				SET(tp->t_lflag, FLUSHO);
+				tp->t_column = tp->t_rocol;
+				cp = tp->t_rawq.c_cf;
+				if (cp)
+					tabc = *cp;	/* XXX FIX NEXTC */
+				for (; cp; cp = nextc(&tp->t_rawq, cp, &tabc))
+					ttyecho(tabc, tp);
+				CLR(tp->t_lflag, FLUSHO);
+				CLR(tp->t_state, TS_CNTTB);
+				splx(s);
+
+				/* savecol will now be length of the tab. */
+				savecol -= tp->t_column;
+				tp->t_column += savecol;
+				if (savecol > 8)
+					savecol = 8;	/* overflow screw */
+				while (--savecol >= 0)
+					(void)ttyoutput('\b', tp);
+				break;
+			default:			/* XXX */
+#define	PANICSTR	"ttyrub: would panic c = %d, val = %d\n"
+				(void)printf(PANICSTR, c, CCLASS(c));
+#ifdef notdef
+				panic(PANICSTR, c, CCLASS(c));
+#endif
+			}
+		}
+	} else if (ISSET(tp->t_lflag, ECHOPRT)) {
+		if (!ISSET(tp->t_state, TS_ERASE)) {
+			SET(tp->t_state, TS_ERASE);
+			(void)ttyoutput('\\', tp);
+		}
+		ttyecho(c, tp);
+	} else
+		ttyecho(tp->t_cc[VERASE], tp);
+	--tp->t_rocount;
+}
+
+/*
+ * Back over cnt characters, erasing them.
+ */
+static void
+ttyrubo(tp, cnt)
+	register struct tty *tp;
+	int cnt;
+{
+
+	while (cnt-- > 0) {
+		(void)ttyoutput('\b', tp);
+		(void)ttyoutput(' ', tp);
+		(void)ttyoutput('\b', tp);
+	}
+}
+
+/*
+ * ttyretype --
+ *	Reprint the rawq line.  Note, it is assumed that c_cc has already
+ *	been checked.
+ */
+void
+ttyretype(tp)
+	register struct tty *tp;
+{
+	register char *cp;
+	int s, c;
+
+	/* Echo the reprint character. */
+	if (tp->t_cc[VREPRINT] != _POSIX_VDISABLE)
+		ttyecho(tp->t_cc[VREPRINT], tp);
+
+	(void)ttyoutput('\n', tp);
+
+	/*
+	 * XXX
+	 * FIX: NEXTC IS BROKEN - DOESN'T CHECK QUOTE
+	 * BIT OF FIRST CHAR.
+	 */
+	s = spltty();
+	for (cp = tp->t_canq.c_cf, c = (cp != NULL ? *cp : 0);
+	    cp != NULL; cp = nextc(&tp->t_canq, cp, &c))
+		ttyecho(c, tp);
+	for (cp = tp->t_rawq.c_cf, c = (cp != NULL ? *cp : 0);
+	    cp != NULL; cp = nextc(&tp->t_rawq, cp, &c))
+		ttyecho(c, tp);
+	CLR(tp->t_state, TS_ERASE);
+	splx(s);
+
+	tp->t_rocount = tp->t_rawq.c_cc;
+	tp->t_rocol = 0;
+}
+
+/*
+ * Echo a typed character to the terminal.
+ */
+static void
+ttyecho(c, tp)
+	register int c;
+	register struct tty *tp;
+{
+
+	if (!ISSET(tp->t_state, TS_CNTTB))
+		CLR(tp->t_lflag, FLUSHO);
+	if ((!ISSET(tp->t_lflag, ECHO) &&
+	    (!ISSET(tp->t_lflag, ECHONL) || c == '\n')) ||
+	    ISSET(tp->t_lflag, EXTPROC))
+		return;
+	if (ISSET(tp->t_lflag, ECHOCTL) &&
+	    (ISSET(c, TTY_CHARMASK) <= 037 && c != '\t' && c != '\n' ||
+	    ISSET(c, TTY_CHARMASK) == 0177)) {
+		(void)ttyoutput('^', tp);
+		CLR(c, ~TTY_CHARMASK);
+		if (c == 0177)
+			c = '?';
+		else
+			c += 'A' - 1;
+	}
+	(void)ttyoutput(c, tp);
+}
+
+/*
+ * Wake up any readers on a tty.
+ */
+void
+ttwakeup(tp)
+	register struct tty *tp;
+{
+
+	selwakeup(&tp->t_rsel);
+	if (ISSET(tp->t_state, TS_ASYNC))
+		pgsignal(tp->t_pgrp, SIGIO, 1);
+	wakeup((caddr_t)&tp->t_rawq);
+}
+
+/*
+ * Look up a code for a specified speed in a conversion table;
+ * used by drivers to map software speed values to hardware parameters.
+ */
+int
+ttspeedtab(speed, table)
+	int speed;
+	register struct speedtab *table;
+{
+
+	for ( ; table->sp_speed != -1; table++)
+		if (table->sp_speed == speed)
+			return (table->sp_code);
+	return (-1);
+}
+
+/*
+ * Set tty hi and low water marks.
+ *
+ * Try to arrange the dynamics so there's about one second
+ * from hi to low water.
+ *
+ */
+void
+ttsetwater(tp)
+	struct tty *tp;
+{
+	register int cps, x;
+
+#define CLAMP(x, h, l)	((x) > h ? h : ((x) < l) ? l : (x))
+
+	cps = tp->t_ospeed / 10;
+	tp->t_lowat = x = CLAMP(cps / 2, TTMAXLOWAT, TTMINLOWAT);
+	x += cps;
+	x = CLAMP(x, TTMAXHIWAT, TTMINHIWAT);
+	tp->t_hiwat = roundup(x, CBSIZE);
+#undef	CLAMP
+}
+
+/*
+ * Report on state of foreground process group.
+ */
+void
+ttyinfo(tp)
+	register struct tty *tp;
+{
+	register struct proc *p, *pick;
+	struct timeval utime, stime;
+	int tmp;
+
+	if (ttycheckoutq(tp,0) == 0)
+		return;
+
+	/* Print load average. */
+	tmp = (averunnable.ldavg[0] * 100 + FSCALE / 2) >> FSHIFT;
+	ttyprintf(tp, "load: %d.%02d ", tmp / 100, tmp % 100);
+
+	if (tp->t_session == NULL)
+		ttyprintf(tp, "not a controlling terminal\n");
+	else if (tp->t_pgrp == NULL)
+		ttyprintf(tp, "no foreground process group\n");
+	else if ((p = tp->t_pgrp->pg_mem) == NULL)
+		ttyprintf(tp, "empty foreground process group\n");
+	else {
+		/* Pick interesting process. */
+		for (pick = NULL; p != NULL; p = p->p_pgrpnxt)
+			if (proc_compare(pick, p))
+				pick = p;
+
+		ttyprintf(tp, " cmd: %s %d [%s] ", pick->p_comm, pick->p_pid,
+		    pick->p_stat == SRUN ? "running" :
+		    pick->p_wmesg ? pick->p_wmesg : "iowait");
+
+		calcru(pick, &utime, &stime, NULL);
+
+		/* Print user time. */
+		ttyprintf(tp, "%d.%02du ",
+		    utime.tv_sec, (utime.tv_usec + 5000) / 10000);
+
+		/* Print system time. */
+		ttyprintf(tp, "%d.%02ds ",
+		    stime.tv_sec, (stime.tv_usec + 5000) / 10000);
+
+#define	pgtok(a)	(((a) * NBPG) / 1024)
+		/* Print percentage cpu, resident set size. */
+		tmp = pick->p_pctcpu * 10000 + FSCALE / 2 >> FSHIFT;
+		ttyprintf(tp, "%d%% %dk\n",
+		    tmp / 100,
+		    pick->p_stat == SIDL || pick->p_stat == SZOMB ? 0 :
+#ifdef pmap_resident_count
+			pgtok(pmap_resident_count(&pick->p_vmspace->vm_pmap))
+#else
+			pgtok(pick->p_vmspace->vm_rssize)
+#endif
+			);
+	}
+	tp->t_rocount = 0;	/* so pending input will be retyped if BS */
+}
+
+/*
+ * Returns 1 if p2 is "better" than p1
+ *
+ * The algorithm for picking the "interesting" process is thus:
+ *
+ *	1) Only foreground processes are eligible - implied.
+ *	2) Runnable processes are favored over anything else.  The runner
+ *	   with the highest cpu utilization is picked (p_estcpu).  Ties are
+ *	   broken by picking the highest pid.
+ *	3) The sleeper with the shortest sleep time is next.  With ties,
+ *	   we pick out just "short-term" sleepers (P_SINTR == 0).
+ *	4) Further ties are broken by picking the highest pid.
+ */
+#define ISRUN(p)	(((p)->p_stat == SRUN) || ((p)->p_stat == SIDL))
+#define TESTAB(a, b)    ((a)<<1 | (b))
+#define ONLYA   2
+#define ONLYB   1
+#define BOTH    3
+
+static int
+proc_compare(p1, p2)
+	register struct proc *p1, *p2;
+{
+
+	if (p1 == NULL)
+		return (1);
+	/*
+	 * see if at least one of them is runnable
+	 */
+	switch (TESTAB(ISRUN(p1), ISRUN(p2))) {
+	case ONLYA:
+		return (0);
+	case ONLYB:
+		return (1);
+	case BOTH:
+		/*
+		 * tie - favor one with highest recent cpu utilization
+		 */
+		if (p2->p_estcpu > p1->p_estcpu)
+			return (1);
+		if (p1->p_estcpu > p2->p_estcpu)
+			return (0);
+		return (p2->p_pid > p1->p_pid);	/* tie - return highest pid */
+	}
+	/*
+ 	 * weed out zombies
+	 */
+	switch (TESTAB(p1->p_stat == SZOMB, p2->p_stat == SZOMB)) {
+	case ONLYA:
+		return (1);
+	case ONLYB:
+		return (0);
+	case BOTH:
+		return (p2->p_pid > p1->p_pid); /* tie - return highest pid */
+	}
+	/*
+	 * pick the one with the smallest sleep time
+	 */
+	if (p2->p_slptime > p1->p_slptime)
+		return (0);
+	if (p1->p_slptime > p2->p_slptime)
+		return (1);
+	/*
+	 * favor one sleeping in a non-interruptible sleep
+	 */
+	if (p1->p_flag & P_SINTR && (p2->p_flag & P_SINTR) == 0)
+		return (1);
+	if (p2->p_flag & P_SINTR && (p1->p_flag & P_SINTR) == 0)
+		return (0);
+	return (p2->p_pid > p1->p_pid);		/* tie - return highest pid */
+}
+
+/*
+ * Output char to tty; console putchar style.
+ */
+int
+tputchar(c, tp)
+	int c;
+	struct tty *tp;
+{
+	register int s;
+
+	s = spltty();
+	if (ISSET(tp->t_state,
+	    TS_CARR_ON | TS_ISOPEN) != (TS_CARR_ON | TS_ISOPEN)) {
+		splx(s);
+		return (-1);
+	}
+	if (c == '\n')
+		(void)ttyoutput('\r', tp);
+	(void)ttyoutput(c, tp);
+	ttstart(tp);
+	splx(s);
+	return (0);
+}
+
+/*
+ * Sleep on chan, returning ERESTART if tty changed while we napped and
+ * returning any errors (e.g. EINTR/ETIMEDOUT) reported by tsleep.  If
+ * the tty is revoked, restarting a pending call will redo validation done
+ * at the start of the call.
+ */
+int
+ttysleep(tp, chan, pri, wmesg, timo)
+	struct tty *tp;
+	void *chan;
+	int pri, timo;
+	char *wmesg;
+{
+	int error;
+	short gen;
+
+	gen = tp->t_gen;
+	if (error = tsleep(chan, pri, wmesg, timo))
+		return (error);
+	return (tp->t_gen == gen ? 0 : ERESTART);
+}
diff --git a/sys/kern/tty_compat.c b/sys/kern/tty_compat.c
new file mode 100644
index 00000000000..a6a39d9d7bf
--- /dev/null
+++ b/sys/kern/tty_compat.c
@@ -0,0 +1,411 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tty_compat.c	8.1 (Berkeley) 6/10/93
+ */
+
+/* 
+ * mapping routines for old line discipline (yuck)
+ */
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/ioctl.h>
+#include <sys/proc.h>
+#include <sys/tty.h>
+#include <sys/termios.h>
+#include <sys/file.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/syslog.h>
+
+int ttydebug = 0;
+
+static struct speedtab compatspeeds[] = {
+	{ 38400, 15 },
+	{ 19200, 14 },
+	{ 9600,	13 },
+	{ 4800,	12 },
+	{ 2400,	11 },
+	{ 1800,	10 },
+	{ 1200,	9 },
+	{ 600,	8 },
+	{ 300,	7 },
+	{ 200,	6 },
+	{ 150,	5 },
+	{ 134,	4 },
+	{ 110,	3 },
+	{ 75,	2 },
+	{ 50,	1 },
+	{ 0,	0 },
+	{ -1,	-1 },
+};
+static int compatspcodes[16] = { 
+	0, 50, 75, 110, 134, 150, 200, 300, 600, 1200,
+	1800, 2400, 4800, 9600, 19200, 38400,
+};
+
+/*ARGSUSED*/
+ttcompat(tp, com, data, flag)
+	register struct tty *tp;
+	int com;
+	caddr_t data;
+	int flag;
+{
+
+	switch (com) {
+	case TIOCGETP: {
+		register struct sgttyb *sg = (struct sgttyb *)data;
+		register u_char *cc = tp->t_cc;
+		register speed;
+
+		speed = ttspeedtab(tp->t_ospeed, compatspeeds);
+		sg->sg_ospeed = (speed == -1) ? 15 : speed;
+		if (tp->t_ispeed == 0)
+			sg->sg_ispeed = sg->sg_ospeed;
+		else {
+			speed = ttspeedtab(tp->t_ispeed, compatspeeds);
+			sg->sg_ispeed = (speed == -1) ? 15 : speed;
+		}
+		sg->sg_erase = cc[VERASE];
+		sg->sg_kill = cc[VKILL];
+		sg->sg_flags = ttcompatgetflags(tp);
+		break;
+	}
+
+	case TIOCSETP:
+	case TIOCSETN: {
+		register struct sgttyb *sg = (struct sgttyb *)data;
+		struct termios term;
+		int speed;
+
+		term = tp->t_termios;
+		if ((speed = sg->sg_ispeed) > 15 || speed < 0)
+			term.c_ispeed = speed;
+		else
+			term.c_ispeed = compatspcodes[speed];
+		if ((speed = sg->sg_ospeed) > 15 || speed < 0)
+			term.c_ospeed = speed;
+		else
+			term.c_ospeed = compatspcodes[speed];
+		term.c_cc[VERASE] = sg->sg_erase;
+		term.c_cc[VKILL] = sg->sg_kill;
+		tp->t_flags = tp->t_flags&0xffff0000 | sg->sg_flags&0xffff;
+		ttcompatsetflags(tp, &term);
+		return (ttioctl(tp, com == TIOCSETP ? TIOCSETAF : TIOCSETA, 
+			&term, flag));
+	}
+
+	case TIOCGETC: {
+		struct tchars *tc = (struct tchars *)data;
+		register u_char *cc = tp->t_cc;
+
+		tc->t_intrc = cc[VINTR];
+		tc->t_quitc = cc[VQUIT];
+		tc->t_startc = cc[VSTART];
+		tc->t_stopc = cc[VSTOP];
+		tc->t_eofc = cc[VEOF];
+		tc->t_brkc = cc[VEOL];
+		break;
+	}
+	case TIOCSETC: {
+		struct tchars *tc = (struct tchars *)data;
+		register u_char *cc = tp->t_cc;
+
+		cc[VINTR] = tc->t_intrc;
+		cc[VQUIT] = tc->t_quitc;
+		cc[VSTART] = tc->t_startc;
+		cc[VSTOP] = tc->t_stopc;
+		cc[VEOF] = tc->t_eofc;
+		cc[VEOL] = tc->t_brkc;
+		if (tc->t_brkc == -1)
+			cc[VEOL2] = _POSIX_VDISABLE;
+		break;
+	}
+	case TIOCSLTC: {
+		struct ltchars *ltc = (struct ltchars *)data;
+		register u_char *cc = tp->t_cc;
+
+		cc[VSUSP] = ltc->t_suspc;
+		cc[VDSUSP] = ltc->t_dsuspc;
+		cc[VREPRINT] = ltc->t_rprntc;
+		cc[VDISCARD] = ltc->t_flushc;
+		cc[VWERASE] = ltc->t_werasc;
+		cc[VLNEXT] = ltc->t_lnextc;
+		break;
+	}
+	case TIOCGLTC: {
+		struct ltchars *ltc = (struct ltchars *)data;
+		register u_char *cc = tp->t_cc;
+
+		ltc->t_suspc = cc[VSUSP];
+		ltc->t_dsuspc = cc[VDSUSP];
+		ltc->t_rprntc = cc[VREPRINT];
+		ltc->t_flushc = cc[VDISCARD];
+		ltc->t_werasc = cc[VWERASE];
+		ltc->t_lnextc = cc[VLNEXT];
+		break;
+	}
+	case TIOCLBIS:
+	case TIOCLBIC:
+	case TIOCLSET: {
+		struct termios term;
+
+		term = tp->t_termios;
+		if (com == TIOCLSET)
+			tp->t_flags = (tp->t_flags&0xffff) | *(int *)data<<16;
+		else {
+			tp->t_flags = 
+			 (ttcompatgetflags(tp)&0xffff0000)|(tp->t_flags&0xffff);
+			if (com == TIOCLBIS)
+				tp->t_flags |= *(int *)data<<16;
+			else
+				tp->t_flags &= ~(*(int *)data<<16);
+		}
+		ttcompatsetlflags(tp, &term);
+		return (ttioctl(tp, TIOCSETA, &term, flag));
+	}
+	case TIOCLGET:
+		*(int *)data = ttcompatgetflags(tp)>>16;
+		if (ttydebug)
+			printf("CLGET: returning %x\n", *(int *)data);
+		break;
+
+	case OTIOCGETD:
+		*(int *)data = tp->t_line ? tp->t_line : 2;
+		break;
+
+	case OTIOCSETD: {
+		int ldisczero = 0;
+
+		return (ttioctl(tp, TIOCSETD, 
+			*(int *)data == 2 ? (caddr_t)&ldisczero : data, flag));
+	    }
+
+	case OTIOCCONS:
+		*(int *)data = 1;
+		return (ttioctl(tp, TIOCCONS, data, flag));
+
+	default:
+		return (-1);
+	}
+	return (0);
+}
+
+ttcompatgetflags(tp)
+	register struct tty *tp;
+{
+	register long iflag = tp->t_iflag;
+	register long lflag = tp->t_lflag;
+	register long oflag = tp->t_oflag;
+	register long cflag = tp->t_cflag;
+	register flags = 0;
+
+	if (iflag&IXOFF)
+		flags |= TANDEM;
+	if (iflag&ICRNL || oflag&ONLCR)
+		flags |= CRMOD;
+	if (cflag&PARENB) {
+		if (iflag&INPCK) {
+			if (cflag&PARODD)
+				flags |= ODDP;
+			else
+				flags |= EVENP;
+		} else
+			flags |= EVENP | ODDP;
+	} else {
+		if ((tp->t_flags&LITOUT) && !(oflag&OPOST))
+			flags |= LITOUT;
+		if (tp->t_flags&PASS8)
+			flags |= PASS8;
+	}
+	
+	if ((lflag&ICANON) == 0) {	
+		/* fudge */
+		if (iflag&IXON || lflag&ISIG || lflag&IEXTEN || cflag&PARENB)
+			flags |= CBREAK;
+		else
+			flags |= RAW;
+	}
+	if (cflag&MDMBUF)
+		flags |= MDMBUF;
+	if ((cflag&HUPCL) == 0)
+		flags |= NOHANG;
+	if (oflag&OXTABS)
+		flags |= XTABS;
+	if (lflag&ECHOE)
+		flags |= CRTERA|CRTBS;
+	if (lflag&ECHOKE)
+		flags |= CRTKIL|CRTBS;
+	if (lflag&ECHOPRT)
+		flags |= PRTERA;
+	if (lflag&ECHOCTL)
+		flags |= CTLECH;
+	if ((iflag&IXANY) == 0)
+		flags |= DECCTQ;
+	flags |= lflag&(ECHO|TOSTOP|FLUSHO|PENDIN|NOFLSH);
+if (ttydebug)
+	printf("getflags: %x\n", flags);
+	return (flags);
+}
+
+ttcompatsetflags(tp, t)
+	register struct tty *tp;
+	register struct termios *t;
+{
+	register flags = tp->t_flags;
+	register long iflag = t->c_iflag;
+	register long oflag = t->c_oflag;
+	register long lflag = t->c_lflag;
+	register long cflag = t->c_cflag;
+
+	if (flags & RAW) {
+		iflag &= IXOFF;
+		oflag &= ~OPOST;
+		lflag &= ~(ECHOCTL|ISIG|ICANON|IEXTEN);
+	} else {
+		iflag |= BRKINT|IXON|IMAXBEL;
+		oflag |= OPOST;
+		lflag |= ISIG|IEXTEN|ECHOCTL;	/* XXX was echoctl on ? */
+		if (flags & XTABS)
+			oflag |= OXTABS;
+		else
+			oflag &= ~OXTABS;
+		if (flags & CBREAK)
+			lflag &= ~ICANON;
+		else
+			lflag |= ICANON;
+		if (flags&CRMOD) {
+			iflag |= ICRNL;
+			oflag |= ONLCR;
+		} else {
+			iflag &= ~ICRNL;
+			oflag &= ~ONLCR;
+		}
+	}
+	if (flags&ECHO)
+		lflag |= ECHO;
+	else
+		lflag &= ~ECHO;
+		
+	if (flags&(RAW|LITOUT|PASS8)) {
+		cflag &= ~(CSIZE|PARENB);
+		cflag |= CS8;
+		if ((flags&(RAW|PASS8)) == 0)
+			iflag |= ISTRIP;
+		else
+			iflag &= ~ISTRIP;
+	} else {
+		cflag &= ~CSIZE;
+		cflag |= CS7|PARENB;
+		iflag |= ISTRIP;
+	}
+	if ((flags&(EVENP|ODDP)) == EVENP) {
+		iflag |= INPCK;
+		cflag &= ~PARODD;
+	} else if ((flags&(EVENP|ODDP)) == ODDP) {
+		iflag |= INPCK;
+		cflag |= PARODD;
+	} else 
+		iflag &= ~INPCK;
+	if (flags&LITOUT)
+		oflag &= ~OPOST;	/* move earlier ? */
+	if (flags&TANDEM)
+		iflag |= IXOFF;
+	else
+		iflag &= ~IXOFF;
+	t->c_iflag = iflag;
+	t->c_oflag = oflag;
+	t->c_lflag = lflag;
+	t->c_cflag = cflag;
+}
+
+ttcompatsetlflags(tp, t)
+	register struct tty *tp;
+	register struct termios *t;
+{
+	register flags = tp->t_flags;
+	register long iflag = t->c_iflag;
+	register long oflag = t->c_oflag;
+	register long lflag = t->c_lflag;
+	register long cflag = t->c_cflag;
+
+	if (flags&CRTERA)
+		lflag |= ECHOE;
+	else
+		lflag &= ~ECHOE;
+	if (flags&CRTKIL)
+		lflag |= ECHOKE;
+	else
+		lflag &= ~ECHOKE;
+	if (flags&PRTERA)
+		lflag |= ECHOPRT;
+	else
+		lflag &= ~ECHOPRT;
+	if (flags&CTLECH)
+		lflag |= ECHOCTL;
+	else
+		lflag &= ~ECHOCTL;
+	if ((flags&DECCTQ) == 0)
+		iflag |= IXANY;
+	else
+		iflag &= ~IXANY;
+	if (flags & MDMBUF)
+		cflag |= MDMBUF;
+	else
+		cflag &= ~MDMBUF;
+	if (flags&NOHANG)
+		cflag &= ~HUPCL;
+	else
+		cflag |= HUPCL;
+	lflag &= ~(TOSTOP|FLUSHO|PENDIN|NOFLSH);
+	lflag |= flags&(TOSTOP|FLUSHO|PENDIN|NOFLSH);
+	if (flags&(LITOUT|PASS8)) {
+		iflag &= ~ISTRIP;
+		cflag &= ~(CSIZE|PARENB);
+		cflag |= CS8;
+		if (flags&LITOUT)
+			oflag &= ~OPOST;
+		if ((flags&(PASS8|RAW)) == 0)
+			iflag |= ISTRIP;
+	} else if ((flags&RAW) == 0) {
+		cflag &= ~CSIZE;
+		cflag |= CS7|PARENB;
+		oflag |= OPOST;
+	}
+	t->c_iflag = iflag;
+	t->c_oflag = oflag;
+	t->c_lflag = lflag;
+	t->c_cflag = cflag;
+}
+#endif	/* COMPAT_43 || COMPAT_SUNOS */
diff --git a/sys/kern/tty_conf.c b/sys/kern/tty_conf.c
new file mode 100644
index 00000000000..b53edb42975
--- /dev/null
+++ b/sys/kern/tty_conf.c
@@ -0,0 +1,126 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tty_conf.c	8.4 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/ioctl.h>
+#include <sys/proc.h>
+#include <sys/tty.h>
+#include <sys/conf.h>
+
+#define	ttynodisc ((int (*) __P((dev_t, struct tty *)))enodev)
+#define	ttyerrclose ((int (*) __P((struct tty *, int flags)))enodev)
+#define	ttyerrio ((int (*) __P((struct tty *, struct uio *, int)))enodev)
+#define	ttyerrinput ((int (*) __P((int c, struct tty *)))enodev)
+#define	ttyerrstart ((int (*) __P((struct tty *)))enodev)
+
+int	nullioctl __P((struct tty *tp, int cmd, caddr_t data,
+			int flag, struct proc *p));
+
+#include "tb.h"
+#if NTB > 0
+int	tbopen __P((dev_t dev, struct tty *tp));
+int	tbclose __P((struct tty *tp, int flags));
+int	tbread __P((struct tty *, struct uio *, int flags));
+int	tbioctl __P((struct tty *tp, int cmd, caddr_t data,
+			int flag, struct proc *p));
+int	tbinput __P((int c, struct tty *tp));
+#endif
+
+#include "sl.h"
+#if NSL > 0
+int	slopen __P((dev_t dev, struct tty *tp));
+int	slclose __P((struct tty *tp, int flags));
+int	sltioctl __P((struct tty *tp, int cmd, caddr_t data,
+			int flag, struct proc *p));
+int	slinput __P((int c, struct tty *tp));
+int	slstart __P((struct tty *tp));
+#endif
+
+
+struct	linesw linesw[] =
+{
+	{ ttyopen, ttylclose, ttread, ttwrite, nullioctl,
+	  ttyinput, ttstart, ttymodem },		/* 0- termios */
+
+	{ ttynodisc, ttyerrclose, ttyerrio, ttyerrio, nullioctl,
+	  ttyerrinput, ttyerrstart, nullmodem },	/* 1- defunct */
+
+	{ ttynodisc, ttyerrclose, ttyerrio, ttyerrio, nullioctl,
+	  ttyerrinput, ttyerrstart, nullmodem },	/* 2- defunct */
+
+#if NTB > 0
+	{ tbopen, tbclose, tbread, enodev, tbioctl,
+	  tbinput, ttstart, nullmodem },		/* 3- TABLDISC */
+#else
+	{ ttynodisc, ttyerrclose, ttyerrio, ttyerrio, nullioctl,
+	  ttyerrinput, ttyerrstart, nullmodem },
+#endif
+
+#if NSL > 0
+	{ slopen, slclose, ttyerrio, ttyerrio, sltioctl,
+	  slinput, slstart, nullmodem },		/* 4- SLIPDISC */
+#else
+	{ ttynodisc, ttyerrclose, ttyerrio, ttyerrio, nullioctl,
+	  ttyerrinput, ttyerrstart, nullmodem },
+#endif
+};
+
+int	nlinesw = sizeof (linesw) / sizeof (linesw[0]);
+
+/*
+ * Do nothing specific version of line
+ * discipline specific ioctl command.
+ */
+/*ARGSUSED*/
+nullioctl(tp, cmd, data, flags, p)
+	struct tty *tp;
+	int cmd;
+	char *data;
+	int flags;
+	struct proc *p;
+{
+
+#ifdef lint
+	tp = tp; data = data; flags = flags; p = p;
+#endif
+	return (-1);
+}
diff --git a/sys/kern/tty_pty.c b/sys/kern/tty_pty.c
new file mode 100644
index 00000000000..0e6911b63e1
--- /dev/null
+++ b/sys/kern/tty_pty.c
@@ -0,0 +1,691 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tty_pty.c	8.2 (Berkeley) 9/23/93
+ */
+
+/*
+ * Pseudo-teletype Driver
+ * (Actually two drivers, requiring two entries in 'cdevsw')
+ */
+#include "pty.h"		/* XXX */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/ioctl.h>
+#include <sys/proc.h>
+#include <sys/tty.h>
+#include <sys/conf.h>
+#include <sys/file.h>
+#include <sys/uio.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+
+#if NPTY == 1
+#undef NPTY
+#define	NPTY	32		/* crude XXX */
+#endif
+
+#define BUFSIZ 100		/* Chunk size iomoved to/from user */
+
+/*
+ * pts == /dev/tty[pqrs]?
+ * ptc == /dev/pty[pqrs]?
+ */
+struct	tty pt_tty[NPTY];	/* XXX */
+struct	pt_ioctl {
+	int	pt_flags;
+	struct	selinfo pt_selr, pt_selw;
+	u_char	pt_send;
+	u_char	pt_ucntl;
+} pt_ioctl[NPTY];		/* XXX */
+int	npty = NPTY;		/* for pstat -t */
+
+#define	PF_PKT		0x08		/* packet mode */
+#define	PF_STOPPED	0x10		/* user told stopped */
+#define	PF_REMOTE	0x20		/* remote and flow controlled input */
+#define	PF_NOSTOP	0x40
+#define PF_UCNTL	0x80		/* user control mode */
+
+void	ptsstop __P((struct tty *, int));
+
+/*
+ * Establish n (or default if n is 1) ptys in the system.
+ *
+ * XXX cdevsw & pstat require the array `pty[]' to be an array
+ */
+void
+ptyattach(n)
+	int n;
+{
+#ifdef notyet
+	char *mem;
+	register u_long ntb;
+#define	DEFAULT_NPTY	32
+
+	/* maybe should allow 0 => none? */
+	if (n <= 1)
+		n = DEFAULT_NPTY;
+	ntb = n * sizeof(struct tty);
+	mem = malloc(ntb + ALIGNBYTES + n * sizeof(struct pt_ioctl),
+	    M_DEVBUF, M_WAITOK);
+	pt_tty = (struct tty *)mem;
+	mem = (char *)ALIGN(mem + ntb);
+	pt_ioctl = (struct pt_ioctl *)mem;
+	npty = n;
+#endif
+}
+
+/*ARGSUSED*/
+ptsopen(dev, flag, devtype, p)
+	dev_t dev;
+	int flag, devtype;
+	struct proc *p;
+{
+	register struct tty *tp;
+	int error;
+
+	if (minor(dev) >= npty)
+		return (ENXIO);
+	tp = &pt_tty[minor(dev)];
+	if ((tp->t_state & TS_ISOPEN) == 0) {
+		tp->t_state |= TS_WOPEN;
+		ttychars(tp);		/* Set up default chars */
+		tp->t_iflag = TTYDEF_IFLAG;
+		tp->t_oflag = TTYDEF_OFLAG;
+		tp->t_lflag = TTYDEF_LFLAG;
+		tp->t_cflag = TTYDEF_CFLAG;
+		tp->t_ispeed = tp->t_ospeed = TTYDEF_SPEED;
+		ttsetwater(tp);		/* would be done in xxparam() */
+	} else if (tp->t_state&TS_XCLUDE && p->p_ucred->cr_uid != 0)
+		return (EBUSY);
+	if (tp->t_oproc)			/* Ctrlr still around. */
+		tp->t_state |= TS_CARR_ON;
+	while ((tp->t_state & TS_CARR_ON) == 0) {
+		tp->t_state |= TS_WOPEN;
+		if (flag&FNONBLOCK)
+			break;
+		if (error = ttysleep(tp, (caddr_t)&tp->t_rawq, TTIPRI | PCATCH,
+		    ttopen, 0))
+			return (error);
+	}
+	error = (*linesw[tp->t_line].l_open)(dev, tp);
+	ptcwakeup(tp, FREAD|FWRITE);
+	return (error);
+}
+
+ptsclose(dev, flag, mode, p)
+	dev_t dev;
+	int flag, mode;
+	struct proc *p;
+{
+	register struct tty *tp;
+	int err;
+
+	tp = &pt_tty[minor(dev)];
+	err = (*linesw[tp->t_line].l_close)(tp, flag);
+	err |= ttyclose(tp);
+	ptcwakeup(tp, FREAD|FWRITE);
+	return (err);
+}
+
+ptsread(dev, uio, flag)
+	dev_t dev;
+	struct uio *uio;
+	int flag;
+{
+	struct proc *p = curproc;
+	register struct tty *tp = &pt_tty[minor(dev)];
+	register struct pt_ioctl *pti = &pt_ioctl[minor(dev)];
+	int error = 0;
+
+again:
+	if (pti->pt_flags & PF_REMOTE) {
+		while (isbackground(p, tp)) {
+			if ((p->p_sigignore & sigmask(SIGTTIN)) ||
+			    (p->p_sigmask & sigmask(SIGTTIN)) ||
+			    p->p_pgrp->pg_jobc == 0 ||
+			    p->p_flag & P_PPWAIT)
+				return (EIO);
+			pgsignal(p->p_pgrp, SIGTTIN, 1);
+			if (error = ttysleep(tp, (caddr_t)&lbolt, 
+			    TTIPRI | PCATCH, ttybg, 0))
+				return (error);
+		}
+		if (tp->t_canq.c_cc == 0) {
+			if (flag & IO_NDELAY)
+				return (EWOULDBLOCK);
+			if (error = ttysleep(tp, (caddr_t)&tp->t_canq,
+			    TTIPRI | PCATCH, ttyin, 0))
+				return (error);
+			goto again;
+		}
+		while (tp->t_canq.c_cc > 1 && uio->uio_resid > 0)
+			if (ureadc(getc(&tp->t_canq), uio) < 0) {
+				error = EFAULT;
+				break;
+			}
+		if (tp->t_canq.c_cc == 1)
+			(void) getc(&tp->t_canq);
+		if (tp->t_canq.c_cc)
+			return (error);
+	} else
+		if (tp->t_oproc)
+			error = (*linesw[tp->t_line].l_read)(tp, uio, flag);
+	ptcwakeup(tp, FWRITE);
+	return (error);
+}
+
+/*
+ * Write to pseudo-tty.
+ * Wakeups of controlling tty will happen
+ * indirectly, when tty driver calls ptsstart.
+ */
+ptswrite(dev, uio, flag)
+	dev_t dev;
+	struct uio *uio;
+	int flag;
+{
+	register struct tty *tp;
+
+	tp = &pt_tty[minor(dev)];
+	if (tp->t_oproc == 0)
+		return (EIO);
+	return ((*linesw[tp->t_line].l_write)(tp, uio, flag));
+}
+
+/*
+ * Start output on pseudo-tty.
+ * Wake up process selecting or sleeping for input from controlling tty.
+ */
+void
+ptsstart(tp)
+	struct tty *tp;
+{
+	register struct pt_ioctl *pti = &pt_ioctl[minor(tp->t_dev)];
+
+	if (tp->t_state & TS_TTSTOP)
+		return;
+	if (pti->pt_flags & PF_STOPPED) {
+		pti->pt_flags &= ~PF_STOPPED;
+		pti->pt_send = TIOCPKT_START;
+	}
+	ptcwakeup(tp, FREAD);
+}
+
+ptcwakeup(tp, flag)
+	struct tty *tp;
+	int flag;
+{
+	struct pt_ioctl *pti = &pt_ioctl[minor(tp->t_dev)];
+
+	if (flag & FREAD) {
+		selwakeup(&pti->pt_selr);
+		wakeup((caddr_t)&tp->t_outq.c_cf);
+	}
+	if (flag & FWRITE) {
+		selwakeup(&pti->pt_selw);
+		wakeup((caddr_t)&tp->t_rawq.c_cf);
+	}
+}
+
+/*ARGSUSED*/
+#ifdef __STDC__
+ptcopen(dev_t dev, int flag, int devtype, struct proc *p)
+#else
+ptcopen(dev, flag, devtype, p)
+	dev_t dev;
+	int flag, devtype;
+	struct proc *p;
+#endif
+{
+	register struct tty *tp;
+	struct pt_ioctl *pti;
+
+	if (minor(dev) >= npty)
+		return (ENXIO);
+	tp = &pt_tty[minor(dev)];
+	if (tp->t_oproc)
+		return (EIO);
+	tp->t_oproc = ptsstart;
+#ifdef sun4c
+	tp->t_stop = ptsstop;
+#endif
+	(void)(*linesw[tp->t_line].l_modem)(tp, 1);
+	tp->t_lflag &= ~EXTPROC;
+	pti = &pt_ioctl[minor(dev)];
+	pti->pt_flags = 0;
+	pti->pt_send = 0;
+	pti->pt_ucntl = 0;
+	return (0);
+}
+
+ptcclose(dev)
+	dev_t dev;
+{
+	register struct tty *tp;
+
+	tp = &pt_tty[minor(dev)];
+	(void)(*linesw[tp->t_line].l_modem)(tp, 0);
+	tp->t_state &= ~TS_CARR_ON;
+	tp->t_oproc = 0;		/* mark closed */
+	tp->t_session = 0;
+	return (0);
+}
+
+ptcread(dev, uio, flag)
+	dev_t dev;
+	struct uio *uio;
+	int flag;
+{
+	register struct tty *tp = &pt_tty[minor(dev)];
+	struct pt_ioctl *pti = &pt_ioctl[minor(dev)];
+	char buf[BUFSIZ];
+	int error = 0, cc;
+
+	/*
+	 * We want to block until the slave
+	 * is open, and there's something to read;
+	 * but if we lost the slave or we're NBIO,
+	 * then return the appropriate error instead.
+	 */
+	for (;;) {
+		if (tp->t_state&TS_ISOPEN) {
+			if (pti->pt_flags&PF_PKT && pti->pt_send) {
+				error = ureadc((int)pti->pt_send, uio);
+				if (error)
+					return (error);
+				if (pti->pt_send & TIOCPKT_IOCTL) {
+					cc = min(uio->uio_resid,
+						sizeof(tp->t_termios));
+					uiomove(&tp->t_termios, cc, uio);
+				}
+				pti->pt_send = 0;
+				return (0);
+			}
+			if (pti->pt_flags&PF_UCNTL && pti->pt_ucntl) {
+				error = ureadc((int)pti->pt_ucntl, uio);
+				if (error)
+					return (error);
+				pti->pt_ucntl = 0;
+				return (0);
+			}
+			if (tp->t_outq.c_cc && (tp->t_state&TS_TTSTOP) == 0)
+				break;
+		}
+		if ((tp->t_state&TS_CARR_ON) == 0)
+			return (0);	/* EOF */
+		if (flag & IO_NDELAY)
+			return (EWOULDBLOCK);
+		if (error = tsleep((caddr_t)&tp->t_outq.c_cf, TTIPRI | PCATCH,
+		    ttyin, 0))
+			return (error);
+	}
+	if (pti->pt_flags & (PF_PKT|PF_UCNTL))
+		error = ureadc(0, uio);
+	while (uio->uio_resid > 0 && error == 0) {
+		cc = q_to_b(&tp->t_outq, buf, min(uio->uio_resid, BUFSIZ));
+		if (cc <= 0)
+			break;
+		error = uiomove(buf, cc, uio);
+	}
+	if (tp->t_outq.c_cc <= tp->t_lowat) {
+		if (tp->t_state&TS_ASLEEP) {
+			tp->t_state &= ~TS_ASLEEP;
+			wakeup((caddr_t)&tp->t_outq);
+		}
+		selwakeup(&tp->t_wsel);
+	}
+	return (error);
+}
+
+void
+ptsstop(tp, flush)
+	register struct tty *tp;
+	int flush;
+{
+	struct pt_ioctl *pti = &pt_ioctl[minor(tp->t_dev)];
+	int flag;
+
+	/* note: FLUSHREAD and FLUSHWRITE already ok */
+	if (flush == 0) {
+		flush = TIOCPKT_STOP;
+		pti->pt_flags |= PF_STOPPED;
+	} else
+		pti->pt_flags &= ~PF_STOPPED;
+	pti->pt_send |= flush;
+	/* change of perspective */
+	flag = 0;
+	if (flush & FREAD)
+		flag |= FWRITE;
+	if (flush & FWRITE)
+		flag |= FREAD;
+	ptcwakeup(tp, flag);
+}
+
+ptcselect(dev, rw, p)
+	dev_t dev;
+	int rw;
+	struct proc *p;
+{
+	register struct tty *tp = &pt_tty[minor(dev)];
+	struct pt_ioctl *pti = &pt_ioctl[minor(dev)];
+	int s;
+
+	if ((tp->t_state&TS_CARR_ON) == 0)
+		return (1);
+	switch (rw) {
+
+	case FREAD:
+		/*
+		 * Need to block timeouts (ttrstart).
+		 */
+		s = spltty();
+		if ((tp->t_state&TS_ISOPEN) &&
+		     tp->t_outq.c_cc && (tp->t_state&TS_TTSTOP) == 0) {
+			splx(s);
+			return (1);
+		}
+		splx(s);
+		/* FALLTHROUGH */
+
+	case 0:					/* exceptional */
+		if ((tp->t_state&TS_ISOPEN) &&
+		    (pti->pt_flags&PF_PKT && pti->pt_send ||
+		     pti->pt_flags&PF_UCNTL && pti->pt_ucntl))
+			return (1);
+		selrecord(p, &pti->pt_selr);
+		break;
+
+
+	case FWRITE:
+		if (tp->t_state&TS_ISOPEN) {
+			if (pti->pt_flags & PF_REMOTE) {
+			    if (tp->t_canq.c_cc == 0)
+				return (1);
+			} else {
+			    if (tp->t_rawq.c_cc + tp->t_canq.c_cc < TTYHOG-2)
+				    return (1);
+			    if (tp->t_canq.c_cc == 0 && (tp->t_iflag&ICANON))
+				    return (1);
+			}
+		}
+		selrecord(p, &pti->pt_selw);
+		break;
+
+	}
+	return (0);
+}
+
+ptcwrite(dev, uio, flag)
+	dev_t dev;
+	register struct uio *uio;
+	int flag;
+{
+	register struct tty *tp = &pt_tty[minor(dev)];
+	register u_char *cp;
+	register int cc = 0;
+	u_char locbuf[BUFSIZ];
+	int cnt = 0;
+	struct pt_ioctl *pti = &pt_ioctl[minor(dev)];
+	int error = 0;
+
+again:
+	if ((tp->t_state&TS_ISOPEN) == 0)
+		goto block;
+	if (pti->pt_flags & PF_REMOTE) {
+		if (tp->t_canq.c_cc)
+			goto block;
+		while (uio->uio_resid > 0 && tp->t_canq.c_cc < TTYHOG - 1) {
+			if (cc == 0) {
+				cc = min(uio->uio_resid, BUFSIZ);
+				cc = min(cc, TTYHOG - 1 - tp->t_canq.c_cc);
+				cp = locbuf;
+				error = uiomove((caddr_t)cp, cc, uio);
+				if (error)
+					return (error);
+				/* check again for safety */
+				if ((tp->t_state&TS_ISOPEN) == 0)
+					return (EIO);
+			}
+			if (cc)
+				(void) b_to_q((char *)cp, cc, &tp->t_canq);
+			cc = 0;
+		}
+		(void) putc(0, &tp->t_canq);
+		ttwakeup(tp);
+		wakeup((caddr_t)&tp->t_canq);
+		return (0);
+	}
+	while (uio->uio_resid > 0) {
+		if (cc == 0) {
+			cc = min(uio->uio_resid, BUFSIZ);
+			cp = locbuf;
+			error = uiomove((caddr_t)cp, cc, uio);
+			if (error)
+				return (error);
+			/* check again for safety */
+			if ((tp->t_state&TS_ISOPEN) == 0)
+				return (EIO);
+		}
+		while (cc > 0) {
+			if ((tp->t_rawq.c_cc + tp->t_canq.c_cc) >= TTYHOG - 2 &&
+			   (tp->t_canq.c_cc > 0 || !(tp->t_iflag&ICANON))) {
+				wakeup((caddr_t)&tp->t_rawq);
+				goto block;
+			}
+			(*linesw[tp->t_line].l_rint)(*cp++, tp);
+			cnt++;
+			cc--;
+		}
+		cc = 0;
+	}
+	return (0);
+block:
+	/*
+	 * Come here to wait for slave to open, for space
+	 * in outq, or space in rawq.
+	 */
+	if ((tp->t_state&TS_CARR_ON) == 0)
+		return (EIO);
+	if (flag & IO_NDELAY) {
+		/* adjust for data copied in but not written */
+		uio->uio_resid += cc;
+		if (cnt == 0)
+			return (EWOULDBLOCK);
+		return (0);
+	}
+	if (error = tsleep((caddr_t)&tp->t_rawq.c_cf, TTOPRI | PCATCH,
+	    ttyout, 0)) {
+		/* adjust for data copied in but not written */
+		uio->uio_resid += cc;
+		return (error);
+	}
+	goto again;
+}
+
+/*ARGSUSED*/
+ptyioctl(dev, cmd, data, flag, p)
+	dev_t dev;
+	int cmd;
+	caddr_t data;
+	int flag;
+	struct proc *p;
+{
+	register struct tty *tp = &pt_tty[minor(dev)];
+	register struct pt_ioctl *pti = &pt_ioctl[minor(dev)];
+	register u_char *cc = tp->t_cc;
+	int stop, error;
+
+	/*
+	 * IF CONTROLLER STTY THEN MUST FLUSH TO PREVENT A HANG.
+	 * ttywflush(tp) will hang if there are characters in the outq.
+	 */
+	if (cmd == TIOCEXT) {
+		/*
+		 * When the EXTPROC bit is being toggled, we need
+		 * to send an TIOCPKT_IOCTL if the packet driver
+		 * is turned on.
+		 */
+		if (*(int *)data) {
+			if (pti->pt_flags & PF_PKT) {
+				pti->pt_send |= TIOCPKT_IOCTL;
+				ptcwakeup(tp, FREAD);
+			}
+			tp->t_lflag |= EXTPROC;
+		} else {
+			if ((tp->t_state & EXTPROC) &&
+			    (pti->pt_flags & PF_PKT)) {
+				pti->pt_send |= TIOCPKT_IOCTL;
+				ptcwakeup(tp, FREAD);
+			}
+			tp->t_lflag &= ~EXTPROC;
+		}
+		return(0);
+	} else
+	if (cdevsw[major(dev)].d_open == ptcopen)
+		switch (cmd) {
+
+		case TIOCGPGRP:
+			/*
+			 * We aviod calling ttioctl on the controller since,
+			 * in that case, tp must be the controlling terminal.
+			 */
+			*(int *)data = tp->t_pgrp ? tp->t_pgrp->pg_id : 0;
+			return (0);
+
+		case TIOCPKT:
+			if (*(int *)data) {
+				if (pti->pt_flags & PF_UCNTL)
+					return (EINVAL);
+				pti->pt_flags |= PF_PKT;
+			} else
+				pti->pt_flags &= ~PF_PKT;
+			return (0);
+
+		case TIOCUCNTL:
+			if (*(int *)data) {
+				if (pti->pt_flags & PF_PKT)
+					return (EINVAL);
+				pti->pt_flags |= PF_UCNTL;
+			} else
+				pti->pt_flags &= ~PF_UCNTL;
+			return (0);
+
+		case TIOCREMOTE:
+			if (*(int *)data)
+				pti->pt_flags |= PF_REMOTE;
+			else
+				pti->pt_flags &= ~PF_REMOTE;
+			ttyflush(tp, FREAD|FWRITE);
+			return (0);
+
+#ifdef COMPAT_43
+		case TIOCSETP:		
+		case TIOCSETN:
+#endif
+		case TIOCSETD:
+		case TIOCSETA:
+		case TIOCSETAW:
+		case TIOCSETAF:
+			ndflush(&tp->t_outq, tp->t_outq.c_cc);
+			break;
+
+		case TIOCSIG:
+			if (*(unsigned int *)data >= NSIG)
+				return(EINVAL);
+			if ((tp->t_lflag&NOFLSH) == 0)
+				ttyflush(tp, FREAD|FWRITE);
+			pgsignal(tp->t_pgrp, *(unsigned int *)data, 1);
+			if ((*(unsigned int *)data == SIGINFO) &&
+			    ((tp->t_lflag&NOKERNINFO) == 0))
+				ttyinfo(tp);
+			return(0);
+		}
+	error = (*linesw[tp->t_line].l_ioctl)(tp, cmd, data, flag, p);
+	if (error < 0)
+		 error = ttioctl(tp, cmd, data, flag);
+	if (error < 0) {
+		if (pti->pt_flags & PF_UCNTL &&
+		    (cmd & ~0xff) == UIOCCMD(0)) {
+			if (cmd & 0xff) {
+				pti->pt_ucntl = (u_char)cmd;
+				ptcwakeup(tp, FREAD);
+			}
+			return (0);
+		}
+		error = ENOTTY;
+	}
+	/*
+	 * If external processing and packet mode send ioctl packet.
+	 */
+	if ((tp->t_lflag&EXTPROC) && (pti->pt_flags & PF_PKT)) {
+		switch(cmd) {
+		case TIOCSETA:
+		case TIOCSETAW:
+		case TIOCSETAF:
+#ifdef COMPAT_43
+		case TIOCSETP:
+		case TIOCSETN:
+#endif
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+		case TIOCSETC:
+		case TIOCSLTC:
+		case TIOCLBIS:
+		case TIOCLBIC:
+		case TIOCLSET:
+#endif
+			pti->pt_send |= TIOCPKT_IOCTL;
+			ptcwakeup(tp, FREAD);
+		default:
+			break;
+		}
+	}
+	stop = (tp->t_iflag & IXON) && CCEQ(cc[VSTOP], CTRL('s')) 
+		&& CCEQ(cc[VSTART], CTRL('q'));
+	if (pti->pt_flags & PF_NOSTOP) {
+		if (stop) {
+			pti->pt_send &= ~TIOCPKT_NOSTOP;
+			pti->pt_send |= TIOCPKT_DOSTOP;
+			pti->pt_flags &= ~PF_NOSTOP;
+			ptcwakeup(tp, FREAD);
+		}
+	} else {
+		if (!stop) {
+			pti->pt_send &= ~TIOCPKT_DOSTOP;
+			pti->pt_send |= TIOCPKT_NOSTOP;
+			pti->pt_flags |= PF_NOSTOP;
+			ptcwakeup(tp, FREAD);
+		}
+	}
+	return (error);
+}
diff --git a/sys/kern/tty_subr.c b/sys/kern/tty_subr.c
new file mode 100644
index 00000000000..fe8f000f87d
--- /dev/null
+++ b/sys/kern/tty_subr.c
@@ -0,0 +1,159 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)tty_subr.c	8.2 (Berkeley) 9/5/93
+ */
+
+#include <sys/param.h>
+#include <sys/ioctl.h>
+#include <sys/tty.h>
+
+char cwaiting;
+struct cblock *cfree, *cfreelist;
+int cfreecount, nclist;
+
+void
+clist_init()
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return;
+}
+
+getc(a1)
+	struct clist *a1;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return ((char)0);
+}
+
+q_to_b(a1, a2, a3)
+	struct clist *a1;
+	char *a2;
+	int a3;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return (0);
+}
+
+ndqb(a1, a2)
+	struct clist *a1;
+	int a2;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return (0);
+}
+
+void
+ndflush(a1, a2)
+	struct clist *a1;
+	int a2;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return;
+}
+
+putc(a1, a2)
+	char a1;
+	struct clist *a2;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return (0);
+}
+
+b_to_q(a1, a2, a3)
+	char *a1;
+	int a2;
+	struct clist *a3;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return (0);
+}
+
+char *
+nextc(a1, a2, a3)
+	struct clist *a1;
+	char *a2;
+	int *a3;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return ((char *)0);
+}
+
+unputc(a1)
+	struct clist *a1;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return ((char)0);
+}
+
+void
+catq(a1, a2)
+	struct clist *a1, *a2;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return;
+}
diff --git a/sys/kern/tty_tb.c b/sys/kern/tty_tb.c
new file mode 100644
index 00000000000..242301a52e8
--- /dev/null
+++ b/sys/kern/tty_tb.c
@@ -0,0 +1,366 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tty_tb.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include "tb.h"
+#if NTB > 0
+
+/*
+ * Line discipline for RS232 tablets;
+ * supplies binary coordinate data.
+ */
+#include <sys/param.h>
+#include <sys/tablet.h>
+#include <sys/tty.h>
+
+/*
+ * Tablet configuration table.
+ */
+struct	tbconf {
+	short	tbc_recsize;	/* input record size in bytes */
+	short	tbc_uiosize;	/* size of data record returned user */
+	int	tbc_sync;	/* mask for finding sync byte/bit */
+	int	(*tbc_decode)();/* decoding routine */
+	char	*tbc_run;	/* enter run mode sequence */
+	char	*tbc_point;	/* enter point mode sequence */
+	char	*tbc_stop;	/* stop sequence */
+	char	*tbc_start;	/* start/restart sequence */
+	int	tbc_flags;
+#define	TBF_POL		0x1	/* polhemus hack */
+#define	TBF_INPROX	0x2	/* tablet has proximity info */
+};
+
+static	int tbdecode(), gtcodecode(), poldecode();
+static	int tblresdecode(), tbhresdecode();
+
+struct	tbconf tbconf[TBTYPE] = {
+{ 0 },
+{ 5, sizeof (struct tbpos), 0200, tbdecode, "6", "4" },
+{ 5, sizeof (struct tbpos), 0200, tbdecode, "\1CN", "\1RT", "\2", "\4" },
+{ 8, sizeof (struct gtcopos), 0200, gtcodecode },
+{17, sizeof (struct polpos), 0200, poldecode, 0, 0, "\21", "\5\22\2\23",
+  TBF_POL },
+{ 5, sizeof (struct tbpos), 0100, tblresdecode, "\1CN", "\1PT", "\2", "\4",
+  TBF_INPROX },
+{ 6, sizeof (struct tbpos), 0200, tbhresdecode, "\1CN", "\1PT", "\2", "\4",
+  TBF_INPROX },
+{ 5, sizeof (struct tbpos), 0100, tblresdecode, "\1CL\33", "\1PT\33", 0, 0},
+{ 6, sizeof (struct tbpos), 0200, tbhresdecode, "\1CL\33", "\1PT\33", 0, 0},
+};
+
+/*
+ * Tablet state
+ */
+struct tb {
+	int	tbflags;		/* mode & type bits */
+#define	TBMAXREC	17	/* max input record size */
+	char	cbuf[TBMAXREC];		/* input buffer */
+	union {
+		struct	tbpos tbpos;
+		struct	gtcopos gtcopos;
+		struct	polpos polpos;
+	} rets;				/* processed state */
+#define NTBS	16
+} tb[NTBS];
+
+/*
+ * Open as tablet discipline; called on discipline change.
+ */
+/*ARGSUSED*/
+tbopen(dev, tp)
+	dev_t dev;
+	register struct tty *tp;
+{
+	register struct tb *tbp;
+
+	if (tp->t_line == TABLDISC)
+		return (ENODEV);
+	ttywflush(tp);
+	for (tbp = tb; tbp < &tb[NTBS]; tbp++)
+		if (tbp->tbflags == 0)
+			break;
+	if (tbp >= &tb[NTBS])
+		return (EBUSY);
+	tbp->tbflags = TBTIGER|TBPOINT;		/* default */
+	tp->t_cp = tbp->cbuf;
+	tp->t_inbuf = 0;
+	bzero((caddr_t)&tbp->rets, sizeof (tbp->rets));
+	tp->T_LINEP = (caddr_t)tbp;
+	tp->t_flags |= LITOUT;
+	return (0);
+}
+
+/*
+ * Line discipline change or last device close.
+ */
+tbclose(tp)
+	register struct tty *tp;
+{
+	register int s;
+	int modebits = TBPOINT|TBSTOP;
+
+	tbioctl(tp, BIOSMODE, &modebits, 0);
+	s = spltty();
+	((struct tb *)tp->T_LINEP)->tbflags = 0;
+	tp->t_cp = 0;
+	tp->t_inbuf = 0;
+	tp->t_rawq.c_cc = 0;		/* clear queues -- paranoid */
+	tp->t_canq.c_cc = 0;
+	tp->t_line = 0;			/* paranoid: avoid races */
+	splx(s);
+}
+
+/*
+ * Read from a tablet line.
+ * Characters have been buffered in a buffer and decoded.
+ */
+tbread(tp, uio)
+	register struct tty *tp;
+	struct uio *uio;
+{
+	register struct tb *tbp = (struct tb *)tp->T_LINEP;
+	register struct tbconf *tc = &tbconf[tbp->tbflags & TBTYPE];
+	int ret;
+
+	if ((tp->t_state&TS_CARR_ON) == 0)
+		return (EIO);
+	ret = uiomove(&tbp->rets, tc->tbc_uiosize, uio);
+	if (tc->tbc_flags&TBF_POL)
+		tbp->rets.polpos.p_key = ' ';
+	return (ret);
+}
+
+/*
+ * Low level character input routine.
+ * Stuff the character in the buffer, and decode
+ * if all the chars are there.
+ *
+ * This routine could be expanded in-line in the receiver
+ * interrupt routine to make it run as fast as possible.
+ */
+tbinput(c, tp)
+	register int c;
+	register struct tty *tp;
+{
+	register struct tb *tbp = (struct tb *)tp->T_LINEP;
+	register struct tbconf *tc = &tbconf[tbp->tbflags & TBTYPE];
+
+	if (tc->tbc_recsize == 0 || tc->tbc_decode == 0)	/* paranoid? */
+		return;
+	/*
+	 * Locate sync bit/byte or reset input buffer.
+	 */
+	if (c&tc->tbc_sync || tp->t_inbuf == tc->tbc_recsize) {
+		tp->t_cp = tbp->cbuf;
+		tp->t_inbuf = 0;
+	}
+	*tp->t_cp++ = c&0177;
+	/*
+	 * Call decode routine only if a full record has been collected.
+	 */
+	if (++tp->t_inbuf == tc->tbc_recsize)
+		(*tc->tbc_decode)(tc, tbp->cbuf, &tbp->rets);
+}
+
+/*
+ * Decode GTCO 8 byte format (high res, tilt, and pressure).
+ */
+static
+gtcodecode(tc, cp, tbpos)
+	struct tbconf *tc;
+	register char *cp;
+	register struct gtcopos *tbpos;
+{
+
+	tbpos->pressure = *cp >> 2;
+	tbpos->status = (tbpos->pressure > 16) | TBINPROX; /* half way down */
+	tbpos->xpos = (*cp++ & 03) << 14;
+	tbpos->xpos |= *cp++ << 7;
+	tbpos->xpos |= *cp++;
+	tbpos->ypos = (*cp++ & 03) << 14;
+	tbpos->ypos |= *cp++ << 7;
+	tbpos->ypos |= *cp++;
+	tbpos->xtilt = *cp++;
+	tbpos->ytilt = *cp++;
+	tbpos->scount++;
+}
+
+/*
+ * Decode old Hitachi 5 byte format (low res).
+ */
+static
+tbdecode(tc, cp, tbpos)
+	struct tbconf *tc;
+	register char *cp;
+	register struct tbpos *tbpos;
+{
+	register char byte;
+
+	byte = *cp++;
+	tbpos->status = (byte&0100) ? TBINPROX : 0;
+	byte &= ~0100;
+	if (byte > 036)
+		tbpos->status |= 1 << ((byte-040)/2);
+	tbpos->xpos = *cp++ << 7;
+	tbpos->xpos |= *cp++;
+	if (tbpos->xpos < 256)			/* tablet wraps around at 256 */
+		tbpos->status &= ~TBINPROX;	/* make it out of proximity */
+	tbpos->ypos = *cp++ << 7;
+	tbpos->ypos |= *cp++;
+	tbpos->scount++;
+}
+
+/*
+ * Decode new Hitach 5-byte format (low res).
+ */
+static
+tblresdecode(tc, cp, tbpos)
+	struct tbconf *tc;
+	register char *cp;
+	register struct tbpos *tbpos;
+{
+
+	*cp &= ~0100;		/* mask sync bit */
+	tbpos->status = (*cp++ >> 2) | TBINPROX;
+	if (tc->tbc_flags&TBF_INPROX && tbpos->status&020)
+		tbpos->status &= ~(020|TBINPROX);
+	tbpos->xpos = *cp++;
+	tbpos->xpos |= *cp++ << 6;
+	tbpos->ypos = *cp++;
+	tbpos->ypos |= *cp++ << 6;
+	tbpos->scount++;
+}
+
+/*
+ * Decode new Hitach 6-byte format (high res).
+ */
+static
+tbhresdecode(tc, cp, tbpos)
+	struct tbconf *tc;
+	register char *cp;
+	register struct tbpos *tbpos;
+{
+	char byte;
+
+	byte = *cp++;
+	tbpos->xpos = (byte & 03) << 14;
+	tbpos->xpos |= *cp++ << 7;
+	tbpos->xpos |= *cp++;
+	tbpos->ypos = *cp++ << 14;
+	tbpos->ypos |= *cp++ << 7;
+	tbpos->ypos |= *cp++;
+	tbpos->status = (byte >> 2) | TBINPROX;
+	if (tc->tbc_flags&TBF_INPROX && tbpos->status&020)
+		tbpos->status &= ~(020|TBINPROX);
+	tbpos->scount++;
+}
+
+/*
+ * Polhemus decode.
+ */
+static
+poldecode(tc, cp, polpos)
+	struct tbconf *tc;
+	register char *cp;
+	register struct polpos *polpos;
+{
+
+	polpos->p_x = cp[4] | cp[3]<<7 | (cp[9] & 0x03) << 14;
+	polpos->p_y = cp[6] | cp[5]<<7 | (cp[9] & 0x0c) << 12;
+	polpos->p_z = cp[8] | cp[7]<<7 | (cp[9] & 0x30) << 10;
+	polpos->p_azi = cp[11] | cp[10]<<7 | (cp[16] & 0x03) << 14;
+	polpos->p_pit = cp[13] | cp[12]<<7 | (cp[16] & 0x0c) << 12;
+	polpos->p_rol = cp[15] | cp[14]<<7 | (cp[16] & 0x30) << 10;
+	polpos->p_stat = cp[1] | cp[0]<<7;
+	if (cp[2] != ' ')
+		polpos->p_key = cp[2];
+}
+
+/*ARGSUSED*/
+tbioctl(tp, cmd, data, flag)
+	struct tty *tp;
+	caddr_t data;
+{
+	register struct tb *tbp = (struct tb *)tp->T_LINEP;
+
+	switch (cmd) {
+
+	case BIOGMODE:
+		*(int *)data = tbp->tbflags & TBMODE;
+		break;
+
+	case BIOSTYPE:
+		if (tbconf[*(int *)data & TBTYPE].tbc_recsize == 0 ||
+		    tbconf[*(int *)data & TBTYPE].tbc_decode == 0)
+			return (EINVAL);
+		tbp->tbflags &= ~TBTYPE;
+		tbp->tbflags |= *(int *)data & TBTYPE;
+		/* fall thru... to set mode bits */
+
+	case BIOSMODE: {
+		register struct tbconf *tc;
+
+		tbp->tbflags &= ~TBMODE;
+		tbp->tbflags |= *(int *)data & TBMODE;
+		tc = &tbconf[tbp->tbflags & TBTYPE];
+		if (tbp->tbflags&TBSTOP) {
+			if (tc->tbc_stop)
+				ttyout(tc->tbc_stop, tp);
+		} else if (tc->tbc_start)
+			ttyout(tc->tbc_start, tp);
+		if (tbp->tbflags&TBPOINT) {
+			if (tc->tbc_point)
+				ttyout(tc->tbc_point, tp);
+		} else if (tc->tbc_run)
+			ttyout(tc->tbc_run, tp);
+		ttstart(tp);
+		break;
+	}
+
+	case BIOGTYPE:
+		*(int *)data = tbp->tbflags & TBTYPE;
+		break;
+
+	case TIOCSETD:
+	case TIOCGETD:
+	case TIOCGETP:
+	case TIOCGETC:
+		return (-1);		/* pass thru... */
+
+	default:
+		return (ENOTTY);
+	}
+	return (0);
+}
+#endif
diff --git a/sys/kern/tty_tty.c b/sys/kern/tty_tty.c
new file mode 100644
index 00000000000..964fc6f6d5e
--- /dev/null
+++ b/sys/kern/tty_tty.c
@@ -0,0 +1,147 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tty_tty.c	8.2 (Berkeley) 9/23/93
+ */
+
+/*
+ * Indirect driver for controlling tty.
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/ioctl.h>
+#include <sys/proc.h>
+#include <sys/tty.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+
+#define cttyvp(p) ((p)->p_flag & P_CONTROLT ? (p)->p_session->s_ttyvp : NULL)
+
+/*ARGSUSED*/
+cttyopen(dev, flag, mode, p)
+	dev_t dev;
+	int flag, mode;
+	struct proc *p;
+{
+	struct vnode *ttyvp = cttyvp(p);
+	int error;
+
+	if (ttyvp == NULL)
+		return (ENXIO);
+	VOP_LOCK(ttyvp);
+#ifdef PARANOID
+	/*
+	 * Since group is tty and mode is 620 on most terminal lines
+	 * and since sessions protect terminals from processes outside
+	 * your session, this check is probably no longer necessary.
+	 * Since it inhibits setuid root programs that later switch 
+	 * to another user from accessing /dev/tty, we have decided
+	 * to delete this test. (mckusick 5/93)
+	 */
+	error = VOP_ACCESS(ttyvp,
+	  (flag&FREAD ? VREAD : 0) | (flag&FWRITE ? VWRITE : 0), p->p_ucred, p);
+	if (!error)
+#endif /* PARANOID */
+		error = VOP_OPEN(ttyvp, flag, NOCRED, p);
+	VOP_UNLOCK(ttyvp);
+	return (error);
+}
+
+/*ARGSUSED*/
+cttyread(dev, uio, flag)
+	dev_t dev;
+	struct uio *uio;
+	int flag;
+{
+	register struct vnode *ttyvp = cttyvp(uio->uio_procp);
+	int error;
+
+	if (ttyvp == NULL)
+		return (EIO);
+	VOP_LOCK(ttyvp);
+	error = VOP_READ(ttyvp, uio, flag, NOCRED);
+	VOP_UNLOCK(ttyvp);
+	return (error);
+}
+
+/*ARGSUSED*/
+cttywrite(dev, uio, flag)
+	dev_t dev;
+	struct uio *uio;
+	int flag;
+{
+	register struct vnode *ttyvp = cttyvp(uio->uio_procp);
+	int error;
+
+	if (ttyvp == NULL)
+		return (EIO);
+	VOP_LOCK(ttyvp);
+	error = VOP_WRITE(ttyvp, uio, flag, NOCRED);
+	VOP_UNLOCK(ttyvp);
+	return (error);
+}
+
+/*ARGSUSED*/
+cttyioctl(dev, cmd, addr, flag, p)
+	dev_t dev;
+	int cmd;
+	caddr_t addr;
+	int flag;
+	struct proc *p;
+{
+	struct vnode *ttyvp = cttyvp(p);
+
+	if (ttyvp == NULL)
+		return (EIO);
+	if (cmd == TIOCNOTTY) {
+		if (!SESS_LEADER(p)) {
+			p->p_flag &= ~P_CONTROLT;
+			return (0);
+		} else
+			return (EINVAL);
+	}
+	return (VOP_IOCTL(ttyvp, cmd, addr, flag, NOCRED, p));
+}
+
+/*ARGSUSED*/
+cttyselect(dev, flag, p)
+	dev_t dev;
+	int flag;
+	struct proc *p;
+{
+	struct vnode *ttyvp = cttyvp(p);
+
+	if (ttyvp == NULL)
+		return (1);	/* try operation to get EOF/failure */
+	return (VOP_SELECT(ttyvp, flag, FREAD|FWRITE, NOCRED, p));
+}
diff --git a/sys/kern/uipc_domain.c b/sys/kern/uipc_domain.c
new file mode 100644
index 00000000000..8834dbf4442
--- /dev/null
+++ b/sys/kern/uipc_domain.c
@@ -0,0 +1,220 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)uipc_domain.c	8.2 (Berkeley) 10/18/93
+ */
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/domain.h>
+#include <sys/mbuf.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+
+void	pffasttimo __P((void *));
+void	pfslowtimo __P((void *));
+
+#define	ADDDOMAIN(x)	{ \
+	extern struct domain __CONCAT(x,domain); \
+	__CONCAT(x,domain.dom_next) = domains; \
+	domains = &__CONCAT(x,domain); \
+}
+
+domaininit()
+{
+	register struct domain *dp;
+	register struct protosw *pr;
+
+#undef unix
+#ifndef lint
+	ADDDOMAIN(unix);
+	ADDDOMAIN(route);
+#ifdef INET
+	ADDDOMAIN(inet);
+#endif
+#ifdef NS
+	ADDDOMAIN(ns);
+#endif
+#ifdef ISO
+	ADDDOMAIN(iso);
+#endif
+#ifdef CCITT
+	ADDDOMAIN(ccitt);
+#endif
+#include "imp.h"
+#if NIMP > 0
+	ADDDOMAIN(imp);
+#endif
+#endif
+
+	for (dp = domains; dp; dp = dp->dom_next) {
+		if (dp->dom_init)
+			(*dp->dom_init)();
+		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
+			if (pr->pr_init)
+				(*pr->pr_init)();
+	}
+
+if (max_linkhdr < 16)		/* XXX */
+max_linkhdr = 16;
+	max_hdr = max_linkhdr + max_protohdr;
+	max_datalen = MHLEN - max_hdr;
+	timeout(pffasttimo, (void *)0, 1);
+	timeout(pfslowtimo, (void *)0, 1);
+}
+
+struct protosw *
+pffindtype(family, type)
+	int family, type;
+{
+	register struct domain *dp;
+	register struct protosw *pr;
+
+	for (dp = domains; dp; dp = dp->dom_next)
+		if (dp->dom_family == family)
+			goto found;
+	return (0);
+found:
+	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
+		if (pr->pr_type && pr->pr_type == type)
+			return (pr);
+	return (0);
+}
+
+struct protosw *
+pffindproto(family, protocol, type)
+	int family, protocol, type;
+{
+	register struct domain *dp;
+	register struct protosw *pr;
+	struct protosw *maybe = 0;
+
+	if (family == 0)
+		return (0);
+	for (dp = domains; dp; dp = dp->dom_next)
+		if (dp->dom_family == family)
+			goto found;
+	return (0);
+found:
+	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
+		if ((pr->pr_protocol == protocol) && (pr->pr_type == type))
+			return (pr);
+
+		if (type == SOCK_RAW && pr->pr_type == SOCK_RAW &&
+		    pr->pr_protocol == 0 && maybe == (struct protosw *)0)
+			maybe = pr;
+	}
+	return (maybe);
+}
+
+net_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
+	int *name;
+	u_int namelen;
+	void *oldp;
+	size_t *oldlenp;
+	void *newp;
+	size_t newlen;
+	struct proc *p;
+{
+	register struct domain *dp;
+	register struct protosw *pr;
+	int family, protocol;
+
+	/*
+	 * All sysctl names at this level are nonterminal;
+	 * next two components are protocol family and protocol number,
+	 * then at least one addition component.
+	 */
+	if (namelen < 3)
+		return (EISDIR);		/* overloaded */
+	family = name[0];
+	protocol = name[1];
+
+	if (family == 0)
+		return (0);
+	for (dp = domains; dp; dp = dp->dom_next)
+		if (dp->dom_family == family)
+			goto found;
+	return (ENOPROTOOPT);
+found:
+	for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
+		if (pr->pr_protocol == protocol && pr->pr_sysctl)
+			return ((*pr->pr_sysctl)(name + 2, namelen - 2,
+			    oldp, oldlenp, newp, newlen));
+	return (ENOPROTOOPT);
+}
+
+pfctlinput(cmd, sa)
+	int cmd;
+	struct sockaddr *sa;
+{
+	register struct domain *dp;
+	register struct protosw *pr;
+
+	for (dp = domains; dp; dp = dp->dom_next)
+		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
+			if (pr->pr_ctlinput)
+				(*pr->pr_ctlinput)(cmd, sa, (caddr_t)0);
+}
+
+void
+pfslowtimo(arg)
+	void *arg;
+{
+	register struct domain *dp;
+	register struct protosw *pr;
+
+	for (dp = domains; dp; dp = dp->dom_next)
+		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
+			if (pr->pr_slowtimo)
+				(*pr->pr_slowtimo)();
+	timeout(pfslowtimo, (void *)0, hz/2);
+}
+
+void
+pffasttimo(arg)
+	void *arg;
+{
+	register struct domain *dp;
+	register struct protosw *pr;
+
+	for (dp = domains; dp; dp = dp->dom_next)
+		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
+			if (pr->pr_fasttimo)
+				(*pr->pr_fasttimo)();
+	timeout(pffasttimo, (void *)0, hz/5);
+}
diff --git a/sys/kern/uipc_mbuf.c b/sys/kern/uipc_mbuf.c
new file mode 100644
index 00000000000..b71c6345e36
--- /dev/null
+++ b/sys/kern/uipc_mbuf.c
@@ -0,0 +1,655 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/map.h>
+#define MBTYPES
+#include <sys/mbuf.h>
+#include <sys/kernel.h>
+#include <sys/syslog.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+
+#include <vm/vm.h>
+
+extern	vm_map_t mb_map;
+struct	mbuf *mbutl;
+char	*mclrefcnt;
+
+mbinit()
+{
+	int s;
+
+#if CLBYTES < 4096
+#define NCL_INIT	(4096/CLBYTES)
+#else
+#define NCL_INIT	1
+#endif
+	s = splimp();
+	if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0)
+		goto bad;
+	splx(s);
+	return;
+bad:
+	panic("mbinit");
+}
+
+/*
+ * Allocate some number of mbuf clusters
+ * and place on cluster free list.
+ * Must be called at splimp.
+ */
+/* ARGSUSED */
+m_clalloc(ncl, nowait)
+	register int ncl;
+	int nowait;
+{
+	static int logged;
+	register caddr_t p;
+	register int i;
+	int npg;
+
+	npg = ncl * CLSIZE;
+	p = (caddr_t)kmem_malloc(mb_map, ctob(npg), !nowait);
+	if (p == NULL) {
+		if (logged == 0) {
+			logged++;
+			log(LOG_ERR, "mb_map full\n");
+		}
+		return (0);
+	}
+	ncl = ncl * CLBYTES / MCLBYTES;
+	for (i = 0; i < ncl; i++) {
+		((union mcluster *)p)->mcl_next = mclfree;
+		mclfree = (union mcluster *)p;
+		p += MCLBYTES;
+		mbstat.m_clfree++;
+	}
+	mbstat.m_clusters += ncl;
+	return (1);
+}
+
+/*
+ * When MGET failes, ask protocols to free space when short of memory,
+ * then re-attempt to allocate an mbuf.
+ */
+struct mbuf *
+m_retry(i, t)
+	int i, t;
+{
+	register struct mbuf *m;
+
+	m_reclaim();
+#define m_retry(i, t)	(struct mbuf *)0
+	MGET(m, i, t);
+#undef m_retry
+	return (m);
+}
+
+/*
+ * As above; retry an MGETHDR.
+ */
+struct mbuf *
+m_retryhdr(i, t)
+	int i, t;
+{
+	register struct mbuf *m;
+
+	m_reclaim();
+#define m_retryhdr(i, t) (struct mbuf *)0
+	MGETHDR(m, i, t);
+#undef m_retryhdr
+	return (m);
+}
+
+m_reclaim()
+{
+	register struct domain *dp;
+	register struct protosw *pr;
+	int s = splimp();
+
+	for (dp = domains; dp; dp = dp->dom_next)
+		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
+			if (pr->pr_drain)
+				(*pr->pr_drain)();
+	splx(s);
+	mbstat.m_drain++;
+}
+
+/*
+ * Space allocation routines.
+ * These are also available as macros
+ * for critical paths.
+ */
+struct mbuf *
+m_get(nowait, type)
+	int nowait, type;
+{
+	register struct mbuf *m;
+
+	MGET(m, nowait, type);
+	return (m);
+}
+
+struct mbuf *
+m_gethdr(nowait, type)
+	int nowait, type;
+{
+	register struct mbuf *m;
+
+	MGETHDR(m, nowait, type);
+	return (m);
+}
+
+struct mbuf *
+m_getclr(nowait, type)
+	int nowait, type;
+{
+	register struct mbuf *m;
+
+	MGET(m, nowait, type);
+	if (m == 0)
+		return (0);
+	bzero(mtod(m, caddr_t), MLEN);
+	return (m);
+}
+
+struct mbuf *
+m_free(m)
+	struct mbuf *m;
+{
+	register struct mbuf *n;
+
+	MFREE(m, n);
+	return (n);
+}
+
+void
+m_freem(m)
+	register struct mbuf *m;
+{
+	register struct mbuf *n;
+
+	if (m == NULL)
+		return;
+	do {
+		MFREE(m, n);
+	} while (m = n);
+}
+
+/*
+ * Mbuffer utility routines.
+ */
+
+/*
+ * Lesser-used path for M_PREPEND:
+ * allocate new mbuf to prepend to chain,
+ * copy junk along.
+ */
+struct mbuf *
+m_prepend(m, len, how)
+	register struct mbuf *m;
+	int len, how;
+{
+	struct mbuf *mn;
+
+	MGET(mn, how, m->m_type);
+	if (mn == (struct mbuf *)NULL) {
+		m_freem(m);
+		return ((struct mbuf *)NULL);
+	}
+	if (m->m_flags & M_PKTHDR) {
+		M_COPY_PKTHDR(mn, m);
+		m->m_flags &= ~M_PKTHDR;
+	}
+	mn->m_next = m;
+	m = mn;
+	if (len < MHLEN)
+		MH_ALIGN(m, len);
+	m->m_len = len;
+	return (m);
+}
+
+/*
+ * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
+ * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
+ * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
+ */
+int MCFail;
+
+struct mbuf *
+m_copym(m, off0, len, wait)
+	register struct mbuf *m;
+	int off0, wait;
+	register int len;
+{
+	register struct mbuf *n, **np;
+	register int off = off0;
+	struct mbuf *top;
+	int copyhdr = 0;
+
+	if (off < 0 || len < 0)
+		panic("m_copym");
+	if (off == 0 && m->m_flags & M_PKTHDR)
+		copyhdr = 1;
+	while (off > 0) {
+		if (m == 0)
+			panic("m_copym");
+		if (off < m->m_len)
+			break;
+		off -= m->m_len;
+		m = m->m_next;
+	}
+	np = &top;
+	top = 0;
+	while (len > 0) {
+		if (m == 0) {
+			if (len != M_COPYALL)
+				panic("m_copym");
+			break;
+		}
+		MGET(n, wait, m->m_type);
+		*np = n;
+		if (n == 0)
+			goto nospace;
+		if (copyhdr) {
+			M_COPY_PKTHDR(n, m);
+			if (len == M_COPYALL)
+				n->m_pkthdr.len -= off0;
+			else
+				n->m_pkthdr.len = len;
+			copyhdr = 0;
+		}
+		n->m_len = min(len, m->m_len - off);
+		if (m->m_flags & M_EXT) {
+			n->m_data = m->m_data + off;
+			mclrefcnt[mtocl(m->m_ext.ext_buf)]++;
+			n->m_ext = m->m_ext;
+			n->m_flags |= M_EXT;
+		} else
+			bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
+			    (unsigned)n->m_len);
+		if (len != M_COPYALL)
+			len -= n->m_len;
+		off = 0;
+		m = m->m_next;
+		np = &n->m_next;
+	}
+	if (top == 0)
+		MCFail++;
+	return (top);
+nospace:
+	m_freem(top);
+	MCFail++;
+	return (0);
+}
+
+/*
+ * Copy data from an mbuf chain starting "off" bytes from the beginning,
+ * continuing for "len" bytes, into the indicated buffer.
+ */
+m_copydata(m, off, len, cp)
+	register struct mbuf *m;
+	register int off;
+	register int len;
+	caddr_t cp;
+{
+	register unsigned count;
+
+	if (off < 0 || len < 0)
+		panic("m_copydata");
+	while (off > 0) {
+		if (m == 0)
+			panic("m_copydata");
+		if (off < m->m_len)
+			break;
+		off -= m->m_len;
+		m = m->m_next;
+	}
+	while (len > 0) {
+		if (m == 0)
+			panic("m_copydata");
+		count = min(m->m_len - off, len);
+		bcopy(mtod(m, caddr_t) + off, cp, count);
+		len -= count;
+		cp += count;
+		off = 0;
+		m = m->m_next;
+	}
+}
+
+/*
+ * Concatenate mbuf chain n to m.
+ * Both chains must be of the same type (e.g. MT_DATA).
+ * Any m_pkthdr is not updated.
+ */
+m_cat(m, n)
+	register struct mbuf *m, *n;
+{
+	while (m->m_next)
+		m = m->m_next;
+	while (n) {
+		if (m->m_flags & M_EXT ||
+		    m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
+			/* just join the two chains */
+			m->m_next = n;
+			return;
+		}
+		/* splat the data from one into the other */
+		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
+		    (u_int)n->m_len);
+		m->m_len += n->m_len;
+		n = m_free(n);
+	}
+}
+
+m_adj(mp, req_len)
+	struct mbuf *mp;
+	int req_len;
+{
+	register int len = req_len;
+	register struct mbuf *m;
+	register count;
+
+	if ((m = mp) == NULL)
+		return;
+	if (len >= 0) {
+		/*
+		 * Trim from head.
+		 */
+		while (m != NULL && len > 0) {
+			if (m->m_len <= len) {
+				len -= m->m_len;
+				m->m_len = 0;
+				m = m->m_next;
+			} else {
+				m->m_len -= len;
+				m->m_data += len;
+				len = 0;
+			}
+		}
+		m = mp;
+		if (mp->m_flags & M_PKTHDR)
+			m->m_pkthdr.len -= (req_len - len);
+	} else {
+		/*
+		 * Trim from tail.  Scan the mbuf chain,
+		 * calculating its length and finding the last mbuf.
+		 * If the adjustment only affects this mbuf, then just
+		 * adjust and return.  Otherwise, rescan and truncate
+		 * after the remaining size.
+		 */
+		len = -len;
+		count = 0;
+		for (;;) {
+			count += m->m_len;
+			if (m->m_next == (struct mbuf *)0)
+				break;
+			m = m->m_next;
+		}
+		if (m->m_len >= len) {
+			m->m_len -= len;
+			if (mp->m_flags & M_PKTHDR)
+				mp->m_pkthdr.len -= len;
+			return;
+		}
+		count -= len;
+		if (count < 0)
+			count = 0;
+		/*
+		 * Correct length for chain is "count".
+		 * Find the mbuf with last data, adjust its length,
+		 * and toss data from remaining mbufs on chain.
+		 */
+		m = mp;
+		if (m->m_flags & M_PKTHDR)
+			m->m_pkthdr.len = count;
+		for (; m; m = m->m_next) {
+			if (m->m_len >= count) {
+				m->m_len = count;
+				break;
+			}
+			count -= m->m_len;
+		}
+		while (m = m->m_next)
+			m->m_len = 0;
+	}
+}
+
+/*
+ * Rearange an mbuf chain so that len bytes are contiguous
+ * and in the data area of an mbuf (so that mtod and dtom
+ * will work for a structure of size len).  Returns the resulting
+ * mbuf chain on success, frees it and returns null on failure.
+ * If there is room, it will add up to max_protohdr-len extra bytes to the
+ * contiguous region in an attempt to avoid being called next time.
+ */
+int MPFail;
+
+struct mbuf *
+m_pullup(n, len)
+	register struct mbuf *n;
+	int len;
+{
+	register struct mbuf *m;
+	register int count;
+	int space;
+
+	/*
+	 * If first mbuf has no cluster, and has room for len bytes
+	 * without shifting current data, pullup into it,
+	 * otherwise allocate a new mbuf to prepend to the chain.
+	 */
+	if ((n->m_flags & M_EXT) == 0 &&
+	    n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
+		if (n->m_len >= len)
+			return (n);
+		m = n;
+		n = n->m_next;
+		len -= m->m_len;
+	} else {
+		if (len > MHLEN)
+			goto bad;
+		MGET(m, M_DONTWAIT, n->m_type);
+		if (m == 0)
+			goto bad;
+		m->m_len = 0;
+		if (n->m_flags & M_PKTHDR) {
+			M_COPY_PKTHDR(m, n);
+			n->m_flags &= ~M_PKTHDR;
+		}
+	}
+	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
+	do {
+		count = min(min(max(len, max_protohdr), space), n->m_len);
+		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
+		  (unsigned)count);
+		len -= count;
+		m->m_len += count;
+		n->m_len -= count;
+		space -= count;
+		if (n->m_len)
+			n->m_data += count;
+		else
+			n = m_free(n);
+	} while (len > 0 && n);
+	if (len > 0) {
+		(void) m_free(m);
+		goto bad;
+	}
+	m->m_next = n;
+	return (m);
+bad:
+	m_freem(n);
+	MPFail++;
+	return (0);
+}
+
+/*
+ * Partition an mbuf chain in two pieces, returning the tail --
+ * all but the first len0 bytes.  In case of failure, it returns NULL and
+ * attempts to restore the chain to its original state.
+ */
+struct mbuf *
+m_split(m0, len0, wait)
+	register struct mbuf *m0;
+	int len0, wait;
+{
+	register struct mbuf *m, *n;
+	unsigned len = len0, remain;
+
+	for (m = m0; m && len > m->m_len; m = m->m_next)
+		len -= m->m_len;
+	if (m == 0)
+		return (0);
+	remain = m->m_len - len;
+	if (m0->m_flags & M_PKTHDR) {
+		MGETHDR(n, wait, m0->m_type);
+		if (n == 0)
+			return (0);
+		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
+		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
+		m0->m_pkthdr.len = len0;
+		if (m->m_flags & M_EXT)
+			goto extpacket;
+		if (remain > MHLEN) {
+			/* m can't be the lead packet */
+			MH_ALIGN(n, 0);
+			n->m_next = m_split(m, len, wait);
+			if (n->m_next == 0) {
+				(void) m_free(n);
+				return (0);
+			} else
+				return (n);
+		} else
+			MH_ALIGN(n, remain);
+	} else if (remain == 0) {
+		n = m->m_next;
+		m->m_next = 0;
+		return (n);
+	} else {
+		MGET(n, wait, m->m_type);
+		if (n == 0)
+			return (0);
+		M_ALIGN(n, remain);
+	}
+extpacket:
+	if (m->m_flags & M_EXT) {
+		n->m_flags |= M_EXT;
+		n->m_ext = m->m_ext;
+		mclrefcnt[mtocl(m->m_ext.ext_buf)]++;
+		m->m_ext.ext_size = 0; /* For Accounting XXXXXX danger */
+		n->m_data = m->m_data + len;
+	} else {
+		bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
+	}
+	n->m_len = remain;
+	m->m_len = len;
+	n->m_next = m->m_next;
+	m->m_next = 0;
+	return (n);
+}
+/*
+ * Routine to copy from device local memory into mbufs.
+ */
+struct mbuf *
+m_devget(buf, totlen, off0, ifp, copy)
+	char *buf;
+	int totlen, off0;
+	struct ifnet *ifp;
+	void (*copy)();
+{
+	register struct mbuf *m;
+	struct mbuf *top = 0, **mp = &top;
+	register int off = off0, len;
+	register char *cp;
+	char *epkt;
+
+	cp = buf;
+	epkt = cp + totlen;
+	if (off) {
+		cp += off + 2 * sizeof(u_short);
+		totlen -= 2 * sizeof(u_short);
+	}
+	MGETHDR(m, M_DONTWAIT, MT_DATA);
+	if (m == 0)
+		return (0);
+	m->m_pkthdr.rcvif = ifp;
+	m->m_pkthdr.len = totlen;
+	m->m_len = MHLEN;
+
+	while (totlen > 0) {
+		if (top) {
+			MGET(m, M_DONTWAIT, MT_DATA);
+			if (m == 0) {
+				m_freem(top);
+				return (0);
+			}
+			m->m_len = MLEN;
+		}
+		len = min(totlen, epkt - cp);
+		if (len >= MINCLSIZE) {
+			MCLGET(m, M_DONTWAIT);
+			if (m->m_flags & M_EXT)
+				m->m_len = len = min(len, MCLBYTES);
+			else
+				len = m->m_len;
+		} else {
+			/*
+			 * Place initial small packet/header at end of mbuf.
+			 */
+			if (len < m->m_len) {
+				if (top == 0 && len + max_linkhdr <= m->m_len)
+					m->m_data += max_linkhdr;
+				m->m_len = len;
+			} else
+				len = m->m_len;
+		}
+		if (copy)
+			copy(cp, mtod(m, caddr_t), (unsigned)len);
+		else
+			bcopy(cp, mtod(m, caddr_t), (unsigned)len);
+		cp += len;
+		*mp = m;
+		mp = &m->m_next;
+		totlen -= len;
+		if (cp == epkt)
+			cp = buf;
+	}
+	return (top);
+}
diff --git a/sys/kern/uipc_proto.c b/sys/kern/uipc_proto.c
new file mode 100644
index 00000000000..da9828aa267
--- /dev/null
+++ b/sys/kern/uipc_proto.c
@@ -0,0 +1,72 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)uipc_proto.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/domain.h>
+#include <sys/mbuf.h>
+
+/*
+ * Definitions of protocols supported in the UNIX domain.
+ */
+
+int	uipc_usrreq(), raw_usrreq();
+void	raw_init(),raw_input(),raw_ctlinput();
+extern	struct domain unixdomain;		/* or at least forward */
+
+struct protosw unixsw[] = {
+{ SOCK_STREAM,	&unixdomain,	0,	PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS,
+  0,		0,		0,		0,
+  uipc_usrreq,
+  0,		0,		0,		0,
+},
+{ SOCK_DGRAM,	&unixdomain,	0,		PR_ATOMIC|PR_ADDR|PR_RIGHTS,
+  0,		0,		0,		0,
+  uipc_usrreq,
+  0,		0,		0,		0,
+},
+{ 0,		0,		0,		0,
+  raw_input,	0,		raw_ctlinput,	0,
+  raw_usrreq,
+  raw_init,	0,		0,		0,
+}
+};
+
+int	unp_externalize(), unp_dispose();
+
+struct domain unixdomain =
+    { AF_UNIX, "unix", 0, unp_externalize, unp_dispose,
+      unixsw, &unixsw[sizeof(unixsw)/sizeof(unixsw[0])] };
diff --git a/sys/kern/uipc_sockbuf.c b/sys/kern/uipc_sockbuf.c
new file mode 100644
index 00000000000..d4af592d79b
--- /dev/null
+++ b/sys/kern/uipc_sockbuf.c
@@ -0,0 +1,755 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)uipc_socket2.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/buf.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+
+/*
+ * Primitive routines for operating on sockets and socket buffers
+ */
+
+/* strings for sleep message: */
+char	netio[] = "netio";
+char	netcon[] = "netcon";
+char	netcls[] = "netcls";
+
+u_long	sb_max = SB_MAX;		/* patchable */
+
+/*
+ * Procedures to manipulate state flags of socket
+ * and do appropriate wakeups.  Normal sequence from the
+ * active (originating) side is that soisconnecting() is
+ * called during processing of connect() call,
+ * resulting in an eventual call to soisconnected() if/when the
+ * connection is established.  When the connection is torn down
+ * soisdisconnecting() is called during processing of disconnect() call,
+ * and soisdisconnected() is called when the connection to the peer
+ * is totally severed.  The semantics of these routines are such that
+ * connectionless protocols can call soisconnected() and soisdisconnected()
+ * only, bypassing the in-progress calls when setting up a ``connection''
+ * takes no time.
+ *
+ * From the passive side, a socket is created with
+ * two queues of sockets: so_q0 for connections in progress
+ * and so_q for connections already made and awaiting user acceptance.
+ * As a protocol is preparing incoming connections, it creates a socket
+ * structure queued on so_q0 by calling sonewconn().  When the connection
+ * is established, soisconnected() is called, and transfers the
+ * socket structure to so_q, making it available to accept().
+ * 
+ * If a socket is closed with sockets on either
+ * so_q0 or so_q, these sockets are dropped.
+ *
+ * If higher level protocols are implemented in
+ * the kernel, the wakeups done here will sometimes
+ * cause software-interrupt process scheduling.
+ */
+
+soisconnecting(so)
+	register struct socket *so;
+{
+
+	so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
+	so->so_state |= SS_ISCONNECTING;
+}
+
+soisconnected(so)
+	register struct socket *so;
+{
+	register struct socket *head = so->so_head;
+
+	so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
+	so->so_state |= SS_ISCONNECTED;
+	if (head && soqremque(so, 0)) {
+		soqinsque(head, so, 1);
+		sorwakeup(head);
+		wakeup((caddr_t)&head->so_timeo);
+	} else {
+		wakeup((caddr_t)&so->so_timeo);
+		sorwakeup(so);
+		sowwakeup(so);
+	}
+}
+
+soisdisconnecting(so)
+	register struct socket *so;
+{
+
+	so->so_state &= ~SS_ISCONNECTING;
+	so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
+	wakeup((caddr_t)&so->so_timeo);
+	sowwakeup(so);
+	sorwakeup(so);
+}
+
+soisdisconnected(so)
+	register struct socket *so;
+{
+
+	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
+	so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE);
+	wakeup((caddr_t)&so->so_timeo);
+	sowwakeup(so);
+	sorwakeup(so);
+}
+
+/*
+ * When an attempt at a new connection is noted on a socket
+ * which accepts connections, sonewconn is called.  If the
+ * connection is possible (subject to space constraints, etc.)
+ * then we allocate a new structure, propoerly linked into the
+ * data structure of the original socket, and return this.
+ * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
+ *
+ * Currently, sonewconn() is defined as sonewconn1() in socketvar.h
+ * to catch calls that are missing the (new) second parameter.
+ */
+struct socket *
+sonewconn1(head, connstatus)
+	register struct socket *head;
+	int connstatus;
+{
+	register struct socket *so;
+	int soqueue = connstatus ? 1 : 0;
+
+	if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2)
+		return ((struct socket *)0);
+	MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_DONTWAIT);
+	if (so == NULL) 
+		return ((struct socket *)0);
+	bzero((caddr_t)so, sizeof(*so));
+	so->so_type = head->so_type;
+	so->so_options = head->so_options &~ SO_ACCEPTCONN;
+	so->so_linger = head->so_linger;
+	so->so_state = head->so_state | SS_NOFDREF;
+	so->so_proto = head->so_proto;
+	so->so_timeo = head->so_timeo;
+	so->so_pgid = head->so_pgid;
+	(void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat);
+	soqinsque(head, so, soqueue);
+	if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH,
+	    (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0)) {
+		(void) soqremque(so, soqueue);
+		(void) free((caddr_t)so, M_SOCKET);
+		return ((struct socket *)0);
+	}
+	if (connstatus) {
+		sorwakeup(head);
+		wakeup((caddr_t)&head->so_timeo);
+		so->so_state |= connstatus;
+	}
+	return (so);
+}
+
+soqinsque(head, so, q)
+	register struct socket *head, *so;
+	int q;
+{
+
+	register struct socket **prev;
+	so->so_head = head;
+	if (q == 0) {
+		head->so_q0len++;
+		so->so_q0 = 0;
+		for (prev = &(head->so_q0); *prev; )
+			prev = &((*prev)->so_q0);
+	} else {
+		head->so_qlen++;
+		so->so_q = 0;
+		for (prev = &(head->so_q); *prev; )
+			prev = &((*prev)->so_q);
+	}
+	*prev = so;
+}
+
+soqremque(so, q)
+	register struct socket *so;
+	int q;
+{
+	register struct socket *head, *prev, *next;
+
+	head = so->so_head;
+	prev = head;
+	for (;;) {
+		next = q ? prev->so_q : prev->so_q0;
+		if (next == so)
+			break;
+		if (next == 0)
+			return (0);
+		prev = next;
+	}
+	if (q == 0) {
+		prev->so_q0 = next->so_q0;
+		head->so_q0len--;
+	} else {
+		prev->so_q = next->so_q;
+		head->so_qlen--;
+	}
+	next->so_q0 = next->so_q = 0;
+	next->so_head = 0;
+	return (1);
+}
+
+/*
+ * Socantsendmore indicates that no more data will be sent on the
+ * socket; it would normally be applied to a socket when the user
+ * informs the system that no more data is to be sent, by the protocol
+ * code (in case PRU_SHUTDOWN).  Socantrcvmore indicates that no more data
+ * will be received, and will normally be applied to the socket by a
+ * protocol when it detects that the peer will send no more data.
+ * Data queued for reading in the socket may yet be read.
+ */
+
+socantsendmore(so)
+	struct socket *so;
+{
+
+	so->so_state |= SS_CANTSENDMORE;
+	sowwakeup(so);
+}
+
+socantrcvmore(so)
+	struct socket *so;
+{
+
+	so->so_state |= SS_CANTRCVMORE;
+	sorwakeup(so);
+}
+
+/*
+ * Wait for data to arrive at/drain from a socket buffer.
+ */
+sbwait(sb)
+	struct sockbuf *sb;
+{
+
+	sb->sb_flags |= SB_WAIT;
+	return (tsleep((caddr_t)&sb->sb_cc,
+	    (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, netio,
+	    sb->sb_timeo));
+}
+
+/* 
+ * Lock a sockbuf already known to be locked;
+ * return any error returned from sleep (EINTR).
+ */
+sb_lock(sb)
+	register struct sockbuf *sb;
+{
+	int error;
+
+	while (sb->sb_flags & SB_LOCK) {
+		sb->sb_flags |= SB_WANT;
+		if (error = tsleep((caddr_t)&sb->sb_flags, 
+		    (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH,
+		    netio, 0))
+			return (error);
+	}
+	sb->sb_flags |= SB_LOCK;
+	return (0);
+}
+
+/*
+ * Wakeup processes waiting on a socket buffer.
+ * Do asynchronous notification via SIGIO
+ * if the socket has the SS_ASYNC flag set.
+ */
+sowakeup(so, sb)
+	register struct socket *so;
+	register struct sockbuf *sb;
+{
+	struct proc *p;
+
+	selwakeup(&sb->sb_sel);
+	sb->sb_flags &= ~SB_SEL;
+	if (sb->sb_flags & SB_WAIT) {
+		sb->sb_flags &= ~SB_WAIT;
+		wakeup((caddr_t)&sb->sb_cc);
+	}
+	if (so->so_state & SS_ASYNC) {
+		if (so->so_pgid < 0)
+			gsignal(-so->so_pgid, SIGIO);
+		else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
+			psignal(p, SIGIO);
+	}
+}
+
+/*
+ * Socket buffer (struct sockbuf) utility routines.
+ *
+ * Each socket contains two socket buffers: one for sending data and
+ * one for receiving data.  Each buffer contains a queue of mbufs,
+ * information about the number of mbufs and amount of data in the
+ * queue, and other fields allowing select() statements and notification
+ * on data availability to be implemented.
+ *
+ * Data stored in a socket buffer is maintained as a list of records.
+ * Each record is a list of mbufs chained together with the m_next
+ * field.  Records are chained together with the m_nextpkt field. The upper
+ * level routine soreceive() expects the following conventions to be
+ * observed when placing information in the receive buffer:
+ *
+ * 1. If the protocol requires each message be preceded by the sender's
+ *    name, then a record containing that name must be present before
+ *    any associated data (mbuf's must be of type MT_SONAME).
+ * 2. If the protocol supports the exchange of ``access rights'' (really
+ *    just additional data associated with the message), and there are
+ *    ``rights'' to be received, then a record containing this data
+ *    should be present (mbuf's must be of type MT_RIGHTS).
+ * 3. If a name or rights record exists, then it must be followed by
+ *    a data record, perhaps of zero length.
+ *
+ * Before using a new socket structure it is first necessary to reserve
+ * buffer space to the socket, by calling sbreserve().  This should commit
+ * some of the available buffer space in the system buffer pool for the
+ * socket (currently, it does nothing but enforce limits).  The space
+ * should be released by calling sbrelease() when the socket is destroyed.
+ */
+
+soreserve(so, sndcc, rcvcc)
+	register struct socket *so;
+	u_long sndcc, rcvcc;
+{
+
+	if (sbreserve(&so->so_snd, sndcc) == 0)
+		goto bad;
+	if (sbreserve(&so->so_rcv, rcvcc) == 0)
+		goto bad2;
+	if (so->so_rcv.sb_lowat == 0)
+		so->so_rcv.sb_lowat = 1;
+	if (so->so_snd.sb_lowat == 0)
+		so->so_snd.sb_lowat = MCLBYTES;
+	if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
+		so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
+	return (0);
+bad2:
+	sbrelease(&so->so_snd);
+bad:
+	return (ENOBUFS);
+}
+
+/*
+ * Allot mbufs to a sockbuf.
+ * Attempt to scale mbmax so that mbcnt doesn't become limiting
+ * if buffering efficiency is near the normal case.
+ */
+sbreserve(sb, cc)
+	struct sockbuf *sb;
+	u_long cc;
+{
+
+	if (cc > sb_max * MCLBYTES / (MSIZE + MCLBYTES))
+		return (0);
+	sb->sb_hiwat = cc;
+	sb->sb_mbmax = min(cc * 2, sb_max);
+	if (sb->sb_lowat > sb->sb_hiwat)
+		sb->sb_lowat = sb->sb_hiwat;
+	return (1);
+}
+
+/*
+ * Free mbufs held by a socket, and reserved mbuf space.
+ */
+sbrelease(sb)
+	struct sockbuf *sb;
+{
+
+	sbflush(sb);
+	sb->sb_hiwat = sb->sb_mbmax = 0;
+}
+
+/*
+ * Routines to add and remove
+ * data from an mbuf queue.
+ *
+ * The routines sbappend() or sbappendrecord() are normally called to
+ * append new mbufs to a socket buffer, after checking that adequate
+ * space is available, comparing the function sbspace() with the amount
+ * of data to be added.  sbappendrecord() differs from sbappend() in
+ * that data supplied is treated as the beginning of a new record.
+ * To place a sender's address, optional access rights, and data in a
+ * socket receive buffer, sbappendaddr() should be used.  To place
+ * access rights and data in a socket receive buffer, sbappendrights()
+ * should be used.  In either case, the new data begins a new record.
+ * Note that unlike sbappend() and sbappendrecord(), these routines check
+ * for the caller that there will be enough space to store the data.
+ * Each fails if there is not enough space, or if it cannot find mbufs
+ * to store additional information in.
+ *
+ * Reliable protocols may use the socket send buffer to hold data
+ * awaiting acknowledgement.  Data is normally copied from a socket
+ * send buffer in a protocol with m_copy for output to a peer,
+ * and then removing the data from the socket buffer with sbdrop()
+ * or sbdroprecord() when the data is acknowledged by the peer.
+ */
+
+/*
+ * Append mbuf chain m to the last record in the
+ * socket buffer sb.  The additional space associated
+ * the mbuf chain is recorded in sb.  Empty mbufs are
+ * discarded and mbufs are compacted where possible.
+ */
+sbappend(sb, m)
+	struct sockbuf *sb;
+	struct mbuf *m;
+{
+	register struct mbuf *n;
+
+	if (m == 0)
+		return;
+	if (n = sb->sb_mb) {
+		while (n->m_nextpkt)
+			n = n->m_nextpkt;
+		do {
+			if (n->m_flags & M_EOR) {
+				sbappendrecord(sb, m); /* XXXXXX!!!! */
+				return;
+			}
+		} while (n->m_next && (n = n->m_next));
+	}
+	sbcompress(sb, m, n);
+}
+
+#ifdef SOCKBUF_DEBUG
+sbcheck(sb)
+	register struct sockbuf *sb;
+{
+	register struct mbuf *m;
+	register int len = 0, mbcnt = 0;
+
+	for (m = sb->sb_mb; m; m = m->m_next) {
+		len += m->m_len;
+		mbcnt += MSIZE;
+		if (m->m_flags & M_EXT)
+			mbcnt += m->m_ext.ext_size;
+		if (m->m_nextpkt)
+			panic("sbcheck nextpkt");
+	}
+	if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
+		printf("cc %d != %d || mbcnt %d != %d\n", len, sb->sb_cc,
+		    mbcnt, sb->sb_mbcnt);
+		panic("sbcheck");
+	}
+}
+#endif
+
+/*
+ * As above, except the mbuf chain
+ * begins a new record.
+ */
+sbappendrecord(sb, m0)
+	register struct sockbuf *sb;
+	register struct mbuf *m0;
+{
+	register struct mbuf *m;
+
+	if (m0 == 0)
+		return;
+	if (m = sb->sb_mb)
+		while (m->m_nextpkt)
+			m = m->m_nextpkt;
+	/*
+	 * Put the first mbuf on the queue.
+	 * Note this permits zero length records.
+	 */
+	sballoc(sb, m0);
+	if (m)
+		m->m_nextpkt = m0;
+	else
+		sb->sb_mb = m0;
+	m = m0->m_next;
+	m0->m_next = 0;
+	if (m && (m0->m_flags & M_EOR)) {
+		m0->m_flags &= ~M_EOR;
+		m->m_flags |= M_EOR;
+	}
+	sbcompress(sb, m, m0);
+}
+
+/*
+ * As above except that OOB data
+ * is inserted at the beginning of the sockbuf,
+ * but after any other OOB data.
+ */
+sbinsertoob(sb, m0)
+	register struct sockbuf *sb;
+	register struct mbuf *m0;
+{
+	register struct mbuf *m;
+	register struct mbuf **mp;
+
+	if (m0 == 0)
+		return;
+	for (mp = &sb->sb_mb; m = *mp; mp = &((*mp)->m_nextpkt)) {
+	    again:
+		switch (m->m_type) {
+
+		case MT_OOBDATA:
+			continue;		/* WANT next train */
+
+		case MT_CONTROL:
+			if (m = m->m_next)
+				goto again;	/* inspect THIS train further */
+		}
+		break;
+	}
+	/*
+	 * Put the first mbuf on the queue.
+	 * Note this permits zero length records.
+	 */
+	sballoc(sb, m0);
+	m0->m_nextpkt = *mp;
+	*mp = m0;
+	m = m0->m_next;
+	m0->m_next = 0;
+	if (m && (m0->m_flags & M_EOR)) {
+		m0->m_flags &= ~M_EOR;
+		m->m_flags |= M_EOR;
+	}
+	sbcompress(sb, m, m0);
+}
+
+/*
+ * Append address and data, and optionally, control (ancillary) data
+ * to the receive queue of a socket.  If present,
+ * m0 must include a packet header with total length.
+ * Returns 0 if no space in sockbuf or insufficient mbufs.
+ */
+sbappendaddr(sb, asa, m0, control)
+	register struct sockbuf *sb;
+	struct sockaddr *asa;
+	struct mbuf *m0, *control;
+{
+	register struct mbuf *m, *n;
+	int space = asa->sa_len;
+
+if (m0 && (m0->m_flags & M_PKTHDR) == 0)
+panic("sbappendaddr");
+	if (m0)
+		space += m0->m_pkthdr.len;
+	for (n = control; n; n = n->m_next) {
+		space += n->m_len;
+		if (n->m_next == 0)	/* keep pointer to last control buf */
+			break;
+	}
+	if (space > sbspace(sb))
+		return (0);
+	if (asa->sa_len > MLEN)
+		return (0);
+	MGET(m, M_DONTWAIT, MT_SONAME);
+	if (m == 0)
+		return (0);
+	m->m_len = asa->sa_len;
+	bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
+	if (n)
+		n->m_next = m0;		/* concatenate data to control */
+	else
+		control = m0;
+	m->m_next = control;
+	for (n = m; n; n = n->m_next)
+		sballoc(sb, n);
+	if (n = sb->sb_mb) {
+		while (n->m_nextpkt)
+			n = n->m_nextpkt;
+		n->m_nextpkt = m;
+	} else
+		sb->sb_mb = m;
+	return (1);
+}
+
+sbappendcontrol(sb, m0, control)
+	struct sockbuf *sb;
+	struct mbuf *control, *m0;
+{
+	register struct mbuf *m, *n;
+	int space = 0;
+
+	if (control == 0)
+		panic("sbappendcontrol");
+	for (m = control; ; m = m->m_next) {
+		space += m->m_len;
+		if (m->m_next == 0)
+			break;
+	}
+	n = m;			/* save pointer to last control buffer */
+	for (m = m0; m; m = m->m_next)
+		space += m->m_len;
+	if (space > sbspace(sb))
+		return (0);
+	n->m_next = m0;			/* concatenate data to control */
+	for (m = control; m; m = m->m_next)
+		sballoc(sb, m);
+	if (n = sb->sb_mb) {
+		while (n->m_nextpkt)
+			n = n->m_nextpkt;
+		n->m_nextpkt = control;
+	} else
+		sb->sb_mb = control;
+	return (1);
+}
+
+/*
+ * Compress mbuf chain m into the socket
+ * buffer sb following mbuf n.  If n
+ * is null, the buffer is presumed empty.
+ */
+sbcompress(sb, m, n)
+	register struct sockbuf *sb;
+	register struct mbuf *m, *n;
+{
+	register int eor = 0;
+	register struct mbuf *o;
+
+	while (m) {
+		eor |= m->m_flags & M_EOR;
+		if (m->m_len == 0 &&
+		    (eor == 0 ||
+		     (((o = m->m_next) || (o = n)) &&
+		      o->m_type == m->m_type))) {
+			m = m_free(m);
+			continue;
+		}
+		if (n && (n->m_flags & (M_EXT | M_EOR)) == 0 &&
+		    (n->m_data + n->m_len + m->m_len) < &n->m_dat[MLEN] &&
+		    n->m_type == m->m_type) {
+			bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
+			    (unsigned)m->m_len);
+			n->m_len += m->m_len;
+			sb->sb_cc += m->m_len;
+			m = m_free(m);
+			continue;
+		}
+		if (n)
+			n->m_next = m;
+		else
+			sb->sb_mb = m;
+		sballoc(sb, m);
+		n = m;
+		m->m_flags &= ~M_EOR;
+		m = m->m_next;
+		n->m_next = 0;
+	}
+	if (eor) {
+		if (n)
+			n->m_flags |= eor;
+		else
+			printf("semi-panic: sbcompress\n");
+	}
+}
+
+/*
+ * Free all mbufs in a sockbuf.
+ * Check that all resources are reclaimed.
+ */
+sbflush(sb)
+	register struct sockbuf *sb;
+{
+
+	if (sb->sb_flags & SB_LOCK)
+		panic("sbflush");
+	while (sb->sb_mbcnt)
+		sbdrop(sb, (int)sb->sb_cc);
+	if (sb->sb_cc || sb->sb_mb)
+		panic("sbflush 2");
+}
+
+/*
+ * Drop data from (the front of) a sockbuf.
+ */
+sbdrop(sb, len)
+	register struct sockbuf *sb;
+	register int len;
+{
+	register struct mbuf *m, *mn;
+	struct mbuf *next;
+
+	next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
+	while (len > 0) {
+		if (m == 0) {
+			if (next == 0)
+				panic("sbdrop");
+			m = next;
+			next = m->m_nextpkt;
+			continue;
+		}
+		if (m->m_len > len) {
+			m->m_len -= len;
+			m->m_data += len;
+			sb->sb_cc -= len;
+			break;
+		}
+		len -= m->m_len;
+		sbfree(sb, m);
+		MFREE(m, mn);
+		m = mn;
+	}
+	while (m && m->m_len == 0) {
+		sbfree(sb, m);
+		MFREE(m, mn);
+		m = mn;
+	}
+	if (m) {
+		sb->sb_mb = m;
+		m->m_nextpkt = next;
+	} else
+		sb->sb_mb = next;
+}
+
+/*
+ * Drop a record off the front of a sockbuf
+ * and move the next record to the front.
+ */
+sbdroprecord(sb)
+	register struct sockbuf *sb;
+{
+	register struct mbuf *m, *mn;
+
+	m = sb->sb_mb;
+	if (m) {
+		sb->sb_mb = m->m_nextpkt;
+		do {
+			sbfree(sb, m);
+			MFREE(m, mn);
+		} while (m = mn);
+	}
+}
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
new file mode 100644
index 00000000000..ed09ee63b9f
--- /dev/null
+++ b/sys/kern/uipc_socket.c
@@ -0,0 +1,1024 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)uipc_socket.c	8.3 (Berkeley) 4/15/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/kernel.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/resourcevar.h>
+
+/*
+ * Socket operation routines.
+ * These routines are called by the routines in
+ * sys_socket.c or from a system process, and
+ * implement the semantics of socket operations by
+ * switching out to the protocol specific routines.
+ */
+/*ARGSUSED*/
+socreate(dom, aso, type, proto)
+	int dom;
+	struct socket **aso;
+	register int type;
+	int proto;
+{
+	struct proc *p = curproc;		/* XXX */
+	register struct protosw *prp;
+	register struct socket *so;
+	register int error;
+
+	if (proto)
+		prp = pffindproto(dom, proto, type);
+	else
+		prp = pffindtype(dom, type);
+	if (prp == 0 || prp->pr_usrreq == 0)
+		return (EPROTONOSUPPORT);
+	if (prp->pr_type != type)
+		return (EPROTOTYPE);
+	MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT);
+	bzero((caddr_t)so, sizeof(*so));
+	so->so_type = type;
+	if (p->p_ucred->cr_uid == 0)
+		so->so_state = SS_PRIV;
+	so->so_proto = prp;
+	error =
+	    (*prp->pr_usrreq)(so, PRU_ATTACH,
+		(struct mbuf *)0, (struct mbuf *)proto, (struct mbuf *)0);
+	if (error) {
+		so->so_state |= SS_NOFDREF;
+		sofree(so);
+		return (error);
+	}
+	*aso = so;
+	return (0);
+}
+
+sobind(so, nam)
+	struct socket *so;
+	struct mbuf *nam;
+{
+	int s = splnet();
+	int error;
+
+	error =
+	    (*so->so_proto->pr_usrreq)(so, PRU_BIND,
+		(struct mbuf *)0, nam, (struct mbuf *)0);
+	splx(s);
+	return (error);
+}
+
+solisten(so, backlog)
+	register struct socket *so;
+	int backlog;
+{
+	int s = splnet(), error;
+
+	error =
+	    (*so->so_proto->pr_usrreq)(so, PRU_LISTEN,
+		(struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
+	if (error) {
+		splx(s);
+		return (error);
+	}
+	if (so->so_q == 0)
+		so->so_options |= SO_ACCEPTCONN;
+	if (backlog < 0)
+		backlog = 0;
+	so->so_qlimit = min(backlog, SOMAXCONN);
+	splx(s);
+	return (0);
+}
+
+sofree(so)
+	register struct socket *so;
+{
+
+	if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
+		return;
+	if (so->so_head) {
+		if (!soqremque(so, 0) && !soqremque(so, 1))
+			panic("sofree dq");
+		so->so_head = 0;
+	}
+	sbrelease(&so->so_snd);
+	sorflush(so);
+	FREE(so, M_SOCKET);
+}
+
+/*
+ * Close a socket on last file table reference removal.
+ * Initiate disconnect if connected.
+ * Free socket when disconnect complete.
+ */
+soclose(so)
+	register struct socket *so;
+{
+	int s = splnet();		/* conservative */
+	int error = 0;
+
+	if (so->so_options & SO_ACCEPTCONN) {
+		while (so->so_q0)
+			(void) soabort(so->so_q0);
+		while (so->so_q)
+			(void) soabort(so->so_q);
+	}
+	if (so->so_pcb == 0)
+		goto discard;
+	if (so->so_state & SS_ISCONNECTED) {
+		if ((so->so_state & SS_ISDISCONNECTING) == 0) {
+			error = sodisconnect(so);
+			if (error)
+				goto drop;
+		}
+		if (so->so_options & SO_LINGER) {
+			if ((so->so_state & SS_ISDISCONNECTING) &&
+			    (so->so_state & SS_NBIO))
+				goto drop;
+			while (so->so_state & SS_ISCONNECTED)
+				if (error = tsleep((caddr_t)&so->so_timeo,
+				    PSOCK | PCATCH, netcls, so->so_linger))
+					break;
+		}
+	}
+drop:
+	if (so->so_pcb) {
+		int error2 =
+		    (*so->so_proto->pr_usrreq)(so, PRU_DETACH,
+			(struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
+		if (error == 0)
+			error = error2;
+	}
+discard:
+	if (so->so_state & SS_NOFDREF)
+		panic("soclose: NOFDREF");
+	so->so_state |= SS_NOFDREF;
+	sofree(so);
+	splx(s);
+	return (error);
+}
+
+/*
+ * Must be called at splnet...
+ */
+soabort(so)
+	struct socket *so;
+{
+
+	return (
+	    (*so->so_proto->pr_usrreq)(so, PRU_ABORT,
+		(struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0));
+}
+
+soaccept(so, nam)
+	register struct socket *so;
+	struct mbuf *nam;
+{
+	int s = splnet();
+	int error;
+
+	if ((so->so_state & SS_NOFDREF) == 0)
+		panic("soaccept: !NOFDREF");
+	so->so_state &= ~SS_NOFDREF;
+	error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT,
+	    (struct mbuf *)0, nam, (struct mbuf *)0);
+	splx(s);
+	return (error);
+}
+
+soconnect(so, nam)
+	register struct socket *so;
+	struct mbuf *nam;
+{
+	int s;
+	int error;
+
+	if (so->so_options & SO_ACCEPTCONN)
+		return (EOPNOTSUPP);
+	s = splnet();
+	/*
+	 * If protocol is connection-based, can only connect once.
+	 * Otherwise, if connected, try to disconnect first.
+	 * This allows user to disconnect by connecting to, e.g.,
+	 * a null address.
+	 */
+	if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
+	    ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
+	    (error = sodisconnect(so))))
+		error = EISCONN;
+	else
+		error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
+		    (struct mbuf *)0, nam, (struct mbuf *)0);
+	splx(s);
+	return (error);
+}
+
+soconnect2(so1, so2)
+	register struct socket *so1;
+	struct socket *so2;
+{
+	int s = splnet();
+	int error;
+
+	error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2,
+	    (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0);
+	splx(s);
+	return (error);
+}
+
+sodisconnect(so)
+	register struct socket *so;
+{
+	int s = splnet();
+	int error;
+
+	if ((so->so_state & SS_ISCONNECTED) == 0) {
+		error = ENOTCONN;
+		goto bad;
+	}
+	if (so->so_state & SS_ISDISCONNECTING) {
+		error = EALREADY;
+		goto bad;
+	}
+	error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT,
+	    (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
+bad:
+	splx(s);
+	return (error);
+}
+
+#define	SBLOCKWAIT(f)	(((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
+/*
+ * Send on a socket.
+ * If send must go all at once and message is larger than
+ * send buffering, then hard error.
+ * Lock against other senders.
+ * If must go all at once and not enough room now, then
+ * inform user that this would block and do nothing.
+ * Otherwise, if nonblocking, send as much as possible.
+ * The data to be sent is described by "uio" if nonzero,
+ * otherwise by the mbuf chain "top" (which must be null
+ * if uio is not).  Data provided in mbuf chain must be small
+ * enough to send all at once.
+ *
+ * Returns nonzero on error, timeout or signal; callers
+ * must check for short counts if EINTR/ERESTART are returned.
+ * Data and control buffers are freed on return.
+ */
+sosend(so, addr, uio, top, control, flags)
+	register struct socket *so;
+	struct mbuf *addr;
+	struct uio *uio;
+	struct mbuf *top;
+	struct mbuf *control;
+	int flags;
+{
+	struct proc *p = curproc;		/* XXX */
+	struct mbuf **mp;
+	register struct mbuf *m;
+	register long space, len, resid;
+	int clen = 0, error, s, dontroute, mlen;
+	int atomic = sosendallatonce(so) || top;
+
+	if (uio)
+		resid = uio->uio_resid;
+	else
+		resid = top->m_pkthdr.len;
+	/*
+	 * In theory resid should be unsigned.
+	 * However, space must be signed, as it might be less than 0
+	 * if we over-committed, and we must use a signed comparison
+	 * of space and resid.  On the other hand, a negative resid
+	 * causes us to loop sending 0-length segments to the protocol.
+	 */
+	if (resid < 0)
+		return (EINVAL);
+	dontroute =
+	    (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
+	    (so->so_proto->pr_flags & PR_ATOMIC);
+	p->p_stats->p_ru.ru_msgsnd++;
+	if (control)
+		clen = control->m_len;
+#define	snderr(errno)	{ error = errno; splx(s); goto release; }
+
+restart:
+	if (error = sblock(&so->so_snd, SBLOCKWAIT(flags)))
+		goto out;
+	do {
+		s = splnet();
+		if (so->so_state & SS_CANTSENDMORE)
+			snderr(EPIPE);
+		if (so->so_error)
+			snderr(so->so_error);
+		if ((so->so_state & SS_ISCONNECTED) == 0) {
+			if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
+				if ((so->so_state & SS_ISCONFIRMING) == 0 &&
+				    !(resid == 0 && clen != 0))
+					snderr(ENOTCONN);
+			} else if (addr == 0)
+				snderr(EDESTADDRREQ);
+		}
+		space = sbspace(&so->so_snd);
+		if (flags & MSG_OOB)
+			space += 1024;
+		if (atomic && resid > so->so_snd.sb_hiwat ||
+		    clen > so->so_snd.sb_hiwat)
+			snderr(EMSGSIZE);
+		if (space < resid + clen && uio &&
+		    (atomic || space < so->so_snd.sb_lowat || space < clen)) {
+			if (so->so_state & SS_NBIO)
+				snderr(EWOULDBLOCK);
+			sbunlock(&so->so_snd);
+			error = sbwait(&so->so_snd);
+			splx(s);
+			if (error)
+				goto out;
+			goto restart;
+		}
+		splx(s);
+		mp = &top;
+		space -= clen;
+		do {
+		    if (uio == NULL) {
+			/*
+			 * Data is prepackaged in "top".
+			 */
+			resid = 0;
+			if (flags & MSG_EOR)
+				top->m_flags |= M_EOR;
+		    } else do {
+			if (top == 0) {
+				MGETHDR(m, M_WAIT, MT_DATA);
+				mlen = MHLEN;
+				m->m_pkthdr.len = 0;
+				m->m_pkthdr.rcvif = (struct ifnet *)0;
+			} else {
+				MGET(m, M_WAIT, MT_DATA);
+				mlen = MLEN;
+			}
+			if (resid >= MINCLSIZE && space >= MCLBYTES) {
+				MCLGET(m, M_WAIT);
+				if ((m->m_flags & M_EXT) == 0)
+					goto nopages;
+				mlen = MCLBYTES;
+#ifdef	MAPPED_MBUFS
+				len = min(MCLBYTES, resid);
+#else
+				if (atomic && top == 0) {
+					len = min(MCLBYTES - max_hdr, resid);
+					m->m_data += max_hdr;
+				} else
+					len = min(MCLBYTES, resid);
+#endif
+				space -= MCLBYTES;
+			} else {
+nopages:
+				len = min(min(mlen, resid), space);
+				space -= len;
+				/*
+				 * For datagram protocols, leave room
+				 * for protocol headers in first mbuf.
+				 */
+				if (atomic && top == 0 && len < mlen)
+					MH_ALIGN(m, len);
+			}
+			error = uiomove(mtod(m, caddr_t), (int)len, uio);
+			resid = uio->uio_resid;
+			m->m_len = len;
+			*mp = m;
+			top->m_pkthdr.len += len;
+			if (error)
+				goto release;
+			mp = &m->m_next;
+			if (resid <= 0) {
+				if (flags & MSG_EOR)
+					top->m_flags |= M_EOR;
+				break;
+			}
+		    } while (space > 0 && atomic);
+		    if (dontroute)
+			    so->so_options |= SO_DONTROUTE;
+		    s = splnet();				/* XXX */
+		    error = (*so->so_proto->pr_usrreq)(so,
+			(flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
+			top, addr, control);
+		    splx(s);
+		    if (dontroute)
+			    so->so_options &= ~SO_DONTROUTE;
+		    clen = 0;
+		    control = 0;
+		    top = 0;
+		    mp = &top;
+		    if (error)
+			goto release;
+		} while (resid && space > 0);
+	} while (resid);
+
+release:
+	sbunlock(&so->so_snd);
+out:
+	if (top)
+		m_freem(top);
+	if (control)
+		m_freem(control);
+	return (error);
+}
+
+/*
+ * Implement receive operations on a socket.
+ * We depend on the way that records are added to the sockbuf
+ * by sbappend*.  In particular, each record (mbufs linked through m_next)
+ * must begin with an address if the protocol so specifies,
+ * followed by an optional mbuf or mbufs containing ancillary data,
+ * and then zero or more mbufs of data.
+ * In order to avoid blocking network interrupts for the entire time here,
+ * we splx() while doing the actual copy to user space.
+ * Although the sockbuf is locked, new data may still be appended,
+ * and thus we must maintain consistency of the sockbuf during that time.
+ *
+ * The caller may receive the data as a single mbuf chain by supplying
+ * an mbuf **mp0 for use in returning the chain.  The uio is then used
+ * only for the count in uio_resid.
+ */
+soreceive(so, paddr, uio, mp0, controlp, flagsp)
+	register struct socket *so;
+	struct mbuf **paddr;
+	struct uio *uio;
+	struct mbuf **mp0;
+	struct mbuf **controlp;
+	int *flagsp;
+{
+	register struct mbuf *m, **mp;
+	register int flags, len, error, s, offset;
+	struct protosw *pr = so->so_proto;
+	struct mbuf *nextrecord;
+	int moff, type;
+	int orig_resid = uio->uio_resid;
+
+	mp = mp0;
+	if (paddr)
+		*paddr = 0;
+	if (controlp)
+		*controlp = 0;
+	if (flagsp)
+		flags = *flagsp &~ MSG_EOR;
+	else
+		flags = 0;
+	if (flags & MSG_OOB) {
+		m = m_get(M_WAIT, MT_DATA);
+		error = (*pr->pr_usrreq)(so, PRU_RCVOOB,
+		    m, (struct mbuf *)(flags & MSG_PEEK), (struct mbuf *)0);
+		if (error)
+			goto bad;
+		do {
+			error = uiomove(mtod(m, caddr_t),
+			    (int) min(uio->uio_resid, m->m_len), uio);
+			m = m_free(m);
+		} while (uio->uio_resid && error == 0 && m);
+bad:
+		if (m)
+			m_freem(m);
+		return (error);
+	}
+	if (mp)
+		*mp = (struct mbuf *)0;
+	if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
+		(*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
+		    (struct mbuf *)0, (struct mbuf *)0);
+
+restart:
+	if (error = sblock(&so->so_rcv, SBLOCKWAIT(flags)))
+		return (error);
+	s = splnet();
+
+	m = so->so_rcv.sb_mb;
+	/*
+	 * If we have less data than requested, block awaiting more
+	 * (subject to any timeout) if:
+	 *   1. the current count is less than the low water mark, or
+	 *   2. MSG_WAITALL is set, and it is possible to do the entire
+	 *	receive operation at once if we block (resid <= hiwat).
+	 *   3. MSG_DONTWAIT is not set
+	 * If MSG_WAITALL is set but resid is larger than the receive buffer,
+	 * we have to do the receive in sections, and thus risk returning
+	 * a short count if a timeout or signal occurs after we start.
+	 */
+	if (m == 0 || ((flags & MSG_DONTWAIT) == 0 &&
+	    so->so_rcv.sb_cc < uio->uio_resid) &&
+	    (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
+	    ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
+	    m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0) {
+#ifdef DIAGNOSTIC
+		if (m == 0 && so->so_rcv.sb_cc)
+			panic("receive 1");
+#endif
+		if (so->so_error) {
+			if (m)
+				goto dontblock;
+			error = so->so_error;
+			if ((flags & MSG_PEEK) == 0)
+				so->so_error = 0;
+			goto release;
+		}
+		if (so->so_state & SS_CANTRCVMORE) {
+			if (m)
+				goto dontblock;
+			else
+				goto release;
+		}
+		for (; m; m = m->m_next)
+			if (m->m_type == MT_OOBDATA  || (m->m_flags & M_EOR)) {
+				m = so->so_rcv.sb_mb;
+				goto dontblock;
+			}
+		if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
+		    (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
+			error = ENOTCONN;
+			goto release;
+		}
+		if (uio->uio_resid == 0)
+			goto release;
+		if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
+			error = EWOULDBLOCK;
+			goto release;
+		}
+		sbunlock(&so->so_rcv);
+		error = sbwait(&so->so_rcv);
+		splx(s);
+		if (error)
+			return (error);
+		goto restart;
+	}
+dontblock:
+	if (uio->uio_procp)
+		uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
+	nextrecord = m->m_nextpkt;
+	if (pr->pr_flags & PR_ADDR) {
+#ifdef DIAGNOSTIC
+		if (m->m_type != MT_SONAME)
+			panic("receive 1a");
+#endif
+		orig_resid = 0;
+		if (flags & MSG_PEEK) {
+			if (paddr)
+				*paddr = m_copy(m, 0, m->m_len);
+			m = m->m_next;
+		} else {
+			sbfree(&so->so_rcv, m);
+			if (paddr) {
+				*paddr = m;
+				so->so_rcv.sb_mb = m->m_next;
+				m->m_next = 0;
+				m = so->so_rcv.sb_mb;
+			} else {
+				MFREE(m, so->so_rcv.sb_mb);
+				m = so->so_rcv.sb_mb;
+			}
+		}
+	}
+	while (m && m->m_type == MT_CONTROL && error == 0) {
+		if (flags & MSG_PEEK) {
+			if (controlp)
+				*controlp = m_copy(m, 0, m->m_len);
+			m = m->m_next;
+		} else {
+			sbfree(&so->so_rcv, m);
+			if (controlp) {
+				if (pr->pr_domain->dom_externalize &&
+				    mtod(m, struct cmsghdr *)->cmsg_type ==
+				    SCM_RIGHTS)
+				   error = (*pr->pr_domain->dom_externalize)(m);
+				*controlp = m;
+				so->so_rcv.sb_mb = m->m_next;
+				m->m_next = 0;
+				m = so->so_rcv.sb_mb;
+			} else {
+				MFREE(m, so->so_rcv.sb_mb);
+				m = so->so_rcv.sb_mb;
+			}
+		}
+		if (controlp) {
+			orig_resid = 0;
+			controlp = &(*controlp)->m_next;
+		}
+	}
+	if (m) {
+		if ((flags & MSG_PEEK) == 0)
+			m->m_nextpkt = nextrecord;
+		type = m->m_type;
+		if (type == MT_OOBDATA)
+			flags |= MSG_OOB;
+	}
+	moff = 0;
+	offset = 0;
+	while (m && uio->uio_resid > 0 && error == 0) {
+		if (m->m_type == MT_OOBDATA) {
+			if (type != MT_OOBDATA)
+				break;
+		} else if (type == MT_OOBDATA)
+			break;
+#ifdef DIAGNOSTIC
+		else if (m->m_type != MT_DATA && m->m_type != MT_HEADER)
+			panic("receive 3");
+#endif
+		so->so_state &= ~SS_RCVATMARK;
+		len = uio->uio_resid;
+		if (so->so_oobmark && len > so->so_oobmark - offset)
+			len = so->so_oobmark - offset;
+		if (len > m->m_len - moff)
+			len = m->m_len - moff;
+		/*
+		 * If mp is set, just pass back the mbufs.
+		 * Otherwise copy them out via the uio, then free.
+		 * Sockbuf must be consistent here (points to current mbuf,
+		 * it points to next record) when we drop priority;
+		 * we must note any additions to the sockbuf when we
+		 * block interrupts again.
+		 */
+		if (mp == 0) {
+			splx(s);
+			error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
+			s = splnet();
+		} else
+			uio->uio_resid -= len;
+		if (len == m->m_len - moff) {
+			if (m->m_flags & M_EOR)
+				flags |= MSG_EOR;
+			if (flags & MSG_PEEK) {
+				m = m->m_next;
+				moff = 0;
+			} else {
+				nextrecord = m->m_nextpkt;
+				sbfree(&so->so_rcv, m);
+				if (mp) {
+					*mp = m;
+					mp = &m->m_next;
+					so->so_rcv.sb_mb = m = m->m_next;
+					*mp = (struct mbuf *)0;
+				} else {
+					MFREE(m, so->so_rcv.sb_mb);
+					m = so->so_rcv.sb_mb;
+				}
+				if (m)
+					m->m_nextpkt = nextrecord;
+			}
+		} else {
+			if (flags & MSG_PEEK)
+				moff += len;
+			else {
+				if (mp)
+					*mp = m_copym(m, 0, len, M_WAIT);
+				m->m_data += len;
+				m->m_len -= len;
+				so->so_rcv.sb_cc -= len;
+			}
+		}
+		if (so->so_oobmark) {
+			if ((flags & MSG_PEEK) == 0) {
+				so->so_oobmark -= len;
+				if (so->so_oobmark == 0) {
+					so->so_state |= SS_RCVATMARK;
+					break;
+				}
+			} else {
+				offset += len;
+				if (offset == so->so_oobmark)
+					break;
+			}
+		}
+		if (flags & MSG_EOR)
+			break;
+		/*
+		 * If the MSG_WAITALL flag is set (for non-atomic socket),
+		 * we must not quit until "uio->uio_resid == 0" or an error
+		 * termination.  If a signal/timeout occurs, return
+		 * with a short count but without error.
+		 * Keep sockbuf locked against other readers.
+		 */
+		while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
+		    !sosendallatonce(so) && !nextrecord) {
+			if (so->so_error || so->so_state & SS_CANTRCVMORE)
+				break;
+			error = sbwait(&so->so_rcv);
+			if (error) {
+				sbunlock(&so->so_rcv);
+				splx(s);
+				return (0);
+			}
+			if (m = so->so_rcv.sb_mb)
+				nextrecord = m->m_nextpkt;
+		}
+	}
+
+	if (m && pr->pr_flags & PR_ATOMIC) {
+		flags |= MSG_TRUNC;
+		if ((flags & MSG_PEEK) == 0)
+			(void) sbdroprecord(&so->so_rcv);
+	}
+	if ((flags & MSG_PEEK) == 0) {
+		if (m == 0)
+			so->so_rcv.sb_mb = nextrecord;
+		if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
+			(*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
+			    (struct mbuf *)flags, (struct mbuf *)0,
+			    (struct mbuf *)0);
+	}
+	if (orig_resid == uio->uio_resid && orig_resid &&
+	    (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
+		sbunlock(&so->so_rcv);
+		splx(s);
+		goto restart;
+	}
+		
+	if (flagsp)
+		*flagsp |= flags;
+release:
+	sbunlock(&so->so_rcv);
+	splx(s);
+	return (error);
+}
+
+soshutdown(so, how)
+	register struct socket *so;
+	register int how;
+{
+	register struct protosw *pr = so->so_proto;
+
+	how++;
+	if (how & FREAD)
+		sorflush(so);
+	if (how & FWRITE)
+		return ((*pr->pr_usrreq)(so, PRU_SHUTDOWN,
+		    (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0));
+	return (0);
+}
+
+sorflush(so)
+	register struct socket *so;
+{
+	register struct sockbuf *sb = &so->so_rcv;
+	register struct protosw *pr = so->so_proto;
+	register int s;
+	struct sockbuf asb;
+
+	sb->sb_flags |= SB_NOINTR;
+	(void) sblock(sb, M_WAITOK);
+	s = splimp();
+	socantrcvmore(so);
+	sbunlock(sb);
+	asb = *sb;
+	bzero((caddr_t)sb, sizeof (*sb));
+	splx(s);
+	if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
+		(*pr->pr_domain->dom_dispose)(asb.sb_mb);
+	sbrelease(&asb);
+}
+
+sosetopt(so, level, optname, m0)
+	register struct socket *so;
+	int level, optname;
+	struct mbuf *m0;
+{
+	int error = 0;
+	register struct mbuf *m = m0;
+
+	if (level != SOL_SOCKET) {
+		if (so->so_proto && so->so_proto->pr_ctloutput)
+			return ((*so->so_proto->pr_ctloutput)
+				  (PRCO_SETOPT, so, level, optname, &m0));
+		error = ENOPROTOOPT;
+	} else {
+		switch (optname) {
+
+		case SO_LINGER:
+			if (m == NULL || m->m_len != sizeof (struct linger)) {
+				error = EINVAL;
+				goto bad;
+			}
+			so->so_linger = mtod(m, struct linger *)->l_linger;
+			/* fall thru... */
+
+		case SO_DEBUG:
+		case SO_KEEPALIVE:
+		case SO_DONTROUTE:
+		case SO_USELOOPBACK:
+		case SO_BROADCAST:
+		case SO_REUSEADDR:
+		case SO_REUSEPORT:
+		case SO_OOBINLINE:
+			if (m == NULL || m->m_len < sizeof (int)) {
+				error = EINVAL;
+				goto bad;
+			}
+			if (*mtod(m, int *))
+				so->so_options |= optname;
+			else
+				so->so_options &= ~optname;
+			break;
+
+		case SO_SNDBUF:
+		case SO_RCVBUF:
+		case SO_SNDLOWAT:
+		case SO_RCVLOWAT:
+			if (m == NULL || m->m_len < sizeof (int)) {
+				error = EINVAL;
+				goto bad;
+			}
+			switch (optname) {
+
+			case SO_SNDBUF:
+			case SO_RCVBUF:
+				if (sbreserve(optname == SO_SNDBUF ?
+				    &so->so_snd : &so->so_rcv,
+				    (u_long) *mtod(m, int *)) == 0) {
+					error = ENOBUFS;
+					goto bad;
+				}
+				break;
+
+			case SO_SNDLOWAT:
+				so->so_snd.sb_lowat = *mtod(m, int *);
+				break;
+			case SO_RCVLOWAT:
+				so->so_rcv.sb_lowat = *mtod(m, int *);
+				break;
+			}
+			break;
+
+		case SO_SNDTIMEO:
+		case SO_RCVTIMEO:
+		    {
+			struct timeval *tv;
+			short val;
+
+			if (m == NULL || m->m_len < sizeof (*tv)) {
+				error = EINVAL;
+				goto bad;
+			}
+			tv = mtod(m, struct timeval *);
+			if (tv->tv_sec > SHRT_MAX / hz - hz) {
+				error = EDOM;
+				goto bad;
+			}
+			val = tv->tv_sec * hz + tv->tv_usec / tick;
+
+			switch (optname) {
+
+			case SO_SNDTIMEO:
+				so->so_snd.sb_timeo = val;
+				break;
+			case SO_RCVTIMEO:
+				so->so_rcv.sb_timeo = val;
+				break;
+			}
+			break;
+		    }
+
+		default:
+			error = ENOPROTOOPT;
+			break;
+		}
+		if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
+			(void) ((*so->so_proto->pr_ctloutput)
+				  (PRCO_SETOPT, so, level, optname, &m0));
+			m = NULL;	/* freed by protocol */
+		}
+	}
+bad:
+	if (m)
+		(void) m_free(m);
+	return (error);
+}
+
+sogetopt(so, level, optname, mp)
+	register struct socket *so;
+	int level, optname;
+	struct mbuf **mp;
+{
+	register struct mbuf *m;
+
+	if (level != SOL_SOCKET) {
+		if (so->so_proto && so->so_proto->pr_ctloutput) {
+			return ((*so->so_proto->pr_ctloutput)
+				  (PRCO_GETOPT, so, level, optname, mp));
+		} else
+			return (ENOPROTOOPT);
+	} else {
+		m = m_get(M_WAIT, MT_SOOPTS);
+		m->m_len = sizeof (int);
+
+		switch (optname) {
+
+		case SO_LINGER:
+			m->m_len = sizeof (struct linger);
+			mtod(m, struct linger *)->l_onoff =
+				so->so_options & SO_LINGER;
+			mtod(m, struct linger *)->l_linger = so->so_linger;
+			break;
+
+		case SO_USELOOPBACK:
+		case SO_DONTROUTE:
+		case SO_DEBUG:
+		case SO_KEEPALIVE:
+		case SO_REUSEADDR:
+		case SO_REUSEPORT:
+		case SO_BROADCAST:
+		case SO_OOBINLINE:
+			*mtod(m, int *) = so->so_options & optname;
+			break;
+
+		case SO_TYPE:
+			*mtod(m, int *) = so->so_type;
+			break;
+
+		case SO_ERROR:
+			*mtod(m, int *) = so->so_error;
+			so->so_error = 0;
+			break;
+
+		case SO_SNDBUF:
+			*mtod(m, int *) = so->so_snd.sb_hiwat;
+			break;
+
+		case SO_RCVBUF:
+			*mtod(m, int *) = so->so_rcv.sb_hiwat;
+			break;
+
+		case SO_SNDLOWAT:
+			*mtod(m, int *) = so->so_snd.sb_lowat;
+			break;
+
+		case SO_RCVLOWAT:
+			*mtod(m, int *) = so->so_rcv.sb_lowat;
+			break;
+
+		case SO_SNDTIMEO:
+		case SO_RCVTIMEO:
+		    {
+			int val = (optname == SO_SNDTIMEO ?
+			     so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
+
+			m->m_len = sizeof(struct timeval);
+			mtod(m, struct timeval *)->tv_sec = val / hz;
+			mtod(m, struct timeval *)->tv_usec =
+			    (val % hz) / tick;
+			break;
+		    }
+
+		default:
+			(void)m_free(m);
+			return (ENOPROTOOPT);
+		}
+		*mp = m;
+		return (0);
+	}
+}
+
+sohasoutofband(so)
+	register struct socket *so;
+{
+	struct proc *p;
+
+	if (so->so_pgid < 0)
+		gsignal(-so->so_pgid, SIGURG);
+	else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
+		psignal(p, SIGURG);
+	selwakeup(&so->so_rcv.sb_sel);
+}
diff --git a/sys/kern/uipc_socket2.c b/sys/kern/uipc_socket2.c
new file mode 100644
index 00000000000..d4af592d79b
--- /dev/null
+++ b/sys/kern/uipc_socket2.c
@@ -0,0 +1,755 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)uipc_socket2.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/buf.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+
+/*
+ * Primitive routines for operating on sockets and socket buffers
+ */
+
+/* strings for sleep message: */
+char	netio[] = "netio";
+char	netcon[] = "netcon";
+char	netcls[] = "netcls";
+
+u_long	sb_max = SB_MAX;		/* patchable */
+
+/*
+ * Procedures to manipulate state flags of socket
+ * and do appropriate wakeups.  Normal sequence from the
+ * active (originating) side is that soisconnecting() is
+ * called during processing of connect() call,
+ * resulting in an eventual call to soisconnected() if/when the
+ * connection is established.  When the connection is torn down
+ * soisdisconnecting() is called during processing of disconnect() call,
+ * and soisdisconnected() is called when the connection to the peer
+ * is totally severed.  The semantics of these routines are such that
+ * connectionless protocols can call soisconnected() and soisdisconnected()
+ * only, bypassing the in-progress calls when setting up a ``connection''
+ * takes no time.
+ *
+ * From the passive side, a socket is created with
+ * two queues of sockets: so_q0 for connections in progress
+ * and so_q for connections already made and awaiting user acceptance.
+ * As a protocol is preparing incoming connections, it creates a socket
+ * structure queued on so_q0 by calling sonewconn().  When the connection
+ * is established, soisconnected() is called, and transfers the
+ * socket structure to so_q, making it available to accept().
+ * 
+ * If a socket is closed with sockets on either
+ * so_q0 or so_q, these sockets are dropped.
+ *
+ * If higher level protocols are implemented in
+ * the kernel, the wakeups done here will sometimes
+ * cause software-interrupt process scheduling.
+ */
+
+soisconnecting(so)
+	register struct socket *so;
+{
+
+	so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
+	so->so_state |= SS_ISCONNECTING;
+}
+
+soisconnected(so)
+	register struct socket *so;
+{
+	register struct socket *head = so->so_head;
+
+	so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
+	so->so_state |= SS_ISCONNECTED;
+	if (head && soqremque(so, 0)) {
+		soqinsque(head, so, 1);
+		sorwakeup(head);
+		wakeup((caddr_t)&head->so_timeo);
+	} else {
+		wakeup((caddr_t)&so->so_timeo);
+		sorwakeup(so);
+		sowwakeup(so);
+	}
+}
+
+soisdisconnecting(so)
+	register struct socket *so;
+{
+
+	so->so_state &= ~SS_ISCONNECTING;
+	so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
+	wakeup((caddr_t)&so->so_timeo);
+	sowwakeup(so);
+	sorwakeup(so);
+}
+
+soisdisconnected(so)
+	register struct socket *so;
+{
+
+	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
+	so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE);
+	wakeup((caddr_t)&so->so_timeo);
+	sowwakeup(so);
+	sorwakeup(so);
+}
+
+/*
+ * When an attempt at a new connection is noted on a socket
+ * which accepts connections, sonewconn is called.  If the
+ * connection is possible (subject to space constraints, etc.)
+ * then we allocate a new structure, propoerly linked into the
+ * data structure of the original socket, and return this.
+ * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
+ *
+ * Currently, sonewconn() is defined as sonewconn1() in socketvar.h
+ * to catch calls that are missing the (new) second parameter.
+ */
+struct socket *
+sonewconn1(head, connstatus)
+	register struct socket *head;
+	int connstatus;
+{
+	register struct socket *so;
+	int soqueue = connstatus ? 1 : 0;
+
+	if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2)
+		return ((struct socket *)0);
+	MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_DONTWAIT);
+	if (so == NULL) 
+		return ((struct socket *)0);
+	bzero((caddr_t)so, sizeof(*so));
+	so->so_type = head->so_type;
+	so->so_options = head->so_options &~ SO_ACCEPTCONN;
+	so->so_linger = head->so_linger;
+	so->so_state = head->so_state | SS_NOFDREF;
+	so->so_proto = head->so_proto;
+	so->so_timeo = head->so_timeo;
+	so->so_pgid = head->so_pgid;
+	(void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat);
+	soqinsque(head, so, soqueue);
+	if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH,
+	    (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0)) {
+		(void) soqremque(so, soqueue);
+		(void) free((caddr_t)so, M_SOCKET);
+		return ((struct socket *)0);
+	}
+	if (connstatus) {
+		sorwakeup(head);
+		wakeup((caddr_t)&head->so_timeo);
+		so->so_state |= connstatus;
+	}
+	return (so);
+}
+
+soqinsque(head, so, q)
+	register struct socket *head, *so;
+	int q;
+{
+
+	register struct socket **prev;
+	so->so_head = head;
+	if (q == 0) {
+		head->so_q0len++;
+		so->so_q0 = 0;
+		for (prev = &(head->so_q0); *prev; )
+			prev = &((*prev)->so_q0);
+	} else {
+		head->so_qlen++;
+		so->so_q = 0;
+		for (prev = &(head->so_q); *prev; )
+			prev = &((*prev)->so_q);
+	}
+	*prev = so;
+}
+
+soqremque(so, q)
+	register struct socket *so;
+	int q;
+{
+	register struct socket *head, *prev, *next;
+
+	head = so->so_head;
+	prev = head;
+	for (;;) {
+		next = q ? prev->so_q : prev->so_q0;
+		if (next == so)
+			break;
+		if (next == 0)
+			return (0);
+		prev = next;
+	}
+	if (q == 0) {
+		prev->so_q0 = next->so_q0;
+		head->so_q0len--;
+	} else {
+		prev->so_q = next->so_q;
+		head->so_qlen--;
+	}
+	next->so_q0 = next->so_q = 0;
+	next->so_head = 0;
+	return (1);
+}
+
+/*
+ * Socantsendmore indicates that no more data will be sent on the
+ * socket; it would normally be applied to a socket when the user
+ * informs the system that no more data is to be sent, by the protocol
+ * code (in case PRU_SHUTDOWN).  Socantrcvmore indicates that no more data
+ * will be received, and will normally be applied to the socket by a
+ * protocol when it detects that the peer will send no more data.
+ * Data queued for reading in the socket may yet be read.
+ */
+
+socantsendmore(so)
+	struct socket *so;
+{
+
+	so->so_state |= SS_CANTSENDMORE;
+	sowwakeup(so);
+}
+
+socantrcvmore(so)
+	struct socket *so;
+{
+
+	so->so_state |= SS_CANTRCVMORE;
+	sorwakeup(so);
+}
+
+/*
+ * Wait for data to arrive at/drain from a socket buffer.
+ */
+sbwait(sb)
+	struct sockbuf *sb;
+{
+
+	sb->sb_flags |= SB_WAIT;
+	return (tsleep((caddr_t)&sb->sb_cc,
+	    (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, netio,
+	    sb->sb_timeo));
+}
+
+/* 
+ * Lock a sockbuf already known to be locked;
+ * return any error returned from sleep (EINTR).
+ */
+sb_lock(sb)
+	register struct sockbuf *sb;
+{
+	int error;
+
+	while (sb->sb_flags & SB_LOCK) {
+		sb->sb_flags |= SB_WANT;
+		if (error = tsleep((caddr_t)&sb->sb_flags, 
+		    (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH,
+		    netio, 0))
+			return (error);
+	}
+	sb->sb_flags |= SB_LOCK;
+	return (0);
+}
+
+/*
+ * Wakeup processes waiting on a socket buffer.
+ * Do asynchronous notification via SIGIO
+ * if the socket has the SS_ASYNC flag set.
+ */
+sowakeup(so, sb)
+	register struct socket *so;
+	register struct sockbuf *sb;
+{
+	struct proc *p;
+
+	selwakeup(&sb->sb_sel);
+	sb->sb_flags &= ~SB_SEL;
+	if (sb->sb_flags & SB_WAIT) {
+		sb->sb_flags &= ~SB_WAIT;
+		wakeup((caddr_t)&sb->sb_cc);
+	}
+	if (so->so_state & SS_ASYNC) {
+		if (so->so_pgid < 0)
+			gsignal(-so->so_pgid, SIGIO);
+		else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
+			psignal(p, SIGIO);
+	}
+}
+
+/*
+ * Socket buffer (struct sockbuf) utility routines.
+ *
+ * Each socket contains two socket buffers: one for sending data and
+ * one for receiving data.  Each buffer contains a queue of mbufs,
+ * information about the number of mbufs and amount of data in the
+ * queue, and other fields allowing select() statements and notification
+ * on data availability to be implemented.
+ *
+ * Data stored in a socket buffer is maintained as a list of records.
+ * Each record is a list of mbufs chained together with the m_next
+ * field.  Records are chained together with the m_nextpkt field. The upper
+ * level routine soreceive() expects the following conventions to be
+ * observed when placing information in the receive buffer:
+ *
+ * 1. If the protocol requires each message be preceded by the sender's
+ *    name, then a record containing that name must be present before
+ *    any associated data (mbuf's must be of type MT_SONAME).
+ * 2. If the protocol supports the exchange of ``access rights'' (really
+ *    just additional data associated with the message), and there are
+ *    ``rights'' to be received, then a record containing this data
+ *    should be present (mbuf's must be of type MT_RIGHTS).
+ * 3. If a name or rights record exists, then it must be followed by
+ *    a data record, perhaps of zero length.
+ *
+ * Before using a new socket structure it is first necessary to reserve
+ * buffer space to the socket, by calling sbreserve().  This should commit
+ * some of the available buffer space in the system buffer pool for the
+ * socket (currently, it does nothing but enforce limits).  The space
+ * should be released by calling sbrelease() when the socket is destroyed.
+ */
+
+soreserve(so, sndcc, rcvcc)
+	register struct socket *so;
+	u_long sndcc, rcvcc;
+{
+
+	if (sbreserve(&so->so_snd, sndcc) == 0)
+		goto bad;
+	if (sbreserve(&so->so_rcv, rcvcc) == 0)
+		goto bad2;
+	if (so->so_rcv.sb_lowat == 0)
+		so->so_rcv.sb_lowat = 1;
+	if (so->so_snd.sb_lowat == 0)
+		so->so_snd.sb_lowat = MCLBYTES;
+	if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
+		so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
+	return (0);
+bad2:
+	sbrelease(&so->so_snd);
+bad:
+	return (ENOBUFS);
+}
+
+/*
+ * Allot mbufs to a sockbuf.
+ * Attempt to scale mbmax so that mbcnt doesn't become limiting
+ * if buffering efficiency is near the normal case.
+ */
+sbreserve(sb, cc)
+	struct sockbuf *sb;
+	u_long cc;
+{
+
+	if (cc > sb_max * MCLBYTES / (MSIZE + MCLBYTES))
+		return (0);
+	sb->sb_hiwat = cc;
+	sb->sb_mbmax = min(cc * 2, sb_max);
+	if (sb->sb_lowat > sb->sb_hiwat)
+		sb->sb_lowat = sb->sb_hiwat;
+	return (1);
+}
+
+/*
+ * Free mbufs held by a socket, and reserved mbuf space.
+ */
+sbrelease(sb)
+	struct sockbuf *sb;
+{
+
+	sbflush(sb);
+	sb->sb_hiwat = sb->sb_mbmax = 0;
+}
+
+/*
+ * Routines to add and remove
+ * data from an mbuf queue.
+ *
+ * The routines sbappend() or sbappendrecord() are normally called to
+ * append new mbufs to a socket buffer, after checking that adequate
+ * space is available, comparing the function sbspace() with the amount
+ * of data to be added.  sbappendrecord() differs from sbappend() in
+ * that data supplied is treated as the beginning of a new record.
+ * To place a sender's address, optional access rights, and data in a
+ * socket receive buffer, sbappendaddr() should be used.  To place
+ * access rights and data in a socket receive buffer, sbappendrights()
+ * should be used.  In either case, the new data begins a new record.
+ * Note that unlike sbappend() and sbappendrecord(), these routines check
+ * for the caller that there will be enough space to store the data.
+ * Each fails if there is not enough space, or if it cannot find mbufs
+ * to store additional information in.
+ *
+ * Reliable protocols may use the socket send buffer to hold data
+ * awaiting acknowledgement.  Data is normally copied from a socket
+ * send buffer in a protocol with m_copy for output to a peer,
+ * and then removing the data from the socket buffer with sbdrop()
+ * or sbdroprecord() when the data is acknowledged by the peer.
+ */
+
+/*
+ * Append mbuf chain m to the last record in the
+ * socket buffer sb.  The additional space associated
+ * the mbuf chain is recorded in sb.  Empty mbufs are
+ * discarded and mbufs are compacted where possible.
+ */
+sbappend(sb, m)
+	struct sockbuf *sb;
+	struct mbuf *m;
+{
+	register struct mbuf *n;
+
+	if (m == 0)
+		return;
+	if (n = sb->sb_mb) {
+		while (n->m_nextpkt)
+			n = n->m_nextpkt;
+		do {
+			if (n->m_flags & M_EOR) {
+				sbappendrecord(sb, m); /* XXXXXX!!!! */
+				return;
+			}
+		} while (n->m_next && (n = n->m_next));
+	}
+	sbcompress(sb, m, n);
+}
+
+#ifdef SOCKBUF_DEBUG
+sbcheck(sb)
+	register struct sockbuf *sb;
+{
+	register struct mbuf *m;
+	register int len = 0, mbcnt = 0;
+
+	for (m = sb->sb_mb; m; m = m->m_next) {
+		len += m->m_len;
+		mbcnt += MSIZE;
+		if (m->m_flags & M_EXT)
+			mbcnt += m->m_ext.ext_size;
+		if (m->m_nextpkt)
+			panic("sbcheck nextpkt");
+	}
+	if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
+		printf("cc %d != %d || mbcnt %d != %d\n", len, sb->sb_cc,
+		    mbcnt, sb->sb_mbcnt);
+		panic("sbcheck");
+	}
+}
+#endif
+
+/*
+ * As above, except the mbuf chain
+ * begins a new record.
+ */
+sbappendrecord(sb, m0)
+	register struct sockbuf *sb;
+	register struct mbuf *m0;
+{
+	register struct mbuf *m;
+
+	if (m0 == 0)
+		return;
+	if (m = sb->sb_mb)
+		while (m->m_nextpkt)
+			m = m->m_nextpkt;
+	/*
+	 * Put the first mbuf on the queue.
+	 * Note this permits zero length records.
+	 */
+	sballoc(sb, m0);
+	if (m)
+		m->m_nextpkt = m0;
+	else
+		sb->sb_mb = m0;
+	m = m0->m_next;
+	m0->m_next = 0;
+	if (m && (m0->m_flags & M_EOR)) {
+		m0->m_flags &= ~M_EOR;
+		m->m_flags |= M_EOR;
+	}
+	sbcompress(sb, m, m0);
+}
+
+/*
+ * As above except that OOB data
+ * is inserted at the beginning of the sockbuf,
+ * but after any other OOB data.
+ */
+sbinsertoob(sb, m0)
+	register struct sockbuf *sb;
+	register struct mbuf *m0;
+{
+	register struct mbuf *m;
+	register struct mbuf **mp;
+
+	if (m0 == 0)
+		return;
+	for (mp = &sb->sb_mb; m = *mp; mp = &((*mp)->m_nextpkt)) {
+	    again:
+		switch (m->m_type) {
+
+		case MT_OOBDATA:
+			continue;		/* WANT next train */
+
+		case MT_CONTROL:
+			if (m = m->m_next)
+				goto again;	/* inspect THIS train further */
+		}
+		break;
+	}
+	/*
+	 * Put the first mbuf on the queue.
+	 * Note this permits zero length records.
+	 */
+	sballoc(sb, m0);
+	m0->m_nextpkt = *mp;
+	*mp = m0;
+	m = m0->m_next;
+	m0->m_next = 0;
+	if (m && (m0->m_flags & M_EOR)) {
+		m0->m_flags &= ~M_EOR;
+		m->m_flags |= M_EOR;
+	}
+	sbcompress(sb, m, m0);
+}
+
+/*
+ * Append address and data, and optionally, control (ancillary) data
+ * to the receive queue of a socket.  If present,
+ * m0 must include a packet header with total length.
+ * Returns 0 if no space in sockbuf or insufficient mbufs.
+ */
+sbappendaddr(sb, asa, m0, control)
+	register struct sockbuf *sb;
+	struct sockaddr *asa;
+	struct mbuf *m0, *control;
+{
+	register struct mbuf *m, *n;
+	int space = asa->sa_len;
+
+if (m0 && (m0->m_flags & M_PKTHDR) == 0)
+panic("sbappendaddr");
+	if (m0)
+		space += m0->m_pkthdr.len;
+	for (n = control; n; n = n->m_next) {
+		space += n->m_len;
+		if (n->m_next == 0)	/* keep pointer to last control buf */
+			break;
+	}
+	if (space > sbspace(sb))
+		return (0);
+	if (asa->sa_len > MLEN)
+		return (0);
+	MGET(m, M_DONTWAIT, MT_SONAME);
+	if (m == 0)
+		return (0);
+	m->m_len = asa->sa_len;
+	bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
+	if (n)
+		n->m_next = m0;		/* concatenate data to control */
+	else
+		control = m0;
+	m->m_next = control;
+	for (n = m; n; n = n->m_next)
+		sballoc(sb, n);
+	if (n = sb->sb_mb) {
+		while (n->m_nextpkt)
+			n = n->m_nextpkt;
+		n->m_nextpkt = m;
+	} else
+		sb->sb_mb = m;
+	return (1);
+}
+
+sbappendcontrol(sb, m0, control)
+	struct sockbuf *sb;
+	struct mbuf *control, *m0;
+{
+	register struct mbuf *m, *n;
+	int space = 0;
+
+	if (control == 0)
+		panic("sbappendcontrol");
+	for (m = control; ; m = m->m_next) {
+		space += m->m_len;
+		if (m->m_next == 0)
+			break;
+	}
+	n = m;			/* save pointer to last control buffer */
+	for (m = m0; m; m = m->m_next)
+		space += m->m_len;
+	if (space > sbspace(sb))
+		return (0);
+	n->m_next = m0;			/* concatenate data to control */
+	for (m = control; m; m = m->m_next)
+		sballoc(sb, m);
+	if (n = sb->sb_mb) {
+		while (n->m_nextpkt)
+			n = n->m_nextpkt;
+		n->m_nextpkt = control;
+	} else
+		sb->sb_mb = control;
+	return (1);
+}
+
+/*
+ * Compress mbuf chain m into the socket
+ * buffer sb following mbuf n.  If n
+ * is null, the buffer is presumed empty.
+ */
+sbcompress(sb, m, n)
+	register struct sockbuf *sb;
+	register struct mbuf *m, *n;
+{
+	register int eor = 0;
+	register struct mbuf *o;
+
+	while (m) {
+		eor |= m->m_flags & M_EOR;
+		if (m->m_len == 0 &&
+		    (eor == 0 ||
+		     (((o = m->m_next) || (o = n)) &&
+		      o->m_type == m->m_type))) {
+			m = m_free(m);
+			continue;
+		}
+		if (n && (n->m_flags & (M_EXT | M_EOR)) == 0 &&
+		    (n->m_data + n->m_len + m->m_len) < &n->m_dat[MLEN] &&
+		    n->m_type == m->m_type) {
+			bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
+			    (unsigned)m->m_len);
+			n->m_len += m->m_len;
+			sb->sb_cc += m->m_len;
+			m = m_free(m);
+			continue;
+		}
+		if (n)
+			n->m_next = m;
+		else
+			sb->sb_mb = m;
+		sballoc(sb, m);
+		n = m;
+		m->m_flags &= ~M_EOR;
+		m = m->m_next;
+		n->m_next = 0;
+	}
+	if (eor) {
+		if (n)
+			n->m_flags |= eor;
+		else
+			printf("semi-panic: sbcompress\n");
+	}
+}
+
+/*
+ * Free all mbufs in a sockbuf.
+ * Check that all resources are reclaimed.
+ */
+sbflush(sb)
+	register struct sockbuf *sb;
+{
+
+	if (sb->sb_flags & SB_LOCK)
+		panic("sbflush");
+	while (sb->sb_mbcnt)
+		sbdrop(sb, (int)sb->sb_cc);
+	if (sb->sb_cc || sb->sb_mb)
+		panic("sbflush 2");
+}
+
+/*
+ * Drop data from (the front of) a sockbuf.
+ */
+sbdrop(sb, len)
+	register struct sockbuf *sb;
+	register int len;
+{
+	register struct mbuf *m, *mn;
+	struct mbuf *next;
+
+	next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
+	while (len > 0) {
+		if (m == 0) {
+			if (next == 0)
+				panic("sbdrop");
+			m = next;
+			next = m->m_nextpkt;
+			continue;
+		}
+		if (m->m_len > len) {
+			m->m_len -= len;
+			m->m_data += len;
+			sb->sb_cc -= len;
+			break;
+		}
+		len -= m->m_len;
+		sbfree(sb, m);
+		MFREE(m, mn);
+		m = mn;
+	}
+	while (m && m->m_len == 0) {
+		sbfree(sb, m);
+		MFREE(m, mn);
+		m = mn;
+	}
+	if (m) {
+		sb->sb_mb = m;
+		m->m_nextpkt = next;
+	} else
+		sb->sb_mb = next;
+}
+
+/*
+ * Drop a record off the front of a sockbuf
+ * and move the next record to the front.
+ */
+sbdroprecord(sb)
+	register struct sockbuf *sb;
+{
+	register struct mbuf *m, *mn;
+
+	m = sb->sb_mb;
+	if (m) {
+		sb->sb_mb = m->m_nextpkt;
+		do {
+			sbfree(sb, m);
+			MFREE(m, mn);
+		} while (m = mn);
+	}
+}
diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c
new file mode 100644
index 00000000000..89b7ffdf196
--- /dev/null
+++ b/sys/kern/uipc_syscalls.c
@@ -0,0 +1,1217 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/filedesc.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/buf.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#ifdef KTRACE
+#include <sys/ktrace.h>
+#endif
+
+/*
+ * System call interface to the socket abstraction.
+ */
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+#define COMPAT_OLDSOCK
+#endif
+
+extern	struct fileops socketops;
+
+struct socket_args {
+	int	domain;
+	int	type;
+	int	protocol;
+};
+socket(p, uap, retval)
+	struct proc *p;
+	register struct socket_args *uap;
+	int *retval;
+{
+	struct filedesc *fdp = p->p_fd;
+	struct socket *so;
+	struct file *fp;
+	int fd, error;
+
+	if (error = falloc(p, &fp, &fd))
+		return (error);
+	fp->f_flag = FREAD|FWRITE;
+	fp->f_type = DTYPE_SOCKET;
+	fp->f_ops = &socketops;
+	if (error = socreate(uap->domain, &so, uap->type, uap->protocol)) {
+		fdp->fd_ofiles[fd] = 0;
+		ffree(fp);
+	} else {
+		fp->f_data = (caddr_t)so;
+		*retval = fd;
+	}
+	return (error);
+}
+
+struct bind_args {
+	int	s;
+	caddr_t	name;
+	int	namelen;
+};
+/* ARGSUSED */
+bind(p, uap, retval)
+	struct proc *p;
+	register struct bind_args *uap;
+	int *retval;
+{
+	struct file *fp;
+	struct mbuf *nam;
+	int error;
+
+	if (error = getsock(p->p_fd, uap->s, &fp))
+		return (error);
+	if (error = sockargs(&nam, uap->name, uap->namelen, MT_SONAME))
+		return (error);
+	error = sobind((struct socket *)fp->f_data, nam);
+	m_freem(nam);
+	return (error);
+}
+
+struct listen_args {
+	int	s;
+	int	backlog;
+};
+/* ARGSUSED */
+listen(p, uap, retval)
+	struct proc *p;
+	register struct listen_args *uap;
+	int *retval;
+{
+	struct file *fp;
+	int error;
+
+	if (error = getsock(p->p_fd, uap->s, &fp))
+		return (error);
+	return (solisten((struct socket *)fp->f_data, uap->backlog));
+}
+
+struct accept_args {
+	int	s;
+	caddr_t	name;
+	int	*anamelen;
+#ifdef COMPAT_OLDSOCK
+	int	compat_43;	/* pseudo */
+#endif
+};
+
+#ifdef COMPAT_OLDSOCK
+accept(p, uap, retval)
+	struct proc *p;
+	struct accept_args *uap;
+	int *retval;
+{
+
+	uap->compat_43 = 0;
+	return (accept1(p, uap, retval));
+}
+
+oaccept(p, uap, retval)
+	struct proc *p;
+	struct accept_args *uap;
+	int *retval;
+{
+
+	uap->compat_43 = 1;
+	return (accept1(p, uap, retval));
+}
+#else /* COMPAT_OLDSOCK */
+
+#define	accept1	accept
+#endif
+
+accept1(p, uap, retval)
+	struct proc *p;
+	register struct accept_args *uap;
+	int *retval;
+{
+	struct file *fp;
+	struct mbuf *nam;
+	int namelen, error, s;
+	register struct socket *so;
+
+	if (uap->name && (error = copyin((caddr_t)uap->anamelen,
+	    (caddr_t)&namelen, sizeof (namelen))))
+		return (error);
+	if (error = getsock(p->p_fd, uap->s, &fp))
+		return (error);
+	s = splnet();
+	so = (struct socket *)fp->f_data;
+	if ((so->so_options & SO_ACCEPTCONN) == 0) {
+		splx(s);
+		return (EINVAL);
+	}
+	if ((so->so_state & SS_NBIO) && so->so_qlen == 0) {
+		splx(s);
+		return (EWOULDBLOCK);
+	}
+	while (so->so_qlen == 0 && so->so_error == 0) {
+		if (so->so_state & SS_CANTRCVMORE) {
+			so->so_error = ECONNABORTED;
+			break;
+		}
+		if (error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH,
+		    netcon, 0)) {
+			splx(s);
+			return (error);
+		}
+	}
+	if (so->so_error) {
+		error = so->so_error;
+		so->so_error = 0;
+		splx(s);
+		return (error);
+	}
+	if (error = falloc(p, &fp, retval)) {
+		splx(s);
+		return (error);
+	}
+	{ struct socket *aso = so->so_q;
+	  if (soqremque(aso, 1) == 0)
+		panic("accept");
+	  so = aso;
+	}
+	fp->f_type = DTYPE_SOCKET;
+	fp->f_flag = FREAD|FWRITE;
+	fp->f_ops = &socketops;
+	fp->f_data = (caddr_t)so;
+	nam = m_get(M_WAIT, MT_SONAME);
+	(void) soaccept(so, nam);
+	if (uap->name) {
+#ifdef COMPAT_OLDSOCK
+		if (uap->compat_43)
+			mtod(nam, struct osockaddr *)->sa_family =
+			    mtod(nam, struct sockaddr *)->sa_family;
+#endif
+		if (namelen > nam->m_len)
+			namelen = nam->m_len;
+		/* SHOULD COPY OUT A CHAIN HERE */
+		if ((error = copyout(mtod(nam, caddr_t), (caddr_t)uap->name,
+		    (u_int)namelen)) == 0)
+			error = copyout((caddr_t)&namelen,
+			    (caddr_t)uap->anamelen, sizeof (*uap->anamelen));
+	}
+	m_freem(nam);
+	splx(s);
+	return (error);
+}
+
+struct connect_args {
+	int	s;
+	caddr_t	name;
+	int	namelen;
+};
+/* ARGSUSED */
+connect(p, uap, retval)
+	struct proc *p;
+	register struct connect_args *uap;
+	int *retval;
+{
+	struct file *fp;
+	register struct socket *so;
+	struct mbuf *nam;
+	int error, s;
+
+	if (error = getsock(p->p_fd, uap->s, &fp))
+		return (error);
+	so = (struct socket *)fp->f_data;
+	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING))
+		return (EALREADY);
+	if (error = sockargs(&nam, uap->name, uap->namelen, MT_SONAME))
+		return (error);
+	error = soconnect(so, nam);
+	if (error)
+		goto bad;
+	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
+		m_freem(nam);
+		return (EINPROGRESS);
+	}
+	s = splnet();
+	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0)
+		if (error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH,
+		    netcon, 0))
+			break;
+	if (error == 0) {
+		error = so->so_error;
+		so->so_error = 0;
+	}
+	splx(s);
+bad:
+	so->so_state &= ~SS_ISCONNECTING;
+	m_freem(nam);
+	if (error == ERESTART)
+		error = EINTR;
+	return (error);
+}
+
+struct socketpair_args {
+	int	domain;
+	int	type;
+	int	protocol;
+	int	*rsv;
+};
+socketpair(p, uap, retval)
+	struct proc *p;
+	register struct socketpair_args *uap;
+	int retval[];
+{
+	register struct filedesc *fdp = p->p_fd;
+	struct file *fp1, *fp2;
+	struct socket *so1, *so2;
+	int fd, error, sv[2];
+
+	if (error = socreate(uap->domain, &so1, uap->type, uap->protocol))
+		return (error);
+	if (error = socreate(uap->domain, &so2, uap->type, uap->protocol))
+		goto free1;
+	if (error = falloc(p, &fp1, &fd))
+		goto free2;
+	sv[0] = fd;
+	fp1->f_flag = FREAD|FWRITE;
+	fp1->f_type = DTYPE_SOCKET;
+	fp1->f_ops = &socketops;
+	fp1->f_data = (caddr_t)so1;
+	if (error = falloc(p, &fp2, &fd))
+		goto free3;
+	fp2->f_flag = FREAD|FWRITE;
+	fp2->f_type = DTYPE_SOCKET;
+	fp2->f_ops = &socketops;
+	fp2->f_data = (caddr_t)so2;
+	sv[1] = fd;
+	if (error = soconnect2(so1, so2))
+		goto free4;
+	if (uap->type == SOCK_DGRAM) {
+		/*
+		 * Datagram socket connection is asymmetric.
+		 */
+		 if (error = soconnect2(so2, so1))
+			goto free4;
+	}
+	error = copyout((caddr_t)sv, (caddr_t)uap->rsv, 2 * sizeof (int));
+	retval[0] = sv[0];		/* XXX ??? */
+	retval[1] = sv[1];		/* XXX ??? */
+	return (error);
+free4:
+	ffree(fp2);
+	fdp->fd_ofiles[sv[1]] = 0;
+free3:
+	ffree(fp1);
+	fdp->fd_ofiles[sv[0]] = 0;
+free2:
+	(void)soclose(so2);
+free1:
+	(void)soclose(so1);
+	return (error);
+}
+
+struct sendto_args {
+	int	s;
+	caddr_t	buf;
+	size_t	len;
+	int	flags;
+	caddr_t	to;
+	int	tolen;
+};
+sendto(p, uap, retval)
+	struct proc *p;
+	register struct sendto_args *uap;
+	int *retval;
+{
+	struct msghdr msg;
+	struct iovec aiov;
+
+	msg.msg_name = uap->to;
+	msg.msg_namelen = uap->tolen;
+	msg.msg_iov = &aiov;
+	msg.msg_iovlen = 1;
+	msg.msg_control = 0;
+#ifdef COMPAT_OLDSOCK
+	msg.msg_flags = 0;
+#endif
+	aiov.iov_base = uap->buf;
+	aiov.iov_len = uap->len;
+	return (sendit(p, uap->s, &msg, uap->flags, retval));
+}
+
+#ifdef COMPAT_OLDSOCK
+struct osend_args {
+	int	s;
+	caddr_t	buf;
+	int	len;
+	int	flags;
+};
+osend(p, uap, retval)
+	struct proc *p;
+	register struct osend_args *uap;
+	int *retval;
+{
+	struct msghdr msg;
+	struct iovec aiov;
+
+	msg.msg_name = 0;
+	msg.msg_namelen = 0;
+	msg.msg_iov = &aiov;
+	msg.msg_iovlen = 1;
+	aiov.iov_base = uap->buf;
+	aiov.iov_len = uap->len;
+	msg.msg_control = 0;
+	msg.msg_flags = 0;
+	return (sendit(p, uap->s, &msg, uap->flags, retval));
+}
+
+#define MSG_COMPAT	0x8000
+struct osendmsg_args {
+	int	s;
+	caddr_t	msg;
+	int	flags;
+};
+osendmsg(p, uap, retval)
+	struct proc *p;
+	register struct osendmsg_args *uap;
+	int *retval;
+{
+	struct msghdr msg;
+	struct iovec aiov[UIO_SMALLIOV], *iov;
+	int error;
+
+	if (error = copyin(uap->msg, (caddr_t)&msg, sizeof (struct omsghdr)))
+		return (error);
+	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
+		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
+			return (EMSGSIZE);
+		MALLOC(iov, struct iovec *,
+		      sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 
+		      M_WAITOK);
+	} else
+		iov = aiov;
+	if (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
+	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))
+		goto done;
+	msg.msg_flags = MSG_COMPAT;
+	msg.msg_iov = iov;
+	error = sendit(p, uap->s, &msg, uap->flags, retval);
+done:
+	if (iov != aiov)
+		FREE(iov, M_IOV);
+	return (error);
+}
+#endif
+
+struct sendmsg_args {
+	int	s;
+	caddr_t	msg;
+	int	flags;
+};
+sendmsg(p, uap, retval)
+	struct proc *p;
+	register struct sendmsg_args *uap;
+	int *retval;
+{
+	struct msghdr msg;
+	struct iovec aiov[UIO_SMALLIOV], *iov;
+	int error;
+
+	if (error = copyin(uap->msg, (caddr_t)&msg, sizeof (msg)))
+		return (error);
+	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
+		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
+			return (EMSGSIZE);
+		MALLOC(iov, struct iovec *,
+		       sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
+		       M_WAITOK);
+	} else
+		iov = aiov;
+	if (msg.msg_iovlen &&
+	    (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
+	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))))
+		goto done;
+	msg.msg_iov = iov;
+#ifdef COMPAT_OLDSOCK
+	msg.msg_flags = 0;
+#endif
+	error = sendit(p, uap->s, &msg, uap->flags, retval);
+done:
+	if (iov != aiov)
+		FREE(iov, M_IOV);
+	return (error);
+}
+
+sendit(p, s, mp, flags, retsize)
+	register struct proc *p;
+	int s;
+	register struct msghdr *mp;
+	int flags, *retsize;
+{
+	struct file *fp;
+	struct uio auio;
+	register struct iovec *iov;
+	register int i;
+	struct mbuf *to, *control;
+	int len, error;
+#ifdef KTRACE
+	struct iovec *ktriov = NULL;
+#endif
+	
+	if (error = getsock(p->p_fd, s, &fp))
+		return (error);
+	auio.uio_iov = mp->msg_iov;
+	auio.uio_iovcnt = mp->msg_iovlen;
+	auio.uio_segflg = UIO_USERSPACE;
+	auio.uio_rw = UIO_WRITE;
+	auio.uio_procp = p;
+	auio.uio_offset = 0;			/* XXX */
+	auio.uio_resid = 0;
+	iov = mp->msg_iov;
+	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
+		if (iov->iov_len < 0)
+			return (EINVAL);
+		if ((auio.uio_resid += iov->iov_len) < 0)
+			return (EINVAL);
+	}
+	if (mp->msg_name) {
+		if (error = sockargs(&to, mp->msg_name, mp->msg_namelen,
+		    MT_SONAME))
+			return (error);
+	} else
+		to = 0;
+	if (mp->msg_control) {
+		if (mp->msg_controllen < sizeof(struct cmsghdr)
+#ifdef COMPAT_OLDSOCK
+		    && mp->msg_flags != MSG_COMPAT
+#endif
+		) {
+			error = EINVAL;
+			goto bad;
+		}
+		if (error = sockargs(&control, mp->msg_control,
+		    mp->msg_controllen, MT_CONTROL))
+			goto bad;
+#ifdef COMPAT_OLDSOCK
+		if (mp->msg_flags == MSG_COMPAT) {
+			register struct cmsghdr *cm;
+
+			M_PREPEND(control, sizeof(*cm), M_WAIT);
+			if (control == 0) {
+				error = ENOBUFS;
+				goto bad;
+			} else {
+				cm = mtod(control, struct cmsghdr *);
+				cm->cmsg_len = control->m_len;
+				cm->cmsg_level = SOL_SOCKET;
+				cm->cmsg_type = SCM_RIGHTS;
+			}
+		}
+#endif
+	} else
+		control = 0;
+#ifdef KTRACE
+	if (KTRPOINT(p, KTR_GENIO)) {
+		int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
+
+		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
+		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
+	}
+#endif
+	len = auio.uio_resid;
+	if (error = sosend((struct socket *)fp->f_data, to, &auio,
+	    (struct mbuf *)0, control, flags)) {
+		if (auio.uio_resid != len && (error == ERESTART ||
+		    error == EINTR || error == EWOULDBLOCK))
+			error = 0;
+		if (error == EPIPE)
+			psignal(p, SIGPIPE);
+	}
+	if (error == 0)
+		*retsize = len - auio.uio_resid;
+#ifdef KTRACE
+	if (ktriov != NULL) {
+		if (error == 0)
+			ktrgenio(p->p_tracep, s, UIO_WRITE,
+				ktriov, *retsize, error);
+		FREE(ktriov, M_TEMP);
+	}
+#endif
+bad:
+	if (to)
+		m_freem(to);
+	return (error);
+}
+
+struct recvfrom_args {
+	int	s;
+	caddr_t	buf;
+	size_t	len;
+	int	flags;
+	caddr_t	from;
+	int	*fromlenaddr;
+};
+
+#ifdef COMPAT_OLDSOCK
+orecvfrom(p, uap, retval)
+	struct proc *p;
+	struct recvfrom_args *uap;
+	int *retval;
+{
+
+	uap->flags |= MSG_COMPAT;
+	return (recvfrom(p, uap, retval));
+}
+#endif
+
+recvfrom(p, uap, retval)
+	struct proc *p;
+	register struct recvfrom_args *uap;
+	int *retval;
+{
+	struct msghdr msg;
+	struct iovec aiov;
+	int error;
+
+	if (uap->fromlenaddr) {
+		if (error = copyin((caddr_t)uap->fromlenaddr,
+		    (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen)))
+			return (error);
+	} else
+		msg.msg_namelen = 0;
+	msg.msg_name = uap->from;
+	msg.msg_iov = &aiov;
+	msg.msg_iovlen = 1;
+	aiov.iov_base = uap->buf;
+	aiov.iov_len = uap->len;
+	msg.msg_control = 0;
+	msg.msg_flags = uap->flags;
+	return (recvit(p, uap->s, &msg, (caddr_t)uap->fromlenaddr, retval));
+}
+
+#ifdef COMPAT_OLDSOCK
+struct orecv_args {
+	int	s;
+	caddr_t	buf;
+	int	len;
+	int	flags;
+};
+orecv(p, uap, retval)
+	struct proc *p;
+	register struct orecv_args *uap;
+	int *retval;
+{
+	struct msghdr msg;
+	struct iovec aiov;
+
+	msg.msg_name = 0;
+	msg.msg_namelen = 0;
+	msg.msg_iov = &aiov;
+	msg.msg_iovlen = 1;
+	aiov.iov_base = uap->buf;
+	aiov.iov_len = uap->len;
+	msg.msg_control = 0;
+	msg.msg_flags = uap->flags;
+	return (recvit(p, uap->s, &msg, (caddr_t)0, retval));
+}
+
+/*
+ * Old recvmsg.  This code takes advantage of the fact that the old msghdr
+ * overlays the new one, missing only the flags, and with the (old) access
+ * rights where the control fields are now.
+ */
+struct orecvmsg_args {
+	int	s;
+	struct	omsghdr *msg;
+	int	flags;
+};
+orecvmsg(p, uap, retval)
+	struct proc *p;
+	register struct orecvmsg_args *uap;
+	int *retval;
+{
+	struct msghdr msg;
+	struct iovec aiov[UIO_SMALLIOV], *iov;
+	int error;
+
+	if (error = copyin((caddr_t)uap->msg, (caddr_t)&msg,
+	    sizeof (struct omsghdr)))
+		return (error);
+	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
+		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
+			return (EMSGSIZE);
+		MALLOC(iov, struct iovec *,
+		      sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
+		      M_WAITOK);
+	} else
+		iov = aiov;
+	msg.msg_flags = uap->flags | MSG_COMPAT;
+	if (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
+	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))
+		goto done;
+	msg.msg_iov = iov;
+	error = recvit(p, uap->s, &msg, (caddr_t)&uap->msg->msg_namelen, retval);
+
+	if (msg.msg_controllen && error == 0)
+		error = copyout((caddr_t)&msg.msg_controllen,
+		    (caddr_t)&uap->msg->msg_accrightslen, sizeof (int));
+done:
+	if (iov != aiov)
+		FREE(iov, M_IOV);
+	return (error);
+}
+#endif
+
+struct recvmsg_args {
+	int	s;
+	struct	msghdr *msg;
+	int	flags;
+};
+recvmsg(p, uap, retval)
+	struct proc *p;
+	register struct recvmsg_args *uap;
+	int *retval;
+{
+	struct msghdr msg;
+	struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
+	register int error;
+
+	if (error = copyin((caddr_t)uap->msg, (caddr_t)&msg, sizeof (msg)))
+		return (error);
+	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
+		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
+			return (EMSGSIZE);
+		MALLOC(iov, struct iovec *,
+		       sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
+		       M_WAITOK);
+	} else
+		iov = aiov;
+#ifdef COMPAT_OLDSOCK
+	msg.msg_flags = uap->flags &~ MSG_COMPAT;
+#else
+	msg.msg_flags = uap->flags;
+#endif
+	uiov = msg.msg_iov;
+	msg.msg_iov = iov;
+	if (error = copyin((caddr_t)uiov, (caddr_t)iov,
+	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))
+		goto done;
+	if ((error = recvit(p, uap->s, &msg, (caddr_t)0, retval)) == 0) {
+		msg.msg_iov = uiov;
+		error = copyout((caddr_t)&msg, (caddr_t)uap->msg, sizeof(msg));
+	}
+done:
+	if (iov != aiov)
+		FREE(iov, M_IOV);
+	return (error);
+}
+
+recvit(p, s, mp, namelenp, retsize)
+	register struct proc *p;
+	int s;
+	register struct msghdr *mp;
+	caddr_t namelenp;
+	int *retsize;
+{
+	struct file *fp;
+	struct uio auio;
+	register struct iovec *iov;
+	register int i;
+	int len, error;
+	struct mbuf *from = 0, *control = 0;
+#ifdef KTRACE
+	struct iovec *ktriov = NULL;
+#endif
+	
+	if (error = getsock(p->p_fd, s, &fp))
+		return (error);
+	auio.uio_iov = mp->msg_iov;
+	auio.uio_iovcnt = mp->msg_iovlen;
+	auio.uio_segflg = UIO_USERSPACE;
+	auio.uio_rw = UIO_READ;
+	auio.uio_procp = p;
+	auio.uio_offset = 0;			/* XXX */
+	auio.uio_resid = 0;
+	iov = mp->msg_iov;
+	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
+		if (iov->iov_len < 0)
+			return (EINVAL);
+		if ((auio.uio_resid += iov->iov_len) < 0)
+			return (EINVAL);
+	}
+#ifdef KTRACE
+	if (KTRPOINT(p, KTR_GENIO)) {
+		int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
+
+		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
+		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
+	}
+#endif
+	len = auio.uio_resid;
+	if (error = soreceive((struct socket *)fp->f_data, &from, &auio,
+	    (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
+	    &mp->msg_flags)) {
+		if (auio.uio_resid != len && (error == ERESTART ||
+		    error == EINTR || error == EWOULDBLOCK))
+			error = 0;
+	}
+#ifdef KTRACE
+	if (ktriov != NULL) {
+		if (error == 0)
+			ktrgenio(p->p_tracep, s, UIO_READ,
+				ktriov, len - auio.uio_resid, error);
+		FREE(ktriov, M_TEMP);
+	}
+#endif
+	if (error)
+		goto out;
+	*retsize = len - auio.uio_resid;
+	if (mp->msg_name) {
+		len = mp->msg_namelen;
+		if (len <= 0 || from == 0)
+			len = 0;
+		else {
+#ifdef COMPAT_OLDSOCK
+			if (mp->msg_flags & MSG_COMPAT)
+				mtod(from, struct osockaddr *)->sa_family =
+				    mtod(from, struct sockaddr *)->sa_family;
+#endif
+			if (len > from->m_len)
+				len = from->m_len;
+			/* else if len < from->m_len ??? */
+			if (error = copyout(mtod(from, caddr_t),
+			    (caddr_t)mp->msg_name, (unsigned)len))
+				goto out;
+		}
+		mp->msg_namelen = len;
+		if (namelenp &&
+		    (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) {
+#ifdef COMPAT_OLDSOCK
+			if (mp->msg_flags & MSG_COMPAT)
+				error = 0;	/* old recvfrom didn't check */
+			else
+#endif
+			goto out;
+		}
+	}
+	if (mp->msg_control) {
+#ifdef COMPAT_OLDSOCK
+		/*
+		 * We assume that old recvmsg calls won't receive access
+		 * rights and other control info, esp. as control info
+		 * is always optional and those options didn't exist in 4.3.
+		 * If we receive rights, trim the cmsghdr; anything else
+		 * is tossed.
+		 */
+		if (control && mp->msg_flags & MSG_COMPAT) {
+			if (mtod(control, struct cmsghdr *)->cmsg_level !=
+			    SOL_SOCKET ||
+			    mtod(control, struct cmsghdr *)->cmsg_type !=
+			    SCM_RIGHTS) {
+				mp->msg_controllen = 0;
+				goto out;
+			}
+			control->m_len -= sizeof (struct cmsghdr);
+			control->m_data += sizeof (struct cmsghdr);
+		}
+#endif
+		len = mp->msg_controllen;
+		if (len <= 0 || control == 0)
+			len = 0;
+		else {
+			if (len >= control->m_len)
+				len = control->m_len;
+			else
+				mp->msg_flags |= MSG_CTRUNC;
+			error = copyout((caddr_t)mtod(control, caddr_t),
+			    (caddr_t)mp->msg_control, (unsigned)len);
+		}
+		mp->msg_controllen = len;
+	}
+out:
+	if (from)
+		m_freem(from);
+	if (control)
+		m_freem(control);
+	return (error);
+}
+
+struct shutdown_args {
+	int	s;
+	int	how;
+};
+/* ARGSUSED */
+shutdown(p, uap, retval)
+	struct proc *p;
+	register struct shutdown_args *uap;
+	int *retval;
+{
+	struct file *fp;
+	int error;
+
+	if (error = getsock(p->p_fd, uap->s, &fp))
+		return (error);
+	return (soshutdown((struct socket *)fp->f_data, uap->how));
+}
+
+struct setsockopt_args {
+	int	s;
+	int	level;
+	int	name;
+	caddr_t	val;
+	int	valsize;
+};
+/* ARGSUSED */
+setsockopt(p, uap, retval)
+	struct proc *p;
+	register struct setsockopt_args *uap;
+	int *retval;
+{
+	struct file *fp;
+	struct mbuf *m = NULL;
+	int error;
+
+	if (error = getsock(p->p_fd, uap->s, &fp))
+		return (error);
+	if (uap->valsize > MLEN)
+		return (EINVAL);
+	if (uap->val) {
+		m = m_get(M_WAIT, MT_SOOPTS);
+		if (m == NULL)
+			return (ENOBUFS);
+		if (error = copyin(uap->val, mtod(m, caddr_t),
+		    (u_int)uap->valsize)) {
+			(void) m_free(m);
+			return (error);
+		}
+		m->m_len = uap->valsize;
+	}
+	return (sosetopt((struct socket *)fp->f_data, uap->level,
+	    uap->name, m));
+}
+
+struct getsockopt_args {
+	int	s;
+	int	level;
+	int	name;
+	caddr_t	val;
+	int	*avalsize;
+};
+/* ARGSUSED */
+getsockopt(p, uap, retval)
+	struct proc *p;
+	register struct getsockopt_args *uap;
+	int *retval;
+{
+	struct file *fp;
+	struct mbuf *m = NULL;
+	int valsize, error;
+
+	if (error = getsock(p->p_fd, uap->s, &fp))
+		return (error);
+	if (uap->val) {
+		if (error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize,
+		    sizeof (valsize)))
+			return (error);
+	} else
+		valsize = 0;
+	if ((error = sogetopt((struct socket *)fp->f_data, uap->level,
+	    uap->name, &m)) == 0 && uap->val && valsize && m != NULL) {
+		if (valsize > m->m_len)
+			valsize = m->m_len;
+		error = copyout(mtod(m, caddr_t), uap->val, (u_int)valsize);
+		if (error == 0)
+			error = copyout((caddr_t)&valsize,
+			    (caddr_t)uap->avalsize, sizeof (valsize));
+	}
+	if (m != NULL)
+		(void) m_free(m);
+	return (error);
+}
+
+struct pipe_args {
+	int	dummy;
+};
+/* ARGSUSED */
+pipe(p, uap, retval)
+	struct proc *p;
+	struct pipe_args *uap;
+	int retval[];
+{
+	register struct filedesc *fdp = p->p_fd;
+	struct file *rf, *wf;
+	struct socket *rso, *wso;
+	int fd, error;
+
+	if (error = socreate(AF_UNIX, &rso, SOCK_STREAM, 0))
+		return (error);
+	if (error = socreate(AF_UNIX, &wso, SOCK_STREAM, 0))
+		goto free1;
+	if (error = falloc(p, &rf, &fd))
+		goto free2;
+	retval[0] = fd;
+	rf->f_flag = FREAD;
+	rf->f_type = DTYPE_SOCKET;
+	rf->f_ops = &socketops;
+	rf->f_data = (caddr_t)rso;
+	if (error = falloc(p, &wf, &fd))
+		goto free3;
+	wf->f_flag = FWRITE;
+	wf->f_type = DTYPE_SOCKET;
+	wf->f_ops = &socketops;
+	wf->f_data = (caddr_t)wso;
+	retval[1] = fd;
+	if (error = unp_connect2(wso, rso))
+		goto free4;
+	return (0);
+free4:
+	ffree(wf);
+	fdp->fd_ofiles[retval[1]] = 0;
+free3:
+	ffree(rf);
+	fdp->fd_ofiles[retval[0]] = 0;
+free2:
+	(void)soclose(wso);
+free1:
+	(void)soclose(rso);
+	return (error);
+}
+
+/*
+ * Get socket name.
+ */
+struct getsockname_args {
+	int	fdes;
+	caddr_t	asa;
+	int	*alen;
+#ifdef COMPAT_OLDSOCK
+	int	compat_43;	/* pseudo */
+#endif
+};
+#ifdef COMPAT_OLDSOCK
+getsockname(p, uap, retval)
+	struct proc *p;
+	struct getsockname_args *uap;
+	int *retval;
+{
+
+	uap->compat_43 = 0;
+	return (getsockname1(p, uap, retval));
+}
+
+ogetsockname(p, uap, retval)
+	struct proc *p;
+	struct getsockname_args *uap;
+	int *retval;
+{
+
+	uap->compat_43 = 1;
+	return (getsockname1(p, uap, retval));
+}
+#else /* COMPAT_OLDSOCK */
+
+#define	getsockname1	getsockname
+#endif
+
+/* ARGSUSED */
+getsockname1(p, uap, retval)
+	struct proc *p;
+	register struct getsockname_args *uap;
+	int *retval;
+{
+	struct file *fp;
+	register struct socket *so;
+	struct mbuf *m;
+	int len, error;
+
+	if (error = getsock(p->p_fd, uap->fdes, &fp))
+		return (error);
+	if (error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len)))
+		return (error);
+	so = (struct socket *)fp->f_data;
+	m = m_getclr(M_WAIT, MT_SONAME);
+	if (m == NULL)
+		return (ENOBUFS);
+	if (error = (*so->so_proto->pr_usrreq)(so, PRU_SOCKADDR, 0, m, 0))
+		goto bad;
+	if (len > m->m_len)
+		len = m->m_len;
+#ifdef COMPAT_OLDSOCK
+	if (uap->compat_43)
+		mtod(m, struct osockaddr *)->sa_family =
+		    mtod(m, struct sockaddr *)->sa_family;
+#endif
+	error = copyout(mtod(m, caddr_t), (caddr_t)uap->asa, (u_int)len);
+	if (error == 0)
+		error = copyout((caddr_t)&len, (caddr_t)uap->alen,
+		    sizeof (len));
+bad:
+	m_freem(m);
+	return (error);
+}
+
+/*
+ * Get name of peer for connected socket.
+ */
+struct getpeername_args {
+	int	fdes;
+	caddr_t	asa;
+	int	*alen;
+#ifdef COMPAT_OLDSOCK
+	int	compat_43;	/* pseudo */
+#endif
+};
+
+#ifdef COMPAT_OLDSOCK
+getpeername(p, uap, retval)
+	struct proc *p;
+	struct getpeername_args *uap;
+	int *retval;
+{
+
+	uap->compat_43 = 0;
+	return (getpeername1(p, uap, retval));
+}
+
+ogetpeername(p, uap, retval)
+	struct proc *p;
+	struct getpeername_args *uap;
+	int *retval;
+{
+
+	uap->compat_43 = 1;
+	return (getpeername1(p, uap, retval));
+}
+#else /* COMPAT_OLDSOCK */
+
+#define	getpeername1	getpeername
+#endif
+
+/* ARGSUSED */
+getpeername1(p, uap, retval)
+	struct proc *p;
+	register struct getpeername_args *uap;
+	int *retval;
+{
+	struct file *fp;
+	register struct socket *so;
+	struct mbuf *m;
+	int len, error;
+
+	if (error = getsock(p->p_fd, uap->fdes, &fp))
+		return (error);
+	so = (struct socket *)fp->f_data;
+	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0)
+		return (ENOTCONN);
+	if (error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len)))
+		return (error);
+	m = m_getclr(M_WAIT, MT_SONAME);
+	if (m == NULL)
+		return (ENOBUFS);
+	if (error = (*so->so_proto->pr_usrreq)(so, PRU_PEERADDR, 0, m, 0))
+		goto bad;
+	if (len > m->m_len)
+		len = m->m_len;
+#ifdef COMPAT_OLDSOCK
+	if (uap->compat_43)
+		mtod(m, struct osockaddr *)->sa_family =
+		    mtod(m, struct sockaddr *)->sa_family;
+#endif
+	if (error = copyout(mtod(m, caddr_t), (caddr_t)uap->asa, (u_int)len))
+		goto bad;
+	error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len));
+bad:
+	m_freem(m);
+	return (error);
+}
+
+sockargs(mp, buf, buflen, type)
+	struct mbuf **mp;
+	caddr_t buf;
+	int buflen, type;
+{
+	register struct sockaddr *sa;
+	register struct mbuf *m;
+	int error;
+
+	if ((u_int)buflen > MLEN) {
+#ifdef COMPAT_OLDSOCK
+		if (type == MT_SONAME && (u_int)buflen <= 112)
+			buflen = MLEN;		/* unix domain compat. hack */
+		else
+#endif
+		return (EINVAL);
+	}
+	m = m_get(M_WAIT, type);
+	if (m == NULL)
+		return (ENOBUFS);
+	m->m_len = buflen;
+	error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
+	if (error)
+		(void) m_free(m);
+	else {
+		*mp = m;
+		if (type == MT_SONAME) {
+			sa = mtod(m, struct sockaddr *);
+
+#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
+			if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
+				sa->sa_family = sa->sa_len;
+#endif
+			sa->sa_len = buflen;
+		}
+	}
+	return (error);
+}
+
+getsock(fdp, fdes, fpp)
+	struct filedesc *fdp;
+	int fdes;
+	struct file **fpp;
+{
+	register struct file *fp;
+
+	if ((unsigned)fdes >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[fdes]) == NULL)
+		return (EBADF);
+	if (fp->f_type != DTYPE_SOCKET)
+		return (ENOTSOCK);
+	*fpp = fp;
+	return (0);
+}
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
new file mode 100644
index 00000000000..94bf8f744c8
--- /dev/null
+++ b/sys/kern/uipc_usrreq.c
@@ -0,0 +1,823 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/filedesc.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/unpcb.h>
+#include <sys/un.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/mbuf.h>
+
+/*
+ * Unix communications domain.
+ *
+ * TODO:
+ *	SEQPACKET, RDM
+ *	rethink name space problems
+ *	need a proper out-of-band
+ */
+struct	sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX };
+ino_t	unp_ino;			/* prototype for fake inode numbers */
+
+/*ARGSUSED*/
+uipc_usrreq(so, req, m, nam, control)
+	struct socket *so;
+	int req;
+	struct mbuf *m, *nam, *control;
+{
+	struct unpcb *unp = sotounpcb(so);
+	register struct socket *so2;
+	register int error = 0;
+	struct proc *p = curproc;	/* XXX */
+
+	if (req == PRU_CONTROL)
+		return (EOPNOTSUPP);
+	if (req != PRU_SEND && control && control->m_len) {
+		error = EOPNOTSUPP;
+		goto release;
+	}
+	if (unp == 0 && req != PRU_ATTACH) {
+		error = EINVAL;
+		goto release;
+	}
+	switch (req) {
+
+	case PRU_ATTACH:
+		if (unp) {
+			error = EISCONN;
+			break;
+		}
+		error = unp_attach(so);
+		break;
+
+	case PRU_DETACH:
+		unp_detach(unp);
+		break;
+
+	case PRU_BIND:
+		error = unp_bind(unp, nam, p);
+		break;
+
+	case PRU_LISTEN:
+		if (unp->unp_vnode == 0)
+			error = EINVAL;
+		break;
+
+	case PRU_CONNECT:
+		error = unp_connect(so, nam, p);
+		break;
+
+	case PRU_CONNECT2:
+		error = unp_connect2(so, (struct socket *)nam);
+		break;
+
+	case PRU_DISCONNECT:
+		unp_disconnect(unp);
+		break;
+
+	case PRU_ACCEPT:
+		/*
+		 * Pass back name of connected socket,
+		 * if it was bound and we are still connected
+		 * (our peer may have closed already!).
+		 */
+		if (unp->unp_conn && unp->unp_conn->unp_addr) {
+			nam->m_len = unp->unp_conn->unp_addr->m_len;
+			bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
+			    mtod(nam, caddr_t), (unsigned)nam->m_len);
+		} else {
+			nam->m_len = sizeof(sun_noname);
+			*(mtod(nam, struct sockaddr *)) = sun_noname;
+		}
+		break;
+
+	case PRU_SHUTDOWN:
+		socantsendmore(so);
+		unp_shutdown(unp);
+		break;
+
+	case PRU_RCVD:
+		switch (so->so_type) {
+
+		case SOCK_DGRAM:
+			panic("uipc 1");
+			/*NOTREACHED*/
+
+		case SOCK_STREAM:
+#define	rcv (&so->so_rcv)
+#define snd (&so2->so_snd)
+			if (unp->unp_conn == 0)
+				break;
+			so2 = unp->unp_conn->unp_socket;
+			/*
+			 * Adjust backpressure on sender
+			 * and wakeup any waiting to write.
+			 */
+			snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
+			unp->unp_mbcnt = rcv->sb_mbcnt;
+			snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
+			unp->unp_cc = rcv->sb_cc;
+			sowwakeup(so2);
+#undef snd
+#undef rcv
+			break;
+
+		default:
+			panic("uipc 2");
+		}
+		break;
+
+	case PRU_SEND:
+		if (control && (error = unp_internalize(control, p)))
+			break;
+		switch (so->so_type) {
+
+		case SOCK_DGRAM: {
+			struct sockaddr *from;
+
+			if (nam) {
+				if (unp->unp_conn) {
+					error = EISCONN;
+					break;
+				}
+				error = unp_connect(so, nam, p);
+				if (error)
+					break;
+			} else {
+				if (unp->unp_conn == 0) {
+					error = ENOTCONN;
+					break;
+				}
+			}
+			so2 = unp->unp_conn->unp_socket;
+			if (unp->unp_addr)
+				from = mtod(unp->unp_addr, struct sockaddr *);
+			else
+				from = &sun_noname;
+			if (sbappendaddr(&so2->so_rcv, from, m, control)) {
+				sorwakeup(so2);
+				m = 0;
+				control = 0;
+			} else
+				error = ENOBUFS;
+			if (nam)
+				unp_disconnect(unp);
+			break;
+		}
+
+		case SOCK_STREAM:
+#define	rcv (&so2->so_rcv)
+#define	snd (&so->so_snd)
+			if (so->so_state & SS_CANTSENDMORE) {
+				error = EPIPE;
+				break;
+			}
+			if (unp->unp_conn == 0)
+				panic("uipc 3");
+			so2 = unp->unp_conn->unp_socket;
+			/*
+			 * Send to paired receive port, and then reduce
+			 * send buffer hiwater marks to maintain backpressure.
+			 * Wake up readers.
+			 */
+			if (control) {
+				if (sbappendcontrol(rcv, m, control))
+					control = 0;
+			} else
+				sbappend(rcv, m);
+			snd->sb_mbmax -=
+			    rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
+			unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
+			snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
+			unp->unp_conn->unp_cc = rcv->sb_cc;
+			sorwakeup(so2);
+			m = 0;
+#undef snd
+#undef rcv
+			break;
+
+		default:
+			panic("uipc 4");
+		}
+		break;
+
+	case PRU_ABORT:
+		unp_drop(unp, ECONNABORTED);
+		break;
+
+	case PRU_SENSE:
+		((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
+		if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
+			so2 = unp->unp_conn->unp_socket;
+			((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
+		}
+		((struct stat *) m)->st_dev = NODEV;
+		if (unp->unp_ino == 0)
+			unp->unp_ino = unp_ino++;
+		((struct stat *) m)->st_ino = unp->unp_ino;
+		return (0);
+
+	case PRU_RCVOOB:
+		return (EOPNOTSUPP);
+
+	case PRU_SENDOOB:
+		error = EOPNOTSUPP;
+		break;
+
+	case PRU_SOCKADDR:
+		if (unp->unp_addr) {
+			nam->m_len = unp->unp_addr->m_len;
+			bcopy(mtod(unp->unp_addr, caddr_t),
+			    mtod(nam, caddr_t), (unsigned)nam->m_len);
+		} else
+			nam->m_len = 0;
+		break;
+
+	case PRU_PEERADDR:
+		if (unp->unp_conn && unp->unp_conn->unp_addr) {
+			nam->m_len = unp->unp_conn->unp_addr->m_len;
+			bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
+			    mtod(nam, caddr_t), (unsigned)nam->m_len);
+		} else
+			nam->m_len = 0;
+		break;
+
+	case PRU_SLOWTIMO:
+		break;
+
+	default:
+		panic("piusrreq");
+	}
+release:
+	if (control)
+		m_freem(control);
+	if (m)
+		m_freem(m);
+	return (error);
+}
+
+/*
+ * Both send and receive buffers are allocated PIPSIZ bytes of buffering
+ * for stream sockets, although the total for sender and receiver is
+ * actually only PIPSIZ.
+ * Datagram sockets really use the sendspace as the maximum datagram size,
+ * and don't really want to reserve the sendspace.  Their recvspace should
+ * be large enough for at least one max-size datagram plus address.
+ */
+#define	PIPSIZ	4096
+u_long	unpst_sendspace = PIPSIZ;
+u_long	unpst_recvspace = PIPSIZ;
+u_long	unpdg_sendspace = 2*1024;	/* really max datagram size */
+u_long	unpdg_recvspace = 4*1024;
+
+int	unp_rights;			/* file descriptors in flight */
+
+unp_attach(so)
+	struct socket *so;
+{
+	register struct mbuf *m;
+	register struct unpcb *unp;
+	int error;
+	
+	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
+		switch (so->so_type) {
+
+		case SOCK_STREAM:
+			error = soreserve(so, unpst_sendspace, unpst_recvspace);
+			break;
+
+		case SOCK_DGRAM:
+			error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
+			break;
+
+		default:
+			panic("unp_attach");
+		}
+		if (error)
+			return (error);
+	}
+	m = m_getclr(M_DONTWAIT, MT_PCB);
+	if (m == NULL)
+		return (ENOBUFS);
+	unp = mtod(m, struct unpcb *);
+	so->so_pcb = (caddr_t)unp;
+	unp->unp_socket = so;
+	return (0);
+}
+
+unp_detach(unp)
+	register struct unpcb *unp;
+{
+	
+	if (unp->unp_vnode) {
+		unp->unp_vnode->v_socket = 0;
+		vrele(unp->unp_vnode);
+		unp->unp_vnode = 0;
+	}
+	if (unp->unp_conn)
+		unp_disconnect(unp);
+	while (unp->unp_refs)
+		unp_drop(unp->unp_refs, ECONNRESET);
+	soisdisconnected(unp->unp_socket);
+	unp->unp_socket->so_pcb = 0;
+	m_freem(unp->unp_addr);
+	(void) m_free(dtom(unp));
+	if (unp_rights) {
+		/*
+		 * Normally the receive buffer is flushed later,
+		 * in sofree, but if our receive buffer holds references
+		 * to descriptors that are now garbage, we will dispose
+		 * of those descriptor references after the garbage collector
+		 * gets them (resulting in a "panic: closef: count < 0").
+		 */
+		sorflush(unp->unp_socket);
+		unp_gc();
+	}
+}
+
+unp_bind(unp, nam, p)
+	struct unpcb *unp;
+	struct mbuf *nam;
+	struct proc *p;
+{
+	struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
+	register struct vnode *vp;
+	struct vattr vattr;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
+		soun->sun_path, p);
+	if (unp->unp_vnode != NULL)
+		return (EINVAL);
+	if (nam->m_len == MLEN) {
+		if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
+			return (EINVAL);
+	} else
+		*(mtod(nam, caddr_t) + nam->m_len) = 0;
+/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	if (vp != NULL) {
+		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+		if (nd.ni_dvp == vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		vrele(vp);
+		return (EADDRINUSE);
+	}
+	VATTR_NULL(&vattr);
+	vattr.va_type = VSOCK;
+	vattr.va_mode = ACCESSPERMS;
+	LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+	if (error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr))
+		return (error);
+	vp = nd.ni_vp;
+	vp->v_socket = unp->unp_socket;
+	unp->unp_vnode = vp;
+	unp->unp_addr = m_copy(nam, 0, (int)M_COPYALL);
+	VOP_UNLOCK(vp);
+	return (0);
+}
+
+unp_connect(so, nam, p)
+	struct socket *so;
+	struct mbuf *nam;
+	struct proc *p;
+{
+	register struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
+	register struct vnode *vp;
+	register struct socket *so2, *so3;
+	struct unpcb *unp2, *unp3;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p);
+	if (nam->m_data + nam->m_len == &nam->m_dat[MLEN]) {	/* XXX */
+		if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
+			return (EMSGSIZE);
+	} else
+		*(mtod(nam, caddr_t) + nam->m_len) = 0;
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	if (vp->v_type != VSOCK) {
+		error = ENOTSOCK;
+		goto bad;
+	}
+	if (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p))
+		goto bad;
+	so2 = vp->v_socket;
+	if (so2 == 0) {
+		error = ECONNREFUSED;
+		goto bad;
+	}
+	if (so->so_type != so2->so_type) {
+		error = EPROTOTYPE;
+		goto bad;
+	}
+	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
+		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
+		    (so3 = sonewconn(so2, 0)) == 0) {
+			error = ECONNREFUSED;
+			goto bad;
+		}
+		unp2 = sotounpcb(so2);
+		unp3 = sotounpcb(so3);
+		if (unp2->unp_addr)
+			unp3->unp_addr =
+				  m_copy(unp2->unp_addr, 0, (int)M_COPYALL);
+		so2 = so3;
+	}
+	error = unp_connect2(so, so2);
+bad:
+	vput(vp);
+	return (error);
+}
+
+unp_connect2(so, so2)
+	register struct socket *so;
+	register struct socket *so2;
+{
+	register struct unpcb *unp = sotounpcb(so);
+	register struct unpcb *unp2;
+
+	if (so2->so_type != so->so_type)
+		return (EPROTOTYPE);
+	unp2 = sotounpcb(so2);
+	unp->unp_conn = unp2;
+	switch (so->so_type) {
+
+	case SOCK_DGRAM:
+		unp->unp_nextref = unp2->unp_refs;
+		unp2->unp_refs = unp;
+		soisconnected(so);
+		break;
+
+	case SOCK_STREAM:
+		unp2->unp_conn = unp;
+		soisconnected(so);
+		soisconnected(so2);
+		break;
+
+	default:
+		panic("unp_connect2");
+	}
+	return (0);
+}
+
+unp_disconnect(unp)
+	struct unpcb *unp;
+{
+	register struct unpcb *unp2 = unp->unp_conn;
+
+	if (unp2 == 0)
+		return;
+	unp->unp_conn = 0;
+	switch (unp->unp_socket->so_type) {
+
+	case SOCK_DGRAM:
+		if (unp2->unp_refs == unp)
+			unp2->unp_refs = unp->unp_nextref;
+		else {
+			unp2 = unp2->unp_refs;
+			for (;;) {
+				if (unp2 == 0)
+					panic("unp_disconnect");
+				if (unp2->unp_nextref == unp)
+					break;
+				unp2 = unp2->unp_nextref;
+			}
+			unp2->unp_nextref = unp->unp_nextref;
+		}
+		unp->unp_nextref = 0;
+		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
+		break;
+
+	case SOCK_STREAM:
+		soisdisconnected(unp->unp_socket);
+		unp2->unp_conn = 0;
+		soisdisconnected(unp2->unp_socket);
+		break;
+	}
+}
+
+#ifdef notdef
+unp_abort(unp)
+	struct unpcb *unp;
+{
+
+	unp_detach(unp);
+}
+#endif
+
+unp_shutdown(unp)
+	struct unpcb *unp;
+{
+	struct socket *so;
+
+	if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
+	    (so = unp->unp_conn->unp_socket))
+		socantrcvmore(so);
+}
+
+unp_drop(unp, errno)
+	struct unpcb *unp;
+	int errno;
+{
+	struct socket *so = unp->unp_socket;
+
+	so->so_error = errno;
+	unp_disconnect(unp);
+	if (so->so_head) {
+		so->so_pcb = (caddr_t) 0;
+		m_freem(unp->unp_addr);
+		(void) m_free(dtom(unp));
+		sofree(so);
+	}
+}
+
+#ifdef notdef
+unp_drain()
+{
+
+}
+#endif
+
+unp_externalize(rights)
+	struct mbuf *rights;
+{
+	struct proc *p = curproc;		/* XXX */
+	register int i;
+	register struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
+	register struct file **rp = (struct file **)(cm + 1);
+	register struct file *fp;
+	int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof (int);
+	int f;
+
+	if (!fdavail(p, newfds)) {
+		for (i = 0; i < newfds; i++) {
+			fp = *rp;
+			unp_discard(fp);
+			*rp++ = 0;
+		}
+		return (EMSGSIZE);
+	}
+	for (i = 0; i < newfds; i++) {
+		if (fdalloc(p, 0, &f))
+			panic("unp_externalize");
+		fp = *rp;
+		p->p_fd->fd_ofiles[f] = fp;
+		fp->f_msgcount--;
+		unp_rights--;
+		*(int *)rp++ = f;
+	}
+	return (0);
+}
+
+unp_internalize(control, p)
+	struct mbuf *control;
+	struct proc *p;
+{
+	struct filedesc *fdp = p->p_fd;
+	register struct cmsghdr *cm = mtod(control, struct cmsghdr *);
+	register struct file **rp;
+	register struct file *fp;
+	register int i, fd;
+	int oldfds;
+
+	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
+	    cm->cmsg_len != control->m_len)
+		return (EINVAL);
+	oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
+	rp = (struct file **)(cm + 1);
+	for (i = 0; i < oldfds; i++) {
+		fd = *(int *)rp++;
+		if ((unsigned)fd >= fdp->fd_nfiles ||
+		    fdp->fd_ofiles[fd] == NULL)
+			return (EBADF);
+	}
+	rp = (struct file **)(cm + 1);
+	for (i = 0; i < oldfds; i++) {
+		fp = fdp->fd_ofiles[*(int *)rp];
+		*rp++ = fp;
+		fp->f_count++;
+		fp->f_msgcount++;
+		unp_rights++;
+	}
+	return (0);
+}
+
+int	unp_defer, unp_gcing;
+int	unp_mark();
+extern	struct domain unixdomain;
+
+unp_gc()
+{
+	register struct file *fp, *nextfp;
+	register struct socket *so;
+	struct file **extra_ref, **fpp;
+	int nunref, i;
+
+	if (unp_gcing)
+		return;
+	unp_gcing = 1;
+	unp_defer = 0;
+	for (fp = filehead; fp; fp = fp->f_filef)
+		fp->f_flag &= ~(FMARK|FDEFER);
+	do {
+		for (fp = filehead; fp; fp = fp->f_filef) {
+			if (fp->f_count == 0)
+				continue;
+			if (fp->f_flag & FDEFER) {
+				fp->f_flag &= ~FDEFER;
+				unp_defer--;
+			} else {
+				if (fp->f_flag & FMARK)
+					continue;
+				if (fp->f_count == fp->f_msgcount)
+					continue;
+				fp->f_flag |= FMARK;
+			}
+			if (fp->f_type != DTYPE_SOCKET ||
+			    (so = (struct socket *)fp->f_data) == 0)
+				continue;
+			if (so->so_proto->pr_domain != &unixdomain ||
+			    (so->so_proto->pr_flags&PR_RIGHTS) == 0)
+				continue;
+#ifdef notdef
+			if (so->so_rcv.sb_flags & SB_LOCK) {
+				/*
+				 * This is problematical; it's not clear
+				 * we need to wait for the sockbuf to be
+				 * unlocked (on a uniprocessor, at least),
+				 * and it's also not clear what to do
+				 * if sbwait returns an error due to receipt
+				 * of a signal.  If sbwait does return
+				 * an error, we'll go into an infinite
+				 * loop.  Delete all of this for now.
+				 */
+				(void) sbwait(&so->so_rcv);
+				goto restart;
+			}
+#endif
+			unp_scan(so->so_rcv.sb_mb, unp_mark);
+		}
+	} while (unp_defer);
+	/*
+	 * We grab an extra reference to each of the file table entries
+	 * that are not otherwise accessible and then free the rights
+	 * that are stored in messages on them.
+	 *
+	 * The bug in the orginal code is a little tricky, so I'll describe
+	 * what's wrong with it here.
+	 *
+	 * It is incorrect to simply unp_discard each entry for f_msgcount
+	 * times -- consider the case of sockets A and B that contain
+	 * references to each other.  On a last close of some other socket,
+	 * we trigger a gc since the number of outstanding rights (unp_rights)
+	 * is non-zero.  If during the sweep phase the gc code un_discards,
+	 * we end up doing a (full) closef on the descriptor.  A closef on A
+	 * results in the following chain.  Closef calls soo_close, which
+	 * calls soclose.   Soclose calls first (through the switch
+	 * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
+	 * returns because the previous instance had set unp_gcing, and
+	 * we return all the way back to soclose, which marks the socket
+	 * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
+	 * to free up the rights that are queued in messages on the socket A,
+	 * i.e., the reference on B.  The sorflush calls via the dom_dispose
+	 * switch unp_dispose, which unp_scans with unp_discard.  This second
+	 * instance of unp_discard just calls closef on B.
+	 *
+	 * Well, a similar chain occurs on B, resulting in a sorflush on B,
+	 * which results in another closef on A.  Unfortunately, A is already
+	 * being closed, and the descriptor has already been marked with
+	 * SS_NOFDREF, and soclose panics at this point.
+	 *
+	 * Here, we first take an extra reference to each inaccessible
+	 * descriptor.  Then, we call sorflush ourself, since we know
+	 * it is a Unix domain socket anyhow.  After we destroy all the
+	 * rights carried in messages, we do a last closef to get rid
+	 * of our extra reference.  This is the last close, and the
+	 * unp_detach etc will shut down the socket.
+	 *
+	 * 91/09/19, bsy@cs.cmu.edu
+	 */
+	extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK);
+	for (nunref = 0, fp = filehead, fpp = extra_ref; fp; fp = nextfp) {
+		nextfp = fp->f_filef;
+		if (fp->f_count == 0)
+			continue;
+		if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
+			*fpp++ = fp;
+			nunref++;
+			fp->f_count++;
+		}
+	}
+	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
+		sorflush((struct socket *)(*fpp)->f_data);
+	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
+		closef(*fpp);
+	free((caddr_t)extra_ref, M_FILE);
+	unp_gcing = 0;
+}
+
+unp_dispose(m)
+	struct mbuf *m;
+{
+	int unp_discard();
+
+	if (m)
+		unp_scan(m, unp_discard);
+}
+
+unp_scan(m0, op)
+	register struct mbuf *m0;
+	int (*op)();
+{
+	register struct mbuf *m;
+	register struct file **rp;
+	register struct cmsghdr *cm;
+	register int i;
+	int qfds;
+
+	while (m0) {
+		for (m = m0; m; m = m->m_next)
+			if (m->m_type == MT_CONTROL &&
+			    m->m_len >= sizeof(*cm)) {
+				cm = mtod(m, struct cmsghdr *);
+				if (cm->cmsg_level != SOL_SOCKET ||
+				    cm->cmsg_type != SCM_RIGHTS)
+					continue;
+				qfds = (cm->cmsg_len - sizeof *cm)
+						/ sizeof (struct file *);
+				rp = (struct file **)(cm + 1);
+				for (i = 0; i < qfds; i++)
+					(*op)(*rp++);
+				break;		/* XXX, but saves time */
+			}
+		m0 = m0->m_act;
+	}
+}
+
+unp_mark(fp)
+	struct file *fp;
+{
+
+	if (fp->f_flag & FMARK)
+		return;
+	unp_defer++;
+	fp->f_flag |= (FMARK|FDEFER);
+}
+
+unp_discard(fp)
+	struct file *fp;
+{
+
+	fp->f_msgcount--;
+	unp_rights--;
+	(void) closef(fp, (struct proc *)NULL);
+}
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
new file mode 100644
index 00000000000..ec5c962f7df
--- /dev/null
+++ b/sys/kern/vfs_bio.c
@@ -0,0 +1,339 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)vfs_bio.c	8.6 (Berkeley) 1/11/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/trace.h>
+#include <sys/malloc.h>
+#include <sys/resourcevar.h>
+
+/*
+ * Definitions for the buffer hash lists.
+ */
+#define	BUFHASH(dvp, lbn)	\
+	(&bufhashtbl[((int)(dvp) / sizeof(*(dvp)) + (int)(lbn)) & bufhash])
+LIST_HEAD(bufhashhdr, buf) *bufhashtbl, invalhash;
+u_long	bufhash;
+
+/*
+ * Insq/Remq for the buffer hash lists.
+ */
+#define	binshash(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_hash)
+#define	bremhash(bp)		LIST_REMOVE(bp, b_hash)
+
+/*
+ * Definitions for the buffer free lists.
+ */
+#define	BQUEUES		4		/* number of free buffer queues */
+
+#define	BQ_LOCKED	0		/* super-blocks &c */
+#define	BQ_LRU		1		/* lru, useful buffers */
+#define	BQ_AGE		2		/* rubbish */
+#define	BQ_EMPTY	3		/* buffer headers with no memory */
+
+TAILQ_HEAD(bqueues, buf) bufqueues[BQUEUES];
+int needbuffer;
+
+/*
+ * Insq/Remq for the buffer free lists.
+ */
+#define	binsheadfree(bp, dp)	TAILQ_INSERT_HEAD(dp, bp, b_freelist)
+#define	binstailfree(bp, dp)	TAILQ_INSERT_TAIL(dp, bp, b_freelist)
+
+void
+bremfree(bp)
+	struct buf *bp;
+{
+	struct bqueues *dp = NULL;
+
+	/*
+	 * We only calculate the head of the freelist when removing
+	 * the last element of the list as that is the only time that
+	 * it is needed (e.g. to reset the tail pointer).
+	 *
+	 * NB: This makes an assumption about how tailq's are implemented.
+	 */
+	if (bp->b_freelist.tqe_next == NULL) {
+		for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++)
+			if (dp->tqh_last == &bp->b_freelist.tqe_next)
+				break;
+		if (dp == &bufqueues[BQUEUES])
+			panic("bremfree: lost tail");
+	}
+	TAILQ_REMOVE(dp, bp, b_freelist);
+}
+
+/*
+ * Initialize buffers and hash links for buffers.
+ */
+void
+bufinit()
+{
+	register struct buf *bp;
+	struct bqueues *dp;
+	register int i;
+	int base, residual;
+
+	for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++)
+		TAILQ_INIT(dp);
+	bufhashtbl = hashinit(nbuf, M_CACHE, &bufhash);
+	base = bufpages / nbuf;
+	residual = bufpages % nbuf;
+	for (i = 0; i < nbuf; i++) {
+		bp = &buf[i];
+		bzero((char *)bp, sizeof *bp);
+		bp->b_dev = NODEV;
+		bp->b_rcred = NOCRED;
+		bp->b_wcred = NOCRED;
+		bp->b_vnbufs.le_next = NOLIST;
+		bp->b_data = buffers + i * MAXBSIZE;
+		if (i < residual)
+			bp->b_bufsize = (base + 1) * CLBYTES;
+		else
+			bp->b_bufsize = base * CLBYTES;
+		bp->b_flags = B_INVAL;
+		dp = bp->b_bufsize ? &bufqueues[BQ_AGE] : &bufqueues[BQ_EMPTY];
+		binsheadfree(bp, dp);
+		binshash(bp, &invalhash);
+	}
+}
+
+bread(a1, a2, a3, a4, a5)
+	struct vnode *a1;
+	daddr_t a2;
+	int a3;
+	struct ucred *a4;
+	struct buf **a5;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return (EIO);
+}
+
+breadn(a1, a2, a3, a4, a5, a6, a7, a8)
+	struct vnode *a1;
+	daddr_t a2; int a3;
+	daddr_t a4[]; int a5[];
+	int a6;
+	struct ucred *a7;
+	struct buf **a8;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return (EIO);
+}
+
+bwrite(a1)
+	struct buf *a1;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return (EIO);
+}
+
+int
+vn_bwrite(ap)
+	struct vop_bwrite_args *ap;
+{
+	return (bwrite(ap->a_bp));
+}
+
+bdwrite(a1)
+	struct buf *a1;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return;
+}
+
+bawrite(a1)
+	struct buf *a1;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return;
+}
+
+brelse(a1)
+	struct buf *a1;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return;
+}
+
+struct buf *
+incore(a1, a2)
+	struct vnode *a1;
+	daddr_t a2;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return (0);
+}
+
+struct buf *
+getblk(a1, a2, a3, a4, a5)
+	struct vnode *a1;
+	daddr_t a2;
+	int a3, a4, a5;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return ((struct buf *)0);
+}
+
+struct buf *
+geteblk(a1)
+	int a1;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return ((struct buf *)0);
+}
+
+allocbuf(a1, a2)
+	struct buf *a1;
+	int a2;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return (0);
+}
+
+struct buf *
+getnewbuf(a1, a2)
+	int a1, a2;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return ((struct buf *)0);
+}
+
+biowait(a1)
+	struct buf *a1;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return (EIO);
+}
+
+void
+biodone(a1)
+	struct buf *a1;
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return;
+}
+
+int
+count_lock_queue()
+{
+
+	/*
+	 * Body deleted.
+	 */
+	return (0);
+}
+
+#ifdef DIAGNOSTIC
+/*
+ * Print out statistics on the current allocation of the buffer pool.
+ * Can be enabled to print out on every ``sync'' by setting "syncprt"
+ * in vfs_syscalls.c using sysctl.
+ */
+void
+vfs_bufstats()
+{
+	int s, i, j, count;
+	register struct buf *bp;
+	register struct bqueues *dp;
+	int counts[MAXBSIZE/CLBYTES+1];
+	static char *bname[BQUEUES] = { "LOCKED", "LRU", "AGE", "EMPTY" };
+
+	for (dp = bufqueues, i = 0; dp < &bufqueues[BQUEUES]; dp++, i++) {
+		count = 0;
+		for (j = 0; j <= MAXBSIZE/CLBYTES; j++)
+			counts[j] = 0;
+		s = splbio();
+		for (bp = dp->tqh_first; bp; bp = bp->b_freelist.tqe_next) {
+			counts[bp->b_bufsize/CLBYTES]++;
+			count++;
+		}
+		splx(s);
+		printf("%s: total-%d", bname[i], count);
+		for (j = 0; j <= MAXBSIZE/CLBYTES; j++)
+			if (counts[j] != 0)
+				printf(", %d-%d", j * CLBYTES, counts[j]);
+		printf("\n");
+	}
+}
+#endif /* DIAGNOSTIC */
diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c
new file mode 100644
index 00000000000..4ccfd7289a0
--- /dev/null
+++ b/sys/kern/vfs_cache.c
@@ -0,0 +1,328 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vfs_cache.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/namei.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+
+/*
+ * Name caching works as follows:
+ *
+ * Names found by directory scans are retained in a cache
+ * for future reference.  It is managed LRU, so frequently
+ * used names will hang around.  Cache is indexed by hash value
+ * obtained from (vp, name) where vp refers to the directory
+ * containing name.
+ *
+ * For simplicity (and economy of storage), names longer than
+ * a maximum length of NCHNAMLEN are not cached; they occur
+ * infrequently in any case, and are almost never of interest.
+ *
+ * Upon reaching the last segment of a path, if the reference
+ * is for DELETE, or NOCACHE is set (rewrite), and the
+ * name is located in the cache, it will be dropped.
+ */
+
+/*
+ * Structures associated with name cacheing.
+ */
+struct namecache **nchashtbl;
+u_long	nchash;				/* size of hash table - 1 */
+long	numcache;			/* number of cache entries allocated */
+struct	namecache *nchhead, **nchtail;	/* LRU chain pointers */
+struct	nchstats nchstats;		/* cache effectiveness statistics */
+
+int doingcache = 1;			/* 1 => enable the cache */
+
+/*
+ * Look for a the name in the cache. We don't do this
+ * if the segment name is long, simply so the cache can avoid
+ * holding long names (which would either waste space, or
+ * add greatly to the complexity).
+ *
+ * Lookup is called with ni_dvp pointing to the directory to search,
+ * ni_ptr pointing to the name of the entry being sought, ni_namelen
+ * tells the length of the name, and ni_hash contains a hash of
+ * the name. If the lookup succeeds, the vnode is returned in ni_vp
+ * and a status of -1 is returned. If the lookup determines that
+ * the name does not exist (negative cacheing), a status of ENOENT
+ * is returned. If the lookup fails, a status of zero is returned.
+ */
+int
+cache_lookup(dvp, vpp, cnp)
+	struct vnode *dvp;
+	struct vnode **vpp;
+	struct componentname *cnp;
+{
+	register struct namecache *ncp, *ncq, **ncpp;
+
+	if (!doingcache)
+		return (0);
+	if (cnp->cn_namelen > NCHNAMLEN) {
+		nchstats.ncs_long++;
+		cnp->cn_flags &= ~MAKEENTRY;
+		return (0);
+	}
+	ncpp = &nchashtbl[cnp->cn_hash & nchash];
+	for (ncp = *ncpp; ncp; ncp = ncp->nc_forw) {
+		if (ncp->nc_dvp == dvp &&
+		    ncp->nc_dvpid == dvp->v_id &&
+		    ncp->nc_nlen == cnp->cn_namelen &&
+		    !bcmp(ncp->nc_name, cnp->cn_nameptr, (u_int)ncp->nc_nlen))
+			break;
+	}
+	if (ncp == NULL) {
+		nchstats.ncs_miss++;
+		return (0);
+	}
+	if (!(cnp->cn_flags & MAKEENTRY)) {
+		nchstats.ncs_badhits++;
+	} else if (ncp->nc_vp == NULL) {
+		if (cnp->cn_nameiop != CREATE) {
+			nchstats.ncs_neghits++;
+			/*
+			 * Move this slot to end of LRU chain,
+			 * if not already there.
+			 */
+			if (ncp->nc_nxt) {
+				/* remove from LRU chain */
+				*ncp->nc_prev = ncp->nc_nxt;
+				ncp->nc_nxt->nc_prev = ncp->nc_prev;
+				/* and replace at end of it */
+				ncp->nc_nxt = NULL;
+				ncp->nc_prev = nchtail;
+				*nchtail = ncp;
+				nchtail = &ncp->nc_nxt;
+			}
+			return (ENOENT);
+		}
+	} else if (ncp->nc_vpid != ncp->nc_vp->v_id) {
+		nchstats.ncs_falsehits++;
+	} else {
+		nchstats.ncs_goodhits++;
+		/*
+		 * move this slot to end of LRU chain, if not already there
+		 */
+		if (ncp->nc_nxt) {
+			/* remove from LRU chain */
+			*ncp->nc_prev = ncp->nc_nxt;
+			ncp->nc_nxt->nc_prev = ncp->nc_prev;
+			/* and replace at end of it */
+			ncp->nc_nxt = NULL;
+			ncp->nc_prev = nchtail;
+			*nchtail = ncp;
+			nchtail = &ncp->nc_nxt;
+		}
+		*vpp = ncp->nc_vp;
+		return (-1);
+	}
+
+	/*
+	 * Last component and we are renaming or deleting,
+	 * the cache entry is invalid, or otherwise don't
+	 * want cache entry to exist.
+	 */
+	/* remove from LRU chain */
+	if (ncq = ncp->nc_nxt)
+		ncq->nc_prev = ncp->nc_prev;
+	else
+		nchtail = ncp->nc_prev;
+	*ncp->nc_prev = ncq;
+	/* remove from hash chain */
+	if (ncq = ncp->nc_forw)
+		ncq->nc_back = ncp->nc_back;
+	*ncp->nc_back = ncq;
+	/* and make a dummy hash chain */
+	ncp->nc_forw = NULL;
+	ncp->nc_back = NULL;
+	/* insert at head of LRU list (first to grab) */
+	if (ncq = nchhead)
+		ncq->nc_prev = &ncp->nc_nxt;
+	else
+		nchtail = &ncp->nc_nxt;
+	nchhead = ncp;
+	ncp->nc_nxt = ncq;
+	ncp->nc_prev = &nchhead;
+	return (0);
+}
+
+/*
+ * Add an entry to the cache
+ */
+cache_enter(dvp, vp, cnp)
+	struct vnode *dvp;
+	struct vnode *vp;
+	struct componentname *cnp;
+{
+	register struct namecache *ncp, *ncq, **ncpp;
+
+#ifdef DIAGNOSTIC
+	if (cnp->cn_namelen > NCHNAMLEN)
+		panic("cache_enter: name too long");
+#endif
+	if (!doingcache)
+		return;
+	/*
+	 * Free the cache slot at head of lru chain.
+	 */
+	if (numcache < desiredvnodes) {
+		ncp = (struct namecache *)
+			malloc((u_long)sizeof *ncp, M_CACHE, M_WAITOK);
+		bzero((char *)ncp, sizeof *ncp);
+		numcache++;
+	} else if (ncp = nchhead) {
+		/* remove from lru chain */
+		if (ncq = ncp->nc_nxt)
+			ncq->nc_prev = ncp->nc_prev;
+		else
+			nchtail = ncp->nc_prev;
+		*ncp->nc_prev = ncq;
+		/* remove from old hash chain, if on one */
+		if (ncp->nc_back) {
+			if (ncq = ncp->nc_forw)
+				ncq->nc_back = ncp->nc_back;
+			*ncp->nc_back = ncq;
+			ncp->nc_forw = NULL;
+			ncp->nc_back = NULL;
+		}
+	} else
+		return;
+	/* grab the vnode we just found */
+	ncp->nc_vp = vp;
+	if (vp)
+		ncp->nc_vpid = vp->v_id;
+	else
+		ncp->nc_vpid = 0;
+	/* fill in cache info */
+	ncp->nc_dvp = dvp;
+	ncp->nc_dvpid = dvp->v_id;
+	ncp->nc_nlen = cnp->cn_namelen;
+	bcopy(cnp->cn_nameptr, ncp->nc_name, (unsigned)ncp->nc_nlen);
+	/* link at end of lru chain */
+	ncp->nc_nxt = NULL;
+	ncp->nc_prev = nchtail;
+	*nchtail = ncp;
+	nchtail = &ncp->nc_nxt;
+	/* and insert on hash chain */
+	ncpp = &nchashtbl[cnp->cn_hash & nchash];
+	if (ncq = *ncpp)
+		ncq->nc_back = &ncp->nc_forw;
+	ncp->nc_forw = ncq;
+	ncp->nc_back = ncpp;
+	*ncpp = ncp;
+}
+
+/*
+ * Name cache initialization, from vfs_init() when we are booting
+ */
+nchinit()
+{
+
+	nchtail = &nchhead;
+	nchashtbl = hashinit(desiredvnodes, M_CACHE, &nchash);
+}
+
+/*
+ * Cache flush, a particular vnode; called when a vnode is renamed to
+ * hide entries that would now be invalid
+ */
+cache_purge(vp)
+	struct vnode *vp;
+{
+	struct namecache *ncp, **ncpp;
+
+	vp->v_id = ++nextvnodeid;
+	if (nextvnodeid != 0)
+		return;
+	for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
+		for (ncp = *ncpp; ncp; ncp = ncp->nc_forw) {
+			ncp->nc_vpid = 0;
+			ncp->nc_dvpid = 0;
+		}
+	}
+	vp->v_id = ++nextvnodeid;
+}
+
+/*
+ * Cache flush, a whole filesystem; called when filesys is umounted to
+ * remove entries that would now be invalid
+ *
+ * The line "nxtcp = nchhead" near the end is to avoid potential problems
+ * if the cache lru chain is modified while we are dumping the
+ * inode.  This makes the algorithm O(n^2), but do you think I care?
+ */
+cache_purgevfs(mp)
+	struct mount *mp;
+{
+	register struct namecache *ncp, *nxtcp;
+
+	for (ncp = nchhead; ncp; ncp = nxtcp) {
+		if (ncp->nc_dvp == NULL || ncp->nc_dvp->v_mount != mp) {
+			nxtcp = ncp->nc_nxt;
+			continue;
+		}
+		/* free the resources we had */
+		ncp->nc_vp = NULL;
+		ncp->nc_dvp = NULL;
+		/* remove from old hash chain, if on one */
+		if (ncp->nc_back) {
+			if (nxtcp = ncp->nc_forw)
+				nxtcp->nc_back = ncp->nc_back;
+			*ncp->nc_back = nxtcp;
+			ncp->nc_forw = NULL;
+			ncp->nc_back = NULL;
+		}
+		/* delete this entry from LRU chain */
+		if (nxtcp = ncp->nc_nxt)
+			nxtcp->nc_prev = ncp->nc_prev;
+		else
+			nchtail = ncp->nc_prev;
+		*ncp->nc_prev = nxtcp;
+		/* cause rescan of list, it may have altered */
+		/* also put the now-free entry at head of LRU */
+		if (nxtcp = nchhead)
+			nxtcp->nc_prev = &ncp->nc_nxt;
+		else
+			nchtail = &ncp->nc_nxt;
+		nchhead = ncp;
+		ncp->nc_nxt = nxtcp;
+		ncp->nc_prev = &nchhead;
+	}
+}
diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c
new file mode 100644
index 00000000000..c34fbc34a67
--- /dev/null
+++ b/sys/kern/vfs_cluster.c
@@ -0,0 +1,746 @@
+/*-
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vfs_cluster.c	8.7 (Berkeley) 2/13/94
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/trace.h>
+#include <sys/malloc.h>
+#include <sys/resourcevar.h>
+#include <libkern/libkern.h>
+
+#ifdef DEBUG
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+int doreallocblks = 1;
+struct ctldebug debug13 = { "doreallocblks", &doreallocblks };
+#else
+/* XXX for cluster_write */
+#define doreallocblks 1
+#endif
+
+/*
+ * Local declarations
+ */
+struct buf *cluster_newbuf __P((struct vnode *, struct buf *, long, daddr_t,
+	    daddr_t, long, int));
+struct buf *cluster_rbuild __P((struct vnode *, u_quad_t, struct buf *,
+	    daddr_t, daddr_t, long, int, long));
+void	    cluster_wbuild __P((struct vnode *, struct buf *, long,
+	    daddr_t, int, daddr_t));
+struct cluster_save *cluster_collectbufs __P((struct vnode *, struct buf *));
+
+#ifdef DIAGNOSTIC
+/*
+ * Set to 1 if reads of block zero should cause readahead to be done.
+ * Set to 0 treats a read of block zero as a non-sequential read.
+ *
+ * Setting to one assumes that most reads of block zero of files are due to
+ * sequential passes over the files (e.g. cat, sum) where additional blocks
+ * will soon be needed.  Setting to zero assumes that the majority are
+ * surgical strikes to get particular info (e.g. size, file) where readahead
+ * blocks will not be used and, in fact, push out other potentially useful
+ * blocks from the cache.  The former seems intuitive, but some quick tests
+ * showed that the latter performed better from a system-wide point of view.
+ */
+int	doclusterraz = 0;
+#define ISSEQREAD(vp, blk) \
+	(((blk) != 0 || doclusterraz) && \
+	 ((blk) == (vp)->v_lastr + 1 || (blk) == (vp)->v_lastr))
+#else
+#define ISSEQREAD(vp, blk) \
+	((blk) != 0 && ((blk) == (vp)->v_lastr + 1 || (blk) == (vp)->v_lastr))
+#endif
+
+/*
+ * This replaces bread.  If this is a bread at the beginning of a file and
+ * lastr is 0, we assume this is the first read and we'll read up to two
+ * blocks if they are sequential.  After that, we'll do regular read ahead
+ * in clustered chunks.
+ *
+ * There are 4 or 5 cases depending on how you count:
+ *	Desired block is in the cache:
+ *	    1 Not sequential access (0 I/Os).
+ *	    2 Access is sequential, do read-ahead (1 ASYNC).
+ *	Desired block is not in cache:
+ *	    3 Not sequential access (1 SYNC).
+ *	    4 Sequential access, next block is contiguous (1 SYNC).
+ *	    5 Sequential access, next block is not contiguous (1 SYNC, 1 ASYNC)
+ *
+ * There are potentially two buffers that require I/O.
+ * 	bp is the block requested.
+ *	rbp is the read-ahead block.
+ *	If either is NULL, then you don't have to do the I/O.
+ */
+cluster_read(vp, filesize, lblkno, size, cred, bpp)
+	struct vnode *vp;
+	u_quad_t filesize;
+	daddr_t lblkno;
+	long size;
+	struct ucred *cred;
+	struct buf **bpp;
+{
+	struct buf *bp, *rbp;
+	daddr_t blkno, ioblkno;
+	long flags;
+	int error, num_ra, alreadyincore;
+
+#ifdef DIAGNOSTIC
+	if (size == 0)
+		panic("cluster_read: size = 0");
+#endif
+
+	error = 0;
+	flags = B_READ;
+	*bpp = bp = getblk(vp, lblkno, size, 0, 0);
+	if (bp->b_flags & B_CACHE) {
+		/*
+		 * Desired block is in cache; do any readahead ASYNC.
+		 * Case 1, 2.
+		 */
+		trace(TR_BREADHIT, pack(vp, size), lblkno);
+		flags |= B_ASYNC;
+		ioblkno = lblkno + (vp->v_ralen ? vp->v_ralen : 1);
+		alreadyincore = (int)incore(vp, ioblkno);
+		bp = NULL;
+	} else {
+		/* Block wasn't in cache, case 3, 4, 5. */
+		trace(TR_BREADMISS, pack(vp, size), lblkno);
+		bp->b_flags |= B_READ;
+		ioblkno = lblkno;
+		alreadyincore = 0;
+		curproc->p_stats->p_ru.ru_inblock++;		/* XXX */
+	}
+	/*
+	 * XXX
+	 * Replace 1 with a window size based on some permutation of
+	 * maxcontig and rot_delay.  This will let you figure out how
+	 * many blocks you should read-ahead (case 2, 4, 5).
+	 *
+	 * If the access isn't sequential, reset the window to 1.
+	 * Note that a read to the same block is considered sequential.
+	 * This catches the case where the file is being read sequentially,
+	 * but at smaller than the filesystem block size.
+	 */
+	rbp = NULL;
+	if (!ISSEQREAD(vp, lblkno)) {
+		vp->v_ralen = 0;
+		vp->v_maxra = lblkno;
+	} else if ((ioblkno + 1) * size <= filesize && !alreadyincore &&
+	    !(error = VOP_BMAP(vp, ioblkno, NULL, &blkno, &num_ra)) &&
+	    blkno != -1) {
+		/*
+		 * Reading sequentially, and the next block is not in the
+		 * cache.  We are going to try reading ahead.
+		 */
+		if (num_ra) {
+			/*
+			 * If our desired readahead block had been read
+			 * in a previous readahead but is no longer in
+			 * core, then we may be reading ahead too far
+			 * or are not using our readahead very rapidly.
+			 * In this case we scale back the window.
+			 */
+			if (!alreadyincore && ioblkno <= vp->v_maxra)
+				vp->v_ralen = max(vp->v_ralen >> 1, 1);
+			/*
+			 * There are more sequential blocks than our current
+			 * window allows, scale up.  Ideally we want to get
+			 * in sync with the filesystem maxcontig value.
+			 */
+			else if (num_ra > vp->v_ralen && lblkno != vp->v_lastr)
+				vp->v_ralen = vp->v_ralen ?
+					min(num_ra, vp->v_ralen << 1) : 1;
+
+			if (num_ra > vp->v_ralen)
+				num_ra = vp->v_ralen;
+		}
+
+		if (num_ra)				/* case 2, 4 */
+			rbp = cluster_rbuild(vp, filesize,
+			    bp, ioblkno, blkno, size, num_ra, flags);
+		else if (ioblkno == lblkno) {
+			bp->b_blkno = blkno;
+			/* Case 5: check how many blocks to read ahead */
+			++ioblkno;
+			if ((ioblkno + 1) * size > filesize ||
+			    incore(vp, ioblkno) || (error = VOP_BMAP(vp,
+			     ioblkno, NULL, &blkno, &num_ra)) || blkno == -1)
+				goto skip_readahead;
+			/*
+			 * Adjust readahead as above
+			 */
+			if (num_ra) {
+				if (!alreadyincore && ioblkno <= vp->v_maxra)
+					vp->v_ralen = max(vp->v_ralen >> 1, 1);
+				else if (num_ra > vp->v_ralen &&
+					 lblkno != vp->v_lastr)
+					vp->v_ralen = vp->v_ralen ?
+						min(num_ra,vp->v_ralen<<1) : 1;
+				if (num_ra > vp->v_ralen)
+					num_ra = vp->v_ralen;
+			}
+			flags |= B_ASYNC;
+			if (num_ra)
+				rbp = cluster_rbuild(vp, filesize,
+				    NULL, ioblkno, blkno, size, num_ra, flags);
+			else {
+				rbp = getblk(vp, ioblkno, size, 0, 0);
+				rbp->b_flags |= flags;
+				rbp->b_blkno = blkno;
+			}
+		} else {
+			/* case 2; read ahead single block */
+			rbp = getblk(vp, ioblkno, size, 0, 0);
+			rbp->b_flags |= flags;
+			rbp->b_blkno = blkno;
+		}
+
+		if (rbp == bp)			/* case 4 */
+			rbp = NULL;
+		else if (rbp) {			/* case 2, 5 */
+			trace(TR_BREADMISSRA,
+			    pack(vp, (num_ra + 1) * size), ioblkno);
+			curproc->p_stats->p_ru.ru_inblock++;	/* XXX */
+		}
+	}
+
+	/* XXX Kirk, do we need to make sure the bp has creds? */
+skip_readahead:
+	if (bp)
+		if (bp->b_flags & (B_DONE | B_DELWRI))
+			panic("cluster_read: DONE bp");
+		else 
+			error = VOP_STRATEGY(bp);
+
+	if (rbp)
+		if (error || rbp->b_flags & (B_DONE | B_DELWRI)) {
+			rbp->b_flags &= ~(B_ASYNC | B_READ);
+			brelse(rbp);
+		} else
+			(void) VOP_STRATEGY(rbp);
+
+	/*
+	 * Recalculate our maximum readahead
+	 */
+	if (rbp == NULL)
+		rbp = bp;
+	if (rbp)
+		vp->v_maxra = rbp->b_lblkno + (rbp->b_bufsize / size) - 1;
+
+	if (bp)
+		return(biowait(bp));
+	return(error);
+}
+
+/*
+ * If blocks are contiguous on disk, use this to provide clustered
+ * read ahead.  We will read as many blocks as possible sequentially
+ * and then parcel them up into logical blocks in the buffer hash table.
+ */
+struct buf *
+cluster_rbuild(vp, filesize, bp, lbn, blkno, size, run, flags)
+	struct vnode *vp;
+	u_quad_t filesize;
+	struct buf *bp;
+	daddr_t lbn;
+	daddr_t blkno;
+	long size;
+	int run;
+	long flags;
+{
+	struct cluster_save *b_save;
+	struct buf *tbp;
+	daddr_t bn;
+	int i, inc;
+
+#ifdef DIAGNOSTIC
+	if (size != vp->v_mount->mnt_stat.f_iosize)
+		panic("cluster_rbuild: size %d != filesize %d\n",
+			size, vp->v_mount->mnt_stat.f_iosize);
+#endif
+	if (size * (lbn + run + 1) > filesize)
+		--run;
+	if (run == 0) {
+		if (!bp) {
+			bp = getblk(vp, lbn, size, 0, 0);
+			bp->b_blkno = blkno;
+			bp->b_flags |= flags;
+		}
+		return(bp);
+	}
+
+	bp = cluster_newbuf(vp, bp, flags, blkno, lbn, size, run + 1);
+	if (bp->b_flags & (B_DONE | B_DELWRI))
+		return (bp);
+
+	b_save = malloc(sizeof(struct buf *) * run + sizeof(struct cluster_save),
+	    M_SEGMENT, M_WAITOK);
+	b_save->bs_bufsize = b_save->bs_bcount = size;
+	b_save->bs_nchildren = 0;
+	b_save->bs_children = (struct buf **)(b_save + 1);
+	b_save->bs_saveaddr = bp->b_saveaddr;
+	bp->b_saveaddr = (caddr_t) b_save;
+
+	inc = btodb(size);
+	for (bn = blkno + inc, i = 1; i <= run; ++i, bn += inc) {
+		if (incore(vp, lbn + i)) {
+			if (i == 1) {
+				bp->b_saveaddr = b_save->bs_saveaddr;
+				bp->b_flags &= ~B_CALL;
+				bp->b_iodone = NULL;
+				allocbuf(bp, size);
+				free(b_save, M_SEGMENT);
+			} else
+				allocbuf(bp, size * i);
+			break;
+		}
+		tbp = getblk(vp, lbn + i, 0, 0, 0);
+		/*
+		 * getblk may return some memory in the buffer if there were
+		 * no empty buffers to shed it to.  If there is currently
+		 * memory in the buffer, we move it down size bytes to make
+		 * room for the valid pages that cluster_callback will insert.
+		 * We do this now so we don't have to do it at interrupt time
+		 * in the callback routine.
+		 */
+		if (tbp->b_bufsize != 0) {
+			caddr_t bdata = (char *)tbp->b_data;
+
+			if (tbp->b_bufsize + size > MAXBSIZE)
+				panic("cluster_rbuild: too much memory");
+			if (tbp->b_bufsize > size) {
+				/*
+				 * XXX if the source and destination regions
+				 * overlap we have to copy backward to avoid
+				 * clobbering any valid pages (i.e. pagemove
+				 * implementations typically can't handle
+				 * overlap).
+				 */
+				bdata += tbp->b_bufsize;
+				while (bdata > (char *)tbp->b_data) {
+					bdata -= CLBYTES;
+					pagemove(bdata, bdata + size, CLBYTES);
+				}
+			} else 
+				pagemove(bdata, bdata + size, tbp->b_bufsize);
+		}
+		tbp->b_blkno = bn;
+		tbp->b_flags |= flags | B_READ | B_ASYNC;
+		++b_save->bs_nchildren;
+		b_save->bs_children[i - 1] = tbp;
+	}
+	return(bp);
+}
+
+/*
+ * Either get a new buffer or grow the existing one.
+ */
+struct buf *
+cluster_newbuf(vp, bp, flags, blkno, lblkno, size, run)
+	struct vnode *vp;
+	struct buf *bp;
+	long flags;
+	daddr_t blkno;
+	daddr_t lblkno;
+	long size;
+	int run;
+{
+	if (!bp) {
+		bp = getblk(vp, lblkno, size, 0, 0);
+		if (bp->b_flags & (B_DONE | B_DELWRI)) {
+			bp->b_blkno = blkno;
+			return(bp);
+		}
+	}
+	allocbuf(bp, run * size);
+	bp->b_blkno = blkno;
+	bp->b_iodone = cluster_callback;
+	bp->b_flags |= flags | B_CALL;
+	return(bp);
+}
+
+/*
+ * Cleanup after a clustered read or write.
+ * This is complicated by the fact that any of the buffers might have
+ * extra memory (if there were no empty buffer headers at allocbuf time)
+ * that we will need to shift around.
+ */
+void
+cluster_callback(bp)
+	struct buf *bp;
+{
+	struct cluster_save *b_save;
+	struct buf **bpp, *tbp;
+	long bsize;
+	caddr_t cp;
+	int error = 0;
+
+	/*
+	 * Must propogate errors to all the components.
+	 */
+	if (bp->b_flags & B_ERROR)
+		error = bp->b_error;
+
+	b_save = (struct cluster_save *)(bp->b_saveaddr);
+	bp->b_saveaddr = b_save->bs_saveaddr;
+
+	bsize = b_save->bs_bufsize;
+	cp = (char *)bp->b_data + bsize;
+	/*
+	 * Move memory from the large cluster buffer into the component
+	 * buffers and mark IO as done on these.
+	 */
+	for (bpp = b_save->bs_children; b_save->bs_nchildren--; ++bpp) {
+		tbp = *bpp;
+		pagemove(cp, tbp->b_data, bsize);
+		tbp->b_bufsize += bsize;
+		tbp->b_bcount = bsize;
+		if (error) {
+			tbp->b_flags |= B_ERROR;
+			tbp->b_error = error;
+		}
+		biodone(tbp);
+		bp->b_bufsize -= bsize;
+		cp += bsize;
+	}
+	/*
+	 * If there was excess memory in the cluster buffer,
+	 * slide it up adjacent to the remaining valid data.
+	 */
+	if (bp->b_bufsize != bsize) {
+		if (bp->b_bufsize < bsize)
+			panic("cluster_callback: too little memory");
+		pagemove(cp, (char *)bp->b_data + bsize, bp->b_bufsize - bsize);
+	}
+	bp->b_bcount = bsize;
+	bp->b_iodone = NULL;
+	free(b_save, M_SEGMENT);
+	if (bp->b_flags & B_ASYNC)
+		brelse(bp);
+	else {
+		bp->b_flags &= ~B_WANTED;
+		wakeup((caddr_t)bp);
+	}
+}
+
+/*
+ * Do clustered write for FFS.
+ *
+ * Three cases:
+ *	1. Write is not sequential (write asynchronously)
+ *	Write is sequential:
+ *	2.	beginning of cluster - begin cluster
+ *	3.	middle of a cluster - add to cluster
+ *	4.	end of a cluster - asynchronously write cluster
+ */
+void
+cluster_write(bp, filesize)
+        struct buf *bp;
+	u_quad_t filesize;
+{
+        struct vnode *vp;
+        daddr_t lbn;
+        int maxclen, cursize;
+
+        vp = bp->b_vp;
+        lbn = bp->b_lblkno;
+
+	/* Initialize vnode to beginning of file. */
+	if (lbn == 0)
+		vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0;
+
+        if (vp->v_clen == 0 || lbn != vp->v_lastw + 1 ||
+	    (bp->b_blkno != vp->v_lasta + btodb(bp->b_bcount))) {
+		maxclen = MAXBSIZE / vp->v_mount->mnt_stat.f_iosize - 1;
+		if (vp->v_clen != 0) {
+			/*
+			 * Next block is not sequential.
+			 *
+			 * If we are not writing at end of file, the process
+			 * seeked to another point in the file since its
+			 * last write, or we have reached our maximum
+			 * cluster size, then push the previous cluster.
+			 * Otherwise try reallocating to make it sequential.
+			 */
+			cursize = vp->v_lastw - vp->v_cstart + 1;
+			if (!doreallocblks ||
+			    (lbn + 1) * bp->b_bcount != filesize ||
+			    lbn != vp->v_lastw + 1 || vp->v_clen <= cursize) {
+				cluster_wbuild(vp, NULL, bp->b_bcount,
+				    vp->v_cstart, cursize, lbn);
+			} else {
+				struct buf **bpp, **endbp;
+				struct cluster_save *buflist;
+
+				buflist = cluster_collectbufs(vp, bp);
+				endbp = &buflist->bs_children
+				    [buflist->bs_nchildren - 1];
+				if (VOP_REALLOCBLKS(vp, buflist)) {
+					/*
+					 * Failed, push the previous cluster.
+					 */
+					for (bpp = buflist->bs_children;
+					     bpp < endbp; bpp++)
+						brelse(*bpp);
+					free(buflist, M_SEGMENT);
+					cluster_wbuild(vp, NULL, bp->b_bcount,
+					    vp->v_cstart, cursize, lbn);
+				} else {
+					/*
+					 * Succeeded, keep building cluster.
+					 */
+					for (bpp = buflist->bs_children;
+					     bpp <= endbp; bpp++)
+						bdwrite(*bpp);
+					free(buflist, M_SEGMENT);
+					vp->v_lastw = lbn;
+					vp->v_lasta = bp->b_blkno;
+					return;
+				}
+			}
+		}
+		/*
+		 * Consider beginning a cluster.
+		 * If at end of file, make cluster as large as possible,
+		 * otherwise find size of existing cluster.
+		 */
+		if ((lbn + 1) * bp->b_bcount != filesize &&
+		    (VOP_BMAP(vp, lbn, NULL, &bp->b_blkno, &maxclen) ||
+		     bp->b_blkno == -1)) {
+			bawrite(bp);
+			vp->v_clen = 0;
+			vp->v_lasta = bp->b_blkno;
+			vp->v_cstart = lbn + 1;
+			vp->v_lastw = lbn;
+			return;
+		}
+                vp->v_clen = maxclen;
+                if (maxclen == 0) {		/* I/O not contiguous */
+			vp->v_cstart = lbn + 1;
+                        bawrite(bp);
+                } else {			/* Wait for rest of cluster */
+			vp->v_cstart = lbn;
+                        bdwrite(bp);
+		}
+	} else if (lbn == vp->v_cstart + vp->v_clen) {
+		/*
+		 * At end of cluster, write it out.
+		 */
+		cluster_wbuild(vp, bp, bp->b_bcount, vp->v_cstart,
+		    vp->v_clen + 1, lbn);
+		vp->v_clen = 0;
+		vp->v_cstart = lbn + 1;
+	} else
+		/*
+		 * In the middle of a cluster, so just delay the
+		 * I/O for now.
+		 */
+		bdwrite(bp);
+	vp->v_lastw = lbn;
+	vp->v_lasta = bp->b_blkno;
+}
+
+
+/*
+ * This is an awful lot like cluster_rbuild...wish they could be combined.
+ * The last lbn argument is the current block on which I/O is being
+ * performed.  Check to see that it doesn't fall in the middle of
+ * the current block (if last_bp == NULL).
+ */
+void
+cluster_wbuild(vp, last_bp, size, start_lbn, len, lbn)
+	struct vnode *vp;
+	struct buf *last_bp;
+	long size;
+	daddr_t start_lbn;
+	int len;
+	daddr_t	lbn;
+{
+	struct cluster_save *b_save;
+	struct buf *bp, *tbp;
+	caddr_t	cp;
+	int i, s;
+
+#ifdef DIAGNOSTIC
+	if (size != vp->v_mount->mnt_stat.f_iosize)
+		panic("cluster_wbuild: size %d != filesize %d\n",
+			size, vp->v_mount->mnt_stat.f_iosize);
+#endif
+redo:
+	while ((!incore(vp, start_lbn) || start_lbn == lbn) && len) {
+		++start_lbn;
+		--len;
+	}
+
+	/* Get more memory for current buffer */
+	if (len <= 1) {
+		if (last_bp) {
+			bawrite(last_bp);
+		} else if (len) {
+			bp = getblk(vp, start_lbn, size, 0, 0);
+			bawrite(bp);
+		}
+		return;
+	}
+
+	bp = getblk(vp, start_lbn, size, 0, 0);
+	if (!(bp->b_flags & B_DELWRI)) {
+		++start_lbn;
+		--len;
+		brelse(bp);
+		goto redo;
+	}
+
+	/*
+	 * Extra memory in the buffer, punt on this buffer.
+	 * XXX we could handle this in most cases, but we would have to
+	 * push the extra memory down to after our max possible cluster
+	 * size and then potentially pull it back up if the cluster was
+	 * terminated prematurely--too much hassle.
+	 */
+	if (bp->b_bcount != bp->b_bufsize) {
+		++start_lbn;
+		--len;
+		bawrite(bp);
+		goto redo;
+	}
+
+	--len;
+	b_save = malloc(sizeof(struct buf *) * len + sizeof(struct cluster_save),
+	    M_SEGMENT, M_WAITOK);
+	b_save->bs_bcount = bp->b_bcount;
+	b_save->bs_bufsize = bp->b_bufsize;
+	b_save->bs_nchildren = 0;
+	b_save->bs_children = (struct buf **)(b_save + 1);
+	b_save->bs_saveaddr = bp->b_saveaddr;
+	bp->b_saveaddr = (caddr_t) b_save;
+
+	bp->b_flags |= B_CALL;
+	bp->b_iodone = cluster_callback;
+	cp = (char *)bp->b_data + size;
+	for (++start_lbn, i = 0; i < len; ++i, ++start_lbn) {
+		/*
+		 * Block is not in core or the non-sequential block
+		 * ending our cluster was part of the cluster (in which
+		 * case we don't want to write it twice).
+		 */
+		if (!incore(vp, start_lbn) ||
+		    last_bp == NULL && start_lbn == lbn)
+			break;
+
+		/*
+		 * Get the desired block buffer (unless it is the final
+		 * sequential block whose buffer was passed in explictly
+		 * as last_bp).
+		 */
+		if (last_bp == NULL || start_lbn != lbn) {
+			tbp = getblk(vp, start_lbn, size, 0, 0);
+			if (!(tbp->b_flags & B_DELWRI)) {
+				brelse(tbp);
+				break;
+			}
+		} else
+			tbp = last_bp;
+
+		++b_save->bs_nchildren;
+
+		/* Move memory from children to parent */
+		if (tbp->b_blkno != (bp->b_blkno + btodb(bp->b_bufsize))) {
+			printf("Clustered Block: %d addr %x bufsize: %d\n",
+			    bp->b_lblkno, bp->b_blkno, bp->b_bufsize);
+			printf("Child Block: %d addr: %x\n", tbp->b_lblkno,
+			    tbp->b_blkno);
+			panic("Clustered write to wrong blocks");
+		}
+
+		pagemove(tbp->b_data, cp, size);
+		bp->b_bcount += size;
+		bp->b_bufsize += size;
+
+		tbp->b_bufsize -= size;
+		tbp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
+		tbp->b_flags |= (B_ASYNC | B_AGE);
+		s = splbio();
+		reassignbuf(tbp, tbp->b_vp);		/* put on clean list */
+		++tbp->b_vp->v_numoutput;
+		splx(s);
+		b_save->bs_children[i] = tbp;
+
+		cp += size;
+	}
+
+	if (i == 0) {
+		/* None to cluster */
+		bp->b_saveaddr = b_save->bs_saveaddr;
+		bp->b_flags &= ~B_CALL;
+		bp->b_iodone = NULL;
+		free(b_save, M_SEGMENT);
+	}
+	bawrite(bp);
+	if (i < len) {
+		len -= i + 1;
+		start_lbn += 1;
+		goto redo;
+	}
+}
+
+/*
+ * Collect together all the buffers in a cluster.
+ * Plus add one additional buffer.
+ */
+struct cluster_save *
+cluster_collectbufs(vp, last_bp)
+	struct vnode *vp;
+	struct buf *last_bp;
+{
+	struct cluster_save *buflist;
+	daddr_t	lbn;
+	int i, len;
+
+	len = vp->v_lastw - vp->v_cstart + 1;
+	buflist = malloc(sizeof(struct buf *) * (len + 1) + sizeof(*buflist),
+	    M_SEGMENT, M_WAITOK);
+	buflist->bs_nchildren = 0;
+	buflist->bs_children = (struct buf **)(buflist + 1);
+	for (lbn = vp->v_cstart, i = 0; i < len; lbn++, i++)
+		    (void)bread(vp, lbn, last_bp->b_bcount, NOCRED,
+			&buflist->bs_children[i]);
+	buflist->bs_children[i] = last_bp;
+	buflist->bs_nchildren = i + 1;
+	return (buflist);
+}
diff --git a/sys/kern/vfs_conf.c b/sys/kern/vfs_conf.c
new file mode 100644
index 00000000000..2fe39eb674b
--- /dev/null
+++ b/sys/kern/vfs_conf.c
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vfs_conf.c	8.8 (Berkeley) 3/31/94
+ */
+
+#include <sys/param.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+
+#ifdef FFS
+#include <ufs/ffs/ffs_extern.h>
+
+/*
+ * This specifies the filesystem used to mount the root.
+ * This specification should be done by /etc/config.
+ */
+int (*mountroot)() = ffs_mountroot;
+#endif
+
+/*
+ * These define the root filesystem and device.
+ */
+struct mount *rootfs;
+struct vnode *rootvnode;
+
+/*
+ * Set up the filesystem operations for vnodes.
+ * The types are defined in mount.h.
+ */
+#ifdef FFS
+extern	struct vfsops ufs_vfsops;
+#define	UFS_VFSOPS	&ufs_vfsops
+#else
+#define	UFS_VFSOPS	NULL
+#endif
+
+#ifdef LFS
+extern	struct vfsops lfs_vfsops;
+#define	LFS_VFSOPS	&lfs_vfsops
+#else
+#define	LFS_VFSOPS	NULL
+#endif
+
+#ifdef MFS
+extern	struct vfsops mfs_vfsops;
+#define	MFS_VFSOPS	&mfs_vfsops
+#else
+#define	MFS_VFSOPS	NULL
+#endif
+
+#ifdef NFS
+extern	struct vfsops nfs_vfsops;
+#define	NFS_VFSOPS	&nfs_vfsops
+#else
+#define	NFS_VFSOPS	NULL
+#endif
+
+#ifdef FDESC
+extern	struct vfsops fdesc_vfsops;
+#define	FDESC_VFSOPS	&fdesc_vfsops
+#else
+#define	FDESC_VFSOPS	NULL
+#endif
+
+#ifdef PORTAL
+extern	struct vfsops portal_vfsops;
+#define	PORTAL_VFSOPS	&portal_vfsops
+#else
+#define	PORTAL_VFSOPS	NULL
+#endif
+
+#ifdef NULLFS
+extern	struct vfsops null_vfsops;
+#define NULL_VFSOPS	&null_vfsops
+#else
+#define NULL_VFSOPS	NULL
+#endif
+
+#ifdef UMAPFS
+extern	struct vfsops umap_vfsops;
+#define UMAP_VFSOPS	&umap_vfsops
+#else
+#define UMAP_VFSOPS	NULL
+#endif
+
+#ifdef KERNFS
+extern	struct vfsops kernfs_vfsops;
+#define KERNFS_VFSOPS	&kernfs_vfsops
+#else
+#define KERNFS_VFSOPS	NULL
+#endif
+
+#ifdef PROCFS
+extern	struct vfsops procfs_vfsops;
+#define PROCFS_VFSOPS	&procfs_vfsops
+#else
+#define PROCFS_VFSOPS	NULL
+#endif
+
+#ifdef AFS
+extern	struct vfsops afs_vfsops;
+#define AFS_VFSOPS	&afs_vfsops
+#else
+#define AFS_VFSOPS	NULL
+#endif
+
+#ifdef CD9660
+extern	struct vfsops cd9660_vfsops;
+#define CD9660_VFSOPS	&cd9660_vfsops
+#else
+#define CD9660_VFSOPS	NULL
+#endif
+
+#ifdef UNION
+extern	struct vfsops union_vfsops;
+#define	UNION_VFSOPS	&union_vfsops
+#else
+#define	UNION_VFSOPS	NULL
+#endif
+
+struct vfsops *vfssw[] = {
+	NULL,			/* 0 = MOUNT_NONE */
+	UFS_VFSOPS,		/* 1 = MOUNT_UFS */
+	NFS_VFSOPS,		/* 2 = MOUNT_NFS */
+	MFS_VFSOPS,		/* 3 = MOUNT_MFS */
+	NULL,			/* 4 = MOUNT_PC */
+	LFS_VFSOPS,		/* 5 = MOUNT_LFS */
+	NULL,			/* 6 = MOUNT_LOFS */
+	FDESC_VFSOPS,		/* 7 = MOUNT_FDESC */
+	PORTAL_VFSOPS,		/* 8 = MOUNT_PORTAL */
+	NULL_VFSOPS,		/* 9 = MOUNT_NULL */
+	UMAP_VFSOPS,		/* 10 = MOUNT_UMAP */
+	KERNFS_VFSOPS,		/* 11 = MOUNT_KERNFS */
+	PROCFS_VFSOPS,		/* 12 = MOUNT_PROCFS */
+	AFS_VFSOPS,		/* 13 = MOUNT_AFS */
+	CD9660_VFSOPS,		/* 14 = MOUNT_CD9660 */
+	UNION_VFSOPS,		/* 15 = MOUNT_UNION */
+	0
+};
+
+
+/*
+ *
+ * vfs_opv_descs enumerates the list of vnode classes, each with it's own
+ * vnode operation vector.  It is consulted at system boot to build operation
+ * vectors.  It is NULL terminated.
+ *
+ */
+extern struct vnodeopv_desc ffs_vnodeop_opv_desc;
+extern struct vnodeopv_desc ffs_specop_opv_desc;
+extern struct vnodeopv_desc ffs_fifoop_opv_desc;
+extern struct vnodeopv_desc lfs_vnodeop_opv_desc;
+extern struct vnodeopv_desc lfs_specop_opv_desc;
+extern struct vnodeopv_desc lfs_fifoop_opv_desc;
+extern struct vnodeopv_desc mfs_vnodeop_opv_desc;
+extern struct vnodeopv_desc dead_vnodeop_opv_desc;
+extern struct vnodeopv_desc fifo_vnodeop_opv_desc;
+extern struct vnodeopv_desc spec_vnodeop_opv_desc;
+extern struct vnodeopv_desc nfsv2_vnodeop_opv_desc;
+extern struct vnodeopv_desc spec_nfsv2nodeop_opv_desc;
+extern struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc;
+extern struct vnodeopv_desc fdesc_vnodeop_opv_desc;
+extern struct vnodeopv_desc portal_vnodeop_opv_desc;
+extern struct vnodeopv_desc null_vnodeop_opv_desc;
+extern struct vnodeopv_desc umap_vnodeop_opv_desc;
+extern struct vnodeopv_desc kernfs_vnodeop_opv_desc;
+extern struct vnodeopv_desc procfs_vnodeop_opv_desc;
+extern struct vnodeopv_desc cd9660_vnodeop_opv_desc;
+extern struct vnodeopv_desc cd9660_specop_opv_desc;
+extern struct vnodeopv_desc cd9660_fifoop_opv_desc;
+extern struct vnodeopv_desc union_vnodeop_opv_desc;
+
+struct vnodeopv_desc *vfs_opv_descs[] = {
+	&ffs_vnodeop_opv_desc,
+	&ffs_specop_opv_desc,
+#ifdef FIFO
+	&ffs_fifoop_opv_desc,
+#endif
+	&dead_vnodeop_opv_desc,
+#ifdef FIFO
+	&fifo_vnodeop_opv_desc,
+#endif
+	&spec_vnodeop_opv_desc,
+#ifdef LFS
+	&lfs_vnodeop_opv_desc,
+	&lfs_specop_opv_desc,
+#ifdef FIFO
+	&lfs_fifoop_opv_desc,
+#endif
+#endif
+#ifdef MFS
+	&mfs_vnodeop_opv_desc,
+#endif
+#ifdef NFS
+	&nfsv2_vnodeop_opv_desc,
+	&spec_nfsv2nodeop_opv_desc,
+#ifdef FIFO
+	&fifo_nfsv2nodeop_opv_desc,
+#endif
+#endif
+#ifdef FDESC
+	&fdesc_vnodeop_opv_desc,
+#endif
+#ifdef PORTAL
+	&portal_vnodeop_opv_desc,
+#endif
+#ifdef NULLFS
+	&null_vnodeop_opv_desc,
+#endif
+#ifdef UMAPFS
+	&umap_vnodeop_opv_desc,
+#endif
+#ifdef KERNFS
+	&kernfs_vnodeop_opv_desc,
+#endif
+#ifdef PROCFS
+	&procfs_vnodeop_opv_desc,
+#endif
+#ifdef CD9660
+	&cd9660_vnodeop_opv_desc,
+	&cd9660_specop_opv_desc,
+#ifdef FIFO
+	&cd9660_fifoop_opv_desc,
+#endif
+#endif
+#ifdef UNION
+	&union_vnodeop_opv_desc,
+#endif
+	NULL
+};
diff --git a/sys/kern/vfs_export.c b/sys/kern/vfs_export.c
new file mode 100644
index 00000000000..9891fe61c19
--- /dev/null
+++ b/sys/kern/vfs_export.c
@@ -0,0 +1,1322 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vfs_subr.c	8.13 (Berkeley) 4/18/94
+ */
+
+/*
+ * External virtual filesystem routines
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/time.h>
+#include <sys/vnode.h>
+#include <sys/stat.h>
+#include <sys/namei.h>
+#include <sys/ucred.h>
+#include <sys/buf.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <sys/domain.h>
+#include <sys/mbuf.h>
+
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+
+#include <miscfs/specfs/specdev.h>
+
+enum vtype iftovt_tab[16] = {
+	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
+	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
+};
+int	vttoif_tab[9] = {
+	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
+	S_IFSOCK, S_IFIFO, S_IFMT,
+};
+
+/*
+ * Insq/Remq for the vnode usage lists.
+ */
+#define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
+#define	bufremvn(bp) {  \
+	LIST_REMOVE(bp, b_vnbufs); \
+	(bp)->b_vnbufs.le_next = NOLIST; \
+}
+
+TAILQ_HEAD(freelst, vnode) vnode_free_list;	/* vnode free list */
+struct mntlist mountlist;			/* mounted filesystem list */
+
+/*
+ * Initialize the vnode management data structures.
+ */
+vntblinit()
+{
+
+	TAILQ_INIT(&vnode_free_list);
+	TAILQ_INIT(&mountlist);
+}
+
+/*
+ * Lock a filesystem.
+ * Used to prevent access to it while mounting and unmounting.
+ */
+vfs_lock(mp)
+	register struct mount *mp;
+{
+
+	while(mp->mnt_flag & MNT_MLOCK) {
+		mp->mnt_flag |= MNT_MWAIT;
+		sleep((caddr_t)mp, PVFS);
+	}
+	mp->mnt_flag |= MNT_MLOCK;
+	return (0);
+}
+
+/*
+ * Unlock a locked filesystem.
+ * Panic if filesystem is not locked.
+ */
+void
+vfs_unlock(mp)
+	register struct mount *mp;
+{
+
+	if ((mp->mnt_flag & MNT_MLOCK) == 0)
+		panic("vfs_unlock: not locked");
+	mp->mnt_flag &= ~MNT_MLOCK;
+	if (mp->mnt_flag & MNT_MWAIT) {
+		mp->mnt_flag &= ~MNT_MWAIT;
+		wakeup((caddr_t)mp);
+	}
+}
+
+/*
+ * Mark a mount point as busy.
+ * Used to synchronize access and to delay unmounting.
+ */
+vfs_busy(mp)
+	register struct mount *mp;
+{
+
+	while(mp->mnt_flag & MNT_MPBUSY) {
+		mp->mnt_flag |= MNT_MPWANT;
+		sleep((caddr_t)&mp->mnt_flag, PVFS);
+	}
+	if (mp->mnt_flag & MNT_UNMOUNT)
+		return (1);
+	mp->mnt_flag |= MNT_MPBUSY;
+	return (0);
+}
+
+/*
+ * Free a busy filesystem.
+ * Panic if filesystem is not busy.
+ */
+vfs_unbusy(mp)
+	register struct mount *mp;
+{
+
+	if ((mp->mnt_flag & MNT_MPBUSY) == 0)
+		panic("vfs_unbusy: not busy");
+	mp->mnt_flag &= ~MNT_MPBUSY;
+	if (mp->mnt_flag & MNT_MPWANT) {
+		mp->mnt_flag &= ~MNT_MPWANT;
+		wakeup((caddr_t)&mp->mnt_flag);
+	}
+}
+
+/*
+ * Lookup a mount point by filesystem identifier.
+ */
+struct mount *
+getvfs(fsid)
+	fsid_t *fsid;
+{
+	register struct mount *mp;
+
+	for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) {
+		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
+		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1])
+			return (mp);
+	}
+	return ((struct mount *)0);
+}
+
+/*
+ * Get a new unique fsid
+ */
+void
+getnewfsid(mp, mtype)
+	struct mount *mp;
+	int mtype;
+{
+static u_short xxxfs_mntid;
+
+	fsid_t tfsid;
+
+	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
+	mp->mnt_stat.f_fsid.val[1] = mtype;
+	if (xxxfs_mntid == 0)
+		++xxxfs_mntid;
+	tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
+	tfsid.val[1] = mtype;
+	if (mountlist.tqh_first != NULL) {
+		while (getvfs(&tfsid)) {
+			tfsid.val[0]++;
+			xxxfs_mntid++;
+		}
+	}
+	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
+}
+
+/*
+ * Set vnode attributes to VNOVAL
+ */
+void vattr_null(vap)
+	register struct vattr *vap;
+{
+
+	vap->va_type = VNON;
+	vap->va_size = vap->va_bytes = VNOVAL;
+	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
+		vap->va_fsid = vap->va_fileid =
+		vap->va_blocksize = vap->va_rdev =
+		vap->va_atime.ts_sec = vap->va_atime.ts_nsec =
+		vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec =
+		vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec =
+		vap->va_flags = vap->va_gen = VNOVAL;
+	vap->va_vaflags = 0;
+}
+
+/*
+ * Routines having to do with the management of the vnode table.
+ */
+extern int (**dead_vnodeop_p)();
+extern void vclean();
+long numvnodes;
+extern struct vattr va_null;
+
+/*
+ * Return the next vnode from the free list.
+ */
+getnewvnode(tag, mp, vops, vpp)
+	enum vtagtype tag;
+	struct mount *mp;
+	int (**vops)();
+	struct vnode **vpp;
+{
+	register struct vnode *vp;
+	int s;
+
+	if ((vnode_free_list.tqh_first == NULL &&
+	     numvnodes < 2 * desiredvnodes) ||
+	    numvnodes < desiredvnodes) {
+		vp = (struct vnode *)malloc((u_long)sizeof *vp,
+		    M_VNODE, M_WAITOK);
+		bzero((char *)vp, sizeof *vp);
+		numvnodes++;
+	} else {
+		if ((vp = vnode_free_list.tqh_first) == NULL) {
+			tablefull("vnode");
+			*vpp = 0;
+			return (ENFILE);
+		}
+		if (vp->v_usecount)
+			panic("free vnode isn't");
+		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+		/* see comment on why 0xdeadb is set at end of vgone (below) */
+		vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
+		vp->v_lease = NULL;
+		if (vp->v_type != VBAD)
+			vgone(vp);
+#ifdef DIAGNOSTIC
+		if (vp->v_data)
+			panic("cleaned vnode isn't");
+		s = splbio();
+		if (vp->v_numoutput)
+			panic("Clean vnode has pending I/O's");
+		splx(s);
+#endif
+		vp->v_flag = 0;
+		vp->v_lastr = 0;
+		vp->v_ralen = 0;
+		vp->v_maxra = 0;
+		vp->v_lastw = 0;
+		vp->v_lasta = 0;
+		vp->v_cstart = 0;
+		vp->v_clen = 0;
+		vp->v_socket = 0;
+	}
+	vp->v_type = VNON;
+	cache_purge(vp);
+	vp->v_tag = tag;
+	vp->v_op = vops;
+	insmntque(vp, mp);
+	*vpp = vp;
+	vp->v_usecount = 1;
+	vp->v_data = 0;
+	return (0);
+}
+
+/*
+ * Move a vnode from one mount queue to another.
+ */
+insmntque(vp, mp)
+	register struct vnode *vp;
+	register struct mount *mp;
+{
+
+	/*
+	 * Delete from old mount point vnode list, if on one.
+	 */
+	if (vp->v_mount != NULL)
+		LIST_REMOVE(vp, v_mntvnodes);
+	/*
+	 * Insert into list of vnodes for the new mount point, if available.
+	 */
+	if ((vp->v_mount = mp) == NULL)
+		return;
+	LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
+}
+
+/*
+ * Update outstanding I/O count and do wakeup if requested.
+ */
+vwakeup(bp)
+	register struct buf *bp;
+{
+	register struct vnode *vp;
+
+	bp->b_flags &= ~B_WRITEINPROG;
+	if (vp = bp->b_vp) {
+		vp->v_numoutput--;
+		if (vp->v_numoutput < 0)
+			panic("vwakeup: neg numoutput");
+		if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
+			if (vp->v_numoutput < 0)
+				panic("vwakeup: neg numoutput");
+			vp->v_flag &= ~VBWAIT;
+			wakeup((caddr_t)&vp->v_numoutput);
+		}
+	}
+}
+
+/*
+ * Flush out and invalidate all buffers associated with a vnode.
+ * Called with the underlying object locked.
+ */
+int
+vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
+	register struct vnode *vp;
+	int flags;
+	struct ucred *cred;
+	struct proc *p;
+	int slpflag, slptimeo;
+{
+	register struct buf *bp;
+	struct buf *nbp, *blist;
+	int s, error;
+
+	if (flags & V_SAVE) {
+		if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p))
+			return (error);
+		if (vp->v_dirtyblkhd.lh_first != NULL)
+			panic("vinvalbuf: dirty bufs");
+	}
+	for (;;) {
+		if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA)
+			while (blist && blist->b_lblkno < 0)
+				blist = blist->b_vnbufs.le_next;
+		if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 
+		    (flags & V_SAVEMETA))
+			while (blist && blist->b_lblkno < 0)
+				blist = blist->b_vnbufs.le_next;
+		if (!blist)
+			break;
+
+		for (bp = blist; bp; bp = nbp) {
+			nbp = bp->b_vnbufs.le_next;
+			if (flags & V_SAVEMETA && bp->b_lblkno < 0)
+				continue;
+			s = splbio();
+			if (bp->b_flags & B_BUSY) {
+				bp->b_flags |= B_WANTED;
+				error = tsleep((caddr_t)bp,
+					slpflag | (PRIBIO + 1), "vinvalbuf",
+					slptimeo);
+				splx(s);
+				if (error)
+					return (error);
+				break;
+			}
+			bremfree(bp);
+			bp->b_flags |= B_BUSY;
+			splx(s);
+			/*
+			 * XXX Since there are no node locks for NFS, I believe
+			 * there is a slight chance that a delayed write will
+			 * occur while sleeping just above, so check for it.
+			 */
+			if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
+				(void) VOP_BWRITE(bp);
+				break;
+			}
+			bp->b_flags |= B_INVAL;
+			brelse(bp);
+		}
+	}
+	if (!(flags & V_SAVEMETA) &&
+	    (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
+		panic("vinvalbuf: flush failed");
+	return (0);
+}
+
+/*
+ * Associate a buffer with a vnode.
+ */
+bgetvp(vp, bp)
+	register struct vnode *vp;
+	register struct buf *bp;
+{
+
+	if (bp->b_vp)
+		panic("bgetvp: not free");
+	VHOLD(vp);
+	bp->b_vp = vp;
+	if (vp->v_type == VBLK || vp->v_type == VCHR)
+		bp->b_dev = vp->v_rdev;
+	else
+		bp->b_dev = NODEV;
+	/*
+	 * Insert onto list for new vnode.
+	 */
+	bufinsvn(bp, &vp->v_cleanblkhd);
+}
+
+/*
+ * Disassociate a buffer from a vnode.
+ */
+brelvp(bp)
+	register struct buf *bp;
+{
+	struct vnode *vp;
+
+	if (bp->b_vp == (struct vnode *) 0)
+		panic("brelvp: NULL");
+	/*
+	 * Delete from old vnode list, if on one.
+	 */
+	if (bp->b_vnbufs.le_next != NOLIST)
+		bufremvn(bp);
+	vp = bp->b_vp;
+	bp->b_vp = (struct vnode *) 0;
+	HOLDRELE(vp);
+}
+
+/*
+ * Reassign a buffer from one vnode to another.
+ * Used to assign file specific control information
+ * (indirect blocks) to the vnode to which they belong.
+ */
+reassignbuf(bp, newvp)
+	register struct buf *bp;
+	register struct vnode *newvp;
+{
+	register struct buflists *listheadp;
+
+	if (newvp == NULL) {
+		printf("reassignbuf: NULL");
+		return;
+	}
+	/*
+	 * Delete from old vnode list, if on one.
+	 */
+	if (bp->b_vnbufs.le_next != NOLIST)
+		bufremvn(bp);
+	/*
+	 * If dirty, put on list of dirty buffers;
+	 * otherwise insert onto list of clean buffers.
+	 */
+	if (bp->b_flags & B_DELWRI)
+		listheadp = &newvp->v_dirtyblkhd;
+	else
+		listheadp = &newvp->v_cleanblkhd;
+	bufinsvn(bp, listheadp);
+}
+
+/*
+ * Create a vnode for a block device.
+ * Used for root filesystem, argdev, and swap areas.
+ * Also used for memory file system special devices.
+ */
+bdevvp(dev, vpp)
+	dev_t dev;
+	struct vnode **vpp;
+{
+	register struct vnode *vp;
+	struct vnode *nvp;
+	int error;
+
+	if (dev == NODEV)
+		return (0);
+	error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
+	if (error) {
+		*vpp = 0;
+		return (error);
+	}
+	vp = nvp;
+	vp->v_type = VBLK;
+	if (nvp = checkalias(vp, dev, (struct mount *)0)) {
+		vput(vp);
+		vp = nvp;
+	}
+	*vpp = vp;
+	return (0);
+}
+
+/*
+ * Check to see if the new vnode represents a special device
+ * for which we already have a vnode (either because of
+ * bdevvp() or because of a different vnode representing
+ * the same block device). If such an alias exists, deallocate
+ * the existing contents and return the aliased vnode. The
+ * caller is responsible for filling it with its new contents.
+ */
+struct vnode *
+checkalias(nvp, nvp_rdev, mp)
+	register struct vnode *nvp;
+	dev_t nvp_rdev;
+	struct mount *mp;
+{
+	register struct vnode *vp;
+	struct vnode **vpp;
+
+	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
+		return (NULLVP);
+
+	vpp = &speclisth[SPECHASH(nvp_rdev)];
+loop:
+	for (vp = *vpp; vp; vp = vp->v_specnext) {
+		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
+			continue;
+		/*
+		 * Alias, but not in use, so flush it out.
+		 */
+		if (vp->v_usecount == 0) {
+			vgone(vp);
+			goto loop;
+		}
+		if (vget(vp, 1))
+			goto loop;
+		break;
+	}
+	if (vp == NULL || vp->v_tag != VT_NON) {
+		MALLOC(nvp->v_specinfo, struct specinfo *,
+			sizeof(struct specinfo), M_VNODE, M_WAITOK);
+		nvp->v_rdev = nvp_rdev;
+		nvp->v_hashchain = vpp;
+		nvp->v_specnext = *vpp;
+		nvp->v_specflags = 0;
+		*vpp = nvp;
+		if (vp != NULL) {
+			nvp->v_flag |= VALIASED;
+			vp->v_flag |= VALIASED;
+			vput(vp);
+		}
+		return (NULLVP);
+	}
+	VOP_UNLOCK(vp);
+	vclean(vp, 0);
+	vp->v_op = nvp->v_op;
+	vp->v_tag = nvp->v_tag;
+	nvp->v_type = VNON;
+	insmntque(vp, mp);
+	return (vp);
+}
+
+/*
+ * Grab a particular vnode from the free list, increment its
+ * reference count and lock it. The vnode lock bit is set the
+ * vnode is being eliminated in vgone. The process is awakened
+ * when the transition is completed, and an error returned to
+ * indicate that the vnode is no longer usable (possibly having
+ * been changed to a new file system type).
+ */
+vget(vp, lockflag)
+	register struct vnode *vp;
+	int lockflag;
+{
+
+	/*
+	 * If the vnode is in the process of being cleaned out for
+	 * another use, we wait for the cleaning to finish and then
+	 * return failure. Cleaning is determined either by checking
+	 * that the VXLOCK flag is set, or that the use count is
+	 * zero with the back pointer set to show that it has been
+	 * removed from the free list by getnewvnode. The VXLOCK
+	 * flag may not have been set yet because vclean is blocked in
+	 * the VOP_LOCK call waiting for the VOP_INACTIVE to complete.
+	 */
+	if ((vp->v_flag & VXLOCK) ||
+	    (vp->v_usecount == 0 &&
+	     vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)) {
+		vp->v_flag |= VXWANT;
+		sleep((caddr_t)vp, PINOD);
+		return (1);
+	}
+	if (vp->v_usecount == 0)
+		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+	vp->v_usecount++;
+	if (lockflag)
+		VOP_LOCK(vp);
+	return (0);
+}
+
+/*
+ * Vnode reference, just increment the count
+ */
+void vref(vp)
+	struct vnode *vp;
+{
+
+	if (vp->v_usecount <= 0)
+		panic("vref used where vget required");
+	vp->v_usecount++;
+}
+
+/*
+ * vput(), just unlock and vrele()
+ */
+void vput(vp)
+	register struct vnode *vp;
+{
+
+	VOP_UNLOCK(vp);
+	vrele(vp);
+}
+
+/*
+ * Vnode release.
+ * If count drops to zero, call inactive routine and return to freelist.
+ */
+void vrele(vp)
+	register struct vnode *vp;
+{
+
+#ifdef DIAGNOSTIC
+	if (vp == NULL)
+		panic("vrele: null vp");
+#endif
+	vp->v_usecount--;
+	if (vp->v_usecount > 0)
+		return;
+#ifdef DIAGNOSTIC
+	if (vp->v_usecount != 0 || vp->v_writecount != 0) {
+		vprint("vrele: bad ref count", vp);
+		panic("vrele: ref cnt");
+	}
+#endif
+	/*
+	 * insert at tail of LRU list
+	 */
+	TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
+	VOP_INACTIVE(vp);
+}
+
+/*
+ * Page or buffer structure gets a reference.
+ */
+void vhold(vp)
+	register struct vnode *vp;
+{
+
+	vp->v_holdcnt++;
+}
+
+/*
+ * Page or buffer structure frees a reference.
+ */
+void holdrele(vp)
+	register struct vnode *vp;
+{
+
+	if (vp->v_holdcnt <= 0)
+		panic("holdrele: holdcnt");
+	vp->v_holdcnt--;
+}
+
+/*
+ * Remove any vnodes in the vnode table belonging to mount point mp.
+ *
+ * If MNT_NOFORCE is specified, there should not be any active ones,
+ * return error if any are found (nb: this is a user error, not a
+ * system error). If MNT_FORCE is specified, detach any active vnodes
+ * that are found.
+ */
+#ifdef DIAGNOSTIC
+int busyprt = 0;	/* print out busy vnodes */
+struct ctldebug debug1 = { "busyprt", &busyprt };
+#endif
+
+vflush(mp, skipvp, flags)
+	struct mount *mp;
+	struct vnode *skipvp;
+	int flags;
+{
+	register struct vnode *vp, *nvp;
+	int busy = 0;
+
+	if ((mp->mnt_flag & MNT_MPBUSY) == 0)
+		panic("vflush: not busy");
+loop:
+	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
+		if (vp->v_mount != mp)
+			goto loop;
+		nvp = vp->v_mntvnodes.le_next;
+		/*
+		 * Skip over a selected vnode.
+		 */
+		if (vp == skipvp)
+			continue;
+		/*
+		 * Skip over a vnodes marked VSYSTEM.
+		 */
+		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM))
+			continue;
+		/*
+		 * If WRITECLOSE is set, only flush out regular file
+		 * vnodes open for writing.
+		 */
+		if ((flags & WRITECLOSE) &&
+		    (vp->v_writecount == 0 || vp->v_type != VREG))
+			continue;
+		/*
+		 * With v_usecount == 0, all we need to do is clear
+		 * out the vnode data structures and we are done.
+		 */
+		if (vp->v_usecount == 0) {
+			vgone(vp);
+			continue;
+		}
+		/*
+		 * If FORCECLOSE is set, forcibly close the vnode.
+		 * For block or character devices, revert to an
+		 * anonymous device. For all other files, just kill them.
+		 */
+		if (flags & FORCECLOSE) {
+			if (vp->v_type != VBLK && vp->v_type != VCHR) {
+				vgone(vp);
+			} else {
+				vclean(vp, 0);
+				vp->v_op = spec_vnodeop_p;
+				insmntque(vp, (struct mount *)0);
+			}
+			continue;
+		}
+#ifdef DIAGNOSTIC
+		if (busyprt)
+			vprint("vflush: busy vnode", vp);
+#endif
+		busy++;
+	}
+	if (busy)
+		return (EBUSY);
+	return (0);
+}
+
+/*
+ * Disassociate the underlying file system from a vnode.
+ */
+void
+vclean(vp, flags)
+	register struct vnode *vp;
+	int flags;
+{
+	int active;
+
+	/*
+	 * Check to see if the vnode is in use.
+	 * If so we have to reference it before we clean it out
+	 * so that its count cannot fall to zero and generate a
+	 * race against ourselves to recycle it.
+	 */
+	if (active = vp->v_usecount)
+		VREF(vp);
+	/*
+	 * Even if the count is zero, the VOP_INACTIVE routine may still
+	 * have the object locked while it cleans it out. The VOP_LOCK
+	 * ensures that the VOP_INACTIVE routine is done with its work.
+	 * For active vnodes, it ensures that no other activity can
+	 * occur while the underlying object is being cleaned out.
+	 */
+	VOP_LOCK(vp);
+	/*
+	 * Prevent the vnode from being recycled or
+	 * brought into use while we clean it out.
+	 */
+	if (vp->v_flag & VXLOCK)
+		panic("vclean: deadlock");
+	vp->v_flag |= VXLOCK;
+	/*
+	 * Clean out any buffers associated with the vnode.
+	 */
+	if (flags & DOCLOSE)
+		vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0);
+	/*
+	 * Any other processes trying to obtain this lock must first
+	 * wait for VXLOCK to clear, then call the new lock operation.
+	 */
+	VOP_UNLOCK(vp);
+	/*
+	 * If purging an active vnode, it must be closed and
+	 * deactivated before being reclaimed.
+	 */
+	if (active) {
+		if (flags & DOCLOSE)
+			VOP_CLOSE(vp, IO_NDELAY, NOCRED, NULL);
+		VOP_INACTIVE(vp);
+	}
+	/*
+	 * Reclaim the vnode.
+	 */
+	if (VOP_RECLAIM(vp))
+		panic("vclean: cannot reclaim");
+	if (active)
+		vrele(vp);
+
+	/*
+	 * Done with purge, notify sleepers of the grim news.
+	 */
+	vp->v_op = dead_vnodeop_p;
+	vp->v_tag = VT_NON;
+	vp->v_flag &= ~VXLOCK;
+	if (vp->v_flag & VXWANT) {
+		vp->v_flag &= ~VXWANT;
+		wakeup((caddr_t)vp);
+	}
+}
+
+/*
+ * Eliminate all activity associated with  the requested vnode
+ * and with all vnodes aliased to the requested vnode.
+ */
+void vgoneall(vp)
+	register struct vnode *vp;
+{
+	register struct vnode *vq;
+
+	if (vp->v_flag & VALIASED) {
+		/*
+		 * If a vgone (or vclean) is already in progress,
+		 * wait until it is done and return.
+		 */
+		if (vp->v_flag & VXLOCK) {
+			vp->v_flag |= VXWANT;
+			sleep((caddr_t)vp, PINOD);
+			return;
+		}
+		/*
+		 * Ensure that vp will not be vgone'd while we
+		 * are eliminating its aliases.
+		 */
+		vp->v_flag |= VXLOCK;
+		while (vp->v_flag & VALIASED) {
+			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
+				if (vq->v_rdev != vp->v_rdev ||
+				    vq->v_type != vp->v_type || vp == vq)
+					continue;
+				vgone(vq);
+				break;
+			}
+		}
+		/*
+		 * Remove the lock so that vgone below will
+		 * really eliminate the vnode after which time
+		 * vgone will awaken any sleepers.
+		 */
+		vp->v_flag &= ~VXLOCK;
+	}
+	vgone(vp);
+}
+
+/*
+ * Eliminate all activity associated with a vnode
+ * in preparation for reuse.
+ */
+void vgone(vp)
+	register struct vnode *vp;
+{
+	register struct vnode *vq;
+	struct vnode *vx;
+
+	/*
+	 * If a vgone (or vclean) is already in progress,
+	 * wait until it is done and return.
+	 */
+	if (vp->v_flag & VXLOCK) {
+		vp->v_flag |= VXWANT;
+		sleep((caddr_t)vp, PINOD);
+		return;
+	}
+	/*
+	 * Clean out the filesystem specific data.
+	 */
+	vclean(vp, DOCLOSE);
+	/*
+	 * Delete from old mount point vnode list, if on one.
+	 */
+	if (vp->v_mount != NULL) {
+		LIST_REMOVE(vp, v_mntvnodes);
+		vp->v_mount = NULL;
+	}
+	/*
+	 * If special device, remove it from special device alias list.
+	 */
+	if (vp->v_type == VBLK || vp->v_type == VCHR) {
+		if (*vp->v_hashchain == vp) {
+			*vp->v_hashchain = vp->v_specnext;
+		} else {
+			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
+				if (vq->v_specnext != vp)
+					continue;
+				vq->v_specnext = vp->v_specnext;
+				break;
+			}
+			if (vq == NULL)
+				panic("missing bdev");
+		}
+		if (vp->v_flag & VALIASED) {
+			vx = NULL;
+			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
+				if (vq->v_rdev != vp->v_rdev ||
+				    vq->v_type != vp->v_type)
+					continue;
+				if (vx)
+					break;
+				vx = vq;
+			}
+			if (vx == NULL)
+				panic("missing alias");
+			if (vq == NULL)
+				vx->v_flag &= ~VALIASED;
+			vp->v_flag &= ~VALIASED;
+		}
+		FREE(vp->v_specinfo, M_VNODE);
+		vp->v_specinfo = NULL;
+	}
+	/*
+	 * If it is on the freelist and not already at the head,
+	 * move it to the head of the list. The test of the back
+	 * pointer and the reference count of zero is because
+	 * it will be removed from the free list by getnewvnode,
+	 * but will not have its reference count incremented until
+	 * after calling vgone. If the reference count were
+	 * incremented first, vgone would (incorrectly) try to
+	 * close the previous instance of the underlying object.
+	 * So, the back pointer is explicitly set to `0xdeadb' in
+	 * getnewvnode after removing it from the freelist to ensure
+	 * that we do not try to move it here.
+	 */
+	if (vp->v_usecount == 0 &&
+	    vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb &&
+	    vnode_free_list.tqh_first != vp) {
+		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
+	}
+	vp->v_type = VBAD;
+}
+
+/*
+ * Lookup a vnode by device number.
+ */
+vfinddev(dev, type, vpp)
+	dev_t dev;
+	enum vtype type;
+	struct vnode **vpp;
+{
+	register struct vnode *vp;
+
+	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
+		if (dev != vp->v_rdev || type != vp->v_type)
+			continue;
+		*vpp = vp;
+		return (1);
+	}
+	return (0);
+}
+
+/*
+ * Calculate the total number of references to a special device.
+ */
+vcount(vp)
+	register struct vnode *vp;
+{
+	register struct vnode *vq, *vnext;
+	int count;
+
+loop:
+	if ((vp->v_flag & VALIASED) == 0)
+		return (vp->v_usecount);
+	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
+		vnext = vq->v_specnext;
+		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
+			continue;
+		/*
+		 * Alias, but not in use, so flush it out.
+		 */
+		if (vq->v_usecount == 0 && vq != vp) {
+			vgone(vq);
+			goto loop;
+		}
+		count += vq->v_usecount;
+	}
+	return (count);
+}
+
+/*
+ * Print out a description of a vnode.
+ */
+static char *typename[] =
+   { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
+
+vprint(label, vp)
+	char *label;
+	register struct vnode *vp;
+{
+	char buf[64];
+
+	if (label != NULL)
+		printf("%s: ", label);
+	printf("type %s, usecount %d, writecount %d, refcount %d,",
+		typename[vp->v_type], vp->v_usecount, vp->v_writecount,
+		vp->v_holdcnt);
+	buf[0] = '\0';
+	if (vp->v_flag & VROOT)
+		strcat(buf, "|VROOT");
+	if (vp->v_flag & VTEXT)
+		strcat(buf, "|VTEXT");
+	if (vp->v_flag & VSYSTEM)
+		strcat(buf, "|VSYSTEM");
+	if (vp->v_flag & VXLOCK)
+		strcat(buf, "|VXLOCK");
+	if (vp->v_flag & VXWANT)
+		strcat(buf, "|VXWANT");
+	if (vp->v_flag & VBWAIT)
+		strcat(buf, "|VBWAIT");
+	if (vp->v_flag & VALIASED)
+		strcat(buf, "|VALIASED");
+	if (buf[0] != '\0')
+		printf(" flags (%s)", &buf[1]);
+	if (vp->v_data == NULL) {
+		printf("\n");
+	} else {
+		printf("\n\t");
+		VOP_PRINT(vp);
+	}
+}
+
+#ifdef DEBUG
+/*
+ * List all of the locked vnodes in the system.
+ * Called when debugging the kernel.
+ */
+printlockedvnodes()
+{
+	register struct mount *mp;
+	register struct vnode *vp;
+
+	printf("Locked vnodes\n");
+	for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) {
+		for (vp = mp->mnt_vnodelist.lh_first;
+		     vp != NULL;
+		     vp = vp->v_mntvnodes.le_next)
+			if (VOP_ISLOCKED(vp))
+				vprint((char *)0, vp);
+	}
+}
+#endif
+
+int kinfo_vdebug = 1;
+int kinfo_vgetfailed;
+#define KINFO_VNODESLOP	10
+/*
+ * Dump vnode list (via sysctl).
+ * Copyout address of vnode followed by vnode.
+ */
+/* ARGSUSED */
+sysctl_vnode(where, sizep)
+	char *where;
+	size_t *sizep;
+{
+	register struct mount *mp, *nmp;
+	struct vnode *vp;
+	register char *bp = where, *savebp;
+	char *ewhere;
+	int error;
+
+#define VPTRSZ	sizeof (struct vnode *)
+#define VNODESZ	sizeof (struct vnode)
+	if (where == NULL) {
+		*sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
+		return (0);
+	}
+	ewhere = where + *sizep;
+		
+	for (mp = mountlist.tqh_first; mp != NULL; mp = nmp) {
+		nmp = mp->mnt_list.tqe_next;
+		if (vfs_busy(mp))
+			continue;
+		savebp = bp;
+again:
+		for (vp = mp->mnt_vnodelist.lh_first;
+		     vp != NULL;
+		     vp = vp->v_mntvnodes.le_next) {
+			/*
+			 * Check that the vp is still associated with
+			 * this filesystem.  RACE: could have been
+			 * recycled onto the same filesystem.
+			 */
+			if (vp->v_mount != mp) {
+				if (kinfo_vdebug)
+					printf("kinfo: vp changed\n");
+				bp = savebp;
+				goto again;
+			}
+			if (bp + VPTRSZ + VNODESZ > ewhere) {
+				*sizep = bp - where;
+				return (ENOMEM);
+			}
+			if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
+			   (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
+				return (error);
+			bp += VPTRSZ + VNODESZ;
+		}
+		vfs_unbusy(mp);
+	}
+
+	*sizep = bp - where;
+	return (0);
+}
+
+/*
+ * Check to see if a filesystem is mounted on a block device.
+ */
+int
+vfs_mountedon(vp)
+	register struct vnode *vp;
+{
+	register struct vnode *vq;
+
+	if (vp->v_specflags & SI_MOUNTEDON)
+		return (EBUSY);
+	if (vp->v_flag & VALIASED) {
+		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
+			if (vq->v_rdev != vp->v_rdev ||
+			    vq->v_type != vp->v_type)
+				continue;
+			if (vq->v_specflags & SI_MOUNTEDON)
+				return (EBUSY);
+		}
+	}
+	return (0);
+}
+
+/*
+ * Build hash lists of net addresses and hang them off the mount point.
+ * Called by ufs_mount() to set up the lists of export addresses.
+ */
+static int
+vfs_hang_addrlist(mp, nep, argp)
+	struct mount *mp;
+	struct netexport *nep;
+	struct export_args *argp;
+{
+	register struct netcred *np;
+	register struct radix_node_head *rnh;
+	register int i;
+	struct radix_node *rn;
+	struct sockaddr *saddr, *smask = 0;
+	struct domain *dom;
+	int error;
+
+	if (argp->ex_addrlen == 0) {
+		if (mp->mnt_flag & MNT_DEFEXPORTED)
+			return (EPERM);
+		np = &nep->ne_defexported;
+		np->netc_exflags = argp->ex_flags;
+		np->netc_anon = argp->ex_anon;
+		np->netc_anon.cr_ref = 1;
+		mp->mnt_flag |= MNT_DEFEXPORTED;
+		return (0);
+	}
+	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
+	np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
+	bzero((caddr_t)np, i);
+	saddr = (struct sockaddr *)(np + 1);
+	if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen))
+		goto out;
+	if (saddr->sa_len > argp->ex_addrlen)
+		saddr->sa_len = argp->ex_addrlen;
+	if (argp->ex_masklen) {
+		smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
+		error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen);
+		if (error)
+			goto out;
+		if (smask->sa_len > argp->ex_masklen)
+			smask->sa_len = argp->ex_masklen;
+	}
+	i = saddr->sa_family;
+	if ((rnh = nep->ne_rtable[i]) == 0) {
+		/*
+		 * Seems silly to initialize every AF when most are not
+		 * used, do so on demand here
+		 */
+		for (dom = domains; dom; dom = dom->dom_next)
+			if (dom->dom_family == i && dom->dom_rtattach) {
+				dom->dom_rtattach((void **)&nep->ne_rtable[i],
+					dom->dom_rtoffset);
+				break;
+			}
+		if ((rnh = nep->ne_rtable[i]) == 0) {
+			error = ENOBUFS;
+			goto out;
+		}
+	}
+	rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
+		np->netc_rnodes);
+	if (rn == 0 || np != (struct netcred *)rn) { /* already exists */
+		error = EPERM;
+		goto out;
+	}
+	np->netc_exflags = argp->ex_flags;
+	np->netc_anon = argp->ex_anon;
+	np->netc_anon.cr_ref = 1;
+	return (0);
+out:
+	free(np, M_NETADDR);
+	return (error);
+}
+
+/* ARGSUSED */
+static int
+vfs_free_netcred(rn, w)
+	struct radix_node *rn;
+	caddr_t w;
+{
+	register struct radix_node_head *rnh = (struct radix_node_head *)w;
+
+	(*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
+	free((caddr_t)rn, M_NETADDR);
+	return (0);
+}
+	
+/*
+ * Free the net address hash lists that are hanging off the mount points.
+ */
+static void
+vfs_free_addrlist(nep)
+	struct netexport *nep;
+{
+	register int i;
+	register struct radix_node_head *rnh;
+
+	for (i = 0; i <= AF_MAX; i++)
+		if (rnh = nep->ne_rtable[i]) {
+			(*rnh->rnh_walktree)(rnh, vfs_free_netcred,
+			    (caddr_t)rnh);
+			free((caddr_t)rnh, M_RTABLE);
+			nep->ne_rtable[i] = 0;
+		}
+}
+
+int
+vfs_export(mp, nep, argp)
+	struct mount *mp;
+	struct netexport *nep;
+	struct export_args *argp;
+{
+	int error;
+
+	if (argp->ex_flags & MNT_DELEXPORT) {
+		vfs_free_addrlist(nep);
+		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
+	}
+	if (argp->ex_flags & MNT_EXPORTED) {
+		if (error = vfs_hang_addrlist(mp, nep, argp))
+			return (error);
+		mp->mnt_flag |= MNT_EXPORTED;
+	}
+	return (0);
+}
+
+struct netcred *
+vfs_export_lookup(mp, nep, nam)
+	register struct mount *mp;
+	struct netexport *nep;
+	struct mbuf *nam;
+{
+	register struct netcred *np;
+	register struct radix_node_head *rnh;
+	struct sockaddr *saddr;
+
+	np = NULL;
+	if (mp->mnt_flag & MNT_EXPORTED) {
+		/*
+		 * Lookup in the export list first.
+		 */
+		if (nam != NULL) {
+			saddr = mtod(nam, struct sockaddr *);
+			rnh = nep->ne_rtable[saddr->sa_family];
+			if (rnh != NULL) {
+				np = (struct netcred *)
+					(*rnh->rnh_matchaddr)((caddr_t)saddr,
+							      rnh);
+				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
+					np = NULL;
+			}
+		}
+		/*
+		 * If no address match, use the default if it exists.
+		 */
+		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
+			np = &nep->ne_defexported;
+	}
+	return (np);
+}
diff --git a/sys/kern/vfs_extattr.c b/sys/kern/vfs_extattr.c
new file mode 100644
index 00000000000..345c7a79bf2
--- /dev/null
+++ b/sys/kern/vfs_extattr.c
@@ -0,0 +1,2107 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/filedesc.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/uio.h>
+#include <sys/malloc.h>
+#include <sys/dirent.h>
+
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+
+static int change_dir __P((struct nameidata *ndp, struct proc *p));
+
+/*
+ * Virtual File System System Calls
+ */
+
+/*
+ * Mount a file system.
+ */
+struct mount_args {
+	int	type;
+	char	*path;
+	int	flags;
+	caddr_t	data;
+};
+/* ARGSUSED */
+mount(p, uap, retval)
+	struct proc *p;
+	register struct mount_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	register struct mount *mp;
+	int error, flag;
+	struct nameidata nd;
+
+	/*
+	 * Must be super user
+	 */
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+	/*
+	 * Get vnode to be covered
+	 */
+	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	if (uap->flags & MNT_UPDATE) {
+		if ((vp->v_flag & VROOT) == 0) {
+			vput(vp);
+			return (EINVAL);
+		}
+		mp = vp->v_mount;
+		flag = mp->mnt_flag;
+		/*
+		 * We only allow the filesystem to be reloaded if it
+		 * is currently mounted read-only.
+		 */
+		if ((uap->flags & MNT_RELOAD) &&
+		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
+			vput(vp);
+			return (EOPNOTSUPP);	/* Needs translation */
+		}
+		mp->mnt_flag |=
+		    uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
+		VOP_UNLOCK(vp);
+		goto update;
+	}
+	if (error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0))
+		return (error);
+	if (vp->v_type != VDIR) {
+		vput(vp);
+		return (ENOTDIR);
+	}
+	if ((u_long)uap->type > MOUNT_MAXTYPE || vfssw[uap->type] == NULL) {
+		vput(vp);
+		return (ENODEV);
+	}
+
+	/*
+	 * Allocate and initialize the file system.
+	 */
+	mp = (struct mount *)malloc((u_long)sizeof(struct mount),
+		M_MOUNT, M_WAITOK);
+	bzero((char *)mp, (u_long)sizeof(struct mount));
+	mp->mnt_op = vfssw[uap->type];
+	if (error = vfs_lock(mp)) {
+		free((caddr_t)mp, M_MOUNT);
+		vput(vp);
+		return (error);
+	}
+	if (vp->v_mountedhere != NULL) {
+		vfs_unlock(mp);
+		free((caddr_t)mp, M_MOUNT);
+		vput(vp);
+		return (EBUSY);
+	}
+	vp->v_mountedhere = mp;
+	mp->mnt_vnodecovered = vp;
+update:
+	/*
+	 * Set the mount level flags.
+	 */
+	if (uap->flags & MNT_RDONLY)
+		mp->mnt_flag |= MNT_RDONLY;
+	else if (mp->mnt_flag & MNT_RDONLY)
+		mp->mnt_flag |= MNT_WANTRDWR;
+	mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
+	    MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC);
+	mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
+	    MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC);
+	/*
+	 * Mount the filesystem.
+	 */
+	error = VFS_MOUNT(mp, uap->path, uap->data, &nd, p);
+	if (mp->mnt_flag & MNT_UPDATE) {
+		vrele(vp);
+		if (mp->mnt_flag & MNT_WANTRDWR)
+			mp->mnt_flag &= ~MNT_RDONLY;
+		mp->mnt_flag &=~
+		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_WANTRDWR);
+		if (error)
+			mp->mnt_flag = flag;
+		return (error);
+	}
+	/*
+	 * Put the new filesystem on the mount list after root.
+	 */
+	cache_purge(vp);
+	if (!error) {
+		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+		VOP_UNLOCK(vp);
+		vfs_unlock(mp);
+		error = VFS_START(mp, 0, p);
+	} else {
+		mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0;
+		vfs_unlock(mp);
+		free((caddr_t)mp, M_MOUNT);
+		vput(vp);
+	}
+	return (error);
+}
+
+/*
+ * Unmount a file system.
+ *
+ * Note: unmount takes a path to the vnode mounted on as argument,
+ * not special file (as before).
+ */
+struct unmount_args {
+	char	*path;
+	int	flags;
+};
+/* ARGSUSED */
+unmount(p, uap, retval)
+	struct proc *p;
+	register struct unmount_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	struct mount *mp;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+
+	/*
+	 * Unless this is a user mount, then must
+	 * have suser privilege.
+	 */
+	if (((vp->v_mount->mnt_flag & MNT_USER) == 0) &&
+	    (error = suser(p->p_ucred, &p->p_acflag))) {
+		vput(vp);
+		return (error);
+	}
+
+	/*
+	 * Must be the root of the filesystem
+	 */
+	if ((vp->v_flag & VROOT) == 0) {
+		vput(vp);
+		return (EINVAL);
+	}
+	mp = vp->v_mount;
+	vput(vp);
+	return (dounmount(mp, uap->flags, p));
+}
+
+/*
+ * Do the actual file system unmount.
+ */
+dounmount(mp, flags, p)
+	register struct mount *mp;
+	int flags;
+	struct proc *p;
+{
+	struct vnode *coveredvp;
+	int error;
+
+	coveredvp = mp->mnt_vnodecovered;
+	if (vfs_busy(mp))
+		return (EBUSY);
+	mp->mnt_flag |= MNT_UNMOUNT;
+	if (error = vfs_lock(mp))
+		return (error);
+
+	mp->mnt_flag &=~ MNT_ASYNC;
+	vnode_pager_umount(mp);	/* release cached vnodes */
+	cache_purgevfs(mp);	/* remove cache entries for this file sys */
+	if ((error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0 ||
+	    (flags & MNT_FORCE))
+		error = VFS_UNMOUNT(mp, flags, p);
+	mp->mnt_flag &= ~MNT_UNMOUNT;
+	vfs_unbusy(mp);
+	if (error) {
+		vfs_unlock(mp);
+	} else {
+		vrele(coveredvp);
+		TAILQ_REMOVE(&mountlist, mp, mnt_list);
+		mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0;
+		vfs_unlock(mp);
+		if (mp->mnt_vnodelist.lh_first != NULL)
+			panic("unmount: dangling vnode");
+		free((caddr_t)mp, M_MOUNT);
+	}
+	return (error);
+}
+
+/*
+ * Sync each mounted filesystem.
+ */
+#ifdef DIAGNOSTIC
+int syncprt = 0;
+struct ctldebug debug0 = { "syncprt", &syncprt };
+#endif
+
+struct sync_args {
+	int	dummy;
+};
+/* ARGSUSED */
+sync(p, uap, retval)
+	struct proc *p;
+	struct sync_args *uap;
+	int *retval;
+{
+	register struct mount *mp, *nmp;
+	int asyncflag;
+
+	for (mp = mountlist.tqh_first; mp != NULL; mp = nmp) {
+		nmp = mp->mnt_list.tqe_next;
+		/*
+		 * The lock check below is to avoid races with mount
+		 * and unmount.
+		 */
+		if ((mp->mnt_flag & (MNT_MLOCK|MNT_RDONLY|MNT_MPBUSY)) == 0 &&
+		    !vfs_busy(mp)) {
+			asyncflag = mp->mnt_flag & MNT_ASYNC;
+			mp->mnt_flag &= ~MNT_ASYNC;
+			VFS_SYNC(mp, MNT_NOWAIT, p->p_ucred, p);
+			if (asyncflag)
+				mp->mnt_flag |= MNT_ASYNC;
+			vfs_unbusy(mp);
+		}
+	}
+#ifdef DIAGNOSTIC
+	if (syncprt)
+		vfs_bufstats();
+#endif /* DIAGNOSTIC */
+	return (0);
+}
+
+/*
+ * Change filesystem quotas.
+ */
+struct quotactl_args {
+	char *path;
+	int cmd;
+	int uid;
+	caddr_t arg;
+};
+/* ARGSUSED */
+quotactl(p, uap, retval)
+	struct proc *p;
+	register struct quotactl_args *uap;
+	int *retval;
+{
+	register struct mount *mp;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	mp = nd.ni_vp->v_mount;
+	vrele(nd.ni_vp);
+	return (VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, p));
+}
+
+/*
+ * Get filesystem statistics.
+ */
+struct statfs_args {
+	char *path;
+	struct statfs *buf;
+};
+/* ARGSUSED */
+statfs(p, uap, retval)
+	struct proc *p;
+	register struct statfs_args *uap;
+	int *retval;
+{
+	register struct mount *mp;
+	register struct statfs *sp;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	mp = nd.ni_vp->v_mount;
+	sp = &mp->mnt_stat;
+	vrele(nd.ni_vp);
+	if (error = VFS_STATFS(mp, sp, p))
+		return (error);
+	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+	return (copyout((caddr_t)sp, (caddr_t)uap->buf, sizeof(*sp)));
+}
+
+/*
+ * Get filesystem statistics.
+ */
+struct fstatfs_args {
+	int fd;
+	struct statfs *buf;
+};
+/* ARGSUSED */
+fstatfs(p, uap, retval)
+	struct proc *p;
+	register struct fstatfs_args *uap;
+	int *retval;
+{
+	struct file *fp;
+	struct mount *mp;
+	register struct statfs *sp;
+	int error;
+
+	if (error = getvnode(p->p_fd, uap->fd, &fp))
+		return (error);
+	mp = ((struct vnode *)fp->f_data)->v_mount;
+	sp = &mp->mnt_stat;
+	if (error = VFS_STATFS(mp, sp, p))
+		return (error);
+	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+	return (copyout((caddr_t)sp, (caddr_t)uap->buf, sizeof(*sp)));
+}
+
+/*
+ * Get statistics on all filesystems.
+ */
+struct getfsstat_args {
+	struct statfs *buf;
+	long bufsize;
+	int flags;
+};
+getfsstat(p, uap, retval)
+	struct proc *p;
+	register struct getfsstat_args *uap;
+	int *retval;
+{
+	register struct mount *mp, *nmp;
+	register struct statfs *sp;
+	caddr_t sfsp;
+	long count, maxcount, error;
+
+	maxcount = uap->bufsize / sizeof(struct statfs);
+	sfsp = (caddr_t)uap->buf;
+	for (count = 0, mp = mountlist.tqh_first; mp != NULL; mp = nmp) {
+		nmp = mp->mnt_list.tqe_next;
+		if (sfsp && count < maxcount &&
+		    ((mp->mnt_flag & MNT_MLOCK) == 0)) {
+			sp = &mp->mnt_stat;
+			/*
+			 * If MNT_NOWAIT is specified, do not refresh the
+			 * fsstat cache. MNT_WAIT overrides MNT_NOWAIT.
+			 */
+			if (((uap->flags & MNT_NOWAIT) == 0 ||
+			    (uap->flags & MNT_WAIT)) &&
+			    (error = VFS_STATFS(mp, sp, p)))
+				continue;
+			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+			if (error = copyout((caddr_t)sp, sfsp, sizeof(*sp)))
+				return (error);
+			sfsp += sizeof(*sp);
+		}
+		count++;
+	}
+	if (sfsp && count > maxcount)
+		*retval = maxcount;
+	else
+		*retval = count;
+	return (0);
+}
+
+/*
+ * Change current working directory to a given file descriptor.
+ */
+struct fchdir_args {
+	int	fd;
+};
+/* ARGSUSED */
+fchdir(p, uap, retval)
+	struct proc *p;
+	struct fchdir_args *uap;
+	int *retval;
+{
+	register struct filedesc *fdp = p->p_fd;
+	register struct vnode *vp;
+	struct file *fp;
+	int error;
+
+	if (error = getvnode(fdp, uap->fd, &fp))
+		return (error);
+	vp = (struct vnode *)fp->f_data;
+	VOP_LOCK(vp);
+	if (vp->v_type != VDIR)
+		error = ENOTDIR;
+	else
+		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
+	VOP_UNLOCK(vp);
+	if (error)
+		return (error);
+	VREF(vp);
+	vrele(fdp->fd_cdir);
+	fdp->fd_cdir = vp;
+	return (0);
+}
+
+/*
+ * Change current working directory (``.'').
+ */
+struct chdir_args {
+	char	*path;
+};
+/* ARGSUSED */
+chdir(p, uap, retval)
+	struct proc *p;
+	struct chdir_args *uap;
+	int *retval;
+{
+	register struct filedesc *fdp = p->p_fd;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+	if (error = change_dir(&nd, p))
+		return (error);
+	vrele(fdp->fd_cdir);
+	fdp->fd_cdir = nd.ni_vp;
+	return (0);
+}
+
+/*
+ * Change notion of root (``/'') directory.
+ */
+struct chroot_args {
+	char	*path;
+};
+/* ARGSUSED */
+chroot(p, uap, retval)
+	struct proc *p;
+	struct chroot_args *uap;
+	int *retval;
+{
+	register struct filedesc *fdp = p->p_fd;
+	int error;
+	struct nameidata nd;
+
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+	if (error = change_dir(&nd, p))
+		return (error);
+	if (fdp->fd_rdir != NULL)
+		vrele(fdp->fd_rdir);
+	fdp->fd_rdir = nd.ni_vp;
+	return (0);
+}
+
+/*
+ * Common routine for chroot and chdir.
+ */
+static int
+change_dir(ndp, p)
+	register struct nameidata *ndp;
+	struct proc *p;
+{
+	struct vnode *vp;
+	int error;
+
+	if (error = namei(ndp))
+		return (error);
+	vp = ndp->ni_vp;
+	if (vp->v_type != VDIR)
+		error = ENOTDIR;
+	else
+		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
+	VOP_UNLOCK(vp);
+	if (error)
+		vrele(vp);
+	return (error);
+}
+
+/*
+ * Check permissions, allocate an open file structure,
+ * and call the device open routine if any.
+ */
+struct open_args {
+	char	*path;
+	int	flags;
+	int	mode;
+};
+open(p, uap, retval)
+	struct proc *p;
+	register struct open_args *uap;
+	int *retval;
+{
+	register struct filedesc *fdp = p->p_fd;
+	register struct file *fp;
+	register struct vnode *vp;
+	int flags, cmode;
+	struct file *nfp;
+	int type, indx, error;
+	struct flock lf;
+	struct nameidata nd;
+	extern struct fileops vnops;
+
+	if (error = falloc(p, &nfp, &indx))
+		return (error);
+	fp = nfp;
+	flags = FFLAGS(uap->flags);
+	cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+	p->p_dupfd = -indx - 1;			/* XXX check for fdopen */
+	if (error = vn_open(&nd, flags, cmode)) {
+		ffree(fp);
+		if ((error == ENODEV || error == ENXIO) &&
+		    p->p_dupfd >= 0 && 			/* XXX from fdopen */
+		    (error =
+		        dupfdopen(fdp, indx, p->p_dupfd, flags, error)) == 0) {
+			*retval = indx;
+			return (0);
+		}
+		if (error == ERESTART)
+			error = EINTR;
+		fdp->fd_ofiles[indx] = NULL;
+		return (error);
+	}
+	p->p_dupfd = 0;
+	vp = nd.ni_vp;
+	fp->f_flag = flags & FMASK;
+	fp->f_type = DTYPE_VNODE;
+	fp->f_ops = &vnops;
+	fp->f_data = (caddr_t)vp;
+	if (flags & (O_EXLOCK | O_SHLOCK)) {
+		lf.l_whence = SEEK_SET;
+		lf.l_start = 0;
+		lf.l_len = 0;
+		if (flags & O_EXLOCK)
+			lf.l_type = F_WRLCK;
+		else
+			lf.l_type = F_RDLCK;
+		type = F_FLOCK;
+		if ((flags & FNONBLOCK) == 0)
+			type |= F_WAIT;
+		VOP_UNLOCK(vp);
+		if (error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) {
+			(void) vn_close(vp, fp->f_flag, fp->f_cred, p);
+			ffree(fp);
+			fdp->fd_ofiles[indx] = NULL;
+			return (error);
+		}
+		VOP_LOCK(vp);
+		fp->f_flag |= FHASLOCK;
+	}
+	VOP_UNLOCK(vp);
+	*retval = indx;
+	return (0);
+}
+
+#ifdef COMPAT_43
+/*
+ * Create a file.
+ */
+struct ocreat_args {
+	char	*path;
+	int	mode;
+};
+ocreat(p, uap, retval)
+	struct proc *p;
+	register struct ocreat_args *uap;
+	int *retval;
+{
+	struct open_args openuap;
+
+	openuap.path = uap->path;
+	openuap.mode = uap->mode;
+	openuap.flags = O_WRONLY | O_CREAT | O_TRUNC;
+	return (open(p, &openuap, retval));
+}
+#endif /* COMPAT_43 */
+
+/*
+ * Create a special file.
+ */
+struct mknod_args {
+	char	*path;
+	int	mode;
+	int	dev;
+};
+/* ARGSUSED */
+mknod(p, uap, retval)
+	struct proc *p;
+	register struct mknod_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	struct vattr vattr;
+	int error;
+	struct nameidata nd;
+
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	if (vp != NULL)
+		error = EEXIST;
+	else {
+		VATTR_NULL(&vattr);
+		vattr.va_mode = (uap->mode & ALLPERMS) &~ p->p_fd->fd_cmask;
+		vattr.va_rdev = uap->dev;
+
+		switch (uap->mode & S_IFMT) {
+		case S_IFMT:	/* used by badsect to flag bad sectors */
+			vattr.va_type = VBAD;
+			break;
+		case S_IFCHR:
+			vattr.va_type = VCHR;
+			break;
+		case S_IFBLK:
+			vattr.va_type = VBLK;
+			break;
+		default:
+			error = EINVAL;
+			break;
+		}
+	}
+	if (!error) {
+		LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+		error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
+	} else {
+		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+		if (nd.ni_dvp == vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		if (vp)
+			vrele(vp);
+	}
+	return (error);
+}
+
+/*
+ * Create named pipe.
+ */
+struct mkfifo_args {
+	char	*path;
+	int	mode;
+};
+/* ARGSUSED */
+mkfifo(p, uap, retval)
+	struct proc *p;
+	register struct mkfifo_args *uap;
+	int *retval;
+{
+	struct vattr vattr;
+	int error;
+	struct nameidata nd;
+
+#ifndef FIFO
+	return (EOPNOTSUPP);
+#else
+	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	if (nd.ni_vp != NULL) {
+		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+		if (nd.ni_dvp == nd.ni_vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		vrele(nd.ni_vp);
+		return (EEXIST);
+	}
+	VATTR_NULL(&vattr);
+	vattr.va_type = VFIFO;
+	vattr.va_mode = (uap->mode & ALLPERMS) &~ p->p_fd->fd_cmask;
+	LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+	return (VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr));
+#endif /* FIFO */
+}
+
+/*
+ * Make a hard file link.
+ */
+struct link_args {
+	char	*path;
+	char	*link;
+};
+/* ARGSUSED */
+link(p, uap, retval)
+	struct proc *p;
+	register struct link_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	struct nameidata nd;
+	int error;
+
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	if (vp->v_type != VDIR ||
+	    (error = suser(p->p_ucred, &p->p_acflag)) == 0) {
+		nd.ni_cnd.cn_nameiop = CREATE;
+		nd.ni_cnd.cn_flags = LOCKPARENT;
+		nd.ni_dirp = uap->link;
+		if ((error = namei(&nd)) == 0) {
+			if (nd.ni_vp != NULL)
+				error = EEXIST;
+			if (!error) {
+				LEASE_CHECK(nd.ni_dvp,
+				    p, p->p_ucred, LEASE_WRITE);
+				LEASE_CHECK(vp,
+				    p, p->p_ucred, LEASE_WRITE);
+				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
+			} else {
+				VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+				if (nd.ni_dvp == nd.ni_vp)
+					vrele(nd.ni_dvp);
+				else
+					vput(nd.ni_dvp);
+				if (nd.ni_vp)
+					vrele(nd.ni_vp);
+			}
+		}
+	}
+	vrele(vp);
+	return (error);
+}
+
+/*
+ * Make a symbolic link.
+ */
+struct symlink_args {
+	char	*path;
+	char	*link;
+};
+/* ARGSUSED */
+symlink(p, uap, retval)
+	struct proc *p;
+	register struct symlink_args *uap;
+	int *retval;
+{
+	struct vattr vattr;
+	char *path;
+	int error;
+	struct nameidata nd;
+
+	MALLOC(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
+	if (error = copyinstr(uap->path, path, MAXPATHLEN, NULL))
+		goto out;
+	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->link, p);
+	if (error = namei(&nd))
+		goto out;
+	if (nd.ni_vp) {
+		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+		if (nd.ni_dvp == nd.ni_vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		vrele(nd.ni_vp);
+		error = EEXIST;
+		goto out;
+	}
+	VATTR_NULL(&vattr);
+	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
+	LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
+out:
+	FREE(path, M_NAMEI);
+	return (error);
+}
+
+/*
+ * Delete a name from the filesystem.
+ */
+struct unlink_args {
+	char	*path;
+};
+/* ARGSUSED */
+unlink(p, uap, retval)
+	struct proc *p;
+	struct unlink_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+	VOP_LOCK(vp);
+
+	if (vp->v_type != VDIR ||
+	    (error = suser(p->p_ucred, &p->p_acflag)) == 0) {
+		/*
+		 * The root of a mounted filesystem cannot be deleted.
+		 */
+		if (vp->v_flag & VROOT)
+			error = EBUSY;
+		else
+			(void)vnode_pager_uncache(vp);
+	}
+
+	if (!error) {
+		LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+		error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+	} else {
+		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+		if (nd.ni_dvp == vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		vput(vp);
+	}
+	return (error);
+}
+
+/*
+ * Reposition read/write file offset.
+ */
+struct lseek_args {
+	int	fd;
+	int	pad;
+	off_t	offset;
+	int	whence;
+};
+lseek(p, uap, retval)
+	struct proc *p;
+	register struct lseek_args *uap;
+	int *retval;
+{
+	struct ucred *cred = p->p_ucred;
+	register struct filedesc *fdp = p->p_fd;
+	register struct file *fp;
+	struct vattr vattr;
+	int error;
+
+	if ((u_int)uap->fd >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
+		return (EBADF);
+	if (fp->f_type != DTYPE_VNODE)
+		return (ESPIPE);
+	switch (uap->whence) {
+	case L_INCR:
+		fp->f_offset += uap->offset;
+		break;
+	case L_XTND:
+		if (error =
+		    VOP_GETATTR((struct vnode *)fp->f_data, &vattr, cred, p))
+			return (error);
+		fp->f_offset = uap->offset + vattr.va_size;
+		break;
+	case L_SET:
+		fp->f_offset = uap->offset;
+		break;
+	default:
+		return (EINVAL);
+	}
+	*(off_t *)retval = fp->f_offset;
+	return (0);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/*
+ * Reposition read/write file offset.
+ */
+struct olseek_args {
+	int	fd;
+	long	offset;
+	int	whence;
+};
+olseek(p, uap, retval)
+	struct proc *p;
+	register struct olseek_args *uap;
+	int *retval;
+{
+	struct lseek_args nuap;
+	off_t qret;
+	int error;
+
+	nuap.fd = uap->fd;
+	nuap.offset = uap->offset;
+	nuap.whence = uap->whence;
+	error = lseek(p, &nuap, &qret);
+	*(long *)retval = qret;
+	return (error);
+}
+#endif /* COMPAT_43 */
+
+/*
+ * Check access permissions.
+ */
+struct access_args {
+	char	*path;
+	int	flags;
+};
+access(p, uap, retval)
+	struct proc *p;
+	register struct access_args *uap;
+	int *retval;
+{
+	register struct ucred *cred = p->p_ucred;
+	register struct vnode *vp;
+	int error, flags, t_gid, t_uid;
+	struct nameidata nd;
+
+	t_uid = cred->cr_uid;
+	t_gid = cred->cr_groups[0];
+	cred->cr_uid = p->p_cred->p_ruid;
+	cred->cr_groups[0] = p->p_cred->p_rgid;
+	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		goto out1;
+	vp = nd.ni_vp;
+
+	/* Flags == 0 means only check for existence. */
+	if (uap->flags) {
+		flags = 0;
+		if (uap->flags & R_OK)
+			flags |= VREAD;
+		if (uap->flags & W_OK)
+			flags |= VWRITE;
+		if (uap->flags & X_OK)
+			flags |= VEXEC;
+		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
+			error = VOP_ACCESS(vp, flags, cred, p);
+	}
+	vput(vp);
+out1:
+	cred->cr_uid = t_uid;
+	cred->cr_groups[0] = t_gid;
+	return (error);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/*
+ * Get file status; this version follows links.
+ */
+struct ostat_args {
+	char	*path;
+	struct ostat *ub;
+};
+/* ARGSUSED */
+ostat(p, uap, retval)
+	struct proc *p;
+	register struct ostat_args *uap;
+	int *retval;
+{
+	struct stat sb;
+	struct ostat osb;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	error = vn_stat(nd.ni_vp, &sb, p);
+	vput(nd.ni_vp);
+	if (error)
+		return (error);
+	cvtstat(&sb, &osb);
+	error = copyout((caddr_t)&osb, (caddr_t)uap->ub, sizeof (osb));
+	return (error);
+}
+
+/*
+ * Get file status; this version does not follow links.
+ */
+struct olstat_args {
+	char	*path;
+	struct ostat *ub;
+};
+/* ARGSUSED */
+olstat(p, uap, retval)
+	struct proc *p;
+	register struct olstat_args *uap;
+	int *retval;
+{
+	struct stat sb;
+	struct ostat osb;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	error = vn_stat(nd.ni_vp, &sb, p);
+	vput(nd.ni_vp);
+	if (error)
+		return (error);
+	cvtstat(&sb, &osb);
+	error = copyout((caddr_t)&osb, (caddr_t)uap->ub, sizeof (osb));
+	return (error);
+}
+
+/*
+ * Convert from an old to a new stat structure.
+ */
+cvtstat(st, ost)
+	struct stat *st;
+	struct ostat *ost;
+{
+
+	ost->st_dev = st->st_dev;
+	ost->st_ino = st->st_ino;
+	ost->st_mode = st->st_mode;
+	ost->st_nlink = st->st_nlink;
+	ost->st_uid = st->st_uid;
+	ost->st_gid = st->st_gid;
+	ost->st_rdev = st->st_rdev;
+	if (st->st_size < (quad_t)1 << 32)
+		ost->st_size = st->st_size;
+	else
+		ost->st_size = -2;
+	ost->st_atime = st->st_atime;
+	ost->st_mtime = st->st_mtime;
+	ost->st_ctime = st->st_ctime;
+	ost->st_blksize = st->st_blksize;
+	ost->st_blocks = st->st_blocks;
+	ost->st_flags = st->st_flags;
+	ost->st_gen = st->st_gen;
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+/*
+ * Get file status; this version follows links.
+ */
+struct stat_args {
+	char	*path;
+	struct stat *ub;
+};
+/* ARGSUSED */
+stat(p, uap, retval)
+	struct proc *p;
+	register struct stat_args *uap;
+	int *retval;
+{
+	struct stat sb;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	error = vn_stat(nd.ni_vp, &sb, p);
+	vput(nd.ni_vp);
+	if (error)
+		return (error);
+	error = copyout((caddr_t)&sb, (caddr_t)uap->ub, sizeof (sb));
+	return (error);
+}
+
+/*
+ * Get file status; this version does not follow links.
+ */
+struct lstat_args {
+	char	*path;
+	struct stat *ub;
+};
+/* ARGSUSED */
+lstat(p, uap, retval)
+	struct proc *p;
+	register struct lstat_args *uap;
+	int *retval;
+{
+	int error;
+	struct vnode *vp, *dvp;
+	struct stat sb, sb1;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKPARENT, UIO_USERSPACE,
+	    uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	/*
+	 * For symbolic links, always return the attributes of its
+	 * containing directory, except for mode, size, and links.
+	 */
+	vp = nd.ni_vp;
+	dvp = nd.ni_dvp;
+	if (vp->v_type != VLNK) {
+		if (dvp == vp)
+			vrele(dvp);
+		else
+			vput(dvp);
+		error = vn_stat(vp, &sb, p);
+		vput(vp);
+		if (error)
+			return (error);
+	} else {
+		error = vn_stat(dvp, &sb, p);
+		vput(dvp);
+		if (error) {
+			vput(vp);
+			return (error);
+		}
+		error = vn_stat(vp, &sb1, p);
+		vput(vp);
+		if (error)
+			return (error);
+		sb.st_mode &= ~S_IFDIR;
+		sb.st_mode |= S_IFLNK;
+		sb.st_nlink = sb1.st_nlink;
+		sb.st_size = sb1.st_size;
+		sb.st_blocks = sb1.st_blocks;
+	}
+	error = copyout((caddr_t)&sb, (caddr_t)uap->ub, sizeof (sb));
+	return (error);
+}
+
+/*
+ * Get configurable pathname variables.
+ */
+struct pathconf_args {
+	char	*path;
+	int	name;
+};
+/* ARGSUSED */
+pathconf(p, uap, retval)
+	struct proc *p;
+	register struct pathconf_args *uap;
+	int *retval;
+{
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	error = VOP_PATHCONF(nd.ni_vp, uap->name, retval);
+	vput(nd.ni_vp);
+	return (error);
+}
+
+/*
+ * Return target name of a symbolic link.
+ */
+struct readlink_args {
+	char	*path;
+	char	*buf;
+	int	count;
+};
+/* ARGSUSED */
+readlink(p, uap, retval)
+	struct proc *p;
+	register struct readlink_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	struct iovec aiov;
+	struct uio auio;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	if (vp->v_type != VLNK)
+		error = EINVAL;
+	else {
+		aiov.iov_base = uap->buf;
+		aiov.iov_len = uap->count;
+		auio.uio_iov = &aiov;
+		auio.uio_iovcnt = 1;
+		auio.uio_offset = 0;
+		auio.uio_rw = UIO_READ;
+		auio.uio_segflg = UIO_USERSPACE;
+		auio.uio_procp = p;
+		auio.uio_resid = uap->count;
+		error = VOP_READLINK(vp, &auio, p->p_ucred);
+	}
+	vput(vp);
+	*retval = uap->count - auio.uio_resid;
+	return (error);
+}
+
+/*
+ * Change flags of a file given a path name.
+ */
+struct chflags_args {
+	char	*path;
+	int	flags;
+};
+/* ARGSUSED */
+chflags(p, uap, retval)
+	struct proc *p;
+	register struct chflags_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	struct vattr vattr;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+	VOP_LOCK(vp);
+	if (vp->v_mount->mnt_flag & MNT_RDONLY)
+		error = EROFS;
+	else {
+		VATTR_NULL(&vattr);
+		vattr.va_flags = uap->flags;
+		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+	}
+	vput(vp);
+	return (error);
+}
+
+/*
+ * Change flags of a file given a file descriptor.
+ */
+struct fchflags_args {
+	int	fd;
+	int	flags;
+};
+/* ARGSUSED */
+fchflags(p, uap, retval)
+	struct proc *p;
+	register struct fchflags_args *uap;
+	int *retval;
+{
+	struct vattr vattr;
+	struct vnode *vp;
+	struct file *fp;
+	int error;
+
+	if (error = getvnode(p->p_fd, uap->fd, &fp))
+		return (error);
+	vp = (struct vnode *)fp->f_data;
+	LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+	VOP_LOCK(vp);
+	if (vp->v_mount->mnt_flag & MNT_RDONLY)
+		error = EROFS;
+	else {
+		VATTR_NULL(&vattr);
+		vattr.va_flags = uap->flags;
+		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+	}
+	VOP_UNLOCK(vp);
+	return (error);
+}
+
+/*
+ * Change mode of a file given path name.
+ */
+struct chmod_args {
+	char	*path;
+	int	mode;
+};
+/* ARGSUSED */
+chmod(p, uap, retval)
+	struct proc *p;
+	register struct chmod_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	struct vattr vattr;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+	VOP_LOCK(vp);
+	if (vp->v_mount->mnt_flag & MNT_RDONLY)
+		error = EROFS;
+	else {
+		VATTR_NULL(&vattr);
+		vattr.va_mode = uap->mode & ALLPERMS;
+		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+	}
+	vput(vp);
+	return (error);
+}
+
+/*
+ * Change mode of a file given a file descriptor.
+ */
+struct fchmod_args {
+	int	fd;
+	int	mode;
+};
+/* ARGSUSED */
+fchmod(p, uap, retval)
+	struct proc *p;
+	register struct fchmod_args *uap;
+	int *retval;
+{
+	struct vattr vattr;
+	struct vnode *vp;
+	struct file *fp;
+	int error;
+
+	if (error = getvnode(p->p_fd, uap->fd, &fp))
+		return (error);
+	vp = (struct vnode *)fp->f_data;
+	LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+	VOP_LOCK(vp);
+	if (vp->v_mount->mnt_flag & MNT_RDONLY)
+		error = EROFS;
+	else {
+		VATTR_NULL(&vattr);
+		vattr.va_mode = uap->mode & ALLPERMS;
+		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+	}
+	VOP_UNLOCK(vp);
+	return (error);
+}
+
+/*
+ * Set ownership given a path name.
+ */
+struct chown_args {
+	char	*path;
+	int	uid;
+	int	gid;
+};
+/* ARGSUSED */
+chown(p, uap, retval)
+	struct proc *p;
+	register struct chown_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	struct vattr vattr;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+	VOP_LOCK(vp);
+	if (vp->v_mount->mnt_flag & MNT_RDONLY)
+		error = EROFS;
+	else {
+		VATTR_NULL(&vattr);
+		vattr.va_uid = uap->uid;
+		vattr.va_gid = uap->gid;
+		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+	}
+	vput(vp);
+	return (error);
+}
+
+/*
+ * Set ownership given a file descriptor.
+ */
+struct fchown_args {
+	int	fd;
+	int	uid;
+	int	gid;
+};
+/* ARGSUSED */
+fchown(p, uap, retval)
+	struct proc *p;
+	register struct fchown_args *uap;
+	int *retval;
+{
+	struct vattr vattr;
+	struct vnode *vp;
+	struct file *fp;
+	int error;
+
+	if (error = getvnode(p->p_fd, uap->fd, &fp))
+		return (error);
+	vp = (struct vnode *)fp->f_data;
+	LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+	VOP_LOCK(vp);
+	if (vp->v_mount->mnt_flag & MNT_RDONLY)
+		error = EROFS;
+	else {
+		VATTR_NULL(&vattr);
+		vattr.va_uid = uap->uid;
+		vattr.va_gid = uap->gid;
+		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+	}
+	VOP_UNLOCK(vp);
+	return (error);
+}
+
+/*
+ * Set the access and modification times of a file.
+ */
+struct utimes_args {
+	char	*path;
+	struct	timeval *tptr;
+};
+/* ARGSUSED */
+utimes(p, uap, retval)
+	struct proc *p;
+	register struct utimes_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	struct timeval tv[2];
+	struct vattr vattr;
+	int error;
+	struct nameidata nd;
+
+	VATTR_NULL(&vattr);
+	if (uap->tptr == NULL) {
+		microtime(&tv[0]);
+		tv[1] = tv[0];
+		vattr.va_vaflags |= VA_UTIMES_NULL;
+	} else if (error = copyin((caddr_t)uap->tptr, (caddr_t)tv, sizeof (tv)))
+  		return (error);
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+	VOP_LOCK(vp);
+	if (vp->v_mount->mnt_flag & MNT_RDONLY)
+		error = EROFS;
+	else {
+		vattr.va_atime.ts_sec = tv[0].tv_sec;
+		vattr.va_atime.ts_nsec = tv[0].tv_usec * 1000;
+		vattr.va_mtime.ts_sec = tv[1].tv_sec;
+		vattr.va_mtime.ts_nsec = tv[1].tv_usec * 1000;
+		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+	}
+	vput(vp);
+	return (error);
+}
+
+/*
+ * Truncate a file given its path name.
+ */
+struct truncate_args {
+	char	*path;
+	int	pad;
+	off_t	length;
+};
+/* ARGSUSED */
+truncate(p, uap, retval)
+	struct proc *p;
+	register struct truncate_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	struct vattr vattr;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+	VOP_LOCK(vp);
+	if (vp->v_type == VDIR)
+		error = EISDIR;
+	else if ((error = vn_writechk(vp)) == 0 &&
+	    (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0) {
+		VATTR_NULL(&vattr);
+		vattr.va_size = uap->length;
+		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+	}
+	vput(vp);
+	return (error);
+}
+
+/*
+ * Truncate a file given a file descriptor.
+ */
+struct ftruncate_args {
+	int	fd;
+	int	pad;
+	off_t	length;
+};
+/* ARGSUSED */
+ftruncate(p, uap, retval)
+	struct proc *p;
+	register struct ftruncate_args *uap;
+	int *retval;
+{
+	struct vattr vattr;
+	struct vnode *vp;
+	struct file *fp;
+	int error;
+
+	if (error = getvnode(p->p_fd, uap->fd, &fp))
+		return (error);
+	if ((fp->f_flag & FWRITE) == 0)
+		return (EINVAL);
+	vp = (struct vnode *)fp->f_data;
+	LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+	VOP_LOCK(vp);
+	if (vp->v_type == VDIR)
+		error = EISDIR;
+	else if ((error = vn_writechk(vp)) == 0) {
+		VATTR_NULL(&vattr);
+		vattr.va_size = uap->length;
+		error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
+	}
+	VOP_UNLOCK(vp);
+	return (error);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/*
+ * Truncate a file given its path name.
+ */
+struct otruncate_args {
+	char	*path;
+	long	length;
+};
+/* ARGSUSED */
+otruncate(p, uap, retval)
+	struct proc *p;
+	register struct otruncate_args *uap;
+	int *retval;
+{
+	struct truncate_args nuap;
+
+	nuap.path = uap->path;
+	nuap.length = uap->length;
+	return (truncate(p, &nuap, retval));
+}
+
+/*
+ * Truncate a file given a file descriptor.
+ */
+struct oftruncate_args {
+	int	fd;
+	long	length;
+};
+/* ARGSUSED */
+oftruncate(p, uap, retval)
+	struct proc *p;
+	register struct oftruncate_args *uap;
+	int *retval;
+{
+	struct ftruncate_args nuap;
+
+	nuap.fd = uap->fd;
+	nuap.length = uap->length;
+	return (ftruncate(p, &nuap, retval));
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+/*
+ * Sync an open file.
+ */
+struct fsync_args {
+	int	fd;
+};
+/* ARGSUSED */
+fsync(p, uap, retval)
+	struct proc *p;
+	struct fsync_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	struct file *fp;
+	int error;
+
+	if (error = getvnode(p->p_fd, uap->fd, &fp))
+		return (error);
+	vp = (struct vnode *)fp->f_data;
+	VOP_LOCK(vp);
+	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
+	VOP_UNLOCK(vp);
+	return (error);
+}
+
+/*
+ * Rename files.  Source and destination must either both be directories,
+ * or both not be directories.  If target is a directory, it must be empty.
+ */
+struct rename_args {
+	char	*from;
+	char	*to;
+};
+/* ARGSUSED */
+rename(p, uap, retval)
+	struct proc *p;
+	register struct rename_args *uap;
+	int *retval;
+{
+	register struct vnode *tvp, *fvp, *tdvp;
+	struct nameidata fromnd, tond;
+	int error;
+
+	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
+		uap->from, p);
+	if (error = namei(&fromnd))
+		return (error);
+	fvp = fromnd.ni_vp;
+	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART,
+		UIO_USERSPACE, uap->to, p);
+	if (error = namei(&tond)) {
+		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
+		vrele(fromnd.ni_dvp);
+		vrele(fvp);
+		goto out1;
+	}
+	tdvp = tond.ni_dvp;
+	tvp = tond.ni_vp;
+	if (tvp != NULL) {
+		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
+			error = ENOTDIR;
+			goto out;
+		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
+			error = EISDIR;
+			goto out;
+		}
+	}
+	if (fvp == tdvp)
+		error = EINVAL;
+	/*
+	 * If source is the same as the destination (that is the
+	 * same inode number with the same name in the same directory),
+	 * then there is nothing to do.
+	 */
+	if (fvp == tvp && fromnd.ni_dvp == tdvp &&
+	    fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
+	    !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
+	      fromnd.ni_cnd.cn_namelen))
+		error = -1;
+out:
+	if (!error) {
+		LEASE_CHECK(tdvp, p, p->p_ucred, LEASE_WRITE);
+		if (fromnd.ni_dvp != tdvp)
+			LEASE_CHECK(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+		if (tvp)
+			LEASE_CHECK(tvp, p, p->p_ucred, LEASE_WRITE);
+		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
+				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
+	} else {
+		VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
+		if (tdvp == tvp)
+			vrele(tdvp);
+		else
+			vput(tdvp);
+		if (tvp)
+			vput(tvp);
+		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
+		vrele(fromnd.ni_dvp);
+		vrele(fvp);
+	}
+	vrele(tond.ni_startdir);
+	FREE(tond.ni_cnd.cn_pnbuf, M_NAMEI);
+out1:
+	if (fromnd.ni_startdir)
+		vrele(fromnd.ni_startdir);
+	FREE(fromnd.ni_cnd.cn_pnbuf, M_NAMEI);
+	if (error == -1)
+		return (0);
+	return (error);
+}
+
+/*
+ * Make a directory file.
+ */
+struct mkdir_args {
+	char	*path;
+	int	mode;
+};
+/* ARGSUSED */
+mkdir(p, uap, retval)
+	struct proc *p;
+	register struct mkdir_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	struct vattr vattr;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	if (vp != NULL) {
+		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+		if (nd.ni_dvp == vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		vrele(vp);
+		return (EEXIST);
+	}
+	VATTR_NULL(&vattr);
+	vattr.va_type = VDIR;
+	vattr.va_mode = (uap->mode & ACCESSPERMS) &~ p->p_fd->fd_cmask;
+	LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
+	if (!error)
+		vput(nd.ni_vp);
+	return (error);
+}
+
+/*
+ * Remove a directory file.
+ */
+struct rmdir_args {
+	char	*path;
+};
+/* ARGSUSED */
+rmdir(p, uap, retval)
+	struct proc *p;
+	struct rmdir_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	if (vp->v_type != VDIR) {
+		error = ENOTDIR;
+		goto out;
+	}
+	/*
+	 * No rmdir "." please.
+	 */
+	if (nd.ni_dvp == vp) {
+		error = EINVAL;
+		goto out;
+	}
+	/*
+	 * The root of a mounted filesystem cannot be deleted.
+	 */
+	if (vp->v_flag & VROOT)
+		error = EBUSY;
+out:
+	if (!error) {
+		LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+		LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+		error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+	} else {
+		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+		if (nd.ni_dvp == vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		vput(vp);
+	}
+	return (error);
+}
+
+#ifdef COMPAT_43
+/*
+ * Read a block of directory entries in a file system independent format.
+ */
+struct ogetdirentries_args {
+	int	fd;
+	char	*buf;
+	u_int	count;
+	long	*basep;
+};
+ogetdirentries(p, uap, retval)
+	struct proc *p;
+	register struct ogetdirentries_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	struct file *fp;
+	struct uio auio, kuio;
+	struct iovec aiov, kiov;
+	struct dirent *dp, *edp;
+	caddr_t dirbuf;
+	int error, readcnt;
+	long loff;
+
+	if (error = getvnode(p->p_fd, uap->fd, &fp))
+		return (error);
+	if ((fp->f_flag & FREAD) == 0)
+		return (EBADF);
+	vp = (struct vnode *)fp->f_data;
+	if (vp->v_type != VDIR)
+		return (EINVAL);
+	aiov.iov_base = uap->buf;
+	aiov.iov_len = uap->count;
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	auio.uio_rw = UIO_READ;
+	auio.uio_segflg = UIO_USERSPACE;
+	auio.uio_procp = p;
+	auio.uio_resid = uap->count;
+	VOP_LOCK(vp);
+	loff = auio.uio_offset = fp->f_offset;
+#	if (BYTE_ORDER != LITTLE_ENDIAN)
+		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
+			error = VOP_READDIR(vp, &auio, fp->f_cred);
+			fp->f_offset = auio.uio_offset;
+		} else
+#	endif
+	{
+		kuio = auio;
+		kuio.uio_iov = &kiov;
+		kuio.uio_segflg = UIO_SYSSPACE;
+		kiov.iov_len = uap->count;
+		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
+		kiov.iov_base = dirbuf;
+		error = VOP_READDIR(vp, &kuio, fp->f_cred);
+		fp->f_offset = kuio.uio_offset;
+		if (error == 0) {
+			readcnt = uap->count - kuio.uio_resid;
+			edp = (struct dirent *)&dirbuf[readcnt];
+			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
+#				if (BYTE_ORDER == LITTLE_ENDIAN)
+					/*
+					 * The expected low byte of
+					 * dp->d_namlen is our dp->d_type.
+					 * The high MBZ byte of dp->d_namlen
+					 * is our dp->d_namlen.
+					 */
+					dp->d_type = dp->d_namlen;
+					dp->d_namlen = 0;
+#				else
+					/*
+					 * The dp->d_type is the high byte
+					 * of the expected dp->d_namlen,
+					 * so must be zero'ed.
+					 */
+					dp->d_type = 0;
+#				endif
+				if (dp->d_reclen > 0) {
+					dp = (struct dirent *)
+					    ((char *)dp + dp->d_reclen);
+				} else {
+					error = EIO;
+					break;
+				}
+			}
+			if (dp >= edp)
+				error = uiomove(dirbuf, readcnt, &auio);
+		}
+		FREE(dirbuf, M_TEMP);
+	}
+	VOP_UNLOCK(vp);
+	if (error)
+		return (error);
+	error = copyout((caddr_t)&loff, (caddr_t)uap->basep, sizeof(long));
+	*retval = uap->count - auio.uio_resid;
+	return (error);
+}
+#endif
+
+/*
+ * Read a block of directory entries in a file system independent format.
+ */
+struct getdirentries_args {
+	int	fd;
+	char	*buf;
+	u_int	count;
+	long	*basep;
+};
+getdirentries(p, uap, retval)
+	struct proc *p;
+	register struct getdirentries_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	struct file *fp;
+	struct uio auio;
+	struct iovec aiov;
+	long loff;
+	int error;
+
+	if (error = getvnode(p->p_fd, uap->fd, &fp))
+		return (error);
+	if ((fp->f_flag & FREAD) == 0)
+		return (EBADF);
+	vp = (struct vnode *)fp->f_data;
+unionread:
+	if (vp->v_type != VDIR)
+		return (EINVAL);
+	aiov.iov_base = uap->buf;
+	aiov.iov_len = uap->count;
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	auio.uio_rw = UIO_READ;
+	auio.uio_segflg = UIO_USERSPACE;
+	auio.uio_procp = p;
+	auio.uio_resid = uap->count;
+	VOP_LOCK(vp);
+	loff = auio.uio_offset = fp->f_offset;
+	error = VOP_READDIR(vp, &auio, fp->f_cred);
+	fp->f_offset = auio.uio_offset;
+	VOP_UNLOCK(vp);
+	if (error)
+		return (error);
+
+#ifdef UNION
+{
+	extern int (**union_vnodeop_p)();
+	extern struct vnode *union_lowervp __P((struct vnode *));
+
+	if ((uap->count == auio.uio_resid) &&
+	    (vp->v_op == union_vnodeop_p)) {
+		struct vnode *tvp = vp;
+
+		vp = union_lowervp(vp);
+		if (vp != NULLVP) {
+			VOP_LOCK(vp);
+			error = VOP_OPEN(vp, FREAD);
+			VOP_UNLOCK(vp);
+
+			if (error) {
+				vrele(vp);
+				return (error);
+			}
+			fp->f_data = (caddr_t) vp;
+			fp->f_offset = 0;
+			error = vn_close(tvp, FREAD, fp->f_cred, p);
+			if (error)
+				return (error);
+			goto unionread;
+		}
+	}
+}
+#endif
+
+	if ((uap->count == auio.uio_resid) &&
+	    (vp->v_flag & VROOT) &&
+	    (vp->v_mount->mnt_flag & MNT_UNION)) {
+		struct vnode *tvp = vp;
+		vp = vp->v_mount->mnt_vnodecovered;
+		VREF(vp);
+		fp->f_data = (caddr_t) vp;
+		fp->f_offset = 0;
+		vrele(tvp);
+		goto unionread;
+	}
+	error = copyout((caddr_t)&loff, (caddr_t)uap->basep, sizeof(long));
+	*retval = uap->count - auio.uio_resid;
+	return (error);
+}
+
+/*
+ * Set the mode mask for creation of filesystem nodes.
+ */
+struct umask_args {
+	int	newmask;
+};
+mode_t				/* XXX */
+umask(p, uap, retval)
+	struct proc *p;
+	struct umask_args *uap;
+	int *retval;
+{
+	register struct filedesc *fdp;
+
+	fdp = p->p_fd;
+	*retval = fdp->fd_cmask;
+	fdp->fd_cmask = uap->newmask & ALLPERMS;
+	return (0);
+}
+
+/*
+ * Void all references to file by ripping underlying filesystem
+ * away from vnode.
+ */
+struct revoke_args {
+	char	*path;
+};
+/* ARGSUSED */
+revoke(p, uap, retval)
+	struct proc *p;
+	register struct revoke_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	struct vattr vattr;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	if (vp->v_type != VCHR && vp->v_type != VBLK) {
+		error = EINVAL;
+		goto out;
+	}
+	if (error = VOP_GETATTR(vp, &vattr, p->p_ucred, p))
+		goto out;
+	if (p->p_ucred->cr_uid != vattr.va_uid &&
+	    (error = suser(p->p_ucred, &p->p_acflag)))
+		goto out;
+	if (vp->v_usecount > 1 || (vp->v_flag & VALIASED))
+		vgoneall(vp);
+out:
+	vrele(vp);
+	return (error);
+}
+
+/*
+ * Convert a user file descriptor to a kernel file entry.
+ */
+getvnode(fdp, fd, fpp)
+	struct filedesc *fdp;
+	struct file **fpp;
+	int fd;
+{
+	struct file *fp;
+
+	if ((u_int)fd >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[fd]) == NULL)
+		return (EBADF);
+	if (fp->f_type != DTYPE_VNODE)
+		return (EINVAL);
+	*fpp = fp;
+	return (0);
+}
diff --git a/sys/kern/vfs_init.c b/sys/kern/vfs_init.c
new file mode 100644
index 00000000000..1ce7347bdc8
--- /dev/null
+++ b/sys/kern/vfs_init.c
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed
+ * to Berkeley by John Heidemann of the UCLA Ficus project.
+ *
+ * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vfs_init.c	8.3 (Berkeley) 1/4/94
+ */
+
+
+#include <sys/param.h>
+#include <sys/mount.h>
+#include <sys/time.h>
+#include <sys/vnode.h>
+#include <sys/stat.h>
+#include <sys/namei.h>
+#include <sys/ucred.h>
+#include <sys/buf.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+
+/*
+ * Sigh, such primitive tools are these...
+ */
+#if 0
+#define DODEBUG(A) A
+#else
+#define DODEBUG(A)
+#endif
+
+extern struct vnodeopv_desc *vfs_opv_descs[];
+				/* a list of lists of vnodeops defns */
+extern struct vnodeop_desc *vfs_op_descs[];
+				/* and the operations they perform */
+/*
+ * This code doesn't work if the defn is **vnodop_defns with cc.
+ * The problem is because of the compiler sometimes putting in an
+ * extra level of indirection for arrays.  It's an interesting
+ * "feature" of C.
+ */
+int vfs_opv_numops;
+
+typedef (*PFI)();   /* the standard Pointer to a Function returning an Int */
+
+/*
+ * A miscellaneous routine.
+ * A generic "default" routine that just returns an error.
+ */
+int
+vn_default_error()
+{
+
+	return (EOPNOTSUPP);
+}
+
+/*
+ * vfs_init.c
+ *
+ * Allocate and fill in operations vectors.
+ *
+ * An undocumented feature of this approach to defining operations is that
+ * there can be multiple entries in vfs_opv_descs for the same operations
+ * vector. This allows third parties to extend the set of operations
+ * supported by another layer in a binary compatibile way. For example,
+ * assume that NFS needed to be modified to support Ficus. NFS has an entry
+ * (probably nfs_vnopdeop_decls) declaring all the operations NFS supports by
+ * default. Ficus could add another entry (ficus_nfs_vnodeop_decl_entensions)
+ * listing those new operations Ficus adds to NFS, all without modifying the
+ * NFS code. (Of couse, the OTW NFS protocol still needs to be munged, but
+ * that is a(whole)nother story.) This is a feature.
+ */
+void
+vfs_opv_init()
+{
+	int i, j, k;
+	int (***opv_desc_vector_p)();
+	int (**opv_desc_vector)();
+	struct vnodeopv_entry_desc *opve_descp;
+
+	/*
+	 * Allocate the dynamic vectors and fill them in.
+	 */
+	for (i=0; vfs_opv_descs[i]; i++) {
+		opv_desc_vector_p = vfs_opv_descs[i]->opv_desc_vector_p;
+		/*
+		 * Allocate and init the vector, if it needs it.
+		 * Also handle backwards compatibility.
+		 */
+		if (*opv_desc_vector_p == NULL) {
+			/* XXX - shouldn't be M_VNODE */
+			MALLOC(*opv_desc_vector_p, PFI*,
+			       vfs_opv_numops*sizeof(PFI), M_VNODE, M_WAITOK);
+			bzero (*opv_desc_vector_p, vfs_opv_numops*sizeof(PFI));
+			DODEBUG(printf("vector at %x allocated\n",
+			    opv_desc_vector_p));
+		}
+		opv_desc_vector = *opv_desc_vector_p;
+		for (j=0; vfs_opv_descs[i]->opv_desc_ops[j].opve_op; j++) {
+			opve_descp = &(vfs_opv_descs[i]->opv_desc_ops[j]);
+
+			/*
+			 * Sanity check:  is this operation listed
+			 * in the list of operations?  We check this
+			 * by seeing if its offest is zero.  Since
+			 * the default routine should always be listed
+			 * first, it should be the only one with a zero
+			 * offset.  Any other operation with a zero
+			 * offset is probably not listed in
+			 * vfs_op_descs, and so is probably an error.
+			 *
+			 * A panic here means the layer programmer
+			 * has committed the all-too common bug
+			 * of adding a new operation to the layer's
+			 * list of vnode operations but
+			 * not adding the operation to the system-wide
+			 * list of supported operations.
+			 */
+			if (opve_descp->opve_op->vdesc_offset == 0 &&
+				    opve_descp->opve_op->vdesc_offset !=
+				    	VOFFSET(vop_default)) {
+				printf("operation %s not listed in %s.\n",
+				    opve_descp->opve_op->vdesc_name,
+				    "vfs_op_descs");
+				panic ("vfs_opv_init: bad operation");
+			}
+			/*
+			 * Fill in this entry.
+			 */
+			opv_desc_vector[opve_descp->opve_op->vdesc_offset] =
+					opve_descp->opve_impl;
+		}
+	}
+	/*
+	 * Finally, go back and replace unfilled routines
+	 * with their default.  (Sigh, an O(n^3) algorithm.  I
+	 * could make it better, but that'd be work, and n is small.)
+	 */
+	for (i = 0; vfs_opv_descs[i]; i++) {
+		opv_desc_vector = *(vfs_opv_descs[i]->opv_desc_vector_p);
+		/*
+		 * Force every operations vector to have a default routine.
+		 */
+		if (opv_desc_vector[VOFFSET(vop_default)]==NULL) {
+			panic("vfs_opv_init: operation vector without default routine.");
+		}
+		for (k = 0; k<vfs_opv_numops; k++)
+			if (opv_desc_vector[k] == NULL)
+				opv_desc_vector[k] = 
+					opv_desc_vector[VOFFSET(vop_default)];
+	}
+}
+
+/*
+ * Initialize known vnode operations vectors.
+ */
+void
+vfs_op_init()
+{
+	int i;
+
+	DODEBUG(printf("Vnode_interface_init.\n"));
+	/*
+	 * Set all vnode vectors to a well known value.
+	 */
+	for (i = 0; vfs_opv_descs[i]; i++)
+		*(vfs_opv_descs[i]->opv_desc_vector_p) = NULL;
+	/*
+	 * Figure out how many ops there are by counting the table,
+	 * and assign each its offset.
+	 */
+	for (vfs_opv_numops = 0, i = 0; vfs_op_descs[i]; i++) {
+		vfs_op_descs[i]->vdesc_offset = vfs_opv_numops;
+		vfs_opv_numops++;
+	}
+	DODEBUG(printf ("vfs_opv_numops=%d\n", vfs_opv_numops));
+}
+
+/*
+ * Routines having to do with the management of the vnode table.
+ */
+extern struct vnodeops dead_vnodeops;
+extern struct vnodeops spec_vnodeops;
+extern void vclean();
+struct vattr va_null;
+
+/*
+ * Initialize the vnode structures and initialize each file system type.
+ */
+vfsinit()
+{
+	struct vfsops **vfsp;
+
+	/*
+	 * Initialize the vnode table
+	 */
+	vntblinit();
+	/*
+	 * Initialize the vnode name cache
+	 */
+	nchinit();
+	/*
+	 * Build vnode operation vectors.
+	 */
+	vfs_op_init();
+	vfs_opv_init();   /* finish the job */
+	/*
+	 * Initialize each file system type.
+	 */
+	vattr_null(&va_null);
+	for (vfsp = &vfssw[0]; vfsp <= &vfssw[MOUNT_MAXTYPE]; vfsp++) {
+		if (*vfsp == NULL)
+			continue;
+		(*(*vfsp)->vfs_init)();
+	}
+}
diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c
new file mode 100644
index 00000000000..0fa5aa19b78
--- /dev/null
+++ b/sys/kern/vfs_lookup.c
@@ -0,0 +1,506 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vfs_lookup.c	8.4 (Berkeley) 2/16/94
+ */
+
+#include <sys/param.h>
+#include <sys/syslimits.h>
+#include <sys/time.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <sys/filedesc.h>
+#include <sys/proc.h>
+
+#ifdef KTRACE
+#include <sys/ktrace.h>
+#endif
+
+/*
+ * Convert a pathname into a pointer to a locked inode.
+ *
+ * The FOLLOW flag is set when symbolic links are to be followed
+ * when they occur at the end of the name translation process.
+ * Symbolic links are always followed for all other pathname
+ * components other than the last.
+ *
+ * The segflg defines whether the name is to be copied from user
+ * space or kernel space.
+ *
+ * Overall outline of namei:
+ *
+ *	copy in name
+ *	get starting directory
+ *	while (!done && !error) {
+ *		call lookup to search path.
+ *		if symbolic link, massage name in buffer and continue
+ *	}
+ */
+int
+namei(ndp)
+	register struct nameidata *ndp;
+{
+	register struct filedesc *fdp;	/* pointer to file descriptor state */
+	register char *cp;		/* pointer into pathname argument */
+	register struct vnode *dp;	/* the directory we are searching */
+	struct iovec aiov;		/* uio for reading symbolic links */
+	struct uio auio;
+	int error, linklen;
+	struct componentname *cnp = &ndp->ni_cnd;
+
+	ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_proc->p_ucred;
+#ifdef DIAGNOSTIC
+	if (!cnp->cn_cred || !cnp->cn_proc)
+		panic ("namei: bad cred/proc");
+	if (cnp->cn_nameiop & (~OPMASK))
+		panic ("namei: nameiop contaminated with flags");
+	if (cnp->cn_flags & OPMASK)
+		panic ("namei: flags contaminated with nameiops");
+#endif
+	fdp = cnp->cn_proc->p_fd;
+
+	/*
+	 * Get a buffer for the name to be translated, and copy the
+	 * name into the buffer.
+	 */
+	if ((cnp->cn_flags & HASBUF) == 0)
+		MALLOC(cnp->cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
+	if (ndp->ni_segflg == UIO_SYSSPACE)
+		error = copystr(ndp->ni_dirp, cnp->cn_pnbuf,
+			    MAXPATHLEN, &ndp->ni_pathlen);
+	else
+		error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf,
+			    MAXPATHLEN, &ndp->ni_pathlen);
+	if (error) {
+		free(cnp->cn_pnbuf, M_NAMEI);
+		ndp->ni_vp = NULL;
+		return (error);
+	}
+	ndp->ni_loopcnt = 0;
+#ifdef KTRACE
+	if (KTRPOINT(cnp->cn_proc, KTR_NAMEI))
+		ktrnamei(cnp->cn_proc->p_tracep, cnp->cn_pnbuf);
+#endif
+
+	/*
+	 * Get starting point for the translation.
+	 */
+	if ((ndp->ni_rootdir = fdp->fd_rdir) == NULL)
+		ndp->ni_rootdir = rootvnode;
+	dp = fdp->fd_cdir;
+	VREF(dp);
+	for (;;) {
+		/*
+		 * Check if root directory should replace current directory.
+		 * Done at start of translation and after symbolic link.
+		 */
+		cnp->cn_nameptr = cnp->cn_pnbuf;
+		if (*(cnp->cn_nameptr) == '/') {
+			vrele(dp);
+			while (*(cnp->cn_nameptr) == '/') {
+				cnp->cn_nameptr++;
+				ndp->ni_pathlen--;
+			}
+			dp = ndp->ni_rootdir;
+			VREF(dp);
+		}
+		ndp->ni_startdir = dp;
+		if (error = lookup(ndp)) {
+			FREE(cnp->cn_pnbuf, M_NAMEI);
+			return (error);
+		}
+		/*
+		 * Check for symbolic link
+		 */
+		if ((cnp->cn_flags & ISSYMLINK) == 0) {
+			if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0)
+				FREE(cnp->cn_pnbuf, M_NAMEI);
+			else
+				cnp->cn_flags |= HASBUF;
+			return (0);
+		}
+		if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
+			VOP_UNLOCK(ndp->ni_dvp);
+		if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
+			error = ELOOP;
+			break;
+		}
+		if (ndp->ni_pathlen > 1)
+			MALLOC(cp, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
+		else
+			cp = cnp->cn_pnbuf;
+		aiov.iov_base = cp;
+		aiov.iov_len = MAXPATHLEN;
+		auio.uio_iov = &aiov;
+		auio.uio_iovcnt = 1;
+		auio.uio_offset = 0;
+		auio.uio_rw = UIO_READ;
+		auio.uio_segflg = UIO_SYSSPACE;
+		auio.uio_procp = (struct proc *)0;
+		auio.uio_resid = MAXPATHLEN;
+		if (error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred)) {
+			if (ndp->ni_pathlen > 1)
+				free(cp, M_NAMEI);
+			break;
+		}
+		linklen = MAXPATHLEN - auio.uio_resid;
+		if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
+			if (ndp->ni_pathlen > 1)
+				free(cp, M_NAMEI);
+			error = ENAMETOOLONG;
+			break;
+		}
+		if (ndp->ni_pathlen > 1) {
+			bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
+			FREE(cnp->cn_pnbuf, M_NAMEI);
+			cnp->cn_pnbuf = cp;
+		} else
+			cnp->cn_pnbuf[linklen] = '\0';
+		ndp->ni_pathlen += linklen;
+		vput(ndp->ni_vp);
+		dp = ndp->ni_dvp;
+	}
+	FREE(cnp->cn_pnbuf, M_NAMEI);
+	vrele(ndp->ni_dvp);
+	vput(ndp->ni_vp);
+	ndp->ni_vp = NULL;
+	return (error);
+}
+
+/*
+ * Search a pathname.
+ * This is a very central and rather complicated routine.
+ *
+ * The pathname is pointed to by ni_ptr and is of length ni_pathlen.
+ * The starting directory is taken from ni_startdir. The pathname is
+ * descended until done, or a symbolic link is encountered. The variable
+ * ni_more is clear if the path is completed; it is set to one if a
+ * symbolic link needing interpretation is encountered.
+ *
+ * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
+ * whether the name is to be looked up, created, renamed, or deleted.
+ * When CREATE, RENAME, or DELETE is specified, information usable in
+ * creating, renaming, or deleting a directory entry may be calculated.
+ * If flag has LOCKPARENT or'ed into it, the parent directory is returned
+ * locked. If flag has WANTPARENT or'ed into it, the parent directory is
+ * returned unlocked. Otherwise the parent directory is not returned. If
+ * the target of the pathname exists and LOCKLEAF is or'ed into the flag
+ * the target is returned locked, otherwise it is returned unlocked.
+ * When creating or renaming and LOCKPARENT is specified, the target may not
+ * be ".".  When deleting and LOCKPARENT is specified, the target may be ".".
+ * 
+ * Overall outline of lookup:
+ *
+ * dirloop:
+ *	identify next component of name at ndp->ni_ptr
+ *	handle degenerate case where name is null string
+ *	if .. and crossing mount points and on mounted filesys, find parent
+ *	call VOP_LOOKUP routine for next component name
+ *	    directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set
+ *	    component vnode returned in ni_vp (if it exists), locked.
+ *	if result vnode is mounted on and crossing mount points,
+ *	    find mounted on vnode
+ *	if more components of name, do next level at dirloop
+ *	return the answer in ni_vp, locked if LOCKLEAF set
+ *	    if LOCKPARENT set, return locked parent in ni_dvp
+ *	    if WANTPARENT set, return unlocked parent in ni_dvp
+ */
+int
+lookup(ndp)
+	register struct nameidata *ndp;
+{
+	register char *cp;		/* pointer into pathname argument */
+	register struct vnode *dp = 0;	/* the directory we are searching */
+	struct vnode *tdp;		/* saved dp */
+	struct mount *mp;		/* mount table entry */
+	int docache;			/* == 0 do not cache last component */
+	int wantparent;			/* 1 => wantparent or lockparent flag */
+	int rdonly;			/* lookup read-only flag bit */
+	int error = 0;
+	struct componentname *cnp = &ndp->ni_cnd;
+
+	/*
+	 * Setup: break out flag bits into variables.
+	 */
+	wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT);
+	docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE;
+	if (cnp->cn_nameiop == DELETE ||
+	    (wantparent && cnp->cn_nameiop != CREATE))
+		docache = 0;
+	rdonly = cnp->cn_flags & RDONLY;
+	ndp->ni_dvp = NULL;
+	cnp->cn_flags &= ~ISSYMLINK;
+	dp = ndp->ni_startdir;
+	ndp->ni_startdir = NULLVP;
+	VOP_LOCK(dp);
+
+dirloop:
+	/*
+	 * Search a new directory.
+	 *
+	 * The cn_hash value is for use by vfs_cache.
+	 * The last component of the filename is left accessible via
+	 * cnp->cn_nameptr for callers that need the name. Callers needing
+	 * the name set the SAVENAME flag. When done, they assume
+	 * responsibility for freeing the pathname buffer.
+	 */
+	cnp->cn_consume = 0;
+	cnp->cn_hash = 0;
+	for (cp = cnp->cn_nameptr; *cp != 0 && *cp != '/'; cp++)
+		cnp->cn_hash += (unsigned char)*cp;
+	cnp->cn_namelen = cp - cnp->cn_nameptr;
+	if (cnp->cn_namelen > NAME_MAX) {
+		error = ENAMETOOLONG;
+		goto bad;
+	}
+#ifdef NAMEI_DIAGNOSTIC
+	{ char c = *cp;
+	*cp = '\0';
+	printf("{%s}: ", cnp->cn_nameptr);
+	*cp = c; }
+#endif
+	ndp->ni_pathlen -= cnp->cn_namelen;
+	ndp->ni_next = cp;
+	cnp->cn_flags |= MAKEENTRY;
+	if (*cp == '\0' && docache == 0)
+		cnp->cn_flags &= ~MAKEENTRY;
+	if (cnp->cn_namelen == 2 &&
+	    cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.')
+		cnp->cn_flags |= ISDOTDOT;
+	else
+		cnp->cn_flags &= ~ISDOTDOT;
+	if (*ndp->ni_next == 0)
+		cnp->cn_flags |= ISLASTCN;
+	else
+		cnp->cn_flags &= ~ISLASTCN;
+
+
+	/*
+	 * Check for degenerate name (e.g. / or "")
+	 * which is a way of talking about a directory,
+	 * e.g. like "/." or ".".
+	 */
+	if (cnp->cn_nameptr[0] == '\0') {
+		if (cnp->cn_nameiop != LOOKUP) {
+			error = EISDIR;
+			goto bad;
+		}
+		if (dp->v_type != VDIR) {
+			error = ENOTDIR;
+			goto bad;
+		}
+		if (wantparent) {
+			ndp->ni_dvp = dp;
+			VREF(dp);
+		}
+		ndp->ni_vp = dp;
+		if (!(cnp->cn_flags & (LOCKPARENT | LOCKLEAF)))
+			VOP_UNLOCK(dp);
+		if (cnp->cn_flags & SAVESTART)
+			panic("lookup: SAVESTART");
+		return (0);
+	}
+
+	/*
+	 * Handle "..": two special cases.
+	 * 1. If at root directory (e.g. after chroot)
+	 *    or at absolute root directory
+	 *    then ignore it so can't get out.
+	 * 2. If this vnode is the root of a mounted
+	 *    filesystem, then replace it with the
+	 *    vnode which was mounted on so we take the
+	 *    .. in the other file system.
+	 */
+	if (cnp->cn_flags & ISDOTDOT) {
+		for (;;) {
+			if (dp == ndp->ni_rootdir || dp == rootvnode) {
+				ndp->ni_dvp = dp;
+				ndp->ni_vp = dp;
+				VREF(dp);
+				goto nextname;
+			}
+			if ((dp->v_flag & VROOT) == 0 ||
+			    (cnp->cn_flags & NOCROSSMOUNT))
+				break;
+			tdp = dp;
+			dp = dp->v_mount->mnt_vnodecovered;
+			vput(tdp);
+			VREF(dp);
+			VOP_LOCK(dp);
+		}
+	}
+
+	/*
+	 * We now have a segment name to search for, and a directory to search.
+	 */
+unionlookup:
+	ndp->ni_dvp = dp;
+	if (error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp)) {
+#ifdef DIAGNOSTIC
+		if (ndp->ni_vp != NULL)
+			panic("leaf should be empty");
+#endif
+#ifdef NAMEI_DIAGNOSTIC
+		printf("not found\n");
+#endif
+		if ((error == ENOENT) &&
+		    (dp->v_flag & VROOT) &&
+		    (dp->v_mount->mnt_flag & MNT_UNION)) {
+			tdp = dp;
+			dp = dp->v_mount->mnt_vnodecovered;
+			vput(tdp);
+			VREF(dp);
+			VOP_LOCK(dp);
+			goto unionlookup;
+		}
+
+		if (error != EJUSTRETURN)
+			goto bad;
+		/*
+		 * If creating and at end of pathname, then can consider
+		 * allowing file to be created.
+		 */
+		if (rdonly || (ndp->ni_dvp->v_mount->mnt_flag & MNT_RDONLY)) {
+			error = EROFS;
+			goto bad;
+		}
+		/*
+		 * We return with ni_vp NULL to indicate that the entry
+		 * doesn't currently exist, leaving a pointer to the
+		 * (possibly locked) directory inode in ndp->ni_dvp.
+		 */
+		if (cnp->cn_flags & SAVESTART) {
+			ndp->ni_startdir = ndp->ni_dvp;
+			VREF(ndp->ni_startdir);
+		}
+		return (0);
+	}
+#ifdef NAMEI_DIAGNOSTIC
+	printf("found\n");
+#endif
+
+	/*
+	 * Take into account any additional components consumed by
+	 * the underlying filesystem.
+	 */
+	if (cnp->cn_consume > 0) {
+		cnp->cn_nameptr += cnp->cn_consume;
+		ndp->ni_next += cnp->cn_consume;
+		ndp->ni_pathlen -= cnp->cn_consume;
+		cnp->cn_consume = 0;
+	}
+
+	dp = ndp->ni_vp;
+	/*
+	 * Check for symbolic link
+	 */
+	if ((dp->v_type == VLNK) &&
+	    ((cnp->cn_flags & FOLLOW) || *ndp->ni_next == '/')) {
+		cnp->cn_flags |= ISSYMLINK;
+		return (0);
+	}
+
+	/*
+	 * Check to see if the vnode has been mounted on;
+	 * if so find the root of the mounted file system.
+	 */
+	while (dp->v_type == VDIR && (mp = dp->v_mountedhere) &&
+	       (cnp->cn_flags & NOCROSSMOUNT) == 0) {
+		if (mp->mnt_flag & MNT_MLOCK) {
+			mp->mnt_flag |= MNT_MWAIT;
+			sleep((caddr_t)mp, PVFS);
+			continue;
+		}
+		if (error = VFS_ROOT(dp->v_mountedhere, &tdp))
+			goto bad2;
+		vput(dp);
+		ndp->ni_vp = dp = tdp;
+	}
+
+nextname:
+	/*
+	 * Not a symbolic link.  If more pathname,
+	 * continue at next component, else return.
+	 */
+	if (*ndp->ni_next == '/') {
+		cnp->cn_nameptr = ndp->ni_next;
+		while (*cnp->cn_nameptr == '/') {
+			cnp->cn_nameptr++;
+			ndp->ni_pathlen--;
+		}
+		vrele(ndp->ni_dvp);
+		goto dirloop;
+	}
+	/*
+	 * Check for read-only file systems.
+	 */
+	if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME) {
+		/*
+		 * Disallow directory write attempts on read-only
+		 * file systems.
+		 */
+		if (rdonly || (dp->v_mount->mnt_flag & MNT_RDONLY) ||
+		    (wantparent &&
+		     (ndp->ni_dvp->v_mount->mnt_flag & MNT_RDONLY))) {
+			error = EROFS;
+			goto bad2;
+		}
+	}
+	if (cnp->cn_flags & SAVESTART) {
+		ndp->ni_startdir = ndp->ni_dvp;
+		VREF(ndp->ni_startdir);
+	}
+	if (!wantparent)
+		vrele(ndp->ni_dvp);
+	if ((cnp->cn_flags & LOCKLEAF) == 0)
+		VOP_UNLOCK(dp);
+	return (0);
+
+bad2:
+	if ((cnp->cn_flags & LOCKPARENT) && *ndp->ni_next == '\0')
+		VOP_UNLOCK(ndp->ni_dvp);
+	vrele(ndp->ni_dvp);
+bad:
+	vput(dp);
+	ndp->ni_vp = NULL;
+	return (error);
+}
+
+
diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c
new file mode 100644
index 00000000000..2fe39eb674b
--- /dev/null
+++ b/sys/kern/vfs_mount.c
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vfs_conf.c	8.8 (Berkeley) 3/31/94
+ */
+
+#include <sys/param.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+
+#ifdef FFS
+#include <ufs/ffs/ffs_extern.h>
+
+/*
+ * This specifies the filesystem used to mount the root.
+ * This specification should be done by /etc/config.
+ */
+int (*mountroot)() = ffs_mountroot;
+#endif
+
+/*
+ * These define the root filesystem and device.
+ */
+struct mount *rootfs;
+struct vnode *rootvnode;
+
+/*
+ * Set up the filesystem operations for vnodes.
+ * The types are defined in mount.h.
+ */
+#ifdef FFS
+extern	struct vfsops ufs_vfsops;
+#define	UFS_VFSOPS	&ufs_vfsops
+#else
+#define	UFS_VFSOPS	NULL
+#endif
+
+#ifdef LFS
+extern	struct vfsops lfs_vfsops;
+#define	LFS_VFSOPS	&lfs_vfsops
+#else
+#define	LFS_VFSOPS	NULL
+#endif
+
+#ifdef MFS
+extern	struct vfsops mfs_vfsops;
+#define	MFS_VFSOPS	&mfs_vfsops
+#else
+#define	MFS_VFSOPS	NULL
+#endif
+
+#ifdef NFS
+extern	struct vfsops nfs_vfsops;
+#define	NFS_VFSOPS	&nfs_vfsops
+#else
+#define	NFS_VFSOPS	NULL
+#endif
+
+#ifdef FDESC
+extern	struct vfsops fdesc_vfsops;
+#define	FDESC_VFSOPS	&fdesc_vfsops
+#else
+#define	FDESC_VFSOPS	NULL
+#endif
+
+#ifdef PORTAL
+extern	struct vfsops portal_vfsops;
+#define	PORTAL_VFSOPS	&portal_vfsops
+#else
+#define	PORTAL_VFSOPS	NULL
+#endif
+
+#ifdef NULLFS
+extern	struct vfsops null_vfsops;
+#define NULL_VFSOPS	&null_vfsops
+#else
+#define NULL_VFSOPS	NULL
+#endif
+
+#ifdef UMAPFS
+extern	struct vfsops umap_vfsops;
+#define UMAP_VFSOPS	&umap_vfsops
+#else
+#define UMAP_VFSOPS	NULL
+#endif
+
+#ifdef KERNFS
+extern	struct vfsops kernfs_vfsops;
+#define KERNFS_VFSOPS	&kernfs_vfsops
+#else
+#define KERNFS_VFSOPS	NULL
+#endif
+
+#ifdef PROCFS
+extern	struct vfsops procfs_vfsops;
+#define PROCFS_VFSOPS	&procfs_vfsops
+#else
+#define PROCFS_VFSOPS	NULL
+#endif
+
+#ifdef AFS
+extern	struct vfsops afs_vfsops;
+#define AFS_VFSOPS	&afs_vfsops
+#else
+#define AFS_VFSOPS	NULL
+#endif
+
+#ifdef CD9660
+extern	struct vfsops cd9660_vfsops;
+#define CD9660_VFSOPS	&cd9660_vfsops
+#else
+#define CD9660_VFSOPS	NULL
+#endif
+
+#ifdef UNION
+extern	struct vfsops union_vfsops;
+#define	UNION_VFSOPS	&union_vfsops
+#else
+#define	UNION_VFSOPS	NULL
+#endif
+
+struct vfsops *vfssw[] = {
+	NULL,			/* 0 = MOUNT_NONE */
+	UFS_VFSOPS,		/* 1 = MOUNT_UFS */
+	NFS_VFSOPS,		/* 2 = MOUNT_NFS */
+	MFS_VFSOPS,		/* 3 = MOUNT_MFS */
+	NULL,			/* 4 = MOUNT_PC */
+	LFS_VFSOPS,		/* 5 = MOUNT_LFS */
+	NULL,			/* 6 = MOUNT_LOFS */
+	FDESC_VFSOPS,		/* 7 = MOUNT_FDESC */
+	PORTAL_VFSOPS,		/* 8 = MOUNT_PORTAL */
+	NULL_VFSOPS,		/* 9 = MOUNT_NULL */
+	UMAP_VFSOPS,		/* 10 = MOUNT_UMAP */
+	KERNFS_VFSOPS,		/* 11 = MOUNT_KERNFS */
+	PROCFS_VFSOPS,		/* 12 = MOUNT_PROCFS */
+	AFS_VFSOPS,		/* 13 = MOUNT_AFS */
+	CD9660_VFSOPS,		/* 14 = MOUNT_CD9660 */
+	UNION_VFSOPS,		/* 15 = MOUNT_UNION */
+	0
+};
+
+
+/*
+ *
+ * vfs_opv_descs enumerates the list of vnode classes, each with it's own
+ * vnode operation vector.  It is consulted at system boot to build operation
+ * vectors.  It is NULL terminated.
+ *
+ */
+extern struct vnodeopv_desc ffs_vnodeop_opv_desc;
+extern struct vnodeopv_desc ffs_specop_opv_desc;
+extern struct vnodeopv_desc ffs_fifoop_opv_desc;
+extern struct vnodeopv_desc lfs_vnodeop_opv_desc;
+extern struct vnodeopv_desc lfs_specop_opv_desc;
+extern struct vnodeopv_desc lfs_fifoop_opv_desc;
+extern struct vnodeopv_desc mfs_vnodeop_opv_desc;
+extern struct vnodeopv_desc dead_vnodeop_opv_desc;
+extern struct vnodeopv_desc fifo_vnodeop_opv_desc;
+extern struct vnodeopv_desc spec_vnodeop_opv_desc;
+extern struct vnodeopv_desc nfsv2_vnodeop_opv_desc;
+extern struct vnodeopv_desc spec_nfsv2nodeop_opv_desc;
+extern struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc;
+extern struct vnodeopv_desc fdesc_vnodeop_opv_desc;
+extern struct vnodeopv_desc portal_vnodeop_opv_desc;
+extern struct vnodeopv_desc null_vnodeop_opv_desc;
+extern struct vnodeopv_desc umap_vnodeop_opv_desc;
+extern struct vnodeopv_desc kernfs_vnodeop_opv_desc;
+extern struct vnodeopv_desc procfs_vnodeop_opv_desc;
+extern struct vnodeopv_desc cd9660_vnodeop_opv_desc;
+extern struct vnodeopv_desc cd9660_specop_opv_desc;
+extern struct vnodeopv_desc cd9660_fifoop_opv_desc;
+extern struct vnodeopv_desc union_vnodeop_opv_desc;
+
+struct vnodeopv_desc *vfs_opv_descs[] = {
+	&ffs_vnodeop_opv_desc,
+	&ffs_specop_opv_desc,
+#ifdef FIFO
+	&ffs_fifoop_opv_desc,
+#endif
+	&dead_vnodeop_opv_desc,
+#ifdef FIFO
+	&fifo_vnodeop_opv_desc,
+#endif
+	&spec_vnodeop_opv_desc,
+#ifdef LFS
+	&lfs_vnodeop_opv_desc,
+	&lfs_specop_opv_desc,
+#ifdef FIFO
+	&lfs_fifoop_opv_desc,
+#endif
+#endif
+#ifdef MFS
+	&mfs_vnodeop_opv_desc,
+#endif
+#ifdef NFS
+	&nfsv2_vnodeop_opv_desc,
+	&spec_nfsv2nodeop_opv_desc,
+#ifdef FIFO
+	&fifo_nfsv2nodeop_opv_desc,
+#endif
+#endif
+#ifdef FDESC
+	&fdesc_vnodeop_opv_desc,
+#endif
+#ifdef PORTAL
+	&portal_vnodeop_opv_desc,
+#endif
+#ifdef NULLFS
+	&null_vnodeop_opv_desc,
+#endif
+#ifdef UMAPFS
+	&umap_vnodeop_opv_desc,
+#endif
+#ifdef KERNFS
+	&kernfs_vnodeop_opv_desc,
+#endif
+#ifdef PROCFS
+	&procfs_vnodeop_opv_desc,
+#endif
+#ifdef CD9660
+	&cd9660_vnodeop_opv_desc,
+	&cd9660_specop_opv_desc,
+#ifdef FIFO
+	&cd9660_fifoop_opv_desc,
+#endif
+#endif
+#ifdef UNION
+	&union_vnodeop_opv_desc,
+#endif
+	NULL
+};
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
new file mode 100644
index 00000000000..9891fe61c19
--- /dev/null
+++ b/sys/kern/vfs_subr.c
@@ -0,0 +1,1322 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vfs_subr.c	8.13 (Berkeley) 4/18/94
+ */
+
+/*
+ * External virtual filesystem routines
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/time.h>
+#include <sys/vnode.h>
+#include <sys/stat.h>
+#include <sys/namei.h>
+#include <sys/ucred.h>
+#include <sys/buf.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <sys/domain.h>
+#include <sys/mbuf.h>
+
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+
+#include <miscfs/specfs/specdev.h>
+
+enum vtype iftovt_tab[16] = {
+	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
+	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
+};
+int	vttoif_tab[9] = {
+	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
+	S_IFSOCK, S_IFIFO, S_IFMT,
+};
+
+/*
+ * Insq/Remq for the vnode usage lists.
+ */
+#define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
+#define	bufremvn(bp) {  \
+	LIST_REMOVE(bp, b_vnbufs); \
+	(bp)->b_vnbufs.le_next = NOLIST; \
+}
+
+TAILQ_HEAD(freelst, vnode) vnode_free_list;	/* vnode free list */
+struct mntlist mountlist;			/* mounted filesystem list */
+
+/*
+ * Initialize the vnode management data structures.
+ */
+vntblinit()
+{
+
+	TAILQ_INIT(&vnode_free_list);
+	TAILQ_INIT(&mountlist);
+}
+
+/*
+ * Lock a filesystem.
+ * Used to prevent access to it while mounting and unmounting.
+ */
+vfs_lock(mp)
+	register struct mount *mp;
+{
+
+	while(mp->mnt_flag & MNT_MLOCK) {
+		mp->mnt_flag |= MNT_MWAIT;
+		sleep((caddr_t)mp, PVFS);
+	}
+	mp->mnt_flag |= MNT_MLOCK;
+	return (0);
+}
+
+/*
+ * Unlock a locked filesystem.
+ * Panic if filesystem is not locked.
+ */
+void
+vfs_unlock(mp)
+	register struct mount *mp;
+{
+
+	if ((mp->mnt_flag & MNT_MLOCK) == 0)
+		panic("vfs_unlock: not locked");
+	mp->mnt_flag &= ~MNT_MLOCK;
+	if (mp->mnt_flag & MNT_MWAIT) {
+		mp->mnt_flag &= ~MNT_MWAIT;
+		wakeup((caddr_t)mp);
+	}
+}
+
+/*
+ * Mark a mount point as busy.
+ * Used to synchronize access and to delay unmounting.
+ */
+vfs_busy(mp)
+	register struct mount *mp;
+{
+
+	while(mp->mnt_flag & MNT_MPBUSY) {
+		mp->mnt_flag |= MNT_MPWANT;
+		sleep((caddr_t)&mp->mnt_flag, PVFS);
+	}
+	if (mp->mnt_flag & MNT_UNMOUNT)
+		return (1);
+	mp->mnt_flag |= MNT_MPBUSY;
+	return (0);
+}
+
+/*
+ * Free a busy filesystem.
+ * Panic if filesystem is not busy.
+ */
+vfs_unbusy(mp)
+	register struct mount *mp;
+{
+
+	if ((mp->mnt_flag & MNT_MPBUSY) == 0)
+		panic("vfs_unbusy: not busy");
+	mp->mnt_flag &= ~MNT_MPBUSY;
+	if (mp->mnt_flag & MNT_MPWANT) {
+		mp->mnt_flag &= ~MNT_MPWANT;
+		wakeup((caddr_t)&mp->mnt_flag);
+	}
+}
+
+/*
+ * Lookup a mount point by filesystem identifier.
+ */
+struct mount *
+getvfs(fsid)
+	fsid_t *fsid;
+{
+	register struct mount *mp;
+
+	for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) {
+		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
+		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1])
+			return (mp);
+	}
+	return ((struct mount *)0);
+}
+
+/*
+ * Get a new unique fsid
+ */
+void
+getnewfsid(mp, mtype)
+	struct mount *mp;
+	int mtype;
+{
+static u_short xxxfs_mntid;
+
+	fsid_t tfsid;
+
+	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
+	mp->mnt_stat.f_fsid.val[1] = mtype;
+	if (xxxfs_mntid == 0)
+		++xxxfs_mntid;
+	tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
+	tfsid.val[1] = mtype;
+	if (mountlist.tqh_first != NULL) {
+		while (getvfs(&tfsid)) {
+			tfsid.val[0]++;
+			xxxfs_mntid++;
+		}
+	}
+	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
+}
+
+/*
+ * Set vnode attributes to VNOVAL
+ */
+void vattr_null(vap)
+	register struct vattr *vap;
+{
+
+	vap->va_type = VNON;
+	vap->va_size = vap->va_bytes = VNOVAL;
+	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
+		vap->va_fsid = vap->va_fileid =
+		vap->va_blocksize = vap->va_rdev =
+		vap->va_atime.ts_sec = vap->va_atime.ts_nsec =
+		vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec =
+		vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec =
+		vap->va_flags = vap->va_gen = VNOVAL;
+	vap->va_vaflags = 0;
+}
+
+/*
+ * Routines having to do with the management of the vnode table.
+ */
+extern int (**dead_vnodeop_p)();
+extern void vclean();
+long numvnodes;
+extern struct vattr va_null;
+
+/*
+ * Return the next vnode from the free list.
+ */
+getnewvnode(tag, mp, vops, vpp)
+	enum vtagtype tag;
+	struct mount *mp;
+	int (**vops)();
+	struct vnode **vpp;
+{
+	register struct vnode *vp;
+	int s;
+
+	if ((vnode_free_list.tqh_first == NULL &&
+	     numvnodes < 2 * desiredvnodes) ||
+	    numvnodes < desiredvnodes) {
+		vp = (struct vnode *)malloc((u_long)sizeof *vp,
+		    M_VNODE, M_WAITOK);
+		bzero((char *)vp, sizeof *vp);
+		numvnodes++;
+	} else {
+		if ((vp = vnode_free_list.tqh_first) == NULL) {
+			tablefull("vnode");
+			*vpp = 0;
+			return (ENFILE);
+		}
+		if (vp->v_usecount)
+			panic("free vnode isn't");
+		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+		/* see comment on why 0xdeadb is set at end of vgone (below) */
+		vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
+		vp->v_lease = NULL;
+		if (vp->v_type != VBAD)
+			vgone(vp);
+#ifdef DIAGNOSTIC
+		if (vp->v_data)
+			panic("cleaned vnode isn't");
+		s = splbio();
+		if (vp->v_numoutput)
+			panic("Clean vnode has pending I/O's");
+		splx(s);
+#endif
+		vp->v_flag = 0;
+		vp->v_lastr = 0;
+		vp->v_ralen = 0;
+		vp->v_maxra = 0;
+		vp->v_lastw = 0;
+		vp->v_lasta = 0;
+		vp->v_cstart = 0;
+		vp->v_clen = 0;
+		vp->v_socket = 0;
+	}
+	vp->v_type = VNON;
+	cache_purge(vp);
+	vp->v_tag = tag;
+	vp->v_op = vops;
+	insmntque(vp, mp);
+	*vpp = vp;
+	vp->v_usecount = 1;
+	vp->v_data = 0;
+	return (0);
+}
+
+/*
+ * Move a vnode from one mount queue to another.
+ */
+insmntque(vp, mp)
+	register struct vnode *vp;
+	register struct mount *mp;
+{
+
+	/*
+	 * Delete from old mount point vnode list, if on one.
+	 */
+	if (vp->v_mount != NULL)
+		LIST_REMOVE(vp, v_mntvnodes);
+	/*
+	 * Insert into list of vnodes for the new mount point, if available.
+	 */
+	if ((vp->v_mount = mp) == NULL)
+		return;
+	LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
+}
+
+/*
+ * Update outstanding I/O count and do wakeup if requested.
+ */
+vwakeup(bp)
+	register struct buf *bp;
+{
+	register struct vnode *vp;
+
+	bp->b_flags &= ~B_WRITEINPROG;
+	if (vp = bp->b_vp) {
+		vp->v_numoutput--;
+		if (vp->v_numoutput < 0)
+			panic("vwakeup: neg numoutput");
+		if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
+			if (vp->v_numoutput < 0)
+				panic("vwakeup: neg numoutput");
+			vp->v_flag &= ~VBWAIT;
+			wakeup((caddr_t)&vp->v_numoutput);
+		}
+	}
+}
+
+/*
+ * Flush out and invalidate all buffers associated with a vnode.
+ * Called with the underlying object locked.
+ */
+int
+vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
+	register struct vnode *vp;
+	int flags;
+	struct ucred *cred;
+	struct proc *p;
+	int slpflag, slptimeo;
+{
+	register struct buf *bp;
+	struct buf *nbp, *blist;
+	int s, error;
+
+	if (flags & V_SAVE) {
+		if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p))
+			return (error);
+		if (vp->v_dirtyblkhd.lh_first != NULL)
+			panic("vinvalbuf: dirty bufs");
+	}
+	for (;;) {
+		if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA)
+			while (blist && blist->b_lblkno < 0)
+				blist = blist->b_vnbufs.le_next;
+		if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 
+		    (flags & V_SAVEMETA))
+			while (blist && blist->b_lblkno < 0)
+				blist = blist->b_vnbufs.le_next;
+		if (!blist)
+			break;
+
+		for (bp = blist; bp; bp = nbp) {
+			nbp = bp->b_vnbufs.le_next;
+			if (flags & V_SAVEMETA && bp->b_lblkno < 0)
+				continue;
+			s = splbio();
+			if (bp->b_flags & B_BUSY) {
+				bp->b_flags |= B_WANTED;
+				error = tsleep((caddr_t)bp,
+					slpflag | (PRIBIO + 1), "vinvalbuf",
+					slptimeo);
+				splx(s);
+				if (error)
+					return (error);
+				break;
+			}
+			bremfree(bp);
+			bp->b_flags |= B_BUSY;
+			splx(s);
+			/*
+			 * XXX Since there are no node locks for NFS, I believe
+			 * there is a slight chance that a delayed write will
+			 * occur while sleeping just above, so check for it.
+			 */
+			if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
+				(void) VOP_BWRITE(bp);
+				break;
+			}
+			bp->b_flags |= B_INVAL;
+			brelse(bp);
+		}
+	}
+	if (!(flags & V_SAVEMETA) &&
+	    (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
+		panic("vinvalbuf: flush failed");
+	return (0);
+}
+
+/*
+ * Associate a buffer with a vnode.
+ */
+bgetvp(vp, bp)
+	register struct vnode *vp;
+	register struct buf *bp;
+{
+
+	if (bp->b_vp)
+		panic("bgetvp: not free");
+	VHOLD(vp);
+	bp->b_vp = vp;
+	if (vp->v_type == VBLK || vp->v_type == VCHR)
+		bp->b_dev = vp->v_rdev;
+	else
+		bp->b_dev = NODEV;
+	/*
+	 * Insert onto list for new vnode.
+	 */
+	bufinsvn(bp, &vp->v_cleanblkhd);
+}
+
+/*
+ * Disassociate a buffer from a vnode.
+ */
+brelvp(bp)
+	register struct buf *bp;
+{
+	struct vnode *vp;
+
+	if (bp->b_vp == (struct vnode *) 0)
+		panic("brelvp: NULL");
+	/*
+	 * Delete from old vnode list, if on one.
+	 */
+	if (bp->b_vnbufs.le_next != NOLIST)
+		bufremvn(bp);
+	vp = bp->b_vp;
+	bp->b_vp = (struct vnode *) 0;
+	HOLDRELE(vp);
+}
+
+/*
+ * Reassign a buffer from one vnode to another.
+ * Used to assign file specific control information
+ * (indirect blocks) to the vnode to which they belong.
+ */
+reassignbuf(bp, newvp)
+	register struct buf *bp;
+	register struct vnode *newvp;
+{
+	register struct buflists *listheadp;
+
+	if (newvp == NULL) {
+		printf("reassignbuf: NULL");
+		return;
+	}
+	/*
+	 * Delete from old vnode list, if on one.
+	 */
+	if (bp->b_vnbufs.le_next != NOLIST)
+		bufremvn(bp);
+	/*
+	 * If dirty, put on list of dirty buffers;
+	 * otherwise insert onto list of clean buffers.
+	 */
+	if (bp->b_flags & B_DELWRI)
+		listheadp = &newvp->v_dirtyblkhd;
+	else
+		listheadp = &newvp->v_cleanblkhd;
+	bufinsvn(bp, listheadp);
+}
+
+/*
+ * Create a vnode for a block device.
+ * Used for root filesystem, argdev, and swap areas.
+ * Also used for memory file system special devices.
+ */
+bdevvp(dev, vpp)
+	dev_t dev;
+	struct vnode **vpp;
+{
+	register struct vnode *vp;
+	struct vnode *nvp;
+	int error;
+
+	if (dev == NODEV)
+		return (0);
+	error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
+	if (error) {
+		*vpp = 0;
+		return (error);
+	}
+	vp = nvp;
+	vp->v_type = VBLK;
+	if (nvp = checkalias(vp, dev, (struct mount *)0)) {
+		vput(vp);
+		vp = nvp;
+	}
+	*vpp = vp;
+	return (0);
+}
+
+/*
+ * Check to see if the new vnode represents a special device
+ * for which we already have a vnode (either because of
+ * bdevvp() or because of a different vnode representing
+ * the same block device). If such an alias exists, deallocate
+ * the existing contents and return the aliased vnode. The
+ * caller is responsible for filling it with its new contents.
+ */
+struct vnode *
+checkalias(nvp, nvp_rdev, mp)
+	register struct vnode *nvp;
+	dev_t nvp_rdev;
+	struct mount *mp;
+{
+	register struct vnode *vp;
+	struct vnode **vpp;
+
+	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
+		return (NULLVP);
+
+	vpp = &speclisth[SPECHASH(nvp_rdev)];
+loop:
+	for (vp = *vpp; vp; vp = vp->v_specnext) {
+		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
+			continue;
+		/*
+		 * Alias, but not in use, so flush it out.
+		 */
+		if (vp->v_usecount == 0) {
+			vgone(vp);
+			goto loop;
+		}
+		if (vget(vp, 1))
+			goto loop;
+		break;
+	}
+	if (vp == NULL || vp->v_tag != VT_NON) {
+		MALLOC(nvp->v_specinfo, struct specinfo *,
+			sizeof(struct specinfo), M_VNODE, M_WAITOK);
+		nvp->v_rdev = nvp_rdev;
+		nvp->v_hashchain = vpp;
+		nvp->v_specnext = *vpp;
+		nvp->v_specflags = 0;
+		*vpp = nvp;
+		if (vp != NULL) {
+			nvp->v_flag |= VALIASED;
+			vp->v_flag |= VALIASED;
+			vput(vp);
+		}
+		return (NULLVP);
+	}
+	VOP_UNLOCK(vp);
+	vclean(vp, 0);
+	vp->v_op = nvp->v_op;
+	vp->v_tag = nvp->v_tag;
+	nvp->v_type = VNON;
+	insmntque(vp, mp);
+	return (vp);
+}
+
+/*
+ * Grab a particular vnode from the free list, increment its
+ * reference count and lock it. The vnode lock bit is set the
+ * vnode is being eliminated in vgone. The process is awakened
+ * when the transition is completed, and an error returned to
+ * indicate that the vnode is no longer usable (possibly having
+ * been changed to a new file system type).
+ */
+vget(vp, lockflag)
+	register struct vnode *vp;
+	int lockflag;
+{
+
+	/*
+	 * If the vnode is in the process of being cleaned out for
+	 * another use, we wait for the cleaning to finish and then
+	 * return failure. Cleaning is determined either by checking
+	 * that the VXLOCK flag is set, or that the use count is
+	 * zero with the back pointer set to show that it has been
+	 * removed from the free list by getnewvnode. The VXLOCK
+	 * flag may not have been set yet because vclean is blocked in
+	 * the VOP_LOCK call waiting for the VOP_INACTIVE to complete.
+	 */
+	if ((vp->v_flag & VXLOCK) ||
+	    (vp->v_usecount == 0 &&
+	     vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)) {
+		vp->v_flag |= VXWANT;
+		sleep((caddr_t)vp, PINOD);
+		return (1);
+	}
+	if (vp->v_usecount == 0)
+		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+	vp->v_usecount++;
+	if (lockflag)
+		VOP_LOCK(vp);
+	return (0);
+}
+
+/*
+ * Vnode reference, just increment the count
+ */
+void vref(vp)
+	struct vnode *vp;
+{
+
+	if (vp->v_usecount <= 0)
+		panic("vref used where vget required");
+	vp->v_usecount++;
+}
+
+/*
+ * vput(), just unlock and vrele()
+ */
+void vput(vp)
+	register struct vnode *vp;
+{
+
+	VOP_UNLOCK(vp);
+	vrele(vp);
+}
+
+/*
+ * Vnode release.
+ * If count drops to zero, call inactive routine and return to freelist.
+ */
+void vrele(vp)
+	register struct vnode *vp;
+{
+
+#ifdef DIAGNOSTIC
+	if (vp == NULL)
+		panic("vrele: null vp");
+#endif
+	vp->v_usecount--;
+	if (vp->v_usecount > 0)
+		return;
+#ifdef DIAGNOSTIC
+	if (vp->v_usecount != 0 || vp->v_writecount != 0) {
+		vprint("vrele: bad ref count", vp);
+		panic("vrele: ref cnt");
+	}
+#endif
+	/*
+	 * insert at tail of LRU list
+	 */
+	TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
+	VOP_INACTIVE(vp);
+}
+
+/*
+ * Page or buffer structure gets a reference.
+ */
+void vhold(vp)
+	register struct vnode *vp;
+{
+
+	vp->v_holdcnt++;
+}
+
+/*
+ * Page or buffer structure frees a reference.
+ */
+void holdrele(vp)
+	register struct vnode *vp;
+{
+
+	if (vp->v_holdcnt <= 0)
+		panic("holdrele: holdcnt");
+	vp->v_holdcnt--;
+}
+
+/*
+ * Remove any vnodes in the vnode table belonging to mount point mp.
+ *
+ * If MNT_NOFORCE is specified, there should not be any active ones,
+ * return error if any are found (nb: this is a user error, not a
+ * system error). If MNT_FORCE is specified, detach any active vnodes
+ * that are found.
+ */
+#ifdef DIAGNOSTIC
+int busyprt = 0;	/* print out busy vnodes */
+struct ctldebug debug1 = { "busyprt", &busyprt };
+#endif
+
+vflush(mp, skipvp, flags)
+	struct mount *mp;
+	struct vnode *skipvp;
+	int flags;
+{
+	register struct vnode *vp, *nvp;
+	int busy = 0;
+
+	if ((mp->mnt_flag & MNT_MPBUSY) == 0)
+		panic("vflush: not busy");
+loop:
+	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
+		if (vp->v_mount != mp)
+			goto loop;
+		nvp = vp->v_mntvnodes.le_next;
+		/*
+		 * Skip over a selected vnode.
+		 */
+		if (vp == skipvp)
+			continue;
+		/*
+		 * Skip over a vnodes marked VSYSTEM.
+		 */
+		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM))
+			continue;
+		/*
+		 * If WRITECLOSE is set, only flush out regular file
+		 * vnodes open for writing.
+		 */
+		if ((flags & WRITECLOSE) &&
+		    (vp->v_writecount == 0 || vp->v_type != VREG))
+			continue;
+		/*
+		 * With v_usecount == 0, all we need to do is clear
+		 * out the vnode data structures and we are done.
+		 */
+		if (vp->v_usecount == 0) {
+			vgone(vp);
+			continue;
+		}
+		/*
+		 * If FORCECLOSE is set, forcibly close the vnode.
+		 * For block or character devices, revert to an
+		 * anonymous device. For all other files, just kill them.
+		 */
+		if (flags & FORCECLOSE) {
+			if (vp->v_type != VBLK && vp->v_type != VCHR) {
+				vgone(vp);
+			} else {
+				vclean(vp, 0);
+				vp->v_op = spec_vnodeop_p;
+				insmntque(vp, (struct mount *)0);
+			}
+			continue;
+		}
+#ifdef DIAGNOSTIC
+		if (busyprt)
+			vprint("vflush: busy vnode", vp);
+#endif
+		busy++;
+	}
+	if (busy)
+		return (EBUSY);
+	return (0);
+}
+
+/*
+ * Disassociate the underlying file system from a vnode.
+ */
+void
+vclean(vp, flags)
+	register struct vnode *vp;
+	int flags;
+{
+	int active;
+
+	/*
+	 * Check to see if the vnode is in use.
+	 * If so we have to reference it before we clean it out
+	 * so that its count cannot fall to zero and generate a
+	 * race against ourselves to recycle it.
+	 */
+	if (active = vp->v_usecount)
+		VREF(vp);
+	/*
+	 * Even if the count is zero, the VOP_INACTIVE routine may still
+	 * have the object locked while it cleans it out. The VOP_LOCK
+	 * ensures that the VOP_INACTIVE routine is done with its work.
+	 * For active vnodes, it ensures that no other activity can
+	 * occur while the underlying object is being cleaned out.
+	 */
+	VOP_LOCK(vp);
+	/*
+	 * Prevent the vnode from being recycled or
+	 * brought into use while we clean it out.
+	 */
+	if (vp->v_flag & VXLOCK)
+		panic("vclean: deadlock");
+	vp->v_flag |= VXLOCK;
+	/*
+	 * Clean out any buffers associated with the vnode.
+	 */
+	if (flags & DOCLOSE)
+		vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0);
+	/*
+	 * Any other processes trying to obtain this lock must first
+	 * wait for VXLOCK to clear, then call the new lock operation.
+	 */
+	VOP_UNLOCK(vp);
+	/*
+	 * If purging an active vnode, it must be closed and
+	 * deactivated before being reclaimed.
+	 */
+	if (active) {
+		if (flags & DOCLOSE)
+			VOP_CLOSE(vp, IO_NDELAY, NOCRED, NULL);
+		VOP_INACTIVE(vp);
+	}
+	/*
+	 * Reclaim the vnode.
+	 */
+	if (VOP_RECLAIM(vp))
+		panic("vclean: cannot reclaim");
+	if (active)
+		vrele(vp);
+
+	/*
+	 * Done with purge, notify sleepers of the grim news.
+	 */
+	vp->v_op = dead_vnodeop_p;
+	vp->v_tag = VT_NON;
+	vp->v_flag &= ~VXLOCK;
+	if (vp->v_flag & VXWANT) {
+		vp->v_flag &= ~VXWANT;
+		wakeup((caddr_t)vp);
+	}
+}
+
+/*
+ * Eliminate all activity associated with  the requested vnode
+ * and with all vnodes aliased to the requested vnode.
+ */
+void vgoneall(vp)
+	register struct vnode *vp;
+{
+	register struct vnode *vq;
+
+	if (vp->v_flag & VALIASED) {
+		/*
+		 * If a vgone (or vclean) is already in progress,
+		 * wait until it is done and return.
+		 */
+		if (vp->v_flag & VXLOCK) {
+			vp->v_flag |= VXWANT;
+			sleep((caddr_t)vp, PINOD);
+			return;
+		}
+		/*
+		 * Ensure that vp will not be vgone'd while we
+		 * are eliminating its aliases.
+		 */
+		vp->v_flag |= VXLOCK;
+		while (vp->v_flag & VALIASED) {
+			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
+				if (vq->v_rdev != vp->v_rdev ||
+				    vq->v_type != vp->v_type || vp == vq)
+					continue;
+				vgone(vq);
+				break;
+			}
+		}
+		/*
+		 * Remove the lock so that vgone below will
+		 * really eliminate the vnode after which time
+		 * vgone will awaken any sleepers.
+		 */
+		vp->v_flag &= ~VXLOCK;
+	}
+	vgone(vp);
+}
+
+/*
+ * Eliminate all activity associated with a vnode
+ * in preparation for reuse.
+ */
+void vgone(vp)
+	register struct vnode *vp;
+{
+	register struct vnode *vq;
+	struct vnode *vx;
+
+	/*
+	 * If a vgone (or vclean) is already in progress,
+	 * wait until it is done and return.
+	 */
+	if (vp->v_flag & VXLOCK) {
+		vp->v_flag |= VXWANT;
+		sleep((caddr_t)vp, PINOD);
+		return;
+	}
+	/*
+	 * Clean out the filesystem specific data.
+	 */
+	vclean(vp, DOCLOSE);
+	/*
+	 * Delete from old mount point vnode list, if on one.
+	 */
+	if (vp->v_mount != NULL) {
+		LIST_REMOVE(vp, v_mntvnodes);
+		vp->v_mount = NULL;
+	}
+	/*
+	 * If special device, remove it from special device alias list.
+	 */
+	if (vp->v_type == VBLK || vp->v_type == VCHR) {
+		if (*vp->v_hashchain == vp) {
+			*vp->v_hashchain = vp->v_specnext;
+		} else {
+			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
+				if (vq->v_specnext != vp)
+					continue;
+				vq->v_specnext = vp->v_specnext;
+				break;
+			}
+			if (vq == NULL)
+				panic("missing bdev");
+		}
+		if (vp->v_flag & VALIASED) {
+			vx = NULL;
+			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
+				if (vq->v_rdev != vp->v_rdev ||
+				    vq->v_type != vp->v_type)
+					continue;
+				if (vx)
+					break;
+				vx = vq;
+			}
+			if (vx == NULL)
+				panic("missing alias");
+			if (vq == NULL)
+				vx->v_flag &= ~VALIASED;
+			vp->v_flag &= ~VALIASED;
+		}
+		FREE(vp->v_specinfo, M_VNODE);
+		vp->v_specinfo = NULL;
+	}
+	/*
+	 * If it is on the freelist and not already at the head,
+	 * move it to the head of the list. The test of the back
+	 * pointer and the reference count of zero is because
+	 * it will be removed from the free list by getnewvnode,
+	 * but will not have its reference count incremented until
+	 * after calling vgone. If the reference count were
+	 * incremented first, vgone would (incorrectly) try to
+	 * close the previous instance of the underlying object.
+	 * So, the back pointer is explicitly set to `0xdeadb' in
+	 * getnewvnode after removing it from the freelist to ensure
+	 * that we do not try to move it here.
+	 */
+	if (vp->v_usecount == 0 &&
+	    vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb &&
+	    vnode_free_list.tqh_first != vp) {
+		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
+	}
+	vp->v_type = VBAD;
+}
+
+/*
+ * Lookup a vnode by device number.
+ */
+vfinddev(dev, type, vpp)
+	dev_t dev;
+	enum vtype type;
+	struct vnode **vpp;
+{
+	register struct vnode *vp;
+
+	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
+		if (dev != vp->v_rdev || type != vp->v_type)
+			continue;
+		*vpp = vp;
+		return (1);
+	}
+	return (0);
+}
+
+/*
+ * Calculate the total number of references to a special device.
+ */
+vcount(vp)
+	register struct vnode *vp;
+{
+	register struct vnode *vq, *vnext;
+	int count;
+
+loop:
+	if ((vp->v_flag & VALIASED) == 0)
+		return (vp->v_usecount);
+	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
+		vnext = vq->v_specnext;
+		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
+			continue;
+		/*
+		 * Alias, but not in use, so flush it out.
+		 */
+		if (vq->v_usecount == 0 && vq != vp) {
+			vgone(vq);
+			goto loop;
+		}
+		count += vq->v_usecount;
+	}
+	return (count);
+}
+
+/*
+ * Print out a description of a vnode.
+ */
+static char *typename[] =
+   { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
+
+vprint(label, vp)
+	char *label;
+	register struct vnode *vp;
+{
+	char buf[64];
+
+	if (label != NULL)
+		printf("%s: ", label);
+	printf("type %s, usecount %d, writecount %d, refcount %d,",
+		typename[vp->v_type], vp->v_usecount, vp->v_writecount,
+		vp->v_holdcnt);
+	buf[0] = '\0';
+	if (vp->v_flag & VROOT)
+		strcat(buf, "|VROOT");
+	if (vp->v_flag & VTEXT)
+		strcat(buf, "|VTEXT");
+	if (vp->v_flag & VSYSTEM)
+		strcat(buf, "|VSYSTEM");
+	if (vp->v_flag & VXLOCK)
+		strcat(buf, "|VXLOCK");
+	if (vp->v_flag & VXWANT)
+		strcat(buf, "|VXWANT");
+	if (vp->v_flag & VBWAIT)
+		strcat(buf, "|VBWAIT");
+	if (vp->v_flag & VALIASED)
+		strcat(buf, "|VALIASED");
+	if (buf[0] != '\0')
+		printf(" flags (%s)", &buf[1]);
+	if (vp->v_data == NULL) {
+		printf("\n");
+	} else {
+		printf("\n\t");
+		VOP_PRINT(vp);
+	}
+}
+
+#ifdef DEBUG
+/*
+ * List all of the locked vnodes in the system.
+ * Called when debugging the kernel.
+ */
+printlockedvnodes()
+{
+	register struct mount *mp;
+	register struct vnode *vp;
+
+	printf("Locked vnodes\n");
+	for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) {
+		for (vp = mp->mnt_vnodelist.lh_first;
+		     vp != NULL;
+		     vp = vp->v_mntvnodes.le_next)
+			if (VOP_ISLOCKED(vp))
+				vprint((char *)0, vp);
+	}
+}
+#endif
+
+int kinfo_vdebug = 1;
+int kinfo_vgetfailed;
+#define KINFO_VNODESLOP	10
+/*
+ * Dump vnode list (via sysctl).
+ * Copyout address of vnode followed by vnode.
+ */
+/* ARGSUSED */
+sysctl_vnode(where, sizep)
+	char *where;
+	size_t *sizep;
+{
+	register struct mount *mp, *nmp;
+	struct vnode *vp;
+	register char *bp = where, *savebp;
+	char *ewhere;
+	int error;
+
+#define VPTRSZ	sizeof (struct vnode *)
+#define VNODESZ	sizeof (struct vnode)
+	if (where == NULL) {
+		*sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
+		return (0);
+	}
+	ewhere = where + *sizep;
+		
+	for (mp = mountlist.tqh_first; mp != NULL; mp = nmp) {
+		nmp = mp->mnt_list.tqe_next;
+		if (vfs_busy(mp))
+			continue;
+		savebp = bp;
+again:
+		for (vp = mp->mnt_vnodelist.lh_first;
+		     vp != NULL;
+		     vp = vp->v_mntvnodes.le_next) {
+			/*
+			 * Check that the vp is still associated with
+			 * this filesystem.  RACE: could have been
+			 * recycled onto the same filesystem.
+			 */
+			if (vp->v_mount != mp) {
+				if (kinfo_vdebug)
+					printf("kinfo: vp changed\n");
+				bp = savebp;
+				goto again;
+			}
+			if (bp + VPTRSZ + VNODESZ > ewhere) {
+				*sizep = bp - where;
+				return (ENOMEM);
+			}
+			if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
+			   (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
+				return (error);
+			bp += VPTRSZ + VNODESZ;
+		}
+		vfs_unbusy(mp);
+	}
+
+	*sizep = bp - where;
+	return (0);
+}
+
+/*
+ * Check to see if a filesystem is mounted on a block device.
+ */
+int
+vfs_mountedon(vp)
+	register struct vnode *vp;
+{
+	register struct vnode *vq;
+
+	if (vp->v_specflags & SI_MOUNTEDON)
+		return (EBUSY);
+	if (vp->v_flag & VALIASED) {
+		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
+			if (vq->v_rdev != vp->v_rdev ||
+			    vq->v_type != vp->v_type)
+				continue;
+			if (vq->v_specflags & SI_MOUNTEDON)
+				return (EBUSY);
+		}
+	}
+	return (0);
+}
+
+/*
+ * Build hash lists of net addresses and hang them off the mount point.
+ * Called by ufs_mount() to set up the lists of export addresses.
+ */
+static int
+vfs_hang_addrlist(mp, nep, argp)
+	struct mount *mp;
+	struct netexport *nep;
+	struct export_args *argp;
+{
+	register struct netcred *np;
+	register struct radix_node_head *rnh;
+	register int i;
+	struct radix_node *rn;
+	struct sockaddr *saddr, *smask = 0;
+	struct domain *dom;
+	int error;
+
+	if (argp->ex_addrlen == 0) {
+		if (mp->mnt_flag & MNT_DEFEXPORTED)
+			return (EPERM);
+		np = &nep->ne_defexported;
+		np->netc_exflags = argp->ex_flags;
+		np->netc_anon = argp->ex_anon;
+		np->netc_anon.cr_ref = 1;
+		mp->mnt_flag |= MNT_DEFEXPORTED;
+		return (0);
+	}
+	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
+	np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
+	bzero((caddr_t)np, i);
+	saddr = (struct sockaddr *)(np + 1);
+	if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen))
+		goto out;
+	if (saddr->sa_len > argp->ex_addrlen)
+		saddr->sa_len = argp->ex_addrlen;
+	if (argp->ex_masklen) {
+		smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
+		error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen);
+		if (error)
+			goto out;
+		if (smask->sa_len > argp->ex_masklen)
+			smask->sa_len = argp->ex_masklen;
+	}
+	i = saddr->sa_family;
+	if ((rnh = nep->ne_rtable[i]) == 0) {
+		/*
+		 * Seems silly to initialize every AF when most are not
+		 * used, do so on demand here
+		 */
+		for (dom = domains; dom; dom = dom->dom_next)
+			if (dom->dom_family == i && dom->dom_rtattach) {
+				dom->dom_rtattach((void **)&nep->ne_rtable[i],
+					dom->dom_rtoffset);
+				break;
+			}
+		if ((rnh = nep->ne_rtable[i]) == 0) {
+			error = ENOBUFS;
+			goto out;
+		}
+	}
+	rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
+		np->netc_rnodes);
+	if (rn == 0 || np != (struct netcred *)rn) { /* already exists */
+		error = EPERM;
+		goto out;
+	}
+	np->netc_exflags = argp->ex_flags;
+	np->netc_anon = argp->ex_anon;
+	np->netc_anon.cr_ref = 1;
+	return (0);
+out:
+	free(np, M_NETADDR);
+	return (error);
+}
+
+/* ARGSUSED */
+static int
+vfs_free_netcred(rn, w)
+	struct radix_node *rn;
+	caddr_t w;
+{
+	register struct radix_node_head *rnh = (struct radix_node_head *)w;
+
+	(*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
+	free((caddr_t)rn, M_NETADDR);
+	return (0);
+}
+	
+/*
+ * Free the net address hash lists that are hanging off the mount points.
+ */
+static void
+vfs_free_addrlist(nep)
+	struct netexport *nep;
+{
+	register int i;
+	register struct radix_node_head *rnh;
+
+	for (i = 0; i <= AF_MAX; i++)
+		if (rnh = nep->ne_rtable[i]) {
+			(*rnh->rnh_walktree)(rnh, vfs_free_netcred,
+			    (caddr_t)rnh);
+			free((caddr_t)rnh, M_RTABLE);
+			nep->ne_rtable[i] = 0;
+		}
+}
+
+int
+vfs_export(mp, nep, argp)
+	struct mount *mp;
+	struct netexport *nep;
+	struct export_args *argp;
+{
+	int error;
+
+	if (argp->ex_flags & MNT_DELEXPORT) {
+		vfs_free_addrlist(nep);
+		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
+	}
+	if (argp->ex_flags & MNT_EXPORTED) {
+		if (error = vfs_hang_addrlist(mp, nep, argp))
+			return (error);
+		mp->mnt_flag |= MNT_EXPORTED;
+	}
+	return (0);
+}
+
+struct netcred *
+vfs_export_lookup(mp, nep, nam)
+	register struct mount *mp;
+	struct netexport *nep;
+	struct mbuf *nam;
+{
+	register struct netcred *np;
+	register struct radix_node_head *rnh;
+	struct sockaddr *saddr;
+
+	np = NULL;
+	if (mp->mnt_flag & MNT_EXPORTED) {
+		/*
+		 * Lookup in the export list first.
+		 */
+		if (nam != NULL) {
+			saddr = mtod(nam, struct sockaddr *);
+			rnh = nep->ne_rtable[saddr->sa_family];
+			if (rnh != NULL) {
+				np = (struct netcred *)
+					(*rnh->rnh_matchaddr)((caddr_t)saddr,
+							      rnh);
+				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
+					np = NULL;
+			}
+		}
+		/*
+		 * If no address match, use the default if it exists.
+		 */
+		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
+			np = &nep->ne_defexported;
+	}
+	return (np);
+}
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
new file mode 100644
index 00000000000..345c7a79bf2
--- /dev/null
+++ b/sys/kern/vfs_syscalls.c
@@ -0,0 +1,2107 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/filedesc.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/uio.h>
+#include <sys/malloc.h>
+#include <sys/dirent.h>
+
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+
+static int change_dir __P((struct nameidata *ndp, struct proc *p));
+
+/*
+ * Virtual File System System Calls
+ */
+
+/*
+ * Mount a file system.
+ */
+struct mount_args {
+	int	type;
+	char	*path;
+	int	flags;
+	caddr_t	data;
+};
+/* ARGSUSED */
+mount(p, uap, retval)
+	struct proc *p;
+	register struct mount_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	register struct mount *mp;
+	int error, flag;
+	struct nameidata nd;
+
+	/*
+	 * Must be super user
+	 */
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+	/*
+	 * Get vnode to be covered
+	 */
+	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	if (uap->flags & MNT_UPDATE) {
+		if ((vp->v_flag & VROOT) == 0) {
+			vput(vp);
+			return (EINVAL);
+		}
+		mp = vp->v_mount;
+		flag = mp->mnt_flag;
+		/*
+		 * We only allow the filesystem to be reloaded if it
+		 * is currently mounted read-only.
+		 */
+		if ((uap->flags & MNT_RELOAD) &&
+		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
+			vput(vp);
+			return (EOPNOTSUPP);	/* Needs translation */
+		}
+		mp->mnt_flag |=
+		    uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
+		VOP_UNLOCK(vp);
+		goto update;
+	}
+	if (error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0))
+		return (error);
+	if (vp->v_type != VDIR) {
+		vput(vp);
+		return (ENOTDIR);
+	}
+	if ((u_long)uap->type > MOUNT_MAXTYPE || vfssw[uap->type] == NULL) {
+		vput(vp);
+		return (ENODEV);
+	}
+
+	/*
+	 * Allocate and initialize the file system.
+	 */
+	mp = (struct mount *)malloc((u_long)sizeof(struct mount),
+		M_MOUNT, M_WAITOK);
+	bzero((char *)mp, (u_long)sizeof(struct mount));
+	mp->mnt_op = vfssw[uap->type];
+	if (error = vfs_lock(mp)) {
+		free((caddr_t)mp, M_MOUNT);
+		vput(vp);
+		return (error);
+	}
+	if (vp->v_mountedhere != NULL) {
+		vfs_unlock(mp);
+		free((caddr_t)mp, M_MOUNT);
+		vput(vp);
+		return (EBUSY);
+	}
+	vp->v_mountedhere = mp;
+	mp->mnt_vnodecovered = vp;
+update:
+	/*
+	 * Set the mount level flags.
+	 */
+	if (uap->flags & MNT_RDONLY)
+		mp->mnt_flag |= MNT_RDONLY;
+	else if (mp->mnt_flag & MNT_RDONLY)
+		mp->mnt_flag |= MNT_WANTRDWR;
+	mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
+	    MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC);
+	mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
+	    MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC);
+	/*
+	 * Mount the filesystem.
+	 */
+	error = VFS_MOUNT(mp, uap->path, uap->data, &nd, p);
+	if (mp->mnt_flag & MNT_UPDATE) {
+		vrele(vp);
+		if (mp->mnt_flag & MNT_WANTRDWR)
+			mp->mnt_flag &= ~MNT_RDONLY;
+		mp->mnt_flag &=~
+		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_WANTRDWR);
+		if (error)
+			mp->mnt_flag = flag;
+		return (error);
+	}
+	/*
+	 * Put the new filesystem on the mount list after root.
+	 */
+	cache_purge(vp);
+	if (!error) {
+		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+		VOP_UNLOCK(vp);
+		vfs_unlock(mp);
+		error = VFS_START(mp, 0, p);
+	} else {
+		mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0;
+		vfs_unlock(mp);
+		free((caddr_t)mp, M_MOUNT);
+		vput(vp);
+	}
+	return (error);
+}
+
+/*
+ * Unmount a file system.
+ *
+ * Note: unmount takes a path to the vnode mounted on as argument,
+ * not special file (as before).
+ */
+struct unmount_args {
+	char	*path;
+	int	flags;
+};
+/* ARGSUSED */
+unmount(p, uap, retval)
+	struct proc *p;
+	register struct unmount_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	struct mount *mp;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+
+	/*
+	 * Unless this is a user mount, then must
+	 * have suser privilege.
+	 */
+	if (((vp->v_mount->mnt_flag & MNT_USER) == 0) &&
+	    (error = suser(p->p_ucred, &p->p_acflag))) {
+		vput(vp);
+		return (error);
+	}
+
+	/*
+	 * Must be the root of the filesystem
+	 */
+	if ((vp->v_flag & VROOT) == 0) {
+		vput(vp);
+		return (EINVAL);
+	}
+	mp = vp->v_mount;
+	vput(vp);
+	return (dounmount(mp, uap->flags, p));
+}
+
+/*
+ * Do the actual file system unmount.
+ */
+dounmount(mp, flags, p)
+	register struct mount *mp;
+	int flags;
+	struct proc *p;
+{
+	struct vnode *coveredvp;
+	int error;
+
+	coveredvp = mp->mnt_vnodecovered;
+	if (vfs_busy(mp))
+		return (EBUSY);
+	mp->mnt_flag |= MNT_UNMOUNT;
+	if (error = vfs_lock(mp))
+		return (error);
+
+	mp->mnt_flag &=~ MNT_ASYNC;
+	vnode_pager_umount(mp);	/* release cached vnodes */
+	cache_purgevfs(mp);	/* remove cache entries for this file sys */
+	if ((error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0 ||
+	    (flags & MNT_FORCE))
+		error = VFS_UNMOUNT(mp, flags, p);
+	mp->mnt_flag &= ~MNT_UNMOUNT;
+	vfs_unbusy(mp);
+	if (error) {
+		vfs_unlock(mp);
+	} else {
+		vrele(coveredvp);
+		TAILQ_REMOVE(&mountlist, mp, mnt_list);
+		mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0;
+		vfs_unlock(mp);
+		if (mp->mnt_vnodelist.lh_first != NULL)
+			panic("unmount: dangling vnode");
+		free((caddr_t)mp, M_MOUNT);
+	}
+	return (error);
+}
+
+/*
+ * Sync each mounted filesystem.
+ */
+#ifdef DIAGNOSTIC
+int syncprt = 0;
+struct ctldebug debug0 = { "syncprt", &syncprt };
+#endif
+
+struct sync_args {
+	int	dummy;
+};
+/* ARGSUSED */
+sync(p, uap, retval)
+	struct proc *p;
+	struct sync_args *uap;
+	int *retval;
+{
+	register struct mount *mp, *nmp;
+	int asyncflag;
+
+	for (mp = mountlist.tqh_first; mp != NULL; mp = nmp) {
+		nmp = mp->mnt_list.tqe_next;
+		/*
+		 * The lock check below is to avoid races with mount
+		 * and unmount.
+		 */
+		if ((mp->mnt_flag & (MNT_MLOCK|MNT_RDONLY|MNT_MPBUSY)) == 0 &&
+		    !vfs_busy(mp)) {
+			asyncflag = mp->mnt_flag & MNT_ASYNC;
+			mp->mnt_flag &= ~MNT_ASYNC;
+			VFS_SYNC(mp, MNT_NOWAIT, p->p_ucred, p);
+			if (asyncflag)
+				mp->mnt_flag |= MNT_ASYNC;
+			vfs_unbusy(mp);
+		}
+	}
+#ifdef DIAGNOSTIC
+	if (syncprt)
+		vfs_bufstats();
+#endif /* DIAGNOSTIC */
+	return (0);
+}
+
+/*
+ * Change filesystem quotas.
+ */
+struct quotactl_args {
+	char *path;
+	int cmd;
+	int uid;
+	caddr_t arg;
+};
+/* ARGSUSED */
+quotactl(p, uap, retval)
+	struct proc *p;
+	register struct quotactl_args *uap;
+	int *retval;
+{
+	register struct mount *mp;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	mp = nd.ni_vp->v_mount;
+	vrele(nd.ni_vp);
+	return (VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, p));
+}
+
+/*
+ * Get filesystem statistics.
+ */
+struct statfs_args {
+	char *path;
+	struct statfs *buf;
+};
+/* ARGSUSED */
+statfs(p, uap, retval)
+	struct proc *p;
+	register struct statfs_args *uap;
+	int *retval;
+{
+	register struct mount *mp;
+	register struct statfs *sp;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	mp = nd.ni_vp->v_mount;
+	sp = &mp->mnt_stat;
+	vrele(nd.ni_vp);
+	if (error = VFS_STATFS(mp, sp, p))
+		return (error);
+	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+	return (copyout((caddr_t)sp, (caddr_t)uap->buf, sizeof(*sp)));
+}
+
+/*
+ * Get filesystem statistics.
+ */
+struct fstatfs_args {
+	int fd;
+	struct statfs *buf;
+};
+/* ARGSUSED */
+fstatfs(p, uap, retval)
+	struct proc *p;
+	register struct fstatfs_args *uap;
+	int *retval;
+{
+	struct file *fp;
+	struct mount *mp;
+	register struct statfs *sp;
+	int error;
+
+	if (error = getvnode(p->p_fd, uap->fd, &fp))
+		return (error);
+	mp = ((struct vnode *)fp->f_data)->v_mount;
+	sp = &mp->mnt_stat;
+	if (error = VFS_STATFS(mp, sp, p))
+		return (error);
+	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+	return (copyout((caddr_t)sp, (caddr_t)uap->buf, sizeof(*sp)));
+}
+
+/*
+ * Get statistics on all filesystems.
+ */
+struct getfsstat_args {
+	struct statfs *buf;
+	long bufsize;
+	int flags;
+};
+getfsstat(p, uap, retval)
+	struct proc *p;
+	register struct getfsstat_args *uap;
+	int *retval;
+{
+	register struct mount *mp, *nmp;
+	register struct statfs *sp;
+	caddr_t sfsp;
+	long count, maxcount, error;
+
+	maxcount = uap->bufsize / sizeof(struct statfs);
+	sfsp = (caddr_t)uap->buf;
+	for (count = 0, mp = mountlist.tqh_first; mp != NULL; mp = nmp) {
+		nmp = mp->mnt_list.tqe_next;
+		if (sfsp && count < maxcount &&
+		    ((mp->mnt_flag & MNT_MLOCK) == 0)) {
+			sp = &mp->mnt_stat;
+			/*
+			 * If MNT_NOWAIT is specified, do not refresh the
+			 * fsstat cache. MNT_WAIT overrides MNT_NOWAIT.
+			 */
+			if (((uap->flags & MNT_NOWAIT) == 0 ||
+			    (uap->flags & MNT_WAIT)) &&
+			    (error = VFS_STATFS(mp, sp, p)))
+				continue;
+			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+			if (error = copyout((caddr_t)sp, sfsp, sizeof(*sp)))
+				return (error);
+			sfsp += sizeof(*sp);
+		}
+		count++;
+	}
+	if (sfsp && count > maxcount)
+		*retval = maxcount;
+	else
+		*retval = count;
+	return (0);
+}
+
+/*
+ * Change current working directory to a given file descriptor.
+ */
+struct fchdir_args {
+	int	fd;
+};
+/* ARGSUSED */
+fchdir(p, uap, retval)
+	struct proc *p;
+	struct fchdir_args *uap;
+	int *retval;
+{
+	register struct filedesc *fdp = p->p_fd;
+	register struct vnode *vp;
+	struct file *fp;
+	int error;
+
+	if (error = getvnode(fdp, uap->fd, &fp))
+		return (error);
+	vp = (struct vnode *)fp->f_data;
+	VOP_LOCK(vp);
+	if (vp->v_type != VDIR)
+		error = ENOTDIR;
+	else
+		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
+	VOP_UNLOCK(vp);
+	if (error)
+		return (error);
+	VREF(vp);
+	vrele(fdp->fd_cdir);
+	fdp->fd_cdir = vp;
+	return (0);
+}
+
+/*
+ * Change current working directory (``.'').
+ */
+struct chdir_args {
+	char	*path;
+};
+/* ARGSUSED */
+chdir(p, uap, retval)
+	struct proc *p;
+	struct chdir_args *uap;
+	int *retval;
+{
+	register struct filedesc *fdp = p->p_fd;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+	if (error = change_dir(&nd, p))
+		return (error);
+	vrele(fdp->fd_cdir);
+	fdp->fd_cdir = nd.ni_vp;
+	return (0);
+}
+
+/*
+ * Change notion of root (``/'') directory.
+ */
+struct chroot_args {
+	char	*path;
+};
+/* ARGSUSED */
+chroot(p, uap, retval)
+	struct proc *p;
+	struct chroot_args *uap;
+	int *retval;
+{
+	register struct filedesc *fdp = p->p_fd;
+	int error;
+	struct nameidata nd;
+
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+	if (error = change_dir(&nd, p))
+		return (error);
+	if (fdp->fd_rdir != NULL)
+		vrele(fdp->fd_rdir);
+	fdp->fd_rdir = nd.ni_vp;
+	return (0);
+}
+
+/*
+ * Common routine for chroot and chdir.
+ */
+static int
+change_dir(ndp, p)
+	register struct nameidata *ndp;
+	struct proc *p;
+{
+	struct vnode *vp;
+	int error;
+
+	if (error = namei(ndp))
+		return (error);
+	vp = ndp->ni_vp;
+	if (vp->v_type != VDIR)
+		error = ENOTDIR;
+	else
+		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
+	VOP_UNLOCK(vp);
+	if (error)
+		vrele(vp);
+	return (error);
+}
+
+/*
+ * Check permissions, allocate an open file structure,
+ * and call the device open routine if any.
+ */
+struct open_args {
+	char	*path;
+	int	flags;
+	int	mode;
+};
+open(p, uap, retval)
+	struct proc *p;
+	register struct open_args *uap;
+	int *retval;
+{
+	register struct filedesc *fdp = p->p_fd;
+	register struct file *fp;
+	register struct vnode *vp;
+	int flags, cmode;
+	struct file *nfp;
+	int type, indx, error;
+	struct flock lf;
+	struct nameidata nd;
+	extern struct fileops vnops;
+
+	if (error = falloc(p, &nfp, &indx))
+		return (error);
+	fp = nfp;
+	flags = FFLAGS(uap->flags);
+	cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+	p->p_dupfd = -indx - 1;			/* XXX check for fdopen */
+	if (error = vn_open(&nd, flags, cmode)) {
+		ffree(fp);
+		if ((error == ENODEV || error == ENXIO) &&
+		    p->p_dupfd >= 0 && 			/* XXX from fdopen */
+		    (error =
+		        dupfdopen(fdp, indx, p->p_dupfd, flags, error)) == 0) {
+			*retval = indx;
+			return (0);
+		}
+		if (error == ERESTART)
+			error = EINTR;
+		fdp->fd_ofiles[indx] = NULL;
+		return (error);
+	}
+	p->p_dupfd = 0;
+	vp = nd.ni_vp;
+	fp->f_flag = flags & FMASK;
+	fp->f_type = DTYPE_VNODE;
+	fp->f_ops = &vnops;
+	fp->f_data = (caddr_t)vp;
+	if (flags & (O_EXLOCK | O_SHLOCK)) {
+		lf.l_whence = SEEK_SET;
+		lf.l_start = 0;
+		lf.l_len = 0;
+		if (flags & O_EXLOCK)
+			lf.l_type = F_WRLCK;
+		else
+			lf.l_type = F_RDLCK;
+		type = F_FLOCK;
+		if ((flags & FNONBLOCK) == 0)
+			type |= F_WAIT;
+		VOP_UNLOCK(vp);
+		if (error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) {
+			(void) vn_close(vp, fp->f_flag, fp->f_cred, p);
+			ffree(fp);
+			fdp->fd_ofiles[indx] = NULL;
+			return (error);
+		}
+		VOP_LOCK(vp);
+		fp->f_flag |= FHASLOCK;
+	}
+	VOP_UNLOCK(vp);
+	*retval = indx;
+	return (0);
+}
+
+#ifdef COMPAT_43
+/*
+ * Create a file.
+ */
+struct ocreat_args {
+	char	*path;
+	int	mode;
+};
+ocreat(p, uap, retval)
+	struct proc *p;
+	register struct ocreat_args *uap;
+	int *retval;
+{
+	struct open_args openuap;
+
+	openuap.path = uap->path;
+	openuap.mode = uap->mode;
+	openuap.flags = O_WRONLY | O_CREAT | O_TRUNC;
+	return (open(p, &openuap, retval));
+}
+#endif /* COMPAT_43 */
+
+/*
+ * Create a special file.
+ */
+struct mknod_args {
+	char	*path;
+	int	mode;
+	int	dev;
+};
+/* ARGSUSED */
+mknod(p, uap, retval)
+	struct proc *p;
+	register struct mknod_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	struct vattr vattr;
+	int error;
+	struct nameidata nd;
+
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	if (vp != NULL)
+		error = EEXIST;
+	else {
+		VATTR_NULL(&vattr);
+		vattr.va_mode = (uap->mode & ALLPERMS) &~ p->p_fd->fd_cmask;
+		vattr.va_rdev = uap->dev;
+
+		switch (uap->mode & S_IFMT) {
+		case S_IFMT:	/* used by badsect to flag bad sectors */
+			vattr.va_type = VBAD;
+			break;
+		case S_IFCHR:
+			vattr.va_type = VCHR;
+			break;
+		case S_IFBLK:
+			vattr.va_type = VBLK;
+			break;
+		default:
+			error = EINVAL;
+			break;
+		}
+	}
+	if (!error) {
+		LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+		error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
+	} else {
+		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+		if (nd.ni_dvp == vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		if (vp)
+			vrele(vp);
+	}
+	return (error);
+}
+
+/*
+ * Create named pipe.
+ */
+struct mkfifo_args {
+	char	*path;
+	int	mode;
+};
+/* ARGSUSED */
+mkfifo(p, uap, retval)
+	struct proc *p;
+	register struct mkfifo_args *uap;
+	int *retval;
+{
+	struct vattr vattr;
+	int error;
+	struct nameidata nd;
+
+#ifndef FIFO
+	return (EOPNOTSUPP);
+#else
+	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	if (nd.ni_vp != NULL) {
+		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+		if (nd.ni_dvp == nd.ni_vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		vrele(nd.ni_vp);
+		return (EEXIST);
+	}
+	VATTR_NULL(&vattr);
+	vattr.va_type = VFIFO;
+	vattr.va_mode = (uap->mode & ALLPERMS) &~ p->p_fd->fd_cmask;
+	LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+	return (VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr));
+#endif /* FIFO */
+}
+
+/*
+ * Make a hard file link.
+ */
+struct link_args {
+	char	*path;
+	char	*link;
+};
+/* ARGSUSED */
+link(p, uap, retval)
+	struct proc *p;
+	register struct link_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	struct nameidata nd;
+	int error;
+
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	if (vp->v_type != VDIR ||
+	    (error = suser(p->p_ucred, &p->p_acflag)) == 0) {
+		nd.ni_cnd.cn_nameiop = CREATE;
+		nd.ni_cnd.cn_flags = LOCKPARENT;
+		nd.ni_dirp = uap->link;
+		if ((error = namei(&nd)) == 0) {
+			if (nd.ni_vp != NULL)
+				error = EEXIST;
+			if (!error) {
+				LEASE_CHECK(nd.ni_dvp,
+				    p, p->p_ucred, LEASE_WRITE);
+				LEASE_CHECK(vp,
+				    p, p->p_ucred, LEASE_WRITE);
+				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
+			} else {
+				VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+				if (nd.ni_dvp == nd.ni_vp)
+					vrele(nd.ni_dvp);
+				else
+					vput(nd.ni_dvp);
+				if (nd.ni_vp)
+					vrele(nd.ni_vp);
+			}
+		}
+	}
+	vrele(vp);
+	return (error);
+}
+
+/*
+ * Make a symbolic link.
+ */
+struct symlink_args {
+	char	*path;
+	char	*link;
+};
+/* ARGSUSED */
+symlink(p, uap, retval)
+	struct proc *p;
+	register struct symlink_args *uap;
+	int *retval;
+{
+	struct vattr vattr;
+	char *path;
+	int error;
+	struct nameidata nd;
+
+	MALLOC(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
+	if (error = copyinstr(uap->path, path, MAXPATHLEN, NULL))
+		goto out;
+	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->link, p);
+	if (error = namei(&nd))
+		goto out;
+	if (nd.ni_vp) {
+		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+		if (nd.ni_dvp == nd.ni_vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		vrele(nd.ni_vp);
+		error = EEXIST;
+		goto out;
+	}
+	VATTR_NULL(&vattr);
+	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
+	LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
+out:
+	FREE(path, M_NAMEI);
+	return (error);
+}
+
+/*
+ * Delete a name from the filesystem.
+ */
+struct unlink_args {
+	char	*path;
+};
+/* ARGSUSED */
+unlink(p, uap, retval)
+	struct proc *p;
+	struct unlink_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+	VOP_LOCK(vp);
+
+	if (vp->v_type != VDIR ||
+	    (error = suser(p->p_ucred, &p->p_acflag)) == 0) {
+		/*
+		 * The root of a mounted filesystem cannot be deleted.
+		 */
+		if (vp->v_flag & VROOT)
+			error = EBUSY;
+		else
+			(void)vnode_pager_uncache(vp);
+	}
+
+	if (!error) {
+		LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+		error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+	} else {
+		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+		if (nd.ni_dvp == vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		vput(vp);
+	}
+	return (error);
+}
+
+/*
+ * Reposition read/write file offset.
+ */
+struct lseek_args {
+	int	fd;
+	int	pad;
+	off_t	offset;
+	int	whence;
+};
+lseek(p, uap, retval)
+	struct proc *p;
+	register struct lseek_args *uap;
+	int *retval;
+{
+	struct ucred *cred = p->p_ucred;
+	register struct filedesc *fdp = p->p_fd;
+	register struct file *fp;
+	struct vattr vattr;
+	int error;
+
+	if ((u_int)uap->fd >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
+		return (EBADF);
+	if (fp->f_type != DTYPE_VNODE)
+		return (ESPIPE);
+	switch (uap->whence) {
+	case L_INCR:
+		fp->f_offset += uap->offset;
+		break;
+	case L_XTND:
+		if (error =
+		    VOP_GETATTR((struct vnode *)fp->f_data, &vattr, cred, p))
+			return (error);
+		fp->f_offset = uap->offset + vattr.va_size;
+		break;
+	case L_SET:
+		fp->f_offset = uap->offset;
+		break;
+	default:
+		return (EINVAL);
+	}
+	*(off_t *)retval = fp->f_offset;
+	return (0);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/*
+ * Reposition read/write file offset.
+ */
+struct olseek_args {
+	int	fd;
+	long	offset;
+	int	whence;
+};
+olseek(p, uap, retval)
+	struct proc *p;
+	register struct olseek_args *uap;
+	int *retval;
+{
+	struct lseek_args nuap;
+	off_t qret;
+	int error;
+
+	nuap.fd = uap->fd;
+	nuap.offset = uap->offset;
+	nuap.whence = uap->whence;
+	error = lseek(p, &nuap, &qret);
+	*(long *)retval = qret;
+	return (error);
+}
+#endif /* COMPAT_43 */
+
+/*
+ * Check access permissions.
+ */
+struct access_args {
+	char	*path;
+	int	flags;
+};
+access(p, uap, retval)
+	struct proc *p;
+	register struct access_args *uap;
+	int *retval;
+{
+	register struct ucred *cred = p->p_ucred;
+	register struct vnode *vp;
+	int error, flags, t_gid, t_uid;
+	struct nameidata nd;
+
+	t_uid = cred->cr_uid;
+	t_gid = cred->cr_groups[0];
+	cred->cr_uid = p->p_cred->p_ruid;
+	cred->cr_groups[0] = p->p_cred->p_rgid;
+	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		goto out1;
+	vp = nd.ni_vp;
+
+	/* Flags == 0 means only check for existence. */
+	if (uap->flags) {
+		flags = 0;
+		if (uap->flags & R_OK)
+			flags |= VREAD;
+		if (uap->flags & W_OK)
+			flags |= VWRITE;
+		if (uap->flags & X_OK)
+			flags |= VEXEC;
+		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
+			error = VOP_ACCESS(vp, flags, cred, p);
+	}
+	vput(vp);
+out1:
+	cred->cr_uid = t_uid;
+	cred->cr_groups[0] = t_gid;
+	return (error);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/*
+ * Get file status; this version follows links.
+ */
+struct ostat_args {
+	char	*path;
+	struct ostat *ub;
+};
+/* ARGSUSED */
+ostat(p, uap, retval)
+	struct proc *p;
+	register struct ostat_args *uap;
+	int *retval;
+{
+	struct stat sb;
+	struct ostat osb;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	error = vn_stat(nd.ni_vp, &sb, p);
+	vput(nd.ni_vp);
+	if (error)
+		return (error);
+	cvtstat(&sb, &osb);
+	error = copyout((caddr_t)&osb, (caddr_t)uap->ub, sizeof (osb));
+	return (error);
+}
+
+/*
+ * Get file status; this version does not follow links.
+ */
+struct olstat_args {
+	char	*path;
+	struct ostat *ub;
+};
+/* ARGSUSED */
+olstat(p, uap, retval)
+	struct proc *p;
+	register struct olstat_args *uap;
+	int *retval;
+{
+	struct stat sb;
+	struct ostat osb;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	error = vn_stat(nd.ni_vp, &sb, p);
+	vput(nd.ni_vp);
+	if (error)
+		return (error);
+	cvtstat(&sb, &osb);
+	error = copyout((caddr_t)&osb, (caddr_t)uap->ub, sizeof (osb));
+	return (error);
+}
+
+/*
+ * Convert from an old to a new stat structure.
+ */
+cvtstat(st, ost)
+	struct stat *st;
+	struct ostat *ost;
+{
+
+	ost->st_dev = st->st_dev;
+	ost->st_ino = st->st_ino;
+	ost->st_mode = st->st_mode;
+	ost->st_nlink = st->st_nlink;
+	ost->st_uid = st->st_uid;
+	ost->st_gid = st->st_gid;
+	ost->st_rdev = st->st_rdev;
+	if (st->st_size < (quad_t)1 << 32)
+		ost->st_size = st->st_size;
+	else
+		ost->st_size = -2;
+	ost->st_atime = st->st_atime;
+	ost->st_mtime = st->st_mtime;
+	ost->st_ctime = st->st_ctime;
+	ost->st_blksize = st->st_blksize;
+	ost->st_blocks = st->st_blocks;
+	ost->st_flags = st->st_flags;
+	ost->st_gen = st->st_gen;
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+/*
+ * Get file status; this version follows links.
+ */
+struct stat_args {
+	char	*path;
+	struct stat *ub;
+};
+/* ARGSUSED */
+stat(p, uap, retval)
+	struct proc *p;
+	register struct stat_args *uap;
+	int *retval;
+{
+	struct stat sb;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	error = vn_stat(nd.ni_vp, &sb, p);
+	vput(nd.ni_vp);
+	if (error)
+		return (error);
+	error = copyout((caddr_t)&sb, (caddr_t)uap->ub, sizeof (sb));
+	return (error);
+}
+
+/*
+ * Get file status; this version does not follow links.
+ */
+struct lstat_args {
+	char	*path;
+	struct stat *ub;
+};
+/* ARGSUSED */
+lstat(p, uap, retval)
+	struct proc *p;
+	register struct lstat_args *uap;
+	int *retval;
+{
+	int error;
+	struct vnode *vp, *dvp;
+	struct stat sb, sb1;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKPARENT, UIO_USERSPACE,
+	    uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	/*
+	 * For symbolic links, always return the attributes of its
+	 * containing directory, except for mode, size, and links.
+	 */
+	vp = nd.ni_vp;
+	dvp = nd.ni_dvp;
+	if (vp->v_type != VLNK) {
+		if (dvp == vp)
+			vrele(dvp);
+		else
+			vput(dvp);
+		error = vn_stat(vp, &sb, p);
+		vput(vp);
+		if (error)
+			return (error);
+	} else {
+		error = vn_stat(dvp, &sb, p);
+		vput(dvp);
+		if (error) {
+			vput(vp);
+			return (error);
+		}
+		error = vn_stat(vp, &sb1, p);
+		vput(vp);
+		if (error)
+			return (error);
+		sb.st_mode &= ~S_IFDIR;
+		sb.st_mode |= S_IFLNK;
+		sb.st_nlink = sb1.st_nlink;
+		sb.st_size = sb1.st_size;
+		sb.st_blocks = sb1.st_blocks;
+	}
+	error = copyout((caddr_t)&sb, (caddr_t)uap->ub, sizeof (sb));
+	return (error);
+}
+
+/*
+ * Get configurable pathname variables.
+ */
+struct pathconf_args {
+	char	*path;
+	int	name;
+};
+/* ARGSUSED */
+pathconf(p, uap, retval)
+	struct proc *p;
+	register struct pathconf_args *uap;
+	int *retval;
+{
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	error = VOP_PATHCONF(nd.ni_vp, uap->name, retval);
+	vput(nd.ni_vp);
+	return (error);
+}
+
+/*
+ * Return target name of a symbolic link.
+ */
+struct readlink_args {
+	char	*path;
+	char	*buf;
+	int	count;
+};
+/* ARGSUSED */
+readlink(p, uap, retval)
+	struct proc *p;
+	register struct readlink_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	struct iovec aiov;
+	struct uio auio;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	if (vp->v_type != VLNK)
+		error = EINVAL;
+	else {
+		aiov.iov_base = uap->buf;
+		aiov.iov_len = uap->count;
+		auio.uio_iov = &aiov;
+		auio.uio_iovcnt = 1;
+		auio.uio_offset = 0;
+		auio.uio_rw = UIO_READ;
+		auio.uio_segflg = UIO_USERSPACE;
+		auio.uio_procp = p;
+		auio.uio_resid = uap->count;
+		error = VOP_READLINK(vp, &auio, p->p_ucred);
+	}
+	vput(vp);
+	*retval = uap->count - auio.uio_resid;
+	return (error);
+}
+
+/*
+ * Change flags of a file given a path name.
+ */
+struct chflags_args {
+	char	*path;
+	int	flags;
+};
+/* ARGSUSED */
+chflags(p, uap, retval)
+	struct proc *p;
+	register struct chflags_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	struct vattr vattr;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+	VOP_LOCK(vp);
+	if (vp->v_mount->mnt_flag & MNT_RDONLY)
+		error = EROFS;
+	else {
+		VATTR_NULL(&vattr);
+		vattr.va_flags = uap->flags;
+		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+	}
+	vput(vp);
+	return (error);
+}
+
+/*
+ * Change flags of a file given a file descriptor.
+ */
+struct fchflags_args {
+	int	fd;
+	int	flags;
+};
+/* ARGSUSED */
+fchflags(p, uap, retval)
+	struct proc *p;
+	register struct fchflags_args *uap;
+	int *retval;
+{
+	struct vattr vattr;
+	struct vnode *vp;
+	struct file *fp;
+	int error;
+
+	if (error = getvnode(p->p_fd, uap->fd, &fp))
+		return (error);
+	vp = (struct vnode *)fp->f_data;
+	LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+	VOP_LOCK(vp);
+	if (vp->v_mount->mnt_flag & MNT_RDONLY)
+		error = EROFS;
+	else {
+		VATTR_NULL(&vattr);
+		vattr.va_flags = uap->flags;
+		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+	}
+	VOP_UNLOCK(vp);
+	return (error);
+}
+
+/*
+ * Change mode of a file given path name.
+ */
+struct chmod_args {
+	char	*path;
+	int	mode;
+};
+/* ARGSUSED */
+chmod(p, uap, retval)
+	struct proc *p;
+	register struct chmod_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	struct vattr vattr;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+	VOP_LOCK(vp);
+	if (vp->v_mount->mnt_flag & MNT_RDONLY)
+		error = EROFS;
+	else {
+		VATTR_NULL(&vattr);
+		vattr.va_mode = uap->mode & ALLPERMS;
+		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+	}
+	vput(vp);
+	return (error);
+}
+
+/*
+ * Change mode of a file given a file descriptor.
+ */
+struct fchmod_args {
+	int	fd;
+	int	mode;
+};
+/* ARGSUSED */
+fchmod(p, uap, retval)
+	struct proc *p;
+	register struct fchmod_args *uap;
+	int *retval;
+{
+	struct vattr vattr;
+	struct vnode *vp;
+	struct file *fp;
+	int error;
+
+	if (error = getvnode(p->p_fd, uap->fd, &fp))
+		return (error);
+	vp = (struct vnode *)fp->f_data;
+	LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+	VOP_LOCK(vp);
+	if (vp->v_mount->mnt_flag & MNT_RDONLY)
+		error = EROFS;
+	else {
+		VATTR_NULL(&vattr);
+		vattr.va_mode = uap->mode & ALLPERMS;
+		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+	}
+	VOP_UNLOCK(vp);
+	return (error);
+}
+
+/*
+ * Set ownership given a path name.
+ */
+struct chown_args {
+	char	*path;
+	int	uid;
+	int	gid;
+};
+/* ARGSUSED */
+chown(p, uap, retval)
+	struct proc *p;
+	register struct chown_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	struct vattr vattr;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+	VOP_LOCK(vp);
+	if (vp->v_mount->mnt_flag & MNT_RDONLY)
+		error = EROFS;
+	else {
+		VATTR_NULL(&vattr);
+		vattr.va_uid = uap->uid;
+		vattr.va_gid = uap->gid;
+		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+	}
+	vput(vp);
+	return (error);
+}
+
+/*
+ * Set ownership given a file descriptor.
+ */
+struct fchown_args {
+	int	fd;
+	int	uid;
+	int	gid;
+};
+/* ARGSUSED */
+fchown(p, uap, retval)
+	struct proc *p;
+	register struct fchown_args *uap;
+	int *retval;
+{
+	struct vattr vattr;
+	struct vnode *vp;
+	struct file *fp;
+	int error;
+
+	if (error = getvnode(p->p_fd, uap->fd, &fp))
+		return (error);
+	vp = (struct vnode *)fp->f_data;
+	LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+	VOP_LOCK(vp);
+	if (vp->v_mount->mnt_flag & MNT_RDONLY)
+		error = EROFS;
+	else {
+		VATTR_NULL(&vattr);
+		vattr.va_uid = uap->uid;
+		vattr.va_gid = uap->gid;
+		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+	}
+	VOP_UNLOCK(vp);
+	return (error);
+}
+
+/*
+ * Set the access and modification times of a file.
+ */
+struct utimes_args {
+	char	*path;
+	struct	timeval *tptr;
+};
+/* ARGSUSED */
+utimes(p, uap, retval)
+	struct proc *p;
+	register struct utimes_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	struct timeval tv[2];
+	struct vattr vattr;
+	int error;
+	struct nameidata nd;
+
+	VATTR_NULL(&vattr);
+	if (uap->tptr == NULL) {
+		microtime(&tv[0]);
+		tv[1] = tv[0];
+		vattr.va_vaflags |= VA_UTIMES_NULL;
+	} else if (error = copyin((caddr_t)uap->tptr, (caddr_t)tv, sizeof (tv)))
+  		return (error);
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+	VOP_LOCK(vp);
+	if (vp->v_mount->mnt_flag & MNT_RDONLY)
+		error = EROFS;
+	else {
+		vattr.va_atime.ts_sec = tv[0].tv_sec;
+		vattr.va_atime.ts_nsec = tv[0].tv_usec * 1000;
+		vattr.va_mtime.ts_sec = tv[1].tv_sec;
+		vattr.va_mtime.ts_nsec = tv[1].tv_usec * 1000;
+		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+	}
+	vput(vp);
+	return (error);
+}
+
+/*
+ * Truncate a file given its path name.
+ */
+struct truncate_args {
+	char	*path;
+	int	pad;
+	off_t	length;
+};
+/* ARGSUSED */
+truncate(p, uap, retval)
+	struct proc *p;
+	register struct truncate_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	struct vattr vattr;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+	VOP_LOCK(vp);
+	if (vp->v_type == VDIR)
+		error = EISDIR;
+	else if ((error = vn_writechk(vp)) == 0 &&
+	    (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0) {
+		VATTR_NULL(&vattr);
+		vattr.va_size = uap->length;
+		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+	}
+	vput(vp);
+	return (error);
+}
+
+/*
+ * Truncate a file given a file descriptor.
+ */
+struct ftruncate_args {
+	int	fd;
+	int	pad;
+	off_t	length;
+};
+/* ARGSUSED */
+ftruncate(p, uap, retval)
+	struct proc *p;
+	register struct ftruncate_args *uap;
+	int *retval;
+{
+	struct vattr vattr;
+	struct vnode *vp;
+	struct file *fp;
+	int error;
+
+	if (error = getvnode(p->p_fd, uap->fd, &fp))
+		return (error);
+	if ((fp->f_flag & FWRITE) == 0)
+		return (EINVAL);
+	vp = (struct vnode *)fp->f_data;
+	LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+	VOP_LOCK(vp);
+	if (vp->v_type == VDIR)
+		error = EISDIR;
+	else if ((error = vn_writechk(vp)) == 0) {
+		VATTR_NULL(&vattr);
+		vattr.va_size = uap->length;
+		error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
+	}
+	VOP_UNLOCK(vp);
+	return (error);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/*
+ * Truncate a file given its path name.
+ */
+struct otruncate_args {
+	char	*path;
+	long	length;
+};
+/* ARGSUSED */
+otruncate(p, uap, retval)
+	struct proc *p;
+	register struct otruncate_args *uap;
+	int *retval;
+{
+	struct truncate_args nuap;
+
+	nuap.path = uap->path;
+	nuap.length = uap->length;
+	return (truncate(p, &nuap, retval));
+}
+
+/*
+ * Truncate a file given a file descriptor.
+ */
+struct oftruncate_args {
+	int	fd;
+	long	length;
+};
+/* ARGSUSED */
+oftruncate(p, uap, retval)
+	struct proc *p;
+	register struct oftruncate_args *uap;
+	int *retval;
+{
+	struct ftruncate_args nuap;
+
+	nuap.fd = uap->fd;
+	nuap.length = uap->length;
+	return (ftruncate(p, &nuap, retval));
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+/*
+ * Sync an open file.
+ */
+struct fsync_args {
+	int	fd;
+};
+/* ARGSUSED */
+fsync(p, uap, retval)
+	struct proc *p;
+	struct fsync_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	struct file *fp;
+	int error;
+
+	if (error = getvnode(p->p_fd, uap->fd, &fp))
+		return (error);
+	vp = (struct vnode *)fp->f_data;
+	VOP_LOCK(vp);
+	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
+	VOP_UNLOCK(vp);
+	return (error);
+}
+
+/*
+ * Rename files.  Source and destination must either both be directories,
+ * or both not be directories.  If target is a directory, it must be empty.
+ */
+struct rename_args {
+	char	*from;
+	char	*to;
+};
+/* ARGSUSED */
+rename(p, uap, retval)
+	struct proc *p;
+	register struct rename_args *uap;
+	int *retval;
+{
+	register struct vnode *tvp, *fvp, *tdvp;
+	struct nameidata fromnd, tond;
+	int error;
+
+	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
+		uap->from, p);
+	if (error = namei(&fromnd))
+		return (error);
+	fvp = fromnd.ni_vp;
+	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART,
+		UIO_USERSPACE, uap->to, p);
+	if (error = namei(&tond)) {
+		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
+		vrele(fromnd.ni_dvp);
+		vrele(fvp);
+		goto out1;
+	}
+	tdvp = tond.ni_dvp;
+	tvp = tond.ni_vp;
+	if (tvp != NULL) {
+		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
+			error = ENOTDIR;
+			goto out;
+		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
+			error = EISDIR;
+			goto out;
+		}
+	}
+	if (fvp == tdvp)
+		error = EINVAL;
+	/*
+	 * If source is the same as the destination (that is the
+	 * same inode number with the same name in the same directory),
+	 * then there is nothing to do.
+	 */
+	if (fvp == tvp && fromnd.ni_dvp == tdvp &&
+	    fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
+	    !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
+	      fromnd.ni_cnd.cn_namelen))
+		error = -1;
+out:
+	if (!error) {
+		LEASE_CHECK(tdvp, p, p->p_ucred, LEASE_WRITE);
+		if (fromnd.ni_dvp != tdvp)
+			LEASE_CHECK(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+		if (tvp)
+			LEASE_CHECK(tvp, p, p->p_ucred, LEASE_WRITE);
+		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
+				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
+	} else {
+		VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
+		if (tdvp == tvp)
+			vrele(tdvp);
+		else
+			vput(tdvp);
+		if (tvp)
+			vput(tvp);
+		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
+		vrele(fromnd.ni_dvp);
+		vrele(fvp);
+	}
+	vrele(tond.ni_startdir);
+	FREE(tond.ni_cnd.cn_pnbuf, M_NAMEI);
+out1:
+	if (fromnd.ni_startdir)
+		vrele(fromnd.ni_startdir);
+	FREE(fromnd.ni_cnd.cn_pnbuf, M_NAMEI);
+	if (error == -1)
+		return (0);
+	return (error);
+}
+
+/*
+ * Make a directory file.
+ */
+struct mkdir_args {
+	char	*path;
+	int	mode;
+};
+/* ARGSUSED */
+mkdir(p, uap, retval)
+	struct proc *p;
+	register struct mkdir_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	struct vattr vattr;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	if (vp != NULL) {
+		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+		if (nd.ni_dvp == vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		vrele(vp);
+		return (EEXIST);
+	}
+	VATTR_NULL(&vattr);
+	vattr.va_type = VDIR;
+	vattr.va_mode = (uap->mode & ACCESSPERMS) &~ p->p_fd->fd_cmask;
+	LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
+	if (!error)
+		vput(nd.ni_vp);
+	return (error);
+}
+
+/*
+ * Remove a directory file.
+ */
+struct rmdir_args {
+	char	*path;
+};
+/* ARGSUSED */
+rmdir(p, uap, retval)
+	struct proc *p;
+	struct rmdir_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	if (vp->v_type != VDIR) {
+		error = ENOTDIR;
+		goto out;
+	}
+	/*
+	 * No rmdir "." please.
+	 */
+	if (nd.ni_dvp == vp) {
+		error = EINVAL;
+		goto out;
+	}
+	/*
+	 * The root of a mounted filesystem cannot be deleted.
+	 */
+	if (vp->v_flag & VROOT)
+		error = EBUSY;
+out:
+	if (!error) {
+		LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+		LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+		error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+	} else {
+		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+		if (nd.ni_dvp == vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		vput(vp);
+	}
+	return (error);
+}
+
+#ifdef COMPAT_43
+/*
+ * Read a block of directory entries in a file system independent format.
+ */
+struct ogetdirentries_args {
+	int	fd;
+	char	*buf;
+	u_int	count;
+	long	*basep;
+};
+ogetdirentries(p, uap, retval)
+	struct proc *p;
+	register struct ogetdirentries_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	struct file *fp;
+	struct uio auio, kuio;
+	struct iovec aiov, kiov;
+	struct dirent *dp, *edp;
+	caddr_t dirbuf;
+	int error, readcnt;
+	long loff;
+
+	if (error = getvnode(p->p_fd, uap->fd, &fp))
+		return (error);
+	if ((fp->f_flag & FREAD) == 0)
+		return (EBADF);
+	vp = (struct vnode *)fp->f_data;
+	if (vp->v_type != VDIR)
+		return (EINVAL);
+	aiov.iov_base = uap->buf;
+	aiov.iov_len = uap->count;
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	auio.uio_rw = UIO_READ;
+	auio.uio_segflg = UIO_USERSPACE;
+	auio.uio_procp = p;
+	auio.uio_resid = uap->count;
+	VOP_LOCK(vp);
+	loff = auio.uio_offset = fp->f_offset;
+#	if (BYTE_ORDER != LITTLE_ENDIAN)
+		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
+			error = VOP_READDIR(vp, &auio, fp->f_cred);
+			fp->f_offset = auio.uio_offset;
+		} else
+#	endif
+	{
+		kuio = auio;
+		kuio.uio_iov = &kiov;
+		kuio.uio_segflg = UIO_SYSSPACE;
+		kiov.iov_len = uap->count;
+		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
+		kiov.iov_base = dirbuf;
+		error = VOP_READDIR(vp, &kuio, fp->f_cred);
+		fp->f_offset = kuio.uio_offset;
+		if (error == 0) {
+			readcnt = uap->count - kuio.uio_resid;
+			edp = (struct dirent *)&dirbuf[readcnt];
+			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
+#				if (BYTE_ORDER == LITTLE_ENDIAN)
+					/*
+					 * The expected low byte of
+					 * dp->d_namlen is our dp->d_type.
+					 * The high MBZ byte of dp->d_namlen
+					 * is our dp->d_namlen.
+					 */
+					dp->d_type = dp->d_namlen;
+					dp->d_namlen = 0;
+#				else
+					/*
+					 * The dp->d_type is the high byte
+					 * of the expected dp->d_namlen,
+					 * so must be zero'ed.
+					 */
+					dp->d_type = 0;
+#				endif
+				if (dp->d_reclen > 0) {
+					dp = (struct dirent *)
+					    ((char *)dp + dp->d_reclen);
+				} else {
+					error = EIO;
+					break;
+				}
+			}
+			if (dp >= edp)
+				error = uiomove(dirbuf, readcnt, &auio);
+		}
+		FREE(dirbuf, M_TEMP);
+	}
+	VOP_UNLOCK(vp);
+	if (error)
+		return (error);
+	error = copyout((caddr_t)&loff, (caddr_t)uap->basep, sizeof(long));
+	*retval = uap->count - auio.uio_resid;
+	return (error);
+}
+#endif
+
+/*
+ * Read a block of directory entries in a file system independent format.
+ */
+struct getdirentries_args {
+	int	fd;
+	char	*buf;
+	u_int	count;
+	long	*basep;
+};
+getdirentries(p, uap, retval)
+	struct proc *p;
+	register struct getdirentries_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	struct file *fp;
+	struct uio auio;
+	struct iovec aiov;
+	long loff;
+	int error;
+
+	if (error = getvnode(p->p_fd, uap->fd, &fp))
+		return (error);
+	if ((fp->f_flag & FREAD) == 0)
+		return (EBADF);
+	vp = (struct vnode *)fp->f_data;
+unionread:
+	if (vp->v_type != VDIR)
+		return (EINVAL);
+	aiov.iov_base = uap->buf;
+	aiov.iov_len = uap->count;
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	auio.uio_rw = UIO_READ;
+	auio.uio_segflg = UIO_USERSPACE;
+	auio.uio_procp = p;
+	auio.uio_resid = uap->count;
+	VOP_LOCK(vp);
+	loff = auio.uio_offset = fp->f_offset;
+	error = VOP_READDIR(vp, &auio, fp->f_cred);
+	fp->f_offset = auio.uio_offset;
+	VOP_UNLOCK(vp);
+	if (error)
+		return (error);
+
+#ifdef UNION
+{
+	extern int (**union_vnodeop_p)();
+	extern struct vnode *union_lowervp __P((struct vnode *));
+
+	if ((uap->count == auio.uio_resid) &&
+	    (vp->v_op == union_vnodeop_p)) {
+		struct vnode *tvp = vp;
+
+		vp = union_lowervp(vp);
+		if (vp != NULLVP) {
+			VOP_LOCK(vp);
+			error = VOP_OPEN(vp, FREAD);
+			VOP_UNLOCK(vp);
+
+			if (error) {
+				vrele(vp);
+				return (error);
+			}
+			fp->f_data = (caddr_t) vp;
+			fp->f_offset = 0;
+			error = vn_close(tvp, FREAD, fp->f_cred, p);
+			if (error)
+				return (error);
+			goto unionread;
+		}
+	}
+}
+#endif
+
+	if ((uap->count == auio.uio_resid) &&
+	    (vp->v_flag & VROOT) &&
+	    (vp->v_mount->mnt_flag & MNT_UNION)) {
+		struct vnode *tvp = vp;
+		vp = vp->v_mount->mnt_vnodecovered;
+		VREF(vp);
+		fp->f_data = (caddr_t) vp;
+		fp->f_offset = 0;
+		vrele(tvp);
+		goto unionread;
+	}
+	error = copyout((caddr_t)&loff, (caddr_t)uap->basep, sizeof(long));
+	*retval = uap->count - auio.uio_resid;
+	return (error);
+}
+
+/*
+ * Set the mode mask for creation of filesystem nodes.
+ */
+struct umask_args {
+	int	newmask;
+};
+mode_t				/* XXX */
+umask(p, uap, retval)
+	struct proc *p;
+	struct umask_args *uap;
+	int *retval;
+{
+	register struct filedesc *fdp;
+
+	fdp = p->p_fd;
+	*retval = fdp->fd_cmask;
+	fdp->fd_cmask = uap->newmask & ALLPERMS;
+	return (0);
+}
+
+/*
+ * Void all references to file by ripping underlying filesystem
+ * away from vnode.
+ */
+struct revoke_args {
+	char	*path;
+};
+/* ARGSUSED */
+revoke(p, uap, retval)
+	struct proc *p;
+	register struct revoke_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	struct vattr vattr;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	if (vp->v_type != VCHR && vp->v_type != VBLK) {
+		error = EINVAL;
+		goto out;
+	}
+	if (error = VOP_GETATTR(vp, &vattr, p->p_ucred, p))
+		goto out;
+	if (p->p_ucred->cr_uid != vattr.va_uid &&
+	    (error = suser(p->p_ucred, &p->p_acflag)))
+		goto out;
+	if (vp->v_usecount > 1 || (vp->v_flag & VALIASED))
+		vgoneall(vp);
+out:
+	vrele(vp);
+	return (error);
+}
+
+/*
+ * Convert a user file descriptor to a kernel file entry.
+ */
+getvnode(fdp, fd, fpp)
+	struct filedesc *fdp;
+	struct file **fpp;
+	int fd;
+{
+	struct file *fp;
+
+	if ((u_int)fd >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[fd]) == NULL)
+		return (EBADF);
+	if (fp->f_type != DTYPE_VNODE)
+		return (EINVAL);
+	*fpp = fp;
+	return (0);
+}
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
new file mode 100644
index 00000000000..d104bb9de77
--- /dev/null
+++ b/sys/kern/vfs_vnops.c
@@ -0,0 +1,422 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vfs_vnops.c	8.2 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/ioctl.h>
+#include <sys/tty.h>
+
+#include <vm/vm.h>
+
+struct 	fileops vnops =
+	{ vn_read, vn_write, vn_ioctl, vn_select, vn_closefile };
+
+/*
+ * Common code for vnode open operations.
+ * Check permissions, and call the VOP_OPEN or VOP_CREATE routine.
+ */
+vn_open(ndp, fmode, cmode)
+	register struct nameidata *ndp;
+	int fmode, cmode;
+{
+	register struct vnode *vp;
+	register struct proc *p = ndp->ni_cnd.cn_proc;
+	register struct ucred *cred = p->p_ucred;
+	struct vattr vat;
+	struct vattr *vap = &vat;
+	int error;
+
+	if (fmode & O_CREAT) {
+		ndp->ni_cnd.cn_nameiop = CREATE;
+		ndp->ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
+		if ((fmode & O_EXCL) == 0)
+			ndp->ni_cnd.cn_flags |= FOLLOW;
+		if (error = namei(ndp))
+			return (error);
+		if (ndp->ni_vp == NULL) {
+			VATTR_NULL(vap);
+			vap->va_type = VREG;
+			vap->va_mode = cmode;
+			LEASE_CHECK(ndp->ni_dvp, p, cred, LEASE_WRITE);
+			if (error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
+			    &ndp->ni_cnd, vap))
+				return (error);
+			fmode &= ~O_TRUNC;
+			vp = ndp->ni_vp;
+		} else {
+			VOP_ABORTOP(ndp->ni_dvp, &ndp->ni_cnd);
+			if (ndp->ni_dvp == ndp->ni_vp)
+				vrele(ndp->ni_dvp);
+			else
+				vput(ndp->ni_dvp);
+			ndp->ni_dvp = NULL;
+			vp = ndp->ni_vp;
+			if (fmode & O_EXCL) {
+				error = EEXIST;
+				goto bad;
+			}
+			fmode &= ~O_CREAT;
+		}
+	} else {
+		ndp->ni_cnd.cn_nameiop = LOOKUP;
+		ndp->ni_cnd.cn_flags = FOLLOW | LOCKLEAF;
+		if (error = namei(ndp))
+			return (error);
+		vp = ndp->ni_vp;
+	}
+	if (vp->v_type == VSOCK) {
+		error = EOPNOTSUPP;
+		goto bad;
+	}
+	if ((fmode & O_CREAT) == 0) {
+		if (fmode & FREAD) {
+			if (error = VOP_ACCESS(vp, VREAD, cred, p))
+				goto bad;
+		}
+		if (fmode & (FWRITE | O_TRUNC)) {
+			if (vp->v_type == VDIR) {
+				error = EISDIR;
+				goto bad;
+			}
+			if ((error = vn_writechk(vp)) ||
+			    (error = VOP_ACCESS(vp, VWRITE, cred, p)))
+				goto bad;
+		}
+	}
+	if (fmode & O_TRUNC) {
+		VOP_UNLOCK(vp);				/* XXX */
+		LEASE_CHECK(vp, p, cred, LEASE_WRITE);
+		VOP_LOCK(vp);				/* XXX */
+		VATTR_NULL(vap);
+		vap->va_size = 0;
+		if (error = VOP_SETATTR(vp, vap, cred, p))
+			goto bad;
+	}
+	if (error = VOP_OPEN(vp, fmode, cred, p))
+		goto bad;
+	if (fmode & FWRITE)
+		vp->v_writecount++;
+	return (0);
+bad:
+	vput(vp);
+	return (error);
+}
+
+/*
+ * Check for write permissions on the specified vnode.
+ * The read-only status of the file system is checked.
+ * Also, prototype text segments cannot be written.
+ */
+vn_writechk(vp)
+	register struct vnode *vp;
+{
+
+	/*
+	 * Disallow write attempts on read-only file systems;
+	 * unless the file is a socket or a block or character
+	 * device resident on the file system.
+	 */
+	if (vp->v_mount->mnt_flag & MNT_RDONLY) {
+		switch (vp->v_type) {
+		case VREG: case VDIR: case VLNK:
+			return (EROFS);
+		}
+	}
+	/*
+	 * If there's shared text associated with
+	 * the vnode, try to free it up once.  If
+	 * we fail, we can't allow writing.
+	 */
+	if ((vp->v_flag & VTEXT) && !vnode_pager_uncache(vp))
+		return (ETXTBSY);
+	return (0);
+}
+
+/*
+ * Vnode close call
+ */
+vn_close(vp, flags, cred, p)
+	register struct vnode *vp;
+	int flags;
+	struct ucred *cred;
+	struct proc *p;
+{
+	int error;
+
+	if (flags & FWRITE)
+		vp->v_writecount--;
+	error = VOP_CLOSE(vp, flags, cred, p);
+	vrele(vp);
+	return (error);
+}
+
+/*
+ * Package up an I/O request on a vnode into a uio and do it.
+ */
+vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p)
+	enum uio_rw rw;
+	struct vnode *vp;
+	caddr_t base;
+	int len;
+	off_t offset;
+	enum uio_seg segflg;
+	int ioflg;
+	struct ucred *cred;
+	int *aresid;
+	struct proc *p;
+{
+	struct uio auio;
+	struct iovec aiov;
+	int error;
+
+	if ((ioflg & IO_NODELOCKED) == 0)
+		VOP_LOCK(vp);
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	aiov.iov_base = base;
+	aiov.iov_len = len;
+	auio.uio_resid = len;
+	auio.uio_offset = offset;
+	auio.uio_segflg = segflg;
+	auio.uio_rw = rw;
+	auio.uio_procp = p;
+	if (rw == UIO_READ) {
+		error = VOP_READ(vp, &auio, ioflg, cred);
+	} else {
+		error = VOP_WRITE(vp, &auio, ioflg, cred);
+	}
+	if (aresid)
+		*aresid = auio.uio_resid;
+	else
+		if (auio.uio_resid && error == 0)
+			error = EIO;
+	if ((ioflg & IO_NODELOCKED) == 0)
+		VOP_UNLOCK(vp);
+	return (error);
+}
+
+/*
+ * File table vnode read routine.
+ */
+vn_read(fp, uio, cred)
+	struct file *fp;
+	struct uio *uio;
+	struct ucred *cred;
+{
+	register struct vnode *vp = (struct vnode *)fp->f_data;
+	int count, error;
+
+	LEASE_CHECK(vp, uio->uio_procp, cred, LEASE_READ);
+	VOP_LOCK(vp);
+	uio->uio_offset = fp->f_offset;
+	count = uio->uio_resid;
+	error = VOP_READ(vp, uio, (fp->f_flag & FNONBLOCK) ? IO_NDELAY : 0,
+		cred);
+	fp->f_offset += count - uio->uio_resid;
+	VOP_UNLOCK(vp);
+	return (error);
+}
+
+/*
+ * File table vnode write routine.
+ */
+vn_write(fp, uio, cred)
+	struct file *fp;
+	struct uio *uio;
+	struct ucred *cred;
+{
+	register struct vnode *vp = (struct vnode *)fp->f_data;
+	int count, error, ioflag = 0;
+
+	if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
+		ioflag |= IO_APPEND;
+	if (fp->f_flag & FNONBLOCK)
+		ioflag |= IO_NDELAY;
+	LEASE_CHECK(vp, uio->uio_procp, cred, LEASE_WRITE);
+	VOP_LOCK(vp);
+	uio->uio_offset = fp->f_offset;
+	count = uio->uio_resid;
+	error = VOP_WRITE(vp, uio, ioflag, cred);
+	if (ioflag & IO_APPEND)
+		fp->f_offset = uio->uio_offset;
+	else
+		fp->f_offset += count - uio->uio_resid;
+	VOP_UNLOCK(vp);
+	return (error);
+}
+
+/*
+ * File table vnode stat routine.
+ */
+vn_stat(vp, sb, p)
+	struct vnode *vp;
+	register struct stat *sb;
+	struct proc *p;
+{
+	struct vattr vattr;
+	register struct vattr *vap;
+	int error;
+	u_short mode;
+
+	vap = &vattr;
+	error = VOP_GETATTR(vp, vap, p->p_ucred, p);
+	if (error)
+		return (error);
+	/*
+	 * Copy from vattr table
+	 */
+	sb->st_dev = vap->va_fsid;
+	sb->st_ino = vap->va_fileid;
+	mode = vap->va_mode;
+	switch (vp->v_type) {
+	case VREG:
+		mode |= S_IFREG;
+		break;
+	case VDIR:
+		mode |= S_IFDIR;
+		break;
+	case VBLK:
+		mode |= S_IFBLK;
+		break;
+	case VCHR:
+		mode |= S_IFCHR;
+		break;
+	case VLNK:
+		mode |= S_IFLNK;
+		break;
+	case VSOCK:
+		mode |= S_IFSOCK;
+		break;
+	case VFIFO:
+		mode |= S_IFIFO;
+		break;
+	default:
+		return (EBADF);
+	};
+	sb->st_mode = mode;
+	sb->st_nlink = vap->va_nlink;
+	sb->st_uid = vap->va_uid;
+	sb->st_gid = vap->va_gid;
+	sb->st_rdev = vap->va_rdev;
+	sb->st_size = vap->va_size;
+	sb->st_atimespec = vap->va_atime;
+	sb->st_mtimespec= vap->va_mtime;
+	sb->st_ctimespec = vap->va_ctime;
+	sb->st_blksize = vap->va_blocksize;
+	sb->st_flags = vap->va_flags;
+	sb->st_gen = vap->va_gen;
+	sb->st_blocks = vap->va_bytes / S_BLKSIZE;
+	return (0);
+}
+
+/*
+ * File table vnode ioctl routine.
+ */
+vn_ioctl(fp, com, data, p)
+	struct file *fp;
+	int com;
+	caddr_t data;
+	struct proc *p;
+{
+	register struct vnode *vp = ((struct vnode *)fp->f_data);
+	struct vattr vattr;
+	int error;
+
+	switch (vp->v_type) {
+
+	case VREG:
+	case VDIR:
+		if (com == FIONREAD) {
+			if (error = VOP_GETATTR(vp, &vattr, p->p_ucred, p))
+				return (error);
+			*(int *)data = vattr.va_size - fp->f_offset;
+			return (0);
+		}
+		if (com == FIONBIO || com == FIOASYNC)	/* XXX */
+			return (0);			/* XXX */
+		/* fall into ... */
+
+	default:
+		return (ENOTTY);
+
+	case VFIFO:
+	case VCHR:
+	case VBLK:
+		error = VOP_IOCTL(vp, com, data, fp->f_flag, p->p_ucred, p);
+		if (error == 0 && com == TIOCSCTTY) {
+			p->p_session->s_ttyvp = vp;
+			VREF(vp);
+		}
+		return (error);
+	}
+}
+
+/*
+ * File table vnode select routine.
+ */
+vn_select(fp, which, p)
+	struct file *fp;
+	int which;
+	struct proc *p;
+{
+
+	return (VOP_SELECT(((struct vnode *)fp->f_data), which, fp->f_flag,
+		fp->f_cred, p));
+}
+
+/*
+ * File table vnode close routine.
+ */
+vn_closefile(fp, p)
+	struct file *fp;
+	struct proc *p;
+{
+
+	return (vn_close(((struct vnode *)fp->f_data), fp->f_flag,
+		fp->f_cred, p));
+}
diff --git a/sys/kern/vnode_if.pl b/sys/kern/vnode_if.pl
new file mode 100644
index 00000000000..e190fa04836
--- /dev/null
+++ b/sys/kern/vnode_if.pl
@@ -0,0 +1,433 @@
+#!/bin/sh -
+#
+# Copyright (c) 1992, 1993
+#	The Regents of the University of California.  All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+# 3. All advertising materials mentioning features or use of this software
+#    must display the following acknowledgement:
+#	This product includes software developed by the University of
+#	California, Berkeley and its contributors.
+# 4. Neither the name of the University nor the names of its contributors
+#    may be used to endorse or promote products derived from this software
+#    without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+#	@(#)vnode_if.sh	8.1 (Berkeley) 6/10/93
+#
+
+# Script to produce VFS front-end sugar.
+#
+# usage: vnode_if.sh srcfile
+#	(where srcfile is currently /sys/kern/vnode_if.src)
+#
+# These awk scripts are not particularly well written, specifically they
+# don't use arrays well and figure out the same information repeatedly.
+# Please rewrite them if you actually understand how to use awk.  Note,
+# they use nawk extensions and gawk's toupper.
+
+if [ $# -ne 1 ] ; then
+	echo 'usage: vnode_if.sh srcfile'
+	exit 1
+fi
+
+# Name of the source file.
+SRC=$1
+
+# Names of the created files.
+CFILE=vnode_if.c
+HEADER=vnode_if.h
+
+# Awk program (must support nawk extensions and gawk's "toupper")
+# Use "awk" at Berkeley, "gawk" elsewhere.
+AWK=awk
+
+# Print out header information for vnode_if.h.
+cat << END_OF_LEADING_COMMENT > $HEADER
+/*
+ * This file is produced automatically.
+ * Do not modify anything in here by hand.
+ *
+ * Created from @(#)vnode_if.sh	8.1 (Berkeley) 6/10/93
+ */
+
+extern struct vnodeop_desc vop_default_desc;
+END_OF_LEADING_COMMENT
+
+# Awk script to take vnode_if.src and turn it into vnode_if.h.
+$AWK '
+	NF == 0 || $0 ~ "^#" {
+		next;
+	}
+	{
+		# Get the function name.
+		name = $1;
+		uname = toupper(name);
+
+		# Get the function arguments.
+		for (c1 = 0;; ++c1) {
+			if (getline <= 0)
+				exit
+			if ($0 ~ "^};")
+				break;
+			a[c1] = $0;
+		}
+
+		# Print out the vop_F_args structure.
+		printf("struct %s_args {\n\tstruct vnodeop_desc *a_desc;\n",
+		    name);
+		for (c2 = 0; c2 < c1; ++c2) {
+			c3 = split(a[c2], t);
+			printf("\t");
+			if (t[2] ~ "WILLRELE")
+				c4 = 3;
+			else 
+				c4 = 2;
+			for (; c4 < c3; ++c4)
+				printf("%s ", t[c4]);
+			beg = match(t[c3], "[^*]");
+			printf("%sa_%s\n",
+			    substr(t[c4], 0, beg - 1), substr(t[c4], beg));
+		}
+		printf("};\n");
+
+		# Print out extern declaration.
+		printf("extern struct vnodeop_desc %s_desc;\n", name);
+
+		# Print out inline struct.
+		printf("static inline int %s(", uname);
+		sep = ", ";
+		for (c2 = 0; c2 < c1; ++c2) {
+			if (c2 == c1 - 1)
+				sep = ")\n";
+			c3 = split(a[c2], t);
+			beg = match(t[c3], "[^*]");
+			end = match(t[c3], ";");
+			printf("%s%s", substr(t[c3], beg, end - beg), sep);
+		}
+		for (c2 = 0; c2 < c1; ++c2) {
+			c3 = split(a[c2], t);
+			printf("\t");
+			if (t[2] ~ "WILLRELE")
+				c4 = 3;
+			else
+				c4 = 2;
+			for (; c4 < c3; ++c4)
+				printf("%s ", t[c4]);
+			beg = match(t[c3], "[^*]");
+			printf("%s%s\n",
+			    substr(t[c4], 0, beg - 1), substr(t[c4], beg));
+		}
+		printf("{\n\tstruct %s_args a;\n\n", name);
+		printf("\ta.a_desc = VDESC(%s);\n", name);
+		for (c2 = 0; c2 < c1; ++c2) {
+			c3 = split(a[c2], t);
+			printf("\t");
+			beg = match(t[c3], "[^*]");
+			end = match(t[c3], ";");
+			printf("a.a_%s = %s\n",
+			    substr(t[c3], beg, end - beg), substr(t[c3], beg));
+		}
+		c1 = split(a[0], t);
+		beg = match(t[c1], "[^*]");
+		end = match(t[c1], ";");
+		printf("\treturn (VCALL(%s, VOFFSET(%s), &a));\n}\n",
+		    substr(t[c1], beg, end - beg), name);
+	}' < $SRC >> $HEADER
+
+# Print out header information for vnode_if.c.
+cat << END_OF_LEADING_COMMENT > $CFILE
+/*
+ * This file is produced automatically.
+ * Do not modify anything in here by hand.
+ *
+ * Created from @(#)vnode_if.sh	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+
+struct vnodeop_desc vop_default_desc = {
+	0,
+	"default",
+	0,
+	NULL,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	NULL,
+};
+
+END_OF_LEADING_COMMENT
+
+# Awk script to take vnode_if.src and turn it into vnode_if.c.
+$AWK 'function kill_surrounding_ws (s) {
+		sub (/^[ \t]*/, "", s);
+		sub (/[ \t]*$/, "", s);
+		return s;
+	}
+
+	function read_args() {
+		numargs = 0;
+		while (getline ln) {
+			if (ln ~ /}/) {
+				break;
+			};
+	
+			# Delete comments, if any.
+			gsub (/\/\*.*\*\//, "", ln);
+			
+			# Delete leading/trailing space.
+			ln = kill_surrounding_ws(ln);
+	
+			# Pick off direction.
+			if (1 == sub(/^INOUT[ \t]+/, "", ln))
+				dir = "INOUT";
+			else if (1 == sub(/^IN[ \t]+/, "", ln))
+				dir = "IN";
+			else if (1 == sub(/^OUT[ \t]+/, "", ln))
+				dir = "OUT";
+			else
+				bail("No IN/OUT direction for \"" ln "\".");
+
+			# check for "WILLRELE"
+			if (1 == sub(/^WILLRELE[ \t]+/, "", ln)) {
+				rele = "WILLRELE";
+			} else {
+				rele = "WONTRELE";
+			};
+	
+			# kill trailing ;
+			if (1 != sub (/;$/, "", ln)) {
+				bail("Missing end-of-line ; in \"" ln "\".");
+			};
+	
+			# pick off variable name
+			if (!(i = match(ln, /[A-Za-z0-9_]+$/))) {
+				bail("Missing var name \"a_foo\" in \"" ln "\".");
+			};
+			arg = substr (ln, i);
+			# Want to <<substr(ln, i) = "";>>, but nawk cannot.
+			# Hack around this.
+			ln = substr(ln, 1, i-1);
+	
+			# what is left must be type
+			# (put clean it up some)
+			type = ln;
+			gsub (/[ \t]+/, " ", type);   # condense whitespace
+			type = kill_surrounding_ws(type);
+	
+			# (boy this was easier in Perl)
+	
+			numargs++;
+			dirs[numargs] = dir;
+			reles[numargs] = rele;
+			types[numargs] = type;
+			args[numargs] = arg;
+		};
+	}
+
+	function generate_operation_vp_offsets() {
+		printf ("int %s_vp_offsets[] = {\n", name);
+		# as a side effect, figure out the releflags
+		releflags = "";
+		vpnum = 0;
+		for (i=1; i<=numargs; i++) {
+			if (types[i] == "struct vnode *") {
+				printf ("\tVOPARG_OFFSETOF(struct %s_args,a_%s),\n",
+					name, args[i]);
+				if (reles[i] == "WILLRELE") {
+					releflags = releflags "|VDESC_VP" vpnum "_WILLRELE";
+				};
+				vpnum++;
+			};
+		};
+		sub (/^\|/, "", releflags);
+		print "\tVDESC_NO_OFFSET";
+		print "};";
+	}
+	
+	function find_arg_with_type (type) {
+		for (i=1; i<=numargs; i++) {
+			if (types[i] == type) {
+				return "VOPARG_OFFSETOF(struct " name "_args,a_" args[i] ")";
+			};
+		};
+		return "VDESC_NO_OFFSET";
+	}
+	
+	function generate_operation_desc() {
+		printf ("struct vnodeop_desc %s_desc = {\n", name);
+		# offset
+		printf ("\t0,\n");
+		# printable name
+		printf ("\t\"%s\",\n", name);
+		# flags
+		vppwillrele = "";
+		for (i=1; i<=numargs; i++) {
+			if (types[i] == "struct vnode **" &&
+				(reles[i] == "WILLRELE")) {
+				vppwillrele = "|VDESC_VPP_WILLRELE";
+			};
+		};
+		if (releflags == "") {
+			printf ("\t0%s,\n", vppwillrele);
+		} else {
+			printf ("\t%s%s,\n", releflags, vppwillrele);
+		};
+		# vp offsets
+		printf ("\t%s_vp_offsets,\n", name);
+		# vpp (if any)
+		printf ("\t%s,\n", find_arg_with_type("struct vnode **"));
+		# cred (if any)
+		printf ("\t%s,\n", find_arg_with_type("struct ucred *"));
+		# proc (if any)
+		printf ("\t%s,\n", find_arg_with_type("struct proc *"));
+		# componentname
+		printf ("\t%s,\n", find_arg_with_type("struct componentname *"));
+		# transport layer information
+		printf ("\tNULL,\n};\n");
+	}
+
+	NF == 0 || $0 ~ "^#" {
+		next;
+	}
+	{
+		# get the function name
+		name = $1;
+
+		# get the function arguments
+		read_args();
+
+		# Print out the vop_F_vp_offsets structure.  This all depends
+		# on naming conventions and nothing else.
+		generate_operation_vp_offsets();
+
+		# Print out the vnodeop_desc structure.
+		generate_operation_desc();
+
+		printf "\n";
+
+	}' < $SRC >> $CFILE
+# THINGS THAT DON'T WORK RIGHT YET.
+# 
+# Two existing BSD vnodeops (bwrite and strategy) don't take any vnodes as
+# arguments.  This means that these operations can't function successfully
+# through a bypass routine.
+#
+# Bwrite and strategy will be replaced when the VM page/buffer cache
+# integration happens.
+#
+# To get around this problem for now we handle these ops as special cases.
+
+cat << END_OF_SPECIAL_CASES >> $HEADER
+#include <sys/buf.h>
+struct vop_strategy_args {
+	struct vnodeop_desc *a_desc;
+	struct buf *a_bp;
+};
+extern struct vnodeop_desc vop_strategy_desc;
+static inline int VOP_STRATEGY(bp)
+	struct buf *bp;
+{
+	struct vop_strategy_args a;
+
+	a.a_desc = VDESC(vop_strategy);
+	a.a_bp = bp;
+	return (VCALL((bp)->b_vp, VOFFSET(vop_strategy), &a));
+}
+
+struct vop_bwrite_args {
+	struct vnodeop_desc *a_desc;
+	struct buf *a_bp;
+};
+extern struct vnodeop_desc vop_bwrite_desc;
+static inline int VOP_BWRITE(bp)
+	struct buf *bp;
+{
+	struct vop_bwrite_args a;
+
+	a.a_desc = VDESC(vop_bwrite);
+	a.a_bp = bp;
+	return (VCALL((bp)->b_vp, VOFFSET(vop_bwrite), &a));
+}
+END_OF_SPECIAL_CASES
+
+cat << END_OF_SPECIAL_CASES >> $CFILE
+int vop_strategy_vp_offsets[] = {
+	VDESC_NO_OFFSET
+};
+struct vnodeop_desc vop_strategy_desc = {
+	0,
+	"vop_strategy",
+	0,
+	vop_strategy_vp_offsets,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	NULL,
+};
+int vop_bwrite_vp_offsets[] = {
+	VDESC_NO_OFFSET
+};
+struct vnodeop_desc vop_bwrite_desc = {
+	0,
+	"vop_bwrite",
+	0,
+	vop_bwrite_vp_offsets,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	NULL,
+};
+END_OF_SPECIAL_CASES
+
+# Add the vfs_op_descs array to the C file.
+$AWK '
+	BEGIN {
+		printf("\nstruct vnodeop_desc *vfs_op_descs[] = {\n");
+		printf("\t&vop_default_desc,	/* MUST BE FIRST */\n");
+		printf("\t&vop_strategy_desc,	/* XXX: SPECIAL CASE */\n");
+		printf("\t&vop_bwrite_desc,	/* XXX: SPECIAL CASE */\n");
+	}
+	END {
+		printf("\tNULL\n};\n");
+	}
+	NF == 0 || $0 ~ "^#" {
+		next;
+	}
+	{
+		# Get the function name.
+		printf("\t&%s_desc,\n", $1);
+
+		# Skip the function arguments.
+		for (;;) {
+			if (getline <= 0)
+				exit
+			if ($0 ~ "^};")
+				break;
+		}
+	}' < $SRC >> $CFILE
+
diff --git a/sys/kern/vnode_if.sh b/sys/kern/vnode_if.sh
new file mode 100644
index 00000000000..e190fa04836
--- /dev/null
+++ b/sys/kern/vnode_if.sh
@@ -0,0 +1,433 @@
+#!/bin/sh -
+#
+# Copyright (c) 1992, 1993
+#	The Regents of the University of California.  All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+# 3. All advertising materials mentioning features or use of this software
+#    must display the following acknowledgement:
+#	This product includes software developed by the University of
+#	California, Berkeley and its contributors.
+# 4. Neither the name of the University nor the names of its contributors
+#    may be used to endorse or promote products derived from this software
+#    without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+#	@(#)vnode_if.sh	8.1 (Berkeley) 6/10/93
+#
+
+# Script to produce VFS front-end sugar.
+#
+# usage: vnode_if.sh srcfile
+#	(where srcfile is currently /sys/kern/vnode_if.src)
+#
+# These awk scripts are not particularly well written, specifically they
+# don't use arrays well and figure out the same information repeatedly.
+# Please rewrite them if you actually understand how to use awk.  Note,
+# they use nawk extensions and gawk's toupper.
+
+if [ $# -ne 1 ] ; then
+	echo 'usage: vnode_if.sh srcfile'
+	exit 1
+fi
+
+# Name of the source file.
+SRC=$1
+
+# Names of the created files.
+CFILE=vnode_if.c
+HEADER=vnode_if.h
+
+# Awk program (must support nawk extensions and gawk's "toupper")
+# Use "awk" at Berkeley, "gawk" elsewhere.
+AWK=awk
+
+# Print out header information for vnode_if.h.
+cat << END_OF_LEADING_COMMENT > $HEADER
+/*
+ * This file is produced automatically.
+ * Do not modify anything in here by hand.
+ *
+ * Created from @(#)vnode_if.sh	8.1 (Berkeley) 6/10/93
+ */
+
+extern struct vnodeop_desc vop_default_desc;
+END_OF_LEADING_COMMENT
+
+# Awk script to take vnode_if.src and turn it into vnode_if.h.
+$AWK '
+	NF == 0 || $0 ~ "^#" {
+		next;
+	}
+	{
+		# Get the function name.
+		name = $1;
+		uname = toupper(name);
+
+		# Get the function arguments.
+		for (c1 = 0;; ++c1) {
+			if (getline <= 0)
+				exit
+			if ($0 ~ "^};")
+				break;
+			a[c1] = $0;
+		}
+
+		# Print out the vop_F_args structure.
+		printf("struct %s_args {\n\tstruct vnodeop_desc *a_desc;\n",
+		    name);
+		for (c2 = 0; c2 < c1; ++c2) {
+			c3 = split(a[c2], t);
+			printf("\t");
+			if (t[2] ~ "WILLRELE")
+				c4 = 3;
+			else 
+				c4 = 2;
+			for (; c4 < c3; ++c4)
+				printf("%s ", t[c4]);
+			beg = match(t[c3], "[^*]");
+			printf("%sa_%s\n",
+			    substr(t[c4], 0, beg - 1), substr(t[c4], beg));
+		}
+		printf("};\n");
+
+		# Print out extern declaration.
+		printf("extern struct vnodeop_desc %s_desc;\n", name);
+
+		# Print out inline struct.
+		printf("static inline int %s(", uname);
+		sep = ", ";
+		for (c2 = 0; c2 < c1; ++c2) {
+			if (c2 == c1 - 1)
+				sep = ")\n";
+			c3 = split(a[c2], t);
+			beg = match(t[c3], "[^*]");
+			end = match(t[c3], ";");
+			printf("%s%s", substr(t[c3], beg, end - beg), sep);
+		}
+		for (c2 = 0; c2 < c1; ++c2) {
+			c3 = split(a[c2], t);
+			printf("\t");
+			if (t[2] ~ "WILLRELE")
+				c4 = 3;
+			else
+				c4 = 2;
+			for (; c4 < c3; ++c4)
+				printf("%s ", t[c4]);
+			beg = match(t[c3], "[^*]");
+			printf("%s%s\n",
+			    substr(t[c4], 0, beg - 1), substr(t[c4], beg));
+		}
+		printf("{\n\tstruct %s_args a;\n\n", name);
+		printf("\ta.a_desc = VDESC(%s);\n", name);
+		for (c2 = 0; c2 < c1; ++c2) {
+			c3 = split(a[c2], t);
+			printf("\t");
+			beg = match(t[c3], "[^*]");
+			end = match(t[c3], ";");
+			printf("a.a_%s = %s\n",
+			    substr(t[c3], beg, end - beg), substr(t[c3], beg));
+		}
+		c1 = split(a[0], t);
+		beg = match(t[c1], "[^*]");
+		end = match(t[c1], ";");
+		printf("\treturn (VCALL(%s, VOFFSET(%s), &a));\n}\n",
+		    substr(t[c1], beg, end - beg), name);
+	}' < $SRC >> $HEADER
+
+# Print out header information for vnode_if.c.
+cat << END_OF_LEADING_COMMENT > $CFILE
+/*
+ * This file is produced automatically.
+ * Do not modify anything in here by hand.
+ *
+ * Created from @(#)vnode_if.sh	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+
+struct vnodeop_desc vop_default_desc = {
+	0,
+	"default",
+	0,
+	NULL,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	NULL,
+};
+
+END_OF_LEADING_COMMENT
+
+# Awk script to take vnode_if.src and turn it into vnode_if.c.
+$AWK 'function kill_surrounding_ws (s) {
+		sub (/^[ \t]*/, "", s);
+		sub (/[ \t]*$/, "", s);
+		return s;
+	}
+
+	function read_args() {
+		numargs = 0;
+		while (getline ln) {
+			if (ln ~ /}/) {
+				break;
+			};
+	
+			# Delete comments, if any.
+			gsub (/\/\*.*\*\//, "", ln);
+			
+			# Delete leading/trailing space.
+			ln = kill_surrounding_ws(ln);
+	
+			# Pick off direction.
+			if (1 == sub(/^INOUT[ \t]+/, "", ln))
+				dir = "INOUT";
+			else if (1 == sub(/^IN[ \t]+/, "", ln))
+				dir = "IN";
+			else if (1 == sub(/^OUT[ \t]+/, "", ln))
+				dir = "OUT";
+			else
+				bail("No IN/OUT direction for \"" ln "\".");
+
+			# check for "WILLRELE"
+			if (1 == sub(/^WILLRELE[ \t]+/, "", ln)) {
+				rele = "WILLRELE";
+			} else {
+				rele = "WONTRELE";
+			};
+	
+			# kill trailing ;
+			if (1 != sub (/;$/, "", ln)) {
+				bail("Missing end-of-line ; in \"" ln "\".");
+			};
+	
+			# pick off variable name
+			if (!(i = match(ln, /[A-Za-z0-9_]+$/))) {
+				bail("Missing var name \"a_foo\" in \"" ln "\".");
+			};
+			arg = substr (ln, i);
+			# Want to <<substr(ln, i) = "";>>, but nawk cannot.
+			# Hack around this.
+			ln = substr(ln, 1, i-1);
+	
+			# what is left must be type
+			# (put clean it up some)
+			type = ln;
+			gsub (/[ \t]+/, " ", type);   # condense whitespace
+			type = kill_surrounding_ws(type);
+	
+			# (boy this was easier in Perl)
+	
+			numargs++;
+			dirs[numargs] = dir;
+			reles[numargs] = rele;
+			types[numargs] = type;
+			args[numargs] = arg;
+		};
+	}
+
+	function generate_operation_vp_offsets() {
+		printf ("int %s_vp_offsets[] = {\n", name);
+		# as a side effect, figure out the releflags
+		releflags = "";
+		vpnum = 0;
+		for (i=1; i<=numargs; i++) {
+			if (types[i] == "struct vnode *") {
+				printf ("\tVOPARG_OFFSETOF(struct %s_args,a_%s),\n",
+					name, args[i]);
+				if (reles[i] == "WILLRELE") {
+					releflags = releflags "|VDESC_VP" vpnum "_WILLRELE";
+				};
+				vpnum++;
+			};
+		};
+		sub (/^\|/, "", releflags);
+		print "\tVDESC_NO_OFFSET";
+		print "};";
+	}
+	
+	function find_arg_with_type (type) {
+		for (i=1; i<=numargs; i++) {
+			if (types[i] == type) {
+				return "VOPARG_OFFSETOF(struct " name "_args,a_" args[i] ")";
+			};
+		};
+		return "VDESC_NO_OFFSET";
+	}
+	
+	function generate_operation_desc() {
+		printf ("struct vnodeop_desc %s_desc = {\n", name);
+		# offset
+		printf ("\t0,\n");
+		# printable name
+		printf ("\t\"%s\",\n", name);
+		# flags
+		vppwillrele = "";
+		for (i=1; i<=numargs; i++) {
+			if (types[i] == "struct vnode **" &&
+				(reles[i] == "WILLRELE")) {
+				vppwillrele = "|VDESC_VPP_WILLRELE";
+			};
+		};
+		if (releflags == "") {
+			printf ("\t0%s,\n", vppwillrele);
+		} else {
+			printf ("\t%s%s,\n", releflags, vppwillrele);
+		};
+		# vp offsets
+		printf ("\t%s_vp_offsets,\n", name);
+		# vpp (if any)
+		printf ("\t%s,\n", find_arg_with_type("struct vnode **"));
+		# cred (if any)
+		printf ("\t%s,\n", find_arg_with_type("struct ucred *"));
+		# proc (if any)
+		printf ("\t%s,\n", find_arg_with_type("struct proc *"));
+		# componentname
+		printf ("\t%s,\n", find_arg_with_type("struct componentname *"));
+		# transport layer information
+		printf ("\tNULL,\n};\n");
+	}
+
+	NF == 0 || $0 ~ "^#" {
+		next;
+	}
+	{
+		# get the function name
+		name = $1;
+
+		# get the function arguments
+		read_args();
+
+		# Print out the vop_F_vp_offsets structure.  This all depends
+		# on naming conventions and nothing else.
+		generate_operation_vp_offsets();
+
+		# Print out the vnodeop_desc structure.
+		generate_operation_desc();
+
+		printf "\n";
+
+	}' < $SRC >> $CFILE
+# THINGS THAT DON'T WORK RIGHT YET.
+# 
+# Two existing BSD vnodeops (bwrite and strategy) don't take any vnodes as
+# arguments.  This means that these operations can't function successfully
+# through a bypass routine.
+#
+# Bwrite and strategy will be replaced when the VM page/buffer cache
+# integration happens.
+#
+# To get around this problem for now we handle these ops as special cases.
+
+cat << END_OF_SPECIAL_CASES >> $HEADER
+#include <sys/buf.h>
+struct vop_strategy_args {
+	struct vnodeop_desc *a_desc;
+	struct buf *a_bp;
+};
+extern struct vnodeop_desc vop_strategy_desc;
+static inline int VOP_STRATEGY(bp)
+	struct buf *bp;
+{
+	struct vop_strategy_args a;
+
+	a.a_desc = VDESC(vop_strategy);
+	a.a_bp = bp;
+	return (VCALL((bp)->b_vp, VOFFSET(vop_strategy), &a));
+}
+
+struct vop_bwrite_args {
+	struct vnodeop_desc *a_desc;
+	struct buf *a_bp;
+};
+extern struct vnodeop_desc vop_bwrite_desc;
+static inline int VOP_BWRITE(bp)
+	struct buf *bp;
+{
+	struct vop_bwrite_args a;
+
+	a.a_desc = VDESC(vop_bwrite);
+	a.a_bp = bp;
+	return (VCALL((bp)->b_vp, VOFFSET(vop_bwrite), &a));
+}
+END_OF_SPECIAL_CASES
+
+cat << END_OF_SPECIAL_CASES >> $CFILE
+int vop_strategy_vp_offsets[] = {
+	VDESC_NO_OFFSET
+};
+struct vnodeop_desc vop_strategy_desc = {
+	0,
+	"vop_strategy",
+	0,
+	vop_strategy_vp_offsets,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	NULL,
+};
+int vop_bwrite_vp_offsets[] = {
+	VDESC_NO_OFFSET
+};
+struct vnodeop_desc vop_bwrite_desc = {
+	0,
+	"vop_bwrite",
+	0,
+	vop_bwrite_vp_offsets,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	NULL,
+};
+END_OF_SPECIAL_CASES
+
+# Add the vfs_op_descs array to the C file.
+$AWK '
+	BEGIN {
+		printf("\nstruct vnodeop_desc *vfs_op_descs[] = {\n");
+		printf("\t&vop_default_desc,	/* MUST BE FIRST */\n");
+		printf("\t&vop_strategy_desc,	/* XXX: SPECIAL CASE */\n");
+		printf("\t&vop_bwrite_desc,	/* XXX: SPECIAL CASE */\n");
+	}
+	END {
+		printf("\tNULL\n};\n");
+	}
+	NF == 0 || $0 ~ "^#" {
+		next;
+	}
+	{
+		# Get the function name.
+		printf("\t&%s_desc,\n", $1);
+
+		# Skip the function arguments.
+		for (;;) {
+			if (getline <= 0)
+				exit
+			if ($0 ~ "^};")
+				break;
+		}
+	}' < $SRC >> $CFILE
+
diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src
new file mode 100644
index 00000000000..caee21dce0b
--- /dev/null
+++ b/sys/kern/vnode_if.src
@@ -0,0 +1,296 @@
+#
+# Copyright (c) 1992, 1993
+#	The Regents of the University of California.  All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+# 3. All advertising materials mentioning features or use of this software
+#    must display the following acknowledgement:
+#	This product includes software developed by the University of
+#	California, Berkeley and its contributors.
+# 4. Neither the name of the University nor the names of its contributors
+#    may be used to endorse or promote products derived from this software
+#    without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+#	@(#)vnode_if.src	8.3 (Berkeley) 2/3/94
+#
+vop_lookup {
+	IN struct vnode *dvp;
+	INOUT struct vnode **vpp;
+	IN struct componentname *cnp;
+};
+
+vop_create {
+	IN WILLRELE struct vnode *dvp;
+	OUT struct vnode **vpp;
+	IN struct componentname *cnp;
+	IN struct vattr *vap;
+};
+
+vop_mknod {
+	IN WILLRELE struct vnode *dvp;
+	OUT WILLRELE struct vnode **vpp;
+	IN struct componentname *cnp;
+	IN struct vattr *vap;
+};
+
+vop_open {
+	IN struct vnode *vp;
+	IN int mode;
+	IN struct ucred *cred;
+	IN struct proc *p;
+};
+
+vop_close {
+	IN struct vnode *vp;
+	IN int fflag;
+	IN struct ucred *cred;
+	IN struct proc *p;
+};
+
+vop_access {
+	IN struct vnode *vp;
+	IN int mode;
+	IN struct ucred *cred;
+	IN struct proc *p;
+};
+
+vop_getattr {
+	IN struct vnode *vp;
+	IN struct vattr *vap;
+	IN struct ucred *cred;
+	IN struct proc *p;
+};
+
+vop_setattr {
+	IN struct vnode *vp;
+	IN struct vattr *vap;
+	IN struct ucred *cred;
+	IN struct proc *p;
+};
+
+vop_read {
+	IN struct vnode *vp;
+	INOUT struct uio *uio;
+	IN int ioflag;
+	IN struct ucred *cred;
+};
+
+vop_write {
+	IN struct vnode *vp;
+	INOUT struct uio *uio;
+	IN int ioflag;
+	IN struct ucred *cred;
+};
+
+vop_ioctl {
+	IN struct vnode *vp;
+	IN int command;
+	IN caddr_t data;
+	IN int fflag;
+	IN struct ucred *cred;
+	IN struct proc *p;
+};
+
+# Needs work?  (fflags)
+vop_select {
+	IN struct vnode *vp;
+	IN int which;
+	IN int fflags;
+	IN struct ucred *cred;
+	IN struct proc *p;
+};
+
+vop_mmap {
+	IN struct vnode *vp;
+	IN int fflags;
+	IN struct ucred *cred;
+	IN struct proc *p;
+};
+
+vop_fsync {
+	IN struct vnode *vp;
+	IN struct ucred *cred;
+	IN int waitfor;
+	IN struct proc *p;
+};
+
+# Needs word: Is newoff right?  What's it mean?
+vop_seek {
+	IN struct vnode *vp;
+	IN off_t oldoff;
+	IN off_t newoff;
+	IN struct ucred *cred;
+};
+
+vop_remove {
+	IN WILLRELE struct vnode *dvp;
+	IN WILLRELE struct vnode *vp;
+	IN struct componentname *cnp;
+};
+
+vop_link {
+	IN WILLRELE struct vnode *vp;
+	IN struct vnode *tdvp;
+	IN struct componentname *cnp;
+};
+
+vop_rename {
+	IN WILLRELE struct vnode *fdvp;
+	IN WILLRELE struct vnode *fvp;
+	IN struct componentname *fcnp;
+	IN WILLRELE struct vnode *tdvp;
+	IN WILLRELE struct vnode *tvp;
+	IN struct componentname *tcnp;
+};
+
+vop_mkdir {
+	IN WILLRELE struct vnode *dvp;
+	OUT struct vnode **vpp;
+	IN struct componentname *cnp;
+	IN struct vattr *vap;
+};
+
+vop_rmdir {
+	IN WILLRELE struct vnode *dvp;
+	IN WILLRELE struct vnode *vp;
+	IN struct componentname *cnp;
+};
+
+vop_symlink {
+	IN WILLRELE struct vnode *dvp;
+	OUT WILLRELE struct vnode **vpp;
+	IN struct componentname *cnp;
+	IN struct vattr *vap;
+	IN char *target;
+};
+
+vop_readdir {
+	IN struct vnode *vp;
+	INOUT struct uio *uio;
+	IN struct ucred *cred;
+};
+
+vop_readlink {
+	IN struct vnode *vp;
+	INOUT struct uio *uio;
+	IN struct ucred *cred;
+};
+
+vop_abortop {
+	IN struct vnode *dvp;
+	IN struct componentname *cnp;
+};
+
+vop_inactive {
+	IN struct vnode *vp;
+};
+
+vop_reclaim {
+	IN struct vnode *vp;
+};
+
+vop_lock {
+	IN struct vnode *vp;
+};
+
+vop_unlock {
+	IN struct vnode *vp;
+};
+
+vop_bmap {
+	IN struct vnode *vp;
+	IN daddr_t bn;
+	OUT struct vnode **vpp;
+	IN daddr_t *bnp;
+	OUT int *runp;
+};
+
+#vop_strategy {
+#	IN struct buf *bp;
+#};
+
+vop_print {
+	IN struct vnode *vp;
+};
+
+vop_islocked {
+	IN struct vnode *vp;
+};
+
+vop_pathconf {
+	IN struct vnode *vp;
+	IN int name;
+	OUT int *retval;
+};
+
+vop_advlock {
+	IN struct vnode *vp;
+	IN caddr_t id;
+	IN int op;
+	IN struct flock *fl;
+	IN int flags;
+};
+
+vop_blkatoff {
+	IN struct vnode *vp;
+	IN off_t offset;
+	OUT char **res;
+	OUT struct buf **bpp;
+};
+
+vop_valloc {
+	IN struct vnode *pvp;
+	IN int mode;
+	IN struct ucred *cred;
+	OUT struct vnode **vpp;
+};
+
+vop_reallocblks {
+	IN struct vnode *vp;
+	IN struct cluster_save *buflist;
+};
+
+vop_vfree {
+	IN struct vnode *pvp;
+	IN ino_t ino;
+	IN int mode;
+};
+
+vop_truncate {
+	IN struct vnode *vp;
+	IN off_t length;
+	IN int flags;
+	IN struct ucred *cred;
+	IN struct proc *p;
+};
+
+vop_update {
+	IN struct vnode *vp;
+	IN struct timeval *access;
+	IN struct timeval *modify;
+	IN int waitfor;
+};
+
+# Needs work: no vp?
+#vop_bwrite {
+#	IN struct buf *bp;
+#};
diff --git a/sys/libkern/Makefile b/sys/libkern/Makefile
new file mode 100644
index 00000000000..991a4350144
--- /dev/null
+++ b/sys/libkern/Makefile
@@ -0,0 +1,20 @@
+#	@(#)Makefile	7.9 (Berkeley) 6/1/93
+
+LIB=	kern
+CFLAGS+= -I${.CURDIR} -I${.CURDIR}/..
+SRCS=	adddi3.c anddi3.c ashldi3.c ashrdi3.c bcmp.c cmpdi2.c divdi3.c \
+	ffs.c iordi3.c locc.c lshldi3.c lshrdi3.c mcount.c moddi3.c \
+	muldi3.c negdi2.c notdi2.c qdivrem.c random.c rindex.c scanc.c \
+	skpc.c strcat.c strcmp.c strcpy.c strlen.c strncpy.c subdi3.c \
+	ucmpdi2.c udivdi3.c umoddi3.c xordi3.c
+
+.if exists(${.CURDIR}/${MACHINE}/Makefile.inc)
+.PATH: ${.CURDIR}/${MACHINE}
+.include "${.CURDIR}/${MACHINE}/Makefile.inc"
+.endif
+
+# mcount cannot be compiled with profiling
+mcount.po: mcount.o
+	cp mcount.o mcount.po
+
+.include <bsd.lib.mk>
diff --git a/sys/libkern/adddi3.c b/sys/libkern/adddi3.c
new file mode 100644
index 00000000000..d10da47e0cf
--- /dev/null
+++ b/sys/libkern/adddi3.c
@@ -0,0 +1,60 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)adddi3.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Add two quads.  This is trivial since a one-bit carry from a single
+ * u_long addition x+y occurs if and only if the sum x+y is less than
+ * either x or y (the choice to compare with x or y is arbitrary).
+ */
+quad_t
+__adddi3(a, b)
+	quad_t a, b;
+{
+	union uu aa, bb, sum;
+
+	aa.q = a;
+	bb.q = b;
+	sum.ul[L] = aa.ul[L] + bb.ul[L];
+	sum.ul[H] = aa.ul[H] + bb.ul[H] + (sum.ul[L] < bb.ul[L]);
+	return (sum.q);
+}
diff --git a/sys/libkern/anddi3.c b/sys/libkern/anddi3.c
new file mode 100644
index 00000000000..5ae45ac1a86
--- /dev/null
+++ b/sys/libkern/anddi3.c
@@ -0,0 +1,58 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)anddi3.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Return a & b, in quad.
+ */
+quad_t
+__anddi3(a, b)
+	quad_t a, b;
+{
+	union uu aa, bb;
+
+	aa.q = a;
+	bb.q = b;
+	aa.ul[0] &= bb.ul[0];
+	aa.ul[1] &= bb.ul[1];
+	return (aa.q);
+}
diff --git a/sys/libkern/ashldi3.c b/sys/libkern/ashldi3.c
new file mode 100644
index 00000000000..72501adfaed
--- /dev/null
+++ b/sys/libkern/ashldi3.c
@@ -0,0 +1,66 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)ashldi3.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Shift a (signed) quad value left (arithmetic shift left).
+ * This is the same as logical shift left!
+ */
+quad_t
+__ashldi3(a, shift)
+	quad_t a;
+	qshift_t shift;
+{
+	union uu aa;
+
+	aa.q = a;
+	if (shift >= LONG_BITS) {
+		aa.ul[H] = shift >= QUAD_BITS ? 0 :
+		    aa.ul[L] << (shift - LONG_BITS);
+		aa.ul[L] = 0;
+	} else if (shift > 0) {
+		aa.ul[H] = (aa.ul[H] << shift) |
+		    (aa.ul[L] >> (LONG_BITS - shift));
+		aa.ul[L] <<= shift;
+	}
+	return (aa.q);
+}
diff --git a/sys/libkern/ashrdi3.c b/sys/libkern/ashrdi3.c
new file mode 100644
index 00000000000..9ffa5ed06b5
--- /dev/null
+++ b/sys/libkern/ashrdi3.c
@@ -0,0 +1,75 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)ashrdi3.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Shift a (signed) quad value right (arithmetic shift right).
+ */
+quad_t
+__ashrdi3(a, shift)
+	quad_t a;
+	qshift_t shift;
+{
+	union uu aa;
+
+	aa.q = a;
+	if (shift >= LONG_BITS) {
+		long s;
+
+		/*
+		 * Smear bits rightward using the machine's right-shift
+		 * method, whether that is sign extension or zero fill,
+		 * to get the `sign word' s.  Note that shifting by
+		 * LONG_BITS is undefined, so we shift (LONG_BITS-1),
+		 * then 1 more, to get our answer.
+		 */
+		s = (aa.sl[H] >> (LONG_BITS - 1)) >> 1;
+		aa.ul[L] = shift >= QUAD_BITS ? s :
+		    aa.sl[H] >> (shift - LONG_BITS);
+		aa.ul[H] = s;
+	} else if (shift > 0) {
+		aa.ul[L] = (aa.ul[L] >> shift) |
+		    (aa.ul[H] << (LONG_BITS - shift));
+		aa.sl[H] >>= shift;
+	}
+	return (aa.q);
+}
diff --git a/sys/libkern/bcmp.c b/sys/libkern/bcmp.c
new file mode 100644
index 00000000000..5a3ae616800
--- /dev/null
+++ b/sys/libkern/bcmp.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 1987, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)bcmp.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include <string.h>
+
+/*
+ * bcmp -- vax cmpc3 instruction
+ */
+int
+bcmp(b1, b2, length)
+	const void *b1, *b2;
+	register size_t length;
+{
+	register char *p1, *p2;
+
+	if (length == 0)
+		return(0);
+	p1 = (char *)b1;
+	p2 = (char *)b2;
+	do
+		if (*p1++ != *p2++)
+			break;
+	while (--length);
+	return(length);
+}
diff --git a/sys/libkern/cmpdi2.c b/sys/libkern/cmpdi2.c
new file mode 100644
index 00000000000..f6e4bdd6a4d
--- /dev/null
+++ b/sys/libkern/cmpdi2.c
@@ -0,0 +1,59 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)cmpdi2.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Return 0, 1, or 2 as a <, =, > b respectively.
+ * Both a and b are considered signed---which means only the high word is
+ * signed.
+ */
+int
+__cmpdi2(a, b)
+	quad_t a, b;
+{
+	union uu aa, bb;
+
+	aa.q = a;
+	bb.q = b;
+	return (aa.sl[H] < bb.sl[H] ? 0 : aa.sl[H] > bb.sl[H] ? 2 :
+	    aa.ul[L] < bb.ul[L] ? 0 : aa.ul[L] > bb.ul[L] ? 2 : 1);
+}
diff --git a/sys/libkern/divdi3.c b/sys/libkern/divdi3.c
new file mode 100644
index 00000000000..da7b2fccd01
--- /dev/null
+++ b/sys/libkern/divdi3.c
@@ -0,0 +1,65 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)divdi3.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Divide two signed quads.
+ * ??? if -1/2 should produce -1 on this machine, this code is wrong
+ */
+quad_t
+__divdi3(a, b)
+	quad_t a, b;
+{
+	u_quad_t ua, ub, uq;
+	int neg;
+
+	if (a < 0)
+		ua = -(u_quad_t)a, neg = 1;
+	else
+		ua = a, neg = 0;
+	if (b < 0)
+		ub = -(u_quad_t)b, neg ^= 1;
+	else
+		ub = b;
+	uq = __qdivrem(ua, ub, (u_quad_t *)0);
+	return (neg ? -uq : uq);
+}
diff --git a/sys/libkern/ffs.c b/sys/libkern/ffs.c
new file mode 100644
index 00000000000..099ff8e4c91
--- /dev/null
+++ b/sys/libkern/ffs.c
@@ -0,0 +1,54 @@
+/*-
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)ffs.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include <string.h>
+
+/*
+ * ffs -- vax ffs instruction
+ */
+int
+ffs(mask)
+	register int mask;
+{
+	register int bit;
+
+	if (mask == 0)
+		return(0);
+	for (bit = 1; !(mask & 1); bit++)
+		mask >>= 1;
+	return(bit);
+}
diff --git a/sys/libkern/iordi3.c b/sys/libkern/iordi3.c
new file mode 100644
index 00000000000..e225005d414
--- /dev/null
+++ b/sys/libkern/iordi3.c
@@ -0,0 +1,58 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)iordi3.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Return a | b, in quad.
+ */
+quad_t
+__iordi3(a, b)
+	quad_t a, b;
+{
+	union uu aa, bb;
+
+	aa.q = a;
+	bb.q = b;
+	aa.ul[0] |= bb.ul[0];
+	aa.ul[1] |= bb.ul[1];
+	return (aa.q);
+}
diff --git a/sys/libkern/libkern.h b/sys/libkern/libkern.h
new file mode 100644
index 00000000000..0e465e03dfd
--- /dev/null
+++ b/sys/libkern/libkern.h
@@ -0,0 +1,98 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)libkern.h	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/types.h>
+
+static inline int
+imax(a, b)
+	int a, b;
+{
+	return (a > b ? a : b);
+}
+static inline int
+imin(a, b)
+	int a, b;
+{
+	return (a < b ? a : b);
+}
+static inline long
+lmax(a, b)
+	long a, b;
+{
+	return (a > b ? a : b);
+}
+static inline long
+lmin(a, b)
+	long a, b;
+{
+	return (a < b ? a : b);
+}
+static inline u_int
+max(a, b)
+	u_int a, b;
+{
+	return (a > b ? a : b);
+}
+static inline u_int
+min(a, b)
+	u_int a, b;
+{
+	return (a < b ? a : b);
+}
+static inline u_long
+ulmax(a, b)
+	u_long a, b;
+{
+	return (a > b ? a : b);
+}
+static inline u_long
+ulmin(a, b)
+	u_long a, b;
+{
+	return (a < b ? a : b);
+}
+
+/* Prototypes for non-quad routines. */
+int	 bcmp __P((const void *, const void *, size_t));
+int	 ffs __P((int));
+int	 locc __P((int, char *, u_int));
+u_long	 random __P((void));
+char	*rindex __P((const char *, int));
+int	 scanc __P((u_int, u_char *, u_char *, int));
+int	 skpc __P((int, int, char *));
+char	*strcat __P((char *, const char *));
+char	*strcpy __P((char *, const char *));
+size_t	 strlen __P((const char *));
+char	*strncpy __P((char *, const char *, size_t));
diff --git a/sys/libkern/locc.c b/sys/libkern/locc.c
new file mode 100644
index 00000000000..3767222c5f0
--- /dev/null
+++ b/sys/libkern/locc.c
@@ -0,0 +1,50 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)locc.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <libkern/libkern.h>
+
+int
+locc(mask0, cp0, size)
+	int mask0;
+	char *cp0;
+	u_int size;
+{
+	register u_char *cp, *end, mask;
+
+	mask = mask0;
+	cp = (u_char *)cp0;
+	for (end = &cp[size]; cp < end && *cp != mask; ++cp);
+	return (end - cp);
+}
diff --git a/sys/libkern/lshldi3.c b/sys/libkern/lshldi3.c
new file mode 100644
index 00000000000..0af6051c1a6
--- /dev/null
+++ b/sys/libkern/lshldi3.c
@@ -0,0 +1,66 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)lshldi3.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Shift an (unsigned) quad value left (logical shift left).
+ * This is the same as arithmetic shift left!
+ */
+quad_t
+__lshldi3(a, shift)
+	quad_t a;
+	qshift_t shift;
+{
+	union uu aa;
+
+	aa.q = a;
+	if (shift >= LONG_BITS) {
+		aa.ul[H] = shift >= QUAD_BITS ? 0 :
+		    aa.ul[L] << (shift - LONG_BITS);
+		aa.ul[L] = 0;
+	} else if (shift > 0) {
+		aa.ul[H] = (aa.ul[H] << shift) |
+		    (aa.ul[L] >> (LONG_BITS - shift));
+		aa.ul[L] <<= shift;
+	}
+	return (aa.q);
+}
diff --git a/sys/libkern/lshrdi3.c b/sys/libkern/lshrdi3.c
new file mode 100644
index 00000000000..add2eda988c
--- /dev/null
+++ b/sys/libkern/lshrdi3.c
@@ -0,0 +1,65 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)lshrdi3.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Shift an (unsigned) quad value right (logical shift right).
+ */
+quad_t
+__lshrdi3(a, shift)
+	quad_t a;
+	qshift_t shift;
+{
+	union uu aa;
+
+	aa.q = a;
+	if (shift >= LONG_BITS) {
+		aa.ul[L] = shift >= QUAD_BITS ? 0 :
+		    aa.ul[H] >> (shift - LONG_BITS);
+		aa.ul[H] = 0;
+	} else if (shift > 0) {
+		aa.ul[L] = (aa.ul[L] >> shift) |
+		    (aa.ul[H] << (LONG_BITS - shift));
+		aa.ul[H] >>= shift;
+	}
+	return (aa.q);
+}
diff --git a/sys/libkern/mcount.c b/sys/libkern/mcount.c
new file mode 100644
index 00000000000..523217d1d2d
--- /dev/null
+++ b/sys/libkern/mcount.c
@@ -0,0 +1,178 @@
+/*-
+ * Copyright (c) 1983, 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if !defined(lint) && !defined(KERNEL) && defined(LIBC_SCCS)
+static char sccsid[] = "@(#)mcount.c	8.1 (Berkeley) 6/4/93";
+#endif
+
+#include <sys/param.h>
+#include <sys/gmon.h>
+
+/*
+ * mcount is called on entry to each function compiled with the profiling
+ * switch set.  _mcount(), which is declared in a machine-dependent way
+ * with _MCOUNT_DECL, does the actual work and is either inlined into a
+ * C routine or called by an assembly stub.  In any case, this magic is
+ * taken care of by the MCOUNT definition in <machine/profile.h>.
+ *
+ * _mcount updates data structures that represent traversals of the
+ * program's call graph edges.  frompc and selfpc are the return
+ * address and function address that represents the given call graph edge.
+ * 
+ * Note: the original BSD code used the same variable (frompcindex) for
+ * both frompcindex and frompc.  Any reasonable, modern compiler will
+ * perform this optimization.
+ */
+_MCOUNT_DECL(frompc, selfpc)	/* _mcount; may be static, inline, etc */
+	register u_long frompc, selfpc;
+{
+	register u_short *frompcindex;
+	register struct tostruct *top, *prevtop;
+	register struct gmonparam *p;
+	register long toindex;
+#ifdef KERNEL
+	register int s;
+#endif
+
+	p = &_gmonparam;
+	/*
+	 * check that we are profiling
+	 * and that we aren't recursively invoked.
+	 */
+	if (p->state != GMON_PROF_ON)
+		return;
+#ifdef KERNEL
+	MCOUNT_ENTER;
+#else
+	p->state = GMON_PROF_BUSY;
+#endif
+	/*
+	 * check that frompcindex is a reasonable pc value.
+	 * for example:	signal catchers get called from the stack,
+	 *		not from text space.  too bad.
+	 */
+	frompc -= p->lowpc;
+	if (frompc > p->textsize)
+		goto done;
+
+	frompcindex = &p->froms[frompc / (p->hashfraction * sizeof(*p->froms))];
+	toindex = *frompcindex;
+	if (toindex == 0) {
+		/*
+		 *	first time traversing this arc
+		 */
+		toindex = ++p->tos[0].link;
+		if (toindex >= p->tolimit)
+			/* halt further profiling */
+			goto overflow;
+
+		*frompcindex = toindex;
+		top = &p->tos[toindex];
+		top->selfpc = selfpc;
+		top->count = 1;
+		top->link = 0;
+		goto done;
+	}
+	top = &p->tos[toindex];
+	if (top->selfpc == selfpc) {
+		/*
+		 * arc at front of chain; usual case.
+		 */
+		top->count++;
+		goto done;
+	}
+	/*
+	 * have to go looking down chain for it.
+	 * top points to what we are looking at,
+	 * prevtop points to previous top.
+	 * we know it is not at the head of the chain.
+	 */
+	for (; /* goto done */; ) {
+		if (top->link == 0) {
+			/*
+			 * top is end of the chain and none of the chain
+			 * had top->selfpc == selfpc.
+			 * so we allocate a new tostruct
+			 * and link it to the head of the chain.
+			 */
+			toindex = ++p->tos[0].link;
+			if (toindex >= p->tolimit)
+				goto overflow;
+
+			top = &p->tos[toindex];
+			top->selfpc = selfpc;
+			top->count = 1;
+			top->link = *frompcindex;
+			*frompcindex = toindex;
+			goto done;
+		}
+		/*
+		 * otherwise, check the next arc on the chain.
+		 */
+		prevtop = top;
+		top = &p->tos[top->link];
+		if (top->selfpc == selfpc) {
+			/*
+			 * there it is.
+			 * increment its count
+			 * move it to the head of the chain.
+			 */
+			top->count++;
+			toindex = prevtop->link;
+			prevtop->link = top->link;
+			top->link = *frompcindex;
+			*frompcindex = toindex;
+			goto done;
+		}
+		
+	}
+done:
+#ifdef KERNEL
+	MCOUNT_EXIT;
+#else
+	p->state = GMON_PROF_ON;
+#endif
+	return;
+overflow:
+	p->state = GMON_PROF_ERROR;
+#ifdef KERNEL
+	MCOUNT_EXIT;
+#endif
+	return;
+}
+
+/*
+ * Actual definition of mcount function.  Defined in <machine/profile.h>,
+ * which is included by <sys/gmon.h>.
+ */
+MCOUNT
diff --git a/sys/libkern/moddi3.c b/sys/libkern/moddi3.c
new file mode 100644
index 00000000000..f31c6e84f2b
--- /dev/null
+++ b/sys/libkern/moddi3.c
@@ -0,0 +1,67 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)moddi3.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Return remainder after dividing two signed quads.
+ *
+ * XXX
+ * If -1/2 should produce -1 on this machine, this code is wrong.
+ */
+quad_t
+__moddi3(a, b)
+	quad_t a, b;
+{
+	u_quad_t ua, ub, ur;
+	int neg;
+
+	if (a < 0)
+		ua = -(u_quad_t)a, neg = 1;
+	else
+		ua = a, neg = 0;
+	if (b < 0)
+		ub = -(u_quad_t)b, neg ^= 1;
+	else
+		ub = b;
+	(void)__qdivrem(ua, ub, &ur);
+	return (neg ? -ur : ur);
+}
diff --git a/sys/libkern/muldi3.c b/sys/libkern/muldi3.c
new file mode 100644
index 00000000000..a8d7cfc7eab
--- /dev/null
+++ b/sys/libkern/muldi3.c
@@ -0,0 +1,246 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)muldi3.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Multiply two quads.
+ *
+ * Our algorithm is based on the following.  Split incoming quad values
+ * u and v (where u,v >= 0) into
+ *
+ *	u = 2^n u1  *  u0	(n = number of bits in `u_long', usu. 32)
+ *
+ * and 
+ *
+ *	v = 2^n v1  *  v0
+ *
+ * Then
+ *
+ *	uv = 2^2n u1 v1  +  2^n u1 v0  +  2^n v1 u0  +  u0 v0
+ *	   = 2^2n u1 v1  +     2^n (u1 v0 + v1 u0)   +  u0 v0
+ *
+ * Now add 2^n u1 v1 to the first term and subtract it from the middle,
+ * and add 2^n u0 v0 to the last term and subtract it from the middle.
+ * This gives:
+ *
+ *	uv = (2^2n + 2^n) (u1 v1)  +
+ *	         (2^n)    (u1 v0 - u1 v1 + u0 v1 - u0 v0)  +
+ *	       (2^n + 1)  (u0 v0)
+ *
+ * Factoring the middle a bit gives us:
+ *
+ *	uv = (2^2n + 2^n) (u1 v1)  +			[u1v1 = high]
+ *		 (2^n)    (u1 - u0) (v0 - v1)  +	[(u1-u0)... = mid]
+ *	       (2^n + 1)  (u0 v0)			[u0v0 = low]
+ *
+ * The terms (u1 v1), (u1 - u0) (v0 - v1), and (u0 v0) can all be done
+ * in just half the precision of the original.  (Note that either or both
+ * of (u1 - u0) or (v0 - v1) may be negative.)
+ *
+ * This algorithm is from Knuth vol. 2 (2nd ed), section 4.3.3, p. 278.
+ *
+ * Since C does not give us a `long * long = quad' operator, we split
+ * our input quads into two longs, then split the two longs into two
+ * shorts.  We can then calculate `short * short = long' in native
+ * arithmetic.
+ *
+ * Our product should, strictly speaking, be a `long quad', with 128
+ * bits, but we are going to discard the upper 64.  In other words,
+ * we are not interested in uv, but rather in (uv mod 2^2n).  This
+ * makes some of the terms above vanish, and we get:
+ *
+ *	(2^n)(high) + (2^n)(mid) + (2^n + 1)(low)
+ *
+ * or
+ *
+ *	(2^n)(high + mid + low) + low
+ *
+ * Furthermore, `high' and `mid' can be computed mod 2^n, as any factor
+ * of 2^n in either one will also vanish.  Only `low' need be computed
+ * mod 2^2n, and only because of the final term above.
+ */
+static quad_t __lmulq(u_long, u_long);
+
+quad_t
+__muldi3(a, b)
+	quad_t a, b;
+{
+	union uu u, v, low, prod;
+	register u_long high, mid, udiff, vdiff;
+	register int negall, negmid;
+#define	u1	u.ul[H]
+#define	u0	u.ul[L]
+#define	v1	v.ul[H]
+#define	v0	v.ul[L]
+
+	/*
+	 * Get u and v such that u, v >= 0.  When this is finished,
+	 * u1, u0, v1, and v0 will be directly accessible through the
+	 * longword fields.
+	 */
+	if (a >= 0)
+		u.q = a, negall = 0;
+	else
+		u.q = -a, negall = 1;
+	if (b >= 0)
+		v.q = b;
+	else
+		v.q = -b, negall ^= 1;
+
+	if (u1 == 0 && v1 == 0) {
+		/*
+		 * An (I hope) important optimization occurs when u1 and v1
+		 * are both 0.  This should be common since most numbers
+		 * are small.  Here the product is just u0*v0.
+		 */
+		prod.q = __lmulq(u0, v0);
+	} else {
+		/*
+		 * Compute the three intermediate products, remembering
+		 * whether the middle term is negative.  We can discard
+		 * any upper bits in high and mid, so we can use native
+		 * u_long * u_long => u_long arithmetic.
+		 */
+		low.q = __lmulq(u0, v0);
+
+		if (u1 >= u0)
+			negmid = 0, udiff = u1 - u0;
+		else
+			negmid = 1, udiff = u0 - u1;
+		if (v0 >= v1)
+			vdiff = v0 - v1;
+		else
+			vdiff = v1 - v0, negmid ^= 1;
+		mid = udiff * vdiff;
+
+		high = u1 * v1;
+
+		/*
+		 * Assemble the final product.
+		 */
+		prod.ul[H] = high + (negmid ? -mid : mid) + low.ul[L] +
+		    low.ul[H];
+		prod.ul[L] = low.ul[L];
+	}
+	return (negall ? -prod.q : prod.q);
+#undef u1
+#undef u0
+#undef v1
+#undef v0
+}
+
+/*
+ * Multiply two 2N-bit longs to produce a 4N-bit quad, where N is half
+ * the number of bits in a long (whatever that is---the code below
+ * does not care as long as quad.h does its part of the bargain---but
+ * typically N==16).
+ *
+ * We use the same algorithm from Knuth, but this time the modulo refinement
+ * does not apply.  On the other hand, since N is half the size of a long,
+ * we can get away with native multiplication---none of our input terms
+ * exceeds (ULONG_MAX >> 1).
+ *
+ * Note that, for u_long l, the quad-precision result
+ *
+ *	l << N
+ *
+ * splits into high and low longs as HHALF(l) and LHUP(l) respectively.
+ */
+static quad_t
+__lmulq(u_long u, u_long v)
+{
+	u_long u1, u0, v1, v0, udiff, vdiff, high, mid, low;
+	u_long prodh, prodl, was;
+	union uu prod;
+	int neg;
+
+	u1 = HHALF(u);
+	u0 = LHALF(u);
+	v1 = HHALF(v);
+	v0 = LHALF(v);
+
+	low = u0 * v0;
+
+	/* This is the same small-number optimization as before. */
+	if (u1 == 0 && v1 == 0)
+		return (low);
+
+	if (u1 >= u0)
+		udiff = u1 - u0, neg = 0;
+	else
+		udiff = u0 - u1, neg = 1;
+	if (v0 >= v1)
+		vdiff = v0 - v1;
+	else
+		vdiff = v1 - v0, neg ^= 1;
+	mid = udiff * vdiff;
+
+	high = u1 * v1;
+
+	/* prod = (high << 2N) + (high << N); */
+	prodh = high + HHALF(high);
+	prodl = LHUP(high);
+
+	/* if (neg) prod -= mid << N; else prod += mid << N; */
+	if (neg) {
+		was = prodl;
+		prodl -= LHUP(mid);
+		prodh -= HHALF(mid) + (prodl > was);
+	} else {
+		was = prodl;
+		prodl += LHUP(mid);
+		prodh += HHALF(mid) + (prodl < was);
+	}
+
+	/* prod += low << N */
+	was = prodl;
+	prodl += LHUP(low);
+	prodh += HHALF(low) + (prodl < was);
+	/* ... + low; */
+	if ((prodl += low) < low)
+		prodh++;
+
+	/* return 4N-bit product */
+	prod.ul[H] = prodh;
+	prod.ul[L] = prodl;
+	return (prod.q);
+}
diff --git a/sys/libkern/negdi2.c b/sys/libkern/negdi2.c
new file mode 100644
index 00000000000..bb8670d8e2c
--- /dev/null
+++ b/sys/libkern/negdi2.c
@@ -0,0 +1,57 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)negdi2.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Return -a (or, equivalently, 0 - a), in quad.  See subdi3.c.
+ */
+quad_t
+__negdi2(a)
+	quad_t a;
+{
+	union uu aa, res;
+
+	aa.q = a;
+	res.ul[L] = -aa.ul[L];
+	res.ul[H] = -aa.ul[H] - (res.ul[L] > 0);
+	return (res.q);
+}
diff --git a/sys/libkern/notdi2.c b/sys/libkern/notdi2.c
new file mode 100644
index 00000000000..d6247339a80
--- /dev/null
+++ b/sys/libkern/notdi2.c
@@ -0,0 +1,58 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)notdi2.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Return ~a.  For some reason gcc calls this `one's complement' rather
+ * than `not'.
+ */
+quad_t
+__one_cmpldi2(a)
+	quad_t a;
+{
+	union uu aa;
+
+	aa.q = a;
+	aa.ul[0] = ~aa.ul[0];
+	aa.ul[1] = ~aa.ul[1];
+	return (aa.q);
+}
diff --git a/sys/libkern/qdivrem.c b/sys/libkern/qdivrem.c
new file mode 100644
index 00000000000..34b94ceaab2
--- /dev/null
+++ b/sys/libkern/qdivrem.c
@@ -0,0 +1,279 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)qdivrem.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+/*
+ * Multiprecision divide.  This algorithm is from Knuth vol. 2 (2nd ed),
+ * section 4.3.1, pp. 257--259.
+ */
+
+#include "quad.h"
+
+#define	B	(1 << HALF_BITS)	/* digit base */
+
+/* Combine two `digits' to make a single two-digit number. */
+#define	COMBINE(a, b) (((u_long)(a) << HALF_BITS) | (b))
+
+/* select a type for digits in base B: use unsigned short if they fit */
+#if ULONG_MAX == 0xffffffff && USHRT_MAX >= 0xffff
+typedef unsigned short digit;
+#else
+typedef u_long digit;
+#endif
+
+/*
+ * Shift p[0]..p[len] left `sh' bits, ignoring any bits that
+ * `fall out' the left (there never will be any such anyway).
+ * We may assume len >= 0.  NOTE THAT THIS WRITES len+1 DIGITS.
+ */
+static void
+shl(register digit *p, register int len, register int sh)
+{
+	register int i;
+
+	for (i = 0; i < len; i++)
+		p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh));
+	p[i] = LHALF(p[i] << sh);
+}
+
+/*
+ * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v.
+ *
+ * We do this in base 2-sup-HALF_BITS, so that all intermediate products
+ * fit within u_long.  As a consequence, the maximum length dividend and
+ * divisor are 4 `digits' in this base (they are shorter if they have
+ * leading zeros).
+ */
+u_quad_t
+__qdivrem(uq, vq, arq)
+	u_quad_t uq, vq, *arq;
+{
+	union uu tmp;
+	digit *u, *v, *q;
+	register digit v1, v2;
+	u_long qhat, rhat, t;
+	int m, n, d, j, i;
+	digit uspace[5], vspace[5], qspace[5];
+
+	/*
+	 * Take care of special cases: divide by zero, and u < v.
+	 */
+	if (vq == 0) {
+		/* divide by zero. */
+		static volatile const unsigned int zero = 0;
+
+		tmp.ul[H] = tmp.ul[L] = 1 / zero;
+		if (arq)
+			*arq = uq;
+		return (tmp.q);
+	}
+	if (uq < vq) {
+		if (arq)
+			*arq = uq;
+		return (0);
+	}
+	u = &uspace[0];
+	v = &vspace[0];
+	q = &qspace[0];
+
+	/*
+	 * Break dividend and divisor into digits in base B, then
+	 * count leading zeros to determine m and n.  When done, we
+	 * will have:
+	 *	u = (u[1]u[2]...u[m+n]) sub B
+	 *	v = (v[1]v[2]...v[n]) sub B
+	 *	v[1] != 0
+	 *	1 < n <= 4 (if n = 1, we use a different division algorithm)
+	 *	m >= 0 (otherwise u < v, which we already checked)
+	 *	m + n = 4
+	 * and thus
+	 *	m = 4 - n <= 2
+	 */
+	tmp.uq = uq;
+	u[0] = 0;
+	u[1] = HHALF(tmp.ul[H]);
+	u[2] = LHALF(tmp.ul[H]);
+	u[3] = HHALF(tmp.ul[L]);
+	u[4] = LHALF(tmp.ul[L]);
+	tmp.uq = vq;
+	v[1] = HHALF(tmp.ul[H]);
+	v[2] = LHALF(tmp.ul[H]);
+	v[3] = HHALF(tmp.ul[L]);
+	v[4] = LHALF(tmp.ul[L]);
+	for (n = 4; v[1] == 0; v++) {
+		if (--n == 1) {
+			u_long rbj;	/* r*B+u[j] (not root boy jim) */
+			digit q1, q2, q3, q4;
+
+			/*
+			 * Change of plan, per exercise 16.
+			 *	r = 0;
+			 *	for j = 1..4:
+			 *		q[j] = floor((r*B + u[j]) / v),
+			 *		r = (r*B + u[j]) % v;
+			 * We unroll this completely here.
+			 */
+			t = v[2];	/* nonzero, by definition */
+			q1 = u[1] / t;
+			rbj = COMBINE(u[1] % t, u[2]);
+			q2 = rbj / t;
+			rbj = COMBINE(rbj % t, u[3]);
+			q3 = rbj / t;
+			rbj = COMBINE(rbj % t, u[4]);
+			q4 = rbj / t;
+			if (arq)
+				*arq = rbj % t;
+			tmp.ul[H] = COMBINE(q1, q2);
+			tmp.ul[L] = COMBINE(q3, q4);
+			return (tmp.q);
+		}
+	}
+
+	/*
+	 * By adjusting q once we determine m, we can guarantee that
+	 * there is a complete four-digit quotient at &qspace[1] when
+	 * we finally stop.
+	 */
+	for (m = 4 - n; u[1] == 0; u++)
+		m--;
+	for (i = 4 - m; --i >= 0;)
+		q[i] = 0;
+	q += 4 - m;
+
+	/*
+	 * Here we run Program D, translated from MIX to C and acquiring
+	 * a few minor changes.
+	 *
+	 * D1: choose multiplier 1 << d to ensure v[1] >= B/2.
+	 */
+	d = 0;
+	for (t = v[1]; t < B / 2; t <<= 1)
+		d++;
+	if (d > 0) {
+		shl(&u[0], m + n, d);		/* u <<= d */
+		shl(&v[1], n - 1, d);		/* v <<= d */
+	}
+	/*
+	 * D2: j = 0.
+	 */
+	j = 0;
+	v1 = v[1];	/* for D3 -- note that v[1..n] are constant */
+	v2 = v[2];	/* for D3 */
+	do {
+		register digit uj0, uj1, uj2;
+		
+		/*
+		 * D3: Calculate qhat (\^q, in TeX notation).
+		 * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and
+		 * let rhat = (u[j]*B + u[j+1]) mod v[1].
+		 * While rhat < B and v[2]*qhat > rhat*B+u[j+2],
+		 * decrement qhat and increase rhat correspondingly.
+		 * Note that if rhat >= B, v[2]*qhat < rhat*B.
+		 */
+		uj0 = u[j + 0];	/* for D3 only -- note that u[j+...] change */
+		uj1 = u[j + 1];	/* for D3 only */
+		uj2 = u[j + 2];	/* for D3 only */
+		if (uj0 == v1) {
+			qhat = B;
+			rhat = uj1;
+			goto qhat_too_big;
+		} else {
+			u_long n = COMBINE(uj0, uj1);
+			qhat = n / v1;
+			rhat = n % v1;
+		}
+		while (v2 * qhat > COMBINE(rhat, uj2)) {
+	qhat_too_big:
+			qhat--;
+			if ((rhat += v1) >= B)
+				break;
+		}
+		/*
+		 * D4: Multiply and subtract.
+		 * The variable `t' holds any borrows across the loop.
+		 * We split this up so that we do not require v[0] = 0,
+		 * and to eliminate a final special case.
+		 */
+		for (t = 0, i = n; i > 0; i--) {
+			t = u[i + j] - v[i] * qhat - t;
+			u[i + j] = LHALF(t);
+			t = (B - HHALF(t)) & (B - 1);
+		}
+		t = u[j] - t;
+		u[j] = LHALF(t);
+		/*
+		 * D5: test remainder.
+		 * There is a borrow if and only if HHALF(t) is nonzero;
+		 * in that (rare) case, qhat was too large (by exactly 1).
+		 * Fix it by adding v[1..n] to u[j..j+n].
+		 */
+		if (HHALF(t)) {
+			qhat--;
+			for (t = 0, i = n; i > 0; i--) { /* D6: add back. */
+				t += u[i + j] + v[i];
+				u[i + j] = LHALF(t);
+				t = HHALF(t);
+			}
+			u[j] = LHALF(u[j] + t);
+		}
+		q[j] = qhat;
+	} while (++j <= m);		/* D7: loop on j. */
+
+	/*
+	 * If caller wants the remainder, we have to calculate it as
+	 * u[m..m+n] >> d (this is at most n digits and thus fits in
+	 * u[m+1..m+n], but we may need more source digits).
+	 */
+	if (arq) {
+		if (d) {
+			for (i = m + n; i > m; --i)
+				u[i] = (u[i] >> d) |
+				    LHALF(u[i - 1] << (HALF_BITS - d));
+			u[i] = 0;
+		}
+		tmp.ul[H] = COMBINE(uspace[1], uspace[2]);
+		tmp.ul[L] = COMBINE(uspace[3], uspace[4]);
+		*arq = tmp.q;
+	}
+
+	tmp.ul[H] = COMBINE(qspace[1], qspace[2]);
+	tmp.ul[L] = COMBINE(qspace[3], qspace[4]);
+	return (tmp.q);
+}
diff --git a/sys/libkern/quad.h b/sys/libkern/quad.h
new file mode 100644
index 00000000000..bc6a2f83632
--- /dev/null
+++ b/sys/libkern/quad.h
@@ -0,0 +1,110 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)quad.h	8.1 (Berkeley) 6/4/93
+ */
+
+/*
+ * Quad arithmetic.
+ *
+ * This library makes the following assumptions:
+ *
+ *  - The type long long (aka quad_t) exists.
+ *
+ *  - A quad variable is exactly twice as long as `long'.
+ *
+ *  - The machine's arithmetic is two's complement.
+ *
+ * This library can provide 128-bit arithmetic on a machine with 128-bit
+ * quads and 64-bit longs, for instance, or 96-bit arithmetic on machines
+ * with 48-bit longs.
+ */
+
+#include <sys/types.h>
+#include <limits.h>
+
+/*
+ * Depending on the desired operation, we view a `long long' (aka quad_t) in
+ * one or more of the following formats.
+ */
+union uu {
+	quad_t	q;		/* as a (signed) quad */
+	quad_t	uq;		/* as an unsigned quad */
+	long	sl[2];		/* as two signed longs */
+	u_long	ul[2];		/* as two unsigned longs */
+};
+
+/*
+ * Define high and low longwords.
+ */
+#define	H		_QUAD_HIGHWORD
+#define	L		_QUAD_LOWWORD
+
+/*
+ * Total number of bits in a quad_t and in the pieces that make it up.
+ * These are used for shifting, and also below for halfword extraction
+ * and assembly.
+ */
+#define	QUAD_BITS	(sizeof(quad_t) * CHAR_BIT)
+#define	LONG_BITS	(sizeof(long) * CHAR_BIT)
+#define	HALF_BITS	(sizeof(long) * CHAR_BIT / 2)
+
+/*
+ * Extract high and low shortwords from longword, and move low shortword of
+ * longword to upper half of long, i.e., produce the upper longword of
+ * ((quad_t)(x) << (number_of_bits_in_long/2)).  (`x' must actually be u_long.)
+ *
+ * These are used in the multiply code, to split a longword into upper
+ * and lower halves, and to reassemble a product as a quad_t, shifted left
+ * (sizeof(long)*CHAR_BIT/2).
+ */
+#define	HHALF(x)	((x) >> HALF_BITS)
+#define	LHALF(x)	((x) & ((1 << HALF_BITS) - 1))
+#define	LHUP(x)		((x) << HALF_BITS)
+
+extern u_quad_t __qdivrem __P((u_quad_t u, u_quad_t v, u_quad_t *rem));
+
+/*
+ * XXX
+ * Compensate for gcc 1 vs gcc 2.  Gcc 1 defines ?sh?di3's second argument
+ * as u_quad_t, while gcc 2 correctly uses int.  Unfortunately, we still use
+ * both compilers.
+ */
+#if __GNUC__ >= 2
+typedef unsigned int	qshift_t;
+#else
+typedef u_quad_t	qshift_t;
+#endif
diff --git a/sys/libkern/random.c b/sys/libkern/random.c
new file mode 100644
index 00000000000..5153124e3fd
--- /dev/null
+++ b/sys/libkern/random.c
@@ -0,0 +1,63 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)random.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <libkern/libkern.h>
+
+/*
+ * Pseudo-random number generator for randomizing the profiling clock,
+ * and whatever else we might use it for.  The result is uniform on
+ * [0, 2^31 - 1].
+ */
+u_long
+random()
+{
+	static u_long randseed = 1;
+	register long x, hi, lo, t;
+
+	/*
+	 * Compute x[n + 1] = (7^5 * x[n]) mod (2^31 - 1).
+	 * From "Random number generators: good ones are hard to find",
+	 * Park and Miller, Communications of the ACM, vol. 31, no. 10,
+	 * October 1988, p. 1195.
+	 */
+	x = randseed;
+	hi = x / 127773;
+	lo = x % 127773;
+	t = 16807 * lo - 2836 * hi;
+	if (t <= 0)
+		t += 0x7fffffff;
+	randseed = t;
+	return (t);
+}
diff --git a/sys/libkern/rindex.c b/sys/libkern/rindex.c
new file mode 100644
index 00000000000..69dced4c46d
--- /dev/null
+++ b/sys/libkern/rindex.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)rindex.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include <stddef.h>
+#include <string.h>
+
+char *
+#ifdef STRRCHR
+strrchr(p, ch)
+#else
+rindex(p, ch)
+#endif
+	register const char *p;
+	register int ch;
+{
+	register char *save;
+
+	for (save = NULL;; ++p) {
+		if (*p == ch)
+			save = (char *)p;
+		if (!*p)
+			return(save);
+	}
+	/* NOTREACHED */
+}
diff --git a/sys/libkern/scanc.c b/sys/libkern/scanc.c
new file mode 100644
index 00000000000..2d8b6a06dd6
--- /dev/null
+++ b/sys/libkern/scanc.c
@@ -0,0 +1,50 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)scanc.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <libkern/libkern.h>
+
+int
+scanc(size, cp, table, mask0)
+	u_int size;
+	register u_char *cp, table[];
+	int mask0;
+{
+	register u_char *end;
+	register u_char mask;
+
+	mask = mask0;
+	for (end = &cp[size]; cp < end && (table[*cp] & mask) == 0; ++cp);
+	return (end - cp);
+}
diff --git a/sys/libkern/skpc.c b/sys/libkern/skpc.c
new file mode 100644
index 00000000000..11b269ee7e7
--- /dev/null
+++ b/sys/libkern/skpc.c
@@ -0,0 +1,50 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)skpc.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <libkern/libkern.h>
+
+int
+skpc(mask0, size, cp0)
+	int mask0;
+	int size;
+	char *cp0;
+{
+	register u_char *cp, *end, mask;
+
+	mask = mask0;
+	cp = (u_char *)cp0;
+	for (end = &cp[size]; cp < end && *cp == mask; ++cp);
+	return (end - cp);
+}
diff --git a/sys/libkern/strcat.c b/sys/libkern/strcat.c
new file mode 100644
index 00000000000..343696719b7
--- /dev/null
+++ b/sys/libkern/strcat.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)strcat.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include <string.h>
+
+char *
+strcat(s, append)
+	register char *s;
+	register const char *append;
+{
+	char *save = s;
+
+	for (; *s; ++s);
+	while (*s++ = *append++);
+	return(save);
+}
diff --git a/sys/libkern/strcmp.c b/sys/libkern/strcmp.c
new file mode 100644
index 00000000000..79cfaa831b2
--- /dev/null
+++ b/sys/libkern/strcmp.c
@@ -0,0 +1,55 @@
+/*-
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Chris Torek.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)strcmp.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/cdefs.h>
+#include <string.h>
+
+/*
+ * Compare strings.
+ */
+int
+strcmp(s1, s2)
+	register const char *s1, *s2;
+{
+	while (*s1 == *s2++)
+		if (*s1++ == 0)
+			return (0);
+	return (*(unsigned char *)s1 - *(unsigned char *)--s2);
+}
diff --git a/sys/libkern/strcpy.c b/sys/libkern/strcpy.c
new file mode 100644
index 00000000000..d1791dd00c3
--- /dev/null
+++ b/sys/libkern/strcpy.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)strcpy.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/cdefs.h>
+#include <string.h>
+
+char *
+strcpy(to, from)
+	register char *to;
+	register const char *from;
+{
+	char *save = to;
+
+	for (; *to = *from; ++from, ++to);
+	return(save);
+}
diff --git a/sys/libkern/strlen.c b/sys/libkern/strlen.c
new file mode 100644
index 00000000000..323fbe48452
--- /dev/null
+++ b/sys/libkern/strlen.c
@@ -0,0 +1,50 @@
+/*-
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)strlen.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/cdefs.h>
+#include <string.h>
+
+size_t
+strlen(str)
+	const char *str;
+{
+	register const char *s;
+
+	for (s = str; *s; ++s);
+	return(s - str);
+}
+
diff --git a/sys/libkern/strncpy.c b/sys/libkern/strncpy.c
new file mode 100644
index 00000000000..9e72740b8b9
--- /dev/null
+++ b/sys/libkern/strncpy.c
@@ -0,0 +1,68 @@
+/*-
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Chris Torek.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)strncpy.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/cdefs.h>
+#include <string.h>
+
+/*
+ * Copy src to dst, truncating or null-padding to always copy n bytes.
+ * Return dst.
+ */
+char *
+strncpy(dst, src, n)
+	char *dst;
+	const char *src;
+	register size_t n;
+{
+	if (n != 0) {
+		register char *d = dst;
+		register const char *s = src;
+
+		do {
+			if ((*d++ = *s++) == 0) {
+				/* NUL pad the remaining n-1 bytes */
+				while (--n != 0)
+					*d++ = 0;
+				break;
+			}
+		} while (--n != 0);
+	}
+	return (dst);
+}
diff --git a/sys/libkern/subdi3.c b/sys/libkern/subdi3.c
new file mode 100644
index 00000000000..e9763452e4f
--- /dev/null
+++ b/sys/libkern/subdi3.c
@@ -0,0 +1,59 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)subdi3.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Subtract two quad values.  This is trivial since a one-bit carry
+ * from a single u_long difference x-y occurs if and only if (x-y) > x.
+ */
+quad_t
+__subdi3(a, b)
+	quad_t a, b;
+{
+	union uu aa, bb, diff;
+
+	aa.q = a;
+	bb.q = b;
+	diff.ul[L] = aa.ul[L] - bb.ul[L];
+	diff.ul[H] = aa.ul[H] - bb.ul[H] - (diff.ul[L] > aa.ul[L]);
+	return (diff.q);
+}
diff --git a/sys/libkern/ucmpdi2.c b/sys/libkern/ucmpdi2.c
new file mode 100644
index 00000000000..e5dfc435d9c
--- /dev/null
+++ b/sys/libkern/ucmpdi2.c
@@ -0,0 +1,58 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)ucmpdi2.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Return 0, 1, or 2 as a <, =, > b respectively.
+ * Neither a nor b are considered signed.
+ */
+int
+__ucmpdi2(a, b)
+	u_quad_t a, b;
+{
+	union uu aa, bb;
+
+	aa.uq = a;
+	bb.uq = b;
+	return (aa.ul[H] < bb.ul[H] ? 0 : aa.ul[H] > bb.ul[H] ? 2 :
+	    aa.ul[L] < bb.ul[L] ? 0 : aa.ul[L] > bb.ul[L] ? 2 : 1);
+}
diff --git a/sys/libkern/udivdi3.c b/sys/libkern/udivdi3.c
new file mode 100644
index 00000000000..8ddd5598911
--- /dev/null
+++ b/sys/libkern/udivdi3.c
@@ -0,0 +1,53 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)udivdi3.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Divide two unsigned quads.
+ */
+u_quad_t
+__udivdi3(a, b)
+	u_quad_t a, b;
+{
+
+	return (__qdivrem(a, b, (u_quad_t *)0));
+}
diff --git a/sys/libkern/umoddi3.c b/sys/libkern/umoddi3.c
new file mode 100644
index 00000000000..2a85f7699a6
--- /dev/null
+++ b/sys/libkern/umoddi3.c
@@ -0,0 +1,55 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)umoddi3.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Return remainder after dividing two unsigned quads.
+ */
+u_quad_t
+__umoddi3(a, b)
+	u_quad_t a, b;
+{
+	u_quad_t r;
+
+	(void)__qdivrem(a, b, &r);
+	return (r);
+}
diff --git a/sys/libkern/xordi3.c b/sys/libkern/xordi3.c
new file mode 100644
index 00000000000..e3a85889360
--- /dev/null
+++ b/sys/libkern/xordi3.c
@@ -0,0 +1,58 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)xordi3.c	8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "quad.h"
+
+/*
+ * Return a ^ b, in quad.
+ */
+quad_t
+__xordi3(a, b)
+	quad_t a, b;
+{
+	union uu aa, bb;
+
+	aa.q = a;
+	bb.q = b;
+	aa.ul[0] ^= bb.ul[0];
+	aa.ul[1] ^= bb.ul[1];
+	return (aa.q);
+}
diff --git a/sys/miscfs/deadfs/dead_vnops.c b/sys/miscfs/deadfs/dead_vnops.c
new file mode 100644
index 00000000000..9d04652b7fc
--- /dev/null
+++ b/sys/miscfs/deadfs/dead_vnops.c
@@ -0,0 +1,354 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)dead_vnops.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/vnode.h>
+#include <sys/errno.h>
+#include <sys/namei.h>
+#include <sys/buf.h>
+
+/*
+ * Prototypes for dead operations on vnodes.
+ */
+int	dead_badop(),
+	dead_ebadf();
+int	dead_lookup __P((struct vop_lookup_args *));
+#define dead_create ((int (*) __P((struct  vop_create_args *)))dead_badop)
+#define dead_mknod ((int (*) __P((struct  vop_mknod_args *)))dead_badop)
+int	dead_open __P((struct vop_open_args *));
+#define dead_close ((int (*) __P((struct  vop_close_args *)))nullop)
+#define dead_access ((int (*) __P((struct  vop_access_args *)))dead_ebadf)
+#define dead_getattr ((int (*) __P((struct  vop_getattr_args *)))dead_ebadf)
+#define dead_setattr ((int (*) __P((struct  vop_setattr_args *)))dead_ebadf)
+int	dead_read __P((struct vop_read_args *));
+int	dead_write __P((struct vop_write_args *));
+int	dead_ioctl __P((struct vop_ioctl_args *));
+int	dead_select __P((struct vop_select_args *));
+#define dead_mmap ((int (*) __P((struct  vop_mmap_args *)))dead_badop)
+#define dead_fsync ((int (*) __P((struct  vop_fsync_args *)))nullop)
+#define dead_seek ((int (*) __P((struct  vop_seek_args *)))nullop)
+#define dead_remove ((int (*) __P((struct  vop_remove_args *)))dead_badop)
+#define dead_link ((int (*) __P((struct  vop_link_args *)))dead_badop)
+#define dead_rename ((int (*) __P((struct  vop_rename_args *)))dead_badop)
+#define dead_mkdir ((int (*) __P((struct  vop_mkdir_args *)))dead_badop)
+#define dead_rmdir ((int (*) __P((struct  vop_rmdir_args *)))dead_badop)
+#define dead_symlink ((int (*) __P((struct  vop_symlink_args *)))dead_badop)
+#define dead_readdir ((int (*) __P((struct  vop_readdir_args *)))dead_ebadf)
+#define dead_readlink ((int (*) __P((struct  vop_readlink_args *)))dead_ebadf)
+#define dead_abortop ((int (*) __P((struct  vop_abortop_args *)))dead_badop)
+#define dead_inactive ((int (*) __P((struct  vop_inactive_args *)))nullop)
+#define dead_reclaim ((int (*) __P((struct  vop_reclaim_args *)))nullop)
+int	dead_lock __P((struct vop_lock_args *));
+#define dead_unlock ((int (*) __P((struct  vop_unlock_args *)))nullop)
+int	dead_bmap __P((struct vop_bmap_args *));
+int	dead_strategy __P((struct vop_strategy_args *));
+int	dead_print __P((struct vop_print_args *));
+#define dead_islocked ((int (*) __P((struct  vop_islocked_args *)))nullop)
+#define dead_pathconf ((int (*) __P((struct  vop_pathconf_args *)))dead_ebadf)
+#define dead_advlock ((int (*) __P((struct  vop_advlock_args *)))dead_ebadf)
+#define dead_blkatoff ((int (*) __P((struct  vop_blkatoff_args *)))dead_badop)
+#define dead_valloc ((int (*) __P((struct  vop_valloc_args *)))dead_badop)
+#define dead_vfree ((int (*) __P((struct  vop_vfree_args *)))dead_badop)
+#define dead_truncate ((int (*) __P((struct  vop_truncate_args *)))nullop)
+#define dead_update ((int (*) __P((struct  vop_update_args *)))nullop)
+#define dead_bwrite ((int (*) __P((struct  vop_bwrite_args *)))nullop)
+
+int (**dead_vnodeop_p)();
+struct vnodeopv_entry_desc dead_vnodeop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, dead_lookup },	/* lookup */
+	{ &vop_create_desc, dead_create },	/* create */
+	{ &vop_mknod_desc, dead_mknod },	/* mknod */
+	{ &vop_open_desc, dead_open },	/* open */
+	{ &vop_close_desc, dead_close },	/* close */
+	{ &vop_access_desc, dead_access },	/* access */
+	{ &vop_getattr_desc, dead_getattr },	/* getattr */
+	{ &vop_setattr_desc, dead_setattr },	/* setattr */
+	{ &vop_read_desc, dead_read },	/* read */
+	{ &vop_write_desc, dead_write },	/* write */
+	{ &vop_ioctl_desc, dead_ioctl },	/* ioctl */
+	{ &vop_select_desc, dead_select },	/* select */
+	{ &vop_mmap_desc, dead_mmap },	/* mmap */
+	{ &vop_fsync_desc, dead_fsync },	/* fsync */
+	{ &vop_seek_desc, dead_seek },	/* seek */
+	{ &vop_remove_desc, dead_remove },	/* remove */
+	{ &vop_link_desc, dead_link },	/* link */
+	{ &vop_rename_desc, dead_rename },	/* rename */
+	{ &vop_mkdir_desc, dead_mkdir },	/* mkdir */
+	{ &vop_rmdir_desc, dead_rmdir },	/* rmdir */
+	{ &vop_symlink_desc, dead_symlink },	/* symlink */
+	{ &vop_readdir_desc, dead_readdir },	/* readdir */
+	{ &vop_readlink_desc, dead_readlink },	/* readlink */
+	{ &vop_abortop_desc, dead_abortop },	/* abortop */
+	{ &vop_inactive_desc, dead_inactive },	/* inactive */
+	{ &vop_reclaim_desc, dead_reclaim },	/* reclaim */
+	{ &vop_lock_desc, dead_lock },	/* lock */
+	{ &vop_unlock_desc, dead_unlock },	/* unlock */
+	{ &vop_bmap_desc, dead_bmap },	/* bmap */
+	{ &vop_strategy_desc, dead_strategy },	/* strategy */
+	{ &vop_print_desc, dead_print },	/* print */
+	{ &vop_islocked_desc, dead_islocked },	/* islocked */
+	{ &vop_pathconf_desc, dead_pathconf },	/* pathconf */
+	{ &vop_advlock_desc, dead_advlock },	/* advlock */
+	{ &vop_blkatoff_desc, dead_blkatoff },	/* blkatoff */
+	{ &vop_valloc_desc, dead_valloc },	/* valloc */
+	{ &vop_vfree_desc, dead_vfree },	/* vfree */
+	{ &vop_truncate_desc, dead_truncate },	/* truncate */
+	{ &vop_update_desc, dead_update },	/* update */
+	{ &vop_bwrite_desc, dead_bwrite },	/* bwrite */
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc dead_vnodeop_opv_desc =
+	{ &dead_vnodeop_p, dead_vnodeop_entries };
+
+/*
+ * Trivial lookup routine that always fails.
+ */
+/* ARGSUSED */
+int
+dead_lookup(ap)
+	struct vop_lookup_args /* {
+		struct vnode * a_dvp;
+		struct vnode ** a_vpp;
+		struct componentname * a_cnp;
+	} */ *ap;
+{
+
+	*ap->a_vpp = NULL;
+	return (ENOTDIR);
+}
+
+/*
+ * Open always fails as if device did not exist.
+ */
+/* ARGSUSED */
+dead_open(ap)
+	struct vop_open_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	return (ENXIO);
+}
+
+/*
+ * Vnode op for read
+ */
+/* ARGSUSED */
+dead_read(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+
+	if (chkvnlock(ap->a_vp))
+		panic("dead_read: lock");
+	/*
+	 * Return EOF for character devices, EIO for others
+	 */
+	if (ap->a_vp->v_type != VCHR)
+		return (EIO);
+	return (0);
+}
+
+/*
+ * Vnode op for write
+ */
+/* ARGSUSED */
+dead_write(ap)
+	struct vop_write_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+
+	if (chkvnlock(ap->a_vp))
+		panic("dead_write: lock");
+	return (EIO);
+}
+
+/*
+ * Device ioctl operation.
+ */
+/* ARGSUSED */
+dead_ioctl(ap)
+	struct vop_ioctl_args /* {
+		struct vnode *a_vp;
+		int  a_command;
+		caddr_t  a_data;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	if (!chkvnlock(ap->a_vp))
+		return (EBADF);
+	return (VCALL(ap->a_vp, VOFFSET(vop_ioctl), ap));
+}
+
+/* ARGSUSED */
+dead_select(ap)
+	struct vop_select_args /* {
+		struct vnode *a_vp;
+		int  a_which;
+		int  a_fflags;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	/*
+	 * Let the user find out that the descriptor is gone.
+	 */
+	return (1);
+}
+
+/*
+ * Just call the device strategy routine
+ */
+dead_strategy(ap)
+	struct vop_strategy_args /* {
+		struct buf *a_bp;
+	} */ *ap;
+{
+
+	if (ap->a_bp->b_vp == NULL || !chkvnlock(ap->a_bp->b_vp)) {
+		ap->a_bp->b_flags |= B_ERROR;
+		biodone(ap->a_bp);
+		return (EIO);
+	}
+	return (VOP_STRATEGY(ap->a_bp));
+}
+
+/*
+ * Wait until the vnode has finished changing state.
+ */
+dead_lock(ap)
+	struct vop_lock_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	if (!chkvnlock(ap->a_vp))
+		return (0);
+	return (VCALL(ap->a_vp, VOFFSET(vop_lock), ap));
+}
+
+/*
+ * Wait until the vnode has finished changing state.
+ */
+dead_bmap(ap)
+	struct vop_bmap_args /* {
+		struct vnode *a_vp;
+		daddr_t  a_bn;
+		struct vnode **a_vpp;
+		daddr_t *a_bnp;
+		int *a_runp;
+	} */ *ap;
+{
+
+	if (!chkvnlock(ap->a_vp))
+		return (EIO);
+	return (VOP_BMAP(ap->a_vp, ap->a_bn, ap->a_vpp, ap->a_bnp, ap->a_runp));
+}
+
+/*
+ * Print out the contents of a dead vnode.
+ */
+/* ARGSUSED */
+dead_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	printf("tag VT_NON, dead vnode\n");
+}
+
+/*
+ * Empty vnode failed operation
+ */
+dead_ebadf()
+{
+
+	return (EBADF);
+}
+
+/*
+ * Empty vnode bad operation
+ */
+dead_badop()
+{
+
+	panic("dead_badop called");
+	/* NOTREACHED */
+}
+
+/*
+ * Empty vnode null operation
+ */
+dead_nullop()
+{
+
+	return (0);
+}
+
+/*
+ * We have to wait during times when the vnode is
+ * in a state of change.
+ */
+chkvnlock(vp)
+	register struct vnode *vp;
+{
+	int locked = 0;
+
+	while (vp->v_flag & VXLOCK) {
+		vp->v_flag |= VXWANT;
+		sleep((caddr_t)vp, PINOD);
+		locked = 1;
+	}
+	return (locked);
+}
diff --git a/sys/miscfs/fdesc/fdesc.h b/sys/miscfs/fdesc/fdesc.h
new file mode 100644
index 00000000000..4c682e7bd37
--- /dev/null
+++ b/sys/miscfs/fdesc/fdesc.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)fdesc.h	8.5 (Berkeley) 1/21/94
+ *
+ * $Id: fdesc.h,v 1.8 1993/04/06 15:28:33 jsp Exp $
+ */
+
+#ifdef KERNEL
+struct fdescmount {
+	struct vnode	*f_root;	/* Root node */
+};
+
+#define FD_ROOT		2
+#define FD_DEVFD	3
+#define FD_STDIN	4
+#define FD_STDOUT	5
+#define FD_STDERR	6
+#define FD_CTTY		7
+#define FD_DESC		8
+#define FD_MAX		12
+
+typedef enum {
+	Froot,
+	Fdevfd,
+	Fdesc,
+	Flink,
+	Fctty
+} fdntype;
+
+struct fdescnode {
+	struct fdescnode *fd_forw;	/* Hash chain */
+	struct fdescnode *fd_back;
+	struct vnode	*fd_vnode;	/* Back ptr to vnode */
+	fdntype		fd_type;	/* Type of this node */
+	unsigned	fd_fd;		/* Fd to be dup'ed */
+	char		*fd_link;	/* Link to fd/n */
+	int		fd_ix;		/* filesystem index */
+};
+
+#define VFSTOFDESC(mp)	((struct fdescmount *)((mp)->mnt_data))
+#define	VTOFDESC(vp) ((struct fdescnode *)(vp)->v_data)
+
+extern dev_t devctty;
+extern int fdesc_init __P((void));
+extern int fdesc_root __P((struct mount *, struct vnode **));
+extern int fdesc_allocvp __P((fdntype, int, struct mount *, struct vnode **));
+extern int (**fdesc_vnodeop_p)();
+extern struct vfsops fdesc_vfsops;
+#endif /* KERNEL */
diff --git a/sys/miscfs/fdesc/fdesc_vfsops.c b/sys/miscfs/fdesc/fdesc_vfsops.c
new file mode 100644
index 00000000000..80c543da655
--- /dev/null
+++ b/sys/miscfs/fdesc/fdesc_vfsops.c
@@ -0,0 +1,288 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)fdesc_vfsops.c	8.4 (Berkeley) 1/21/94
+ *
+ * $Id: fdesc_vfsops.c,v 1.9 1993/04/06 15:28:33 jsp Exp $
+ */
+
+/*
+ * /dev/fd Filesystem
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/filedesc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <miscfs/fdesc/fdesc.h>
+
+/*
+ * Mount the per-process file descriptors (/dev/fd)
+ */
+int
+fdesc_mount(mp, path, data, ndp, p)
+	struct mount *mp;
+	char *path;
+	caddr_t data;
+	struct nameidata *ndp;
+	struct proc *p;
+{
+	int error = 0;
+	u_int size;
+	struct fdescmount *fmp;
+	struct vnode *rvp;
+
+	/*
+	 * Update is a no-op
+	 */
+	if (mp->mnt_flag & MNT_UPDATE)
+		return (EOPNOTSUPP);
+
+	error = fdesc_allocvp(Froot, FD_ROOT, mp, &rvp);
+	if (error)
+		return (error);
+
+	MALLOC(fmp, struct fdescmount *, sizeof(struct fdescmount),
+				M_UFSMNT, M_WAITOK);	/* XXX */
+	rvp->v_type = VDIR;
+	rvp->v_flag |= VROOT;
+	fmp->f_root = rvp;
+	/* XXX -- don't mark as local to work around fts() problems */
+	/*mp->mnt_flag |= MNT_LOCAL;*/
+	mp->mnt_data = (qaddr_t) fmp;
+	getnewfsid(mp, MOUNT_FDESC);
+
+	(void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+	bzero(mp->mnt_stat.f_mntfromname, MNAMELEN);
+	bcopy("fdesc", mp->mnt_stat.f_mntfromname, sizeof("fdesc"));
+	return (0);
+}
+
+int
+fdesc_start(mp, flags, p)
+	struct mount *mp;
+	int flags;
+	struct proc *p;
+{
+	return (0);
+}
+
+int
+fdesc_unmount(mp, mntflags, p)
+	struct mount *mp;
+	int mntflags;
+	struct proc *p;
+{
+	int error;
+	int flags = 0;
+	extern int doforce;
+	struct vnode *rootvp = VFSTOFDESC(mp)->f_root;
+
+	if (mntflags & MNT_FORCE) {
+		/* fdesc can never be rootfs so don't check for it */
+		if (!doforce)
+			return (EINVAL);
+		flags |= FORCECLOSE;
+	}
+
+	/*
+	 * Clear out buffer cache.  I don't think we
+	 * ever get anything cached at this level at the
+	 * moment, but who knows...
+	 */
+	if (rootvp->v_usecount > 1)
+		return (EBUSY);
+	if (error = vflush(mp, rootvp, flags))
+		return (error);
+
+	/*
+	 * Release reference on underlying root vnode
+	 */
+	vrele(rootvp);
+	/*
+	 * And blow it away for future re-use
+	 */
+	vgone(rootvp);
+	/*
+	 * Finally, throw away the fdescmount structure
+	 */
+	free(mp->mnt_data, M_UFSMNT);	/* XXX */
+	mp->mnt_data = 0;
+
+	return (0);
+}
+
+int
+fdesc_root(mp, vpp)
+	struct mount *mp;
+	struct vnode **vpp;
+{
+	struct vnode *vp;
+
+	/*
+	 * Return locked reference to root.
+	 */
+	vp = VFSTOFDESC(mp)->f_root;
+	VREF(vp);
+	VOP_LOCK(vp);
+	*vpp = vp;
+	return (0);
+}
+
+int
+fdesc_quotactl(mp, cmd, uid, arg, p)
+	struct mount *mp;
+	int cmd;
+	uid_t uid;
+	caddr_t arg;
+	struct proc *p;
+{
+
+	return (EOPNOTSUPP);
+}
+
+int
+fdesc_statfs(mp, sbp, p)
+	struct mount *mp;
+	struct statfs *sbp;
+	struct proc *p;
+{
+	struct filedesc *fdp;
+	int lim;
+	int i;
+	int last;
+	int freefd;
+
+	/*
+	 * Compute number of free file descriptors.
+	 * [ Strange results will ensue if the open file
+	 * limit is ever reduced below the current number
+	 * of open files... ]
+	 */
+	lim = p->p_rlimit[RLIMIT_NOFILE].rlim_cur;
+	fdp = p->p_fd;
+	last = min(fdp->fd_nfiles, lim);
+	freefd = 0;
+	for (i = fdp->fd_freefile; i < last; i++)
+		if (fdp->fd_ofiles[i] == NULL)
+			freefd++;
+
+	/*
+	 * Adjust for the fact that the fdesc array may not
+	 * have been fully allocated yet.
+	 */
+	if (fdp->fd_nfiles < lim)
+		freefd += (lim - fdp->fd_nfiles);
+
+	sbp->f_type = MOUNT_FDESC;
+	sbp->f_flags = 0;
+	sbp->f_bsize = DEV_BSIZE;
+	sbp->f_iosize = DEV_BSIZE;
+	sbp->f_blocks = 2;		/* 1K to keep df happy */
+	sbp->f_bfree = 0;
+	sbp->f_bavail = 0;
+	sbp->f_files = lim + 1;		/* Allow for "." */
+	sbp->f_ffree = freefd;		/* See comments above */
+	if (sbp != &mp->mnt_stat) {
+		bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+	}
+	return (0);
+}
+
+int
+fdesc_sync(mp, waitfor)
+	struct mount *mp;
+	int waitfor;
+{
+
+	return (0);
+}
+
+/*
+ * Fdesc flat namespace lookup.
+ * Currently unsupported.
+ */
+int
+fdesc_vget(mp, ino, vpp)
+	struct mount *mp;
+	ino_t ino;
+	struct vnode **vpp;
+{
+
+	return (EOPNOTSUPP);
+}
+
+int
+fdesc_fhtovp(mp, fhp, setgen, vpp)
+	struct mount *mp;
+	struct fid *fhp;
+	int setgen;
+	struct vnode **vpp;
+{
+	return (EOPNOTSUPP);
+}
+
+int
+fdesc_vptofh(vp, fhp)
+	struct vnode *vp;
+	struct fid *fhp;
+{
+
+	return (EOPNOTSUPP);
+}
+
+struct vfsops fdesc_vfsops = {
+	fdesc_mount,
+	fdesc_start,
+	fdesc_unmount,
+	fdesc_root,
+	fdesc_quotactl,
+	fdesc_statfs,
+	fdesc_sync,
+	fdesc_vget,
+	fdesc_fhtovp,
+	fdesc_vptofh,
+	fdesc_init,
+};
diff --git a/sys/miscfs/fdesc/fdesc_vnops.c b/sys/miscfs/fdesc/fdesc_vnops.c
new file mode 100644
index 00000000000..00d8675aea2
--- /dev/null
+++ b/sys/miscfs/fdesc/fdesc_vnops.c
@@ -0,0 +1,974 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)fdesc_vnops.c	8.9 (Berkeley) 1/21/94
+ *
+ * $Id: fdesc_vnops.c,v 1.12 1993/04/06 16:17:17 jsp Exp $
+ */
+
+/*
+ * /dev/fd Filesystem
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>	/* boottime */
+#include <sys/resourcevar.h>
+#include <sys/filedesc.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/buf.h>
+#include <sys/dirent.h>
+#include <miscfs/fdesc/fdesc.h>
+
+#define cttyvp(p) ((p)->p_flag & P_CONTROLT ? (p)->p_session->s_ttyvp : NULL)
+
+#define FDL_WANT	0x01
+#define FDL_LOCKED	0x02
+static int fdcache_lock;
+
+dev_t devctty;
+
+#if (FD_STDIN != FD_STDOUT-1) || (FD_STDOUT != FD_STDERR-1)
+FD_STDIN, FD_STDOUT, FD_STDERR must be a sequence n, n+1, n+2
+#endif
+
+#define	NFDCACHE 3
+#define	FD_NHASH(ix) ((ix) & NFDCACHE)
+
+/*
+ * Cache head
+ */
+struct fdcache {
+	struct fdescnode	*fc_forw;
+	struct fdescnode	*fc_back;
+};
+
+static struct fdcache fdcache[NFDCACHE];
+
+/*
+ * Initialise cache headers
+ */
+fdesc_init()
+{
+	struct fdcache *fc;
+
+	devctty = makedev(nchrdev, 0);
+
+	for (fc = fdcache; fc < fdcache + NFDCACHE; fc++)
+		fc->fc_forw = fc->fc_back = (struct fdescnode *) fc;
+}
+
+/*
+ * Compute hash list for given target vnode
+ */
+static struct fdcache *
+fdesc_hash(ix)
+	int ix;
+{
+
+	return (&fdcache[FD_NHASH(ix)]);
+}
+
+int
+fdesc_allocvp(ftype, ix, mp, vpp)
+	fdntype ftype;
+	int ix;
+	struct mount *mp;
+	struct vnode **vpp;
+{
+	struct fdcache *fc;
+	struct fdescnode *fd;
+	int error = 0;
+
+loop:
+	fc = fdesc_hash(ix);
+	for (fd = fc->fc_forw; fd != (struct fdescnode *) fc; fd = fd->fd_forw) {
+		if (fd->fd_ix == ix && fd->fd_vnode->v_mount == mp) {
+			if (vget(fd->fd_vnode, 0))
+				goto loop;
+			*vpp = fd->fd_vnode;
+			return (error);
+		}
+	}
+
+	/*
+	 * otherwise lock the array while we call getnewvnode
+	 * since that can block.
+	 */ 
+	if (fdcache_lock & FDL_LOCKED) {
+		fdcache_lock |= FDL_WANT;
+		sleep((caddr_t) &fdcache_lock, PINOD);
+		goto loop;
+	}
+	fdcache_lock |= FDL_LOCKED;
+
+	error = getnewvnode(VT_FDESC, mp, fdesc_vnodeop_p, vpp);
+	if (error)
+		goto out;
+	MALLOC(fd, void *, sizeof(struct fdescnode), M_TEMP, M_WAITOK);
+	(*vpp)->v_data = fd;
+	fd->fd_vnode = *vpp;
+	fd->fd_type = ftype;
+	fd->fd_fd = -1;
+	fd->fd_link = 0;
+	fd->fd_ix = ix;
+	fc = fdesc_hash(ix);
+	insque(fd, fc);
+
+out:;
+	fdcache_lock &= ~FDL_LOCKED;
+
+	if (fdcache_lock & FDL_WANT) {
+		fdcache_lock &= ~FDL_WANT;
+		wakeup((caddr_t) &fdcache_lock);
+	}
+
+	return (error);
+}
+
+/*
+ * vp is the current namei directory
+ * ndp is the name to locate in that directory...
+ */
+int
+fdesc_lookup(ap)
+	struct vop_lookup_args /* {
+		struct vnode * a_dvp;
+		struct vnode ** a_vpp;
+		struct componentname * a_cnp;
+	} */ *ap;
+{
+	struct vnode **vpp = ap->a_vpp;
+	struct vnode *dvp = ap->a_dvp;
+	char *pname;
+	struct proc *p;
+	int nfiles;
+	unsigned fd;
+	int error;
+	struct vnode *fvp;
+	char *ln;
+
+	pname = ap->a_cnp->cn_nameptr;
+	if (ap->a_cnp->cn_namelen == 1 && *pname == '.') {
+		*vpp = dvp;
+		VREF(dvp);	
+		VOP_LOCK(dvp);
+		return (0);
+	}
+
+	p = ap->a_cnp->cn_proc;
+	nfiles = p->p_fd->fd_nfiles;
+
+	switch (VTOFDESC(dvp)->fd_type) {
+	default:
+	case Flink:
+	case Fdesc:
+	case Fctty:
+		error = ENOTDIR;
+		goto bad;
+
+	case Froot:
+		if (ap->a_cnp->cn_namelen == 2 && bcmp(pname, "fd", 2) == 0) {
+			error = fdesc_allocvp(Fdevfd, FD_DEVFD, dvp->v_mount, &fvp);
+			if (error)
+				goto bad;
+			*vpp = fvp;
+			fvp->v_type = VDIR;
+			VOP_LOCK(fvp);
+			return (0);
+		}
+
+		if (ap->a_cnp->cn_namelen == 3 && bcmp(pname, "tty", 3) == 0) {
+			struct vnode *ttyvp = cttyvp(p);
+			if (ttyvp == NULL) {
+				error = ENXIO;
+				goto bad;
+			}
+			error = fdesc_allocvp(Fctty, FD_CTTY, dvp->v_mount, &fvp);
+			if (error)
+				goto bad;
+			*vpp = fvp;
+			fvp->v_type = VFIFO;
+			VOP_LOCK(fvp);
+			return (0);
+		}
+
+		ln = 0;
+		switch (ap->a_cnp->cn_namelen) {
+		case 5:
+			if (bcmp(pname, "stdin", 5) == 0) {
+				ln = "fd/0";
+				fd = FD_STDIN;
+			}
+			break;
+		case 6:
+			if (bcmp(pname, "stdout", 6) == 0) {
+				ln = "fd/1";
+				fd = FD_STDOUT;
+			} else
+			if (bcmp(pname, "stderr", 6) == 0) {
+				ln = "fd/2";
+				fd = FD_STDERR;
+			}
+			break;
+		}
+
+		if (ln) {
+			error = fdesc_allocvp(Flink, fd, dvp->v_mount, &fvp);
+			if (error)
+				goto bad;
+			VTOFDESC(fvp)->fd_link = ln;
+			*vpp = fvp;
+			fvp->v_type = VLNK;
+			VOP_LOCK(fvp);
+			return (0);
+		} else {
+			error = ENOENT;
+			goto bad;
+		}
+
+		/* FALL THROUGH */
+
+	case Fdevfd:
+		if (ap->a_cnp->cn_namelen == 2 && bcmp(pname, "..", 2) == 0) {
+			error = fdesc_root(dvp->v_mount, vpp);
+			return (error);
+		}
+
+		fd = 0;
+		while (*pname >= '0' && *pname <= '9') {
+			fd = 10 * fd + *pname++ - '0';
+			if (fd >= nfiles)
+				break;
+		}
+
+		if (*pname != '\0') {
+			error = ENOENT;
+			goto bad;
+		}
+
+		if (fd >= nfiles || p->p_fd->fd_ofiles[fd] == NULL) {
+			error = EBADF;
+			goto bad;
+		}
+
+		error = fdesc_allocvp(Fdesc, FD_DESC+fd, dvp->v_mount, &fvp);
+		if (error)
+			goto bad;
+		VTOFDESC(fvp)->fd_fd = fd;
+		*vpp = fvp;
+		return (0);
+	}
+
+bad:;
+	*vpp = NULL;
+	return (error);
+}
+
+int
+fdesc_open(ap)
+	struct vop_open_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	int error = 0;
+
+	switch (VTOFDESC(vp)->fd_type) {
+	case Fdesc:
+		/*
+		 * XXX Kludge: set p->p_dupfd to contain the value of the
+		 * the file descriptor being sought for duplication. The error 
+		 * return ensures that the vnode for this device will be
+		 * released by vn_open. Open will detect this special error and
+		 * take the actions in dupfdopen.  Other callers of vn_open or
+		 * VOP_OPEN will simply report the error.
+		 */
+		ap->a_p->p_dupfd = VTOFDESC(vp)->fd_fd;	/* XXX */
+		error = ENODEV;
+		break;
+
+	case Fctty:
+		error = cttyopen(devctty, ap->a_mode, 0, ap->a_p);
+		break;
+	}
+
+	return (error);
+}
+
+static int
+fdesc_attr(fd, vap, cred, p)
+	int fd;
+	struct vattr *vap;
+	struct ucred *cred;
+	struct proc *p;
+{
+	struct filedesc *fdp = p->p_fd;
+	struct file *fp;
+	struct stat stb;
+	int error;
+
+	if (fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL)
+		return (EBADF);
+
+	switch (fp->f_type) {
+	case DTYPE_VNODE:
+		error = VOP_GETATTR((struct vnode *) fp->f_data, vap, cred, p);
+		if (error == 0 && vap->va_type == VDIR) {
+			/*
+			 * don't allow directories to show up because
+			 * that causes loops in the namespace.
+			 */
+			vap->va_type = VFIFO;
+		}
+		break;
+
+	case DTYPE_SOCKET:
+		error = soo_stat((struct socket *)fp->f_data, &stb);
+		if (error == 0) {
+			vattr_null(vap);
+			vap->va_type = VSOCK;
+			vap->va_mode = stb.st_mode;
+			vap->va_nlink = stb.st_nlink;
+			vap->va_uid = stb.st_uid;
+			vap->va_gid = stb.st_gid;
+			vap->va_fsid = stb.st_dev;
+			vap->va_fileid = stb.st_ino;
+			vap->va_size = stb.st_size;
+			vap->va_blocksize = stb.st_blksize;
+			vap->va_atime = stb.st_atimespec;
+			vap->va_mtime = stb.st_mtimespec;
+			vap->va_ctime = stb.st_ctimespec;
+			vap->va_gen = stb.st_gen;
+			vap->va_flags = stb.st_flags;
+			vap->va_rdev = stb.st_rdev;
+			vap->va_bytes = stb.st_blocks * stb.st_blksize;
+		}
+		break;
+
+	default:
+		panic("fdesc attr");
+		break;
+	}
+
+	return (error);
+}
+
+int
+fdesc_getattr(ap)
+	struct vop_getattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct vattr *vap = ap->a_vap;
+	unsigned fd;
+	int error = 0;
+
+	switch (VTOFDESC(vp)->fd_type) {
+	case Froot:
+	case Fdevfd:
+	case Flink:
+	case Fctty:
+		bzero((caddr_t) vap, sizeof(*vap));
+		vattr_null(vap);
+		vap->va_fileid = VTOFDESC(vp)->fd_ix;
+
+		switch (VTOFDESC(vp)->fd_type) {
+		case Flink:
+			vap->va_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH;
+			vap->va_type = VLNK;
+			vap->va_nlink = 1;
+			vap->va_size = strlen(VTOFDESC(vp)->fd_link);
+			break;
+
+		case Fctty:
+			vap->va_mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH;
+			vap->va_type = VFIFO;
+			vap->va_nlink = 1;
+			vap->va_size = 0;
+			break;
+
+		default:
+			vap->va_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH;
+			vap->va_type = VDIR;
+			vap->va_nlink = 2;
+			vap->va_size = DEV_BSIZE;
+			break;
+		}
+		vap->va_uid = 0;
+		vap->va_gid = 0;
+		vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+		vap->va_blocksize = DEV_BSIZE;
+		vap->va_atime.ts_sec = boottime.tv_sec;
+		vap->va_atime.ts_nsec = 0;
+		vap->va_mtime = vap->va_atime;
+		vap->va_ctime = vap->va_mtime;
+		vap->va_gen = 0;
+		vap->va_flags = 0;
+		vap->va_rdev = 0;
+		vap->va_bytes = 0;
+		break;
+
+	case Fdesc:
+		fd = VTOFDESC(vp)->fd_fd;
+		error = fdesc_attr(fd, vap, ap->a_cred, ap->a_p);
+		break;
+
+	default:
+		panic("fdesc_getattr");
+		break;	
+	}
+
+	if (error == 0)
+		vp->v_type = vap->va_type;
+
+	return (error);
+}
+
+int
+fdesc_setattr(ap)
+	struct vop_setattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct filedesc *fdp = ap->a_p->p_fd;
+	struct file *fp;
+	unsigned fd;
+	int error;
+
+	/*
+	 * Can't mess with the root vnode
+	 */
+	switch (VTOFDESC(ap->a_vp)->fd_type) {
+	case Fdesc:
+		break;
+
+	case Fctty:
+		return (0);
+
+	default:
+		return (EACCES);
+	}
+
+	fd = VTOFDESC(ap->a_vp)->fd_fd;
+	if (fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) {
+		return (EBADF);
+	}
+
+	/*
+	 * Can setattr the underlying vnode, but not sockets!
+	 */
+	switch (fp->f_type) {
+	case DTYPE_VNODE:
+		error = VOP_SETATTR((struct vnode *) fp->f_data, ap->a_vap, ap->a_cred, ap->a_p);
+		break;
+
+	case DTYPE_SOCKET:
+		error = 0;
+		break;
+
+	default:
+		panic("fdesc setattr");
+		break;
+	}
+
+	return (error);
+}
+
+#define UIO_MX 16
+
+static struct dirtmp {
+	u_long d_fileno;
+	u_short d_reclen;
+	u_short d_namlen;
+	char d_name[8];
+} rootent[] = {
+	{ FD_DEVFD, UIO_MX, 2, "fd" },
+	{ FD_STDIN, UIO_MX, 5, "stdin" },
+	{ FD_STDOUT, UIO_MX, 6, "stdout" },
+	{ FD_STDERR, UIO_MX, 6, "stderr" },
+	{ FD_CTTY, UIO_MX, 3, "tty" },
+	{ 0 }
+};
+
+int
+fdesc_readdir(ap)
+	struct vop_readdir_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	struct uio *uio = ap->a_uio;
+	struct filedesc *fdp;
+	int i;
+	int error;
+
+	switch (VTOFDESC(ap->a_vp)->fd_type) {
+	case Fctty:
+		return (0);
+
+	case Fdesc:
+		return (ENOTDIR);
+
+	default:
+		break;
+	}
+
+	fdp = uio->uio_procp->p_fd;
+
+	if (VTOFDESC(ap->a_vp)->fd_type == Froot) {
+		struct dirent d;
+		struct dirent *dp = &d;
+		struct dirtmp *dt;
+
+		i = uio->uio_offset / UIO_MX;
+		error = 0;
+
+		while (uio->uio_resid > 0) {
+			dt = &rootent[i];
+			if (dt->d_fileno == 0) {
+				/**eofflagp = 1;*/
+				break;
+			}
+			i++;
+			
+			switch (dt->d_fileno) {
+			case FD_CTTY:
+				if (cttyvp(uio->uio_procp) == NULL)
+					continue;
+				break;
+
+			case FD_STDIN:
+			case FD_STDOUT:
+			case FD_STDERR:
+				if ((dt->d_fileno-FD_STDIN) >= fdp->fd_nfiles)
+					continue;
+				if (fdp->fd_ofiles[dt->d_fileno-FD_STDIN] == NULL)
+					continue;
+				break;
+			}
+			bzero((caddr_t) dp, UIO_MX);
+			dp->d_fileno = dt->d_fileno;
+			dp->d_namlen = dt->d_namlen;
+			dp->d_type = DT_UNKNOWN;
+			dp->d_reclen = dt->d_reclen;
+			bcopy(dt->d_name, dp->d_name, dp->d_namlen+1);
+			error = uiomove((caddr_t) dp, UIO_MX, uio);
+			if (error)
+				break;
+		}
+		uio->uio_offset = i * UIO_MX;
+		return (error);
+	}
+
+	i = uio->uio_offset / UIO_MX;
+	error = 0;
+	while (uio->uio_resid > 0) {
+		if (i >= fdp->fd_nfiles)
+			break;
+
+		if (fdp->fd_ofiles[i] != NULL) {
+			struct dirent d;
+			struct dirent *dp = &d;
+
+			bzero((caddr_t) dp, UIO_MX);
+
+			dp->d_namlen = sprintf(dp->d_name, "%d", i);
+			dp->d_reclen = UIO_MX;
+			dp->d_type = DT_UNKNOWN;
+			dp->d_fileno = i + FD_STDIN;
+			/*
+			 * And ship to userland
+			 */
+			error = uiomove((caddr_t) dp, UIO_MX, uio);
+			if (error)
+				break;
+		}
+		i++;
+	}
+
+	uio->uio_offset = i * UIO_MX;
+	return (error);
+}
+
+int
+fdesc_readlink(ap)
+	struct vop_readlink_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	int error;
+
+	if (vp->v_type != VLNK)
+		return (EPERM);
+
+	if (VTOFDESC(vp)->fd_type == Flink) {
+		char *ln = VTOFDESC(vp)->fd_link;
+		error = uiomove(ln, strlen(ln), ap->a_uio);
+	} else {
+		error = EOPNOTSUPP;
+	}
+
+	return (error);
+}
+
+int
+fdesc_read(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	int error = EOPNOTSUPP;
+
+	switch (VTOFDESC(ap->a_vp)->fd_type) {
+	case Fctty:
+		error = cttyread(devctty, ap->a_uio, ap->a_ioflag);
+		break;
+
+	default:
+		error = EOPNOTSUPP;
+		break;
+	}
+	
+	return (error);
+}
+
+int
+fdesc_write(ap)
+	struct vop_write_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	int error = EOPNOTSUPP;
+
+	switch (VTOFDESC(ap->a_vp)->fd_type) {
+	case Fctty:
+		error = cttywrite(devctty, ap->a_uio, ap->a_ioflag);
+		break;
+
+	default:
+		error = EOPNOTSUPP;
+		break;
+	}
+	
+	return (error);
+}
+
+int
+fdesc_ioctl(ap)
+	struct vop_ioctl_args /* {
+		struct vnode *a_vp;
+		int  a_command;
+		caddr_t  a_data;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	int error = EOPNOTSUPP;
+
+	switch (VTOFDESC(ap->a_vp)->fd_type) {
+	case Fctty:
+		error = cttyioctl(devctty, ap->a_command, ap->a_data,
+					ap->a_fflag, ap->a_p);
+		break;
+
+	default:
+		error = EOPNOTSUPP;
+		break;
+	}
+	
+	return (error);
+}
+
+int
+fdesc_select(ap)
+	struct vop_select_args /* {
+		struct vnode *a_vp;
+		int  a_which;
+		int  a_fflags;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	int error = EOPNOTSUPP;
+
+	switch (VTOFDESC(ap->a_vp)->fd_type) {
+	case Fctty:
+		error = cttyselect(devctty, ap->a_fflags, ap->a_p);
+		break;
+
+	default:
+		error = EOPNOTSUPP;
+		break;
+	}
+	
+	return (error);
+}
+
+int
+fdesc_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+
+	/*
+	 * Clear out the v_type field to avoid
+	 * nasty things happening in vgone().
+	 */
+	vp->v_type = VNON;
+	return (0);
+}
+
+int
+fdesc_reclaim(ap)
+	struct vop_reclaim_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+
+	remque(VTOFDESC(vp));
+	FREE(vp->v_data, M_TEMP);
+	vp->v_data = 0;
+
+	return (0);
+}
+
+/*
+ * Return POSIX pathconf information applicable to special devices.
+ */
+fdesc_pathconf(ap)
+	struct vop_pathconf_args /* {
+		struct vnode *a_vp;
+		int a_name;
+		int *a_retval;
+	} */ *ap;
+{
+
+	switch (ap->a_name) {
+	case _PC_LINK_MAX:
+		*ap->a_retval = LINK_MAX;
+		return (0);
+	case _PC_MAX_CANON:
+		*ap->a_retval = MAX_CANON;
+		return (0);
+	case _PC_MAX_INPUT:
+		*ap->a_retval = MAX_INPUT;
+		return (0);
+	case _PC_PIPE_BUF:
+		*ap->a_retval = PIPE_BUF;
+		return (0);
+	case _PC_CHOWN_RESTRICTED:
+		*ap->a_retval = 1;
+		return (0);
+	case _PC_VDISABLE:
+		*ap->a_retval = _POSIX_VDISABLE;
+		return (0);
+	default:
+		return (EINVAL);
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * Print out the contents of a /dev/fd vnode.
+ */
+/* ARGSUSED */
+int
+fdesc_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	printf("tag VT_NON, fdesc vnode\n");
+	return (0);
+}
+
+/*void*/
+int
+fdesc_vfree(ap)
+	struct vop_vfree_args /* {
+		struct vnode *a_pvp;
+		ino_t a_ino;
+		int a_mode;
+	} */ *ap;
+{
+
+	return (0);
+}
+
+/*
+ * /dev/fd vnode unsupported operation
+ */
+int
+fdesc_enotsupp()
+{
+
+	return (EOPNOTSUPP);
+}
+
+/*
+ * /dev/fd "should never get here" operation
+ */
+int
+fdesc_badop()
+{
+
+	panic("fdesc: bad op");
+	/* NOTREACHED */
+}
+
+/*
+ * /dev/fd vnode null operation
+ */
+int
+fdesc_nullop()
+{
+
+	return (0);
+}
+
+#define fdesc_create ((int (*) __P((struct  vop_create_args *)))fdesc_enotsupp)
+#define fdesc_mknod ((int (*) __P((struct  vop_mknod_args *)))fdesc_enotsupp)
+#define fdesc_close ((int (*) __P((struct  vop_close_args *)))nullop)
+#define fdesc_access ((int (*) __P((struct  vop_access_args *)))nullop)
+#define fdesc_mmap ((int (*) __P((struct  vop_mmap_args *)))fdesc_enotsupp)
+#define fdesc_fsync ((int (*) __P((struct  vop_fsync_args *)))nullop)
+#define fdesc_seek ((int (*) __P((struct  vop_seek_args *)))nullop)
+#define fdesc_remove ((int (*) __P((struct  vop_remove_args *)))fdesc_enotsupp)
+#define fdesc_link ((int (*) __P((struct  vop_link_args *)))fdesc_enotsupp)
+#define fdesc_rename ((int (*) __P((struct  vop_rename_args *)))fdesc_enotsupp)
+#define fdesc_mkdir ((int (*) __P((struct  vop_mkdir_args *)))fdesc_enotsupp)
+#define fdesc_rmdir ((int (*) __P((struct  vop_rmdir_args *)))fdesc_enotsupp)
+#define fdesc_symlink ((int (*) __P((struct vop_symlink_args *)))fdesc_enotsupp)
+#define fdesc_abortop ((int (*) __P((struct  vop_abortop_args *)))nullop)
+#define fdesc_lock ((int (*) __P((struct  vop_lock_args *)))nullop)
+#define fdesc_unlock ((int (*) __P((struct  vop_unlock_args *)))nullop)
+#define fdesc_bmap ((int (*) __P((struct  vop_bmap_args *)))fdesc_badop)
+#define fdesc_strategy ((int (*) __P((struct  vop_strategy_args *)))fdesc_badop)
+#define fdesc_islocked ((int (*) __P((struct  vop_islocked_args *)))nullop)
+#define fdesc_advlock ((int (*) __P((struct vop_advlock_args *)))fdesc_enotsupp)
+#define fdesc_blkatoff \
+	((int (*) __P((struct  vop_blkatoff_args *)))fdesc_enotsupp)
+#define fdesc_vget ((int (*) __P((struct  vop_vget_args *)))fdesc_enotsupp)
+#define fdesc_valloc ((int(*) __P(( \
+		struct vnode *pvp, \
+		int mode, \
+		struct ucred *cred, \
+		struct vnode **vpp))) fdesc_enotsupp)
+#define fdesc_truncate \
+	((int (*) __P((struct  vop_truncate_args *)))fdesc_enotsupp)
+#define fdesc_update ((int (*) __P((struct  vop_update_args *)))fdesc_enotsupp)
+#define fdesc_bwrite ((int (*) __P((struct  vop_bwrite_args *)))fdesc_enotsupp)
+
+int (**fdesc_vnodeop_p)();
+struct vnodeopv_entry_desc fdesc_vnodeop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, fdesc_lookup },	/* lookup */
+	{ &vop_create_desc, fdesc_create },	/* create */
+	{ &vop_mknod_desc, fdesc_mknod },	/* mknod */
+	{ &vop_open_desc, fdesc_open },		/* open */
+	{ &vop_close_desc, fdesc_close },	/* close */
+	{ &vop_access_desc, fdesc_access },	/* access */
+	{ &vop_getattr_desc, fdesc_getattr },	/* getattr */
+	{ &vop_setattr_desc, fdesc_setattr },	/* setattr */
+	{ &vop_read_desc, fdesc_read },		/* read */
+	{ &vop_write_desc, fdesc_write },	/* write */
+	{ &vop_ioctl_desc, fdesc_ioctl },	/* ioctl */
+	{ &vop_select_desc, fdesc_select },	/* select */
+	{ &vop_mmap_desc, fdesc_mmap },		/* mmap */
+	{ &vop_fsync_desc, fdesc_fsync },	/* fsync */
+	{ &vop_seek_desc, fdesc_seek },		/* seek */
+	{ &vop_remove_desc, fdesc_remove },	/* remove */
+	{ &vop_link_desc, fdesc_link },		/* link */
+	{ &vop_rename_desc, fdesc_rename },	/* rename */
+	{ &vop_mkdir_desc, fdesc_mkdir },	/* mkdir */
+	{ &vop_rmdir_desc, fdesc_rmdir },	/* rmdir */
+	{ &vop_symlink_desc, fdesc_symlink },	/* symlink */
+	{ &vop_readdir_desc, fdesc_readdir },	/* readdir */
+	{ &vop_readlink_desc, fdesc_readlink },	/* readlink */
+	{ &vop_abortop_desc, fdesc_abortop },	/* abortop */
+	{ &vop_inactive_desc, fdesc_inactive },	/* inactive */
+	{ &vop_reclaim_desc, fdesc_reclaim },	/* reclaim */
+	{ &vop_lock_desc, fdesc_lock },		/* lock */
+	{ &vop_unlock_desc, fdesc_unlock },	/* unlock */
+	{ &vop_bmap_desc, fdesc_bmap },		/* bmap */
+	{ &vop_strategy_desc, fdesc_strategy },	/* strategy */
+	{ &vop_print_desc, fdesc_print },	/* print */
+	{ &vop_islocked_desc, fdesc_islocked },	/* islocked */
+	{ &vop_pathconf_desc, fdesc_pathconf },	/* pathconf */
+	{ &vop_advlock_desc, fdesc_advlock },	/* advlock */
+	{ &vop_blkatoff_desc, fdesc_blkatoff },	/* blkatoff */
+	{ &vop_valloc_desc, fdesc_valloc },	/* valloc */
+	{ &vop_vfree_desc, fdesc_vfree },	/* vfree */
+	{ &vop_truncate_desc, fdesc_truncate },	/* truncate */
+	{ &vop_update_desc, fdesc_update },	/* update */
+	{ &vop_bwrite_desc, fdesc_bwrite },	/* bwrite */
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc fdesc_vnodeop_opv_desc =
+	{ &fdesc_vnodeop_p, fdesc_vnodeop_entries };
diff --git a/sys/miscfs/fifofs/fifo.h b/sys/miscfs/fifofs/fifo.h
new file mode 100644
index 00000000000..e89186d8b89
--- /dev/null
+++ b/sys/miscfs/fifofs/fifo.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)fifo.h	8.2 (Berkeley) 2/2/94
+ */
+
+#ifdef FIFO
+/*
+ * Prototypes for fifo operations on vnodes.
+ */
+int	fifo_badop(),
+	fifo_ebadf();
+
+int	fifo_lookup __P((struct vop_lookup_args *));
+#define fifo_create ((int (*) __P((struct  vop_create_args *)))fifo_badop)
+#define fifo_mknod ((int (*) __P((struct  vop_mknod_args *)))fifo_badop)
+int	fifo_open __P((struct vop_open_args *));
+int	fifo_close __P((struct vop_close_args *));
+#define fifo_access ((int (*) __P((struct  vop_access_args *)))fifo_ebadf)
+#define fifo_getattr ((int (*) __P((struct  vop_getattr_args *)))fifo_ebadf)
+#define fifo_setattr ((int (*) __P((struct  vop_setattr_args *)))fifo_ebadf)
+int	fifo_read __P((struct vop_read_args *));
+int	fifo_write __P((struct vop_write_args *));
+int	fifo_ioctl __P((struct vop_ioctl_args *));
+int	fifo_select __P((struct vop_select_args *));
+#define fifo_mmap ((int (*) __P((struct  vop_mmap_args *)))fifo_badop)
+#define fifo_fsync ((int (*) __P((struct  vop_fsync_args *)))nullop)
+#define fifo_seek ((int (*) __P((struct  vop_seek_args *)))fifo_badop)
+#define fifo_remove ((int (*) __P((struct  vop_remove_args *)))fifo_badop)
+#define fifo_link ((int (*) __P((struct  vop_link_args *)))fifo_badop)
+#define fifo_rename ((int (*) __P((struct  vop_rename_args *)))fifo_badop)
+#define fifo_mkdir ((int (*) __P((struct  vop_mkdir_args *)))fifo_badop)
+#define fifo_rmdir ((int (*) __P((struct  vop_rmdir_args *)))fifo_badop)
+#define fifo_symlink ((int (*) __P((struct  vop_symlink_args *)))fifo_badop)
+#define fifo_readdir ((int (*) __P((struct  vop_readdir_args *)))fifo_badop)
+#define fifo_readlink ((int (*) __P((struct  vop_readlink_args *)))fifo_badop)
+#define fifo_abortop ((int (*) __P((struct  vop_abortop_args *)))fifo_badop)
+#define fifo_inactive ((int (*) __P((struct  vop_inactive_args *)))nullop)
+#define fifo_reclaim ((int (*) __P((struct  vop_reclaim_args *)))nullop)
+int	fifo_lock __P((struct vop_lock_args *));
+int	fifo_unlock __P((struct vop_unlock_args *));
+int	fifo_bmap __P((struct vop_bmap_args *));
+#define fifo_strategy ((int (*) __P((struct  vop_strategy_args *)))fifo_badop)
+int	fifo_print __P((struct vop_print_args *));
+#define fifo_islocked ((int (*) __P((struct  vop_islocked_args *)))nullop)
+int	fifo_pathconf __P((struct vop_pathconf_args *));
+int	fifo_advlock __P((struct vop_advlock_args *));
+#define fifo_blkatoff ((int (*) __P((struct  vop_blkatoff_args *)))fifo_badop)
+#define fifo_valloc ((int (*) __P((struct  vop_valloc_args *)))fifo_badop)
+#define fifo_reallocblks \
+	((int (*) __P((struct  vop_reallocblks_args *)))fifo_badop)
+#define fifo_vfree ((int (*) __P((struct  vop_vfree_args *)))fifo_badop)
+#define fifo_truncate ((int (*) __P((struct  vop_truncate_args *)))nullop)
+#define fifo_update ((int (*) __P((struct  vop_update_args *)))nullop)
+#define fifo_bwrite ((int (*) __P((struct  vop_bwrite_args *)))nullop)
+#endif /* FIFO */
diff --git a/sys/miscfs/fifofs/fifo_vnops.c b/sys/miscfs/fifofs/fifo_vnops.c
new file mode 100644
index 00000000000..bad33a430b6
--- /dev/null
+++ b/sys/miscfs/fifofs/fifo_vnops.c
@@ -0,0 +1,494 @@
+/*
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)fifo_vnops.c	8.2 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/time.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/stat.h>
+#include <sys/systm.h>
+#include <sys/ioctl.h>
+#include <sys/file.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <miscfs/fifofs/fifo.h>
+
+/*
+ * This structure is associated with the FIFO vnode and stores
+ * the state associated with the FIFO.
+ */
+struct fifoinfo {
+	struct socket	*fi_readsock;
+	struct socket	*fi_writesock;
+	long		fi_readers;
+	long		fi_writers;
+};
+
+int (**fifo_vnodeop_p)();
+struct vnodeopv_entry_desc fifo_vnodeop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, fifo_lookup },		/* lookup */
+	{ &vop_create_desc, fifo_create },		/* create */
+	{ &vop_mknod_desc, fifo_mknod },		/* mknod */
+	{ &vop_open_desc, fifo_open },			/* open */
+	{ &vop_close_desc, fifo_close },		/* close */
+	{ &vop_access_desc, fifo_access },		/* access */
+	{ &vop_getattr_desc, fifo_getattr },		/* getattr */
+	{ &vop_setattr_desc, fifo_setattr },		/* setattr */
+	{ &vop_read_desc, fifo_read },			/* read */
+	{ &vop_write_desc, fifo_write },		/* write */
+	{ &vop_ioctl_desc, fifo_ioctl },		/* ioctl */
+	{ &vop_select_desc, fifo_select },		/* select */
+	{ &vop_mmap_desc, fifo_mmap },			/* mmap */
+	{ &vop_fsync_desc, fifo_fsync },		/* fsync */
+	{ &vop_seek_desc, fifo_seek },			/* seek */
+	{ &vop_remove_desc, fifo_remove },		/* remove */
+	{ &vop_link_desc, fifo_link },			/* link */
+	{ &vop_rename_desc, fifo_rename },		/* rename */
+	{ &vop_mkdir_desc, fifo_mkdir },		/* mkdir */
+	{ &vop_rmdir_desc, fifo_rmdir },		/* rmdir */
+	{ &vop_symlink_desc, fifo_symlink },		/* symlink */
+	{ &vop_readdir_desc, fifo_readdir },		/* readdir */
+	{ &vop_readlink_desc, fifo_readlink },		/* readlink */
+	{ &vop_abortop_desc, fifo_abortop },		/* abortop */
+	{ &vop_inactive_desc, fifo_inactive },		/* inactive */
+	{ &vop_reclaim_desc, fifo_reclaim },		/* reclaim */
+	{ &vop_lock_desc, fifo_lock },			/* lock */
+	{ &vop_unlock_desc, fifo_unlock },		/* unlock */
+	{ &vop_bmap_desc, fifo_bmap },			/* bmap */
+	{ &vop_strategy_desc, fifo_strategy },		/* strategy */
+	{ &vop_print_desc, fifo_print },		/* print */
+	{ &vop_islocked_desc, fifo_islocked },		/* islocked */
+	{ &vop_pathconf_desc, fifo_pathconf },		/* pathconf */
+	{ &vop_advlock_desc, fifo_advlock },		/* advlock */
+	{ &vop_blkatoff_desc, fifo_blkatoff },		/* blkatoff */
+	{ &vop_valloc_desc, fifo_valloc },		/* valloc */
+	{ &vop_vfree_desc, fifo_vfree },		/* vfree */
+	{ &vop_truncate_desc, fifo_truncate },		/* truncate */
+	{ &vop_update_desc, fifo_update },		/* update */
+	{ &vop_bwrite_desc, fifo_bwrite },		/* bwrite */
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc fifo_vnodeop_opv_desc =
+	{ &fifo_vnodeop_p, fifo_vnodeop_entries };
+
+/*
+ * Trivial lookup routine that always fails.
+ */
+/* ARGSUSED */
+fifo_lookup(ap)
+	struct vop_lookup_args /* {
+		struct vnode * a_dvp;
+		struct vnode ** a_vpp;
+		struct componentname * a_cnp;
+	} */ *ap;
+{
+	
+	*ap->a_vpp = NULL;
+	return (ENOTDIR);
+}
+
+/*
+ * Open called to set up a new instance of a fifo or
+ * to find an active instance of a fifo.
+ */
+/* ARGSUSED */
+fifo_open(ap)
+	struct vop_open_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct fifoinfo *fip;
+	struct socket *rso, *wso;
+	int error;
+	static char openstr[] = "fifo";
+
+	if ((ap->a_mode & (FREAD|FWRITE)) == (FREAD|FWRITE))
+		return (EINVAL);
+	if ((fip = vp->v_fifoinfo) == NULL) {
+		MALLOC(fip, struct fifoinfo *, sizeof(*fip), M_VNODE, M_WAITOK);
+		vp->v_fifoinfo = fip;
+		if (error = socreate(AF_UNIX, &rso, SOCK_STREAM, 0)) {
+			free(fip, M_VNODE);
+			vp->v_fifoinfo = NULL;
+			return (error);
+		}
+		fip->fi_readsock = rso;
+		if (error = socreate(AF_UNIX, &wso, SOCK_STREAM, 0)) {
+			(void)soclose(rso);
+			free(fip, M_VNODE);
+			vp->v_fifoinfo = NULL;
+			return (error);
+		}
+		fip->fi_writesock = wso;
+		if (error = unp_connect2(wso, rso)) {
+			(void)soclose(wso);
+			(void)soclose(rso);
+			free(fip, M_VNODE);
+			vp->v_fifoinfo = NULL;
+			return (error);
+		}
+		fip->fi_readers = fip->fi_writers = 0;
+		wso->so_state |= SS_CANTRCVMORE;
+		rso->so_state |= SS_CANTSENDMORE;
+	}
+	error = 0;
+	if (ap->a_mode & FREAD) {
+		fip->fi_readers++;
+		if (fip->fi_readers == 1) {
+			fip->fi_writesock->so_state &= ~SS_CANTSENDMORE;
+			if (fip->fi_writers > 0)
+				wakeup((caddr_t)&fip->fi_writers);
+		}
+		if (ap->a_mode & O_NONBLOCK)
+			return (0);
+		while (fip->fi_writers == 0) {
+			VOP_UNLOCK(vp);
+			error = tsleep((caddr_t)&fip->fi_readers,
+			    PCATCH | PSOCK, openstr, 0);
+			VOP_LOCK(vp);
+			if (error)
+				break;
+		}
+	} else {
+		fip->fi_writers++;
+		if (fip->fi_readers == 0 && (ap->a_mode & O_NONBLOCK)) {
+			error = ENXIO;
+		} else {
+			if (fip->fi_writers == 1) {
+				fip->fi_readsock->so_state &= ~SS_CANTRCVMORE;
+				if (fip->fi_readers > 0)
+					wakeup((caddr_t)&fip->fi_readers);
+			}
+			while (fip->fi_readers == 0) {
+				VOP_UNLOCK(vp);
+				error = tsleep((caddr_t)&fip->fi_writers,
+				    PCATCH | PSOCK, openstr, 0);
+				VOP_LOCK(vp);
+				if (error)
+					break;
+			}
+		}
+	}
+	if (error)
+		VOP_CLOSE(vp, ap->a_mode, ap->a_cred, ap->a_p);
+	return (error);
+}
+
+/*
+ * Vnode op for read
+ */
+/* ARGSUSED */
+fifo_read(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	register struct uio *uio = ap->a_uio;
+	register struct socket *rso = ap->a_vp->v_fifoinfo->fi_readsock;
+	int error, startresid;
+
+#ifdef DIAGNOSTIC
+	if (uio->uio_rw != UIO_READ)
+		panic("fifo_read mode");
+#endif
+	if (uio->uio_resid == 0)
+		return (0);
+	if (ap->a_ioflag & IO_NDELAY)
+		rso->so_state |= SS_NBIO;
+	startresid = uio->uio_resid;
+	VOP_UNLOCK(ap->a_vp);
+	error = soreceive(rso, (struct mbuf **)0, uio, (int *)0,
+		(struct mbuf **)0, (struct mbuf **)0);
+	VOP_LOCK(ap->a_vp);
+	/*
+	 * Clear EOF indication after first such return.
+	 */
+	if (uio->uio_resid == startresid)
+		rso->so_state &= ~SS_CANTRCVMORE;
+	if (ap->a_ioflag & IO_NDELAY)
+		rso->so_state &= ~SS_NBIO;
+	return (error);
+}
+
+/*
+ * Vnode op for write
+ */
+/* ARGSUSED */
+fifo_write(ap)
+	struct vop_write_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	struct socket *wso = ap->a_vp->v_fifoinfo->fi_writesock;
+	int error;
+
+#ifdef DIAGNOSTIC
+	if (ap->a_uio->uio_rw != UIO_WRITE)
+		panic("fifo_write mode");
+#endif
+	if (ap->a_ioflag & IO_NDELAY)
+		wso->so_state |= SS_NBIO;
+	VOP_UNLOCK(ap->a_vp);
+	error = sosend(wso, (struct mbuf *)0, ap->a_uio, 0, (struct mbuf *)0, 0);
+	VOP_LOCK(ap->a_vp);
+	if (ap->a_ioflag & IO_NDELAY)
+		wso->so_state &= ~SS_NBIO;
+	return (error);
+}
+
+/*
+ * Device ioctl operation.
+ */
+/* ARGSUSED */
+fifo_ioctl(ap)
+	struct vop_ioctl_args /* {
+		struct vnode *a_vp;
+		int  a_command;
+		caddr_t  a_data;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct file filetmp;
+
+	if (ap->a_command == FIONBIO)
+		return (0);
+	if (ap->a_fflag & FREAD)
+		filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_readsock;
+	else
+		filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_writesock;
+	return (soo_ioctl(&filetmp, ap->a_command, ap->a_data, ap->a_p));
+}
+
+/* ARGSUSED */
+fifo_select(ap)
+	struct vop_select_args /* {
+		struct vnode *a_vp;
+		int  a_which;
+		int  a_fflags;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct file filetmp;
+
+	if (ap->a_fflags & FREAD)
+		filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_readsock;
+	else
+		filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_writesock;
+	return (soo_select(&filetmp, ap->a_which, ap->a_p));
+}
+
+/*
+ * This is a noop, simply returning what one has been given.
+ */
+fifo_bmap(ap)
+	struct vop_bmap_args /* {
+		struct vnode *a_vp;
+		daddr_t  a_bn;
+		struct vnode **a_vpp;
+		daddr_t *a_bnp;
+	} */ *ap;
+{
+
+	if (ap->a_vpp != NULL)
+		*ap->a_vpp = ap->a_vp;
+	if (ap->a_bnp != NULL)
+		*ap->a_bnp = ap->a_bn;
+	return (0);
+}
+
+/*
+ * At the moment we do not do any locking.
+ */
+/* ARGSUSED */
+fifo_lock(ap)
+	struct vop_lock_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	return (0);
+}
+
+/* ARGSUSED */
+fifo_unlock(ap)
+	struct vop_unlock_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	return (0);
+}
+
+/*
+ * Device close routine
+ */
+/* ARGSUSED */
+fifo_close(ap)
+	struct vop_close_args /* {
+		struct vnode *a_vp;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct fifoinfo *fip = vp->v_fifoinfo;
+	int error1, error2;
+
+	if (ap->a_fflag & FWRITE) {
+		fip->fi_writers--;
+		if (fip->fi_writers == 0)
+			socantrcvmore(fip->fi_readsock);
+	} else {
+		fip->fi_readers--;
+		if (fip->fi_readers == 0)
+			socantsendmore(fip->fi_writesock);
+	}
+	if (vp->v_usecount > 1)
+		return (0);
+	error1 = soclose(fip->fi_readsock);
+	error2 = soclose(fip->fi_writesock);
+	FREE(fip, M_VNODE);
+	vp->v_fifoinfo = NULL;
+	if (error1)
+		return (error1);
+	return (error2);
+}
+
+/*
+ * Print out the contents of a fifo vnode.
+ */
+fifo_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	printf("tag VT_NON");
+	fifo_printinfo(ap->a_vp);
+	printf("\n");
+}
+
+/*
+ * Print out internal contents of a fifo vnode.
+ */
+fifo_printinfo(vp)
+	struct vnode *vp;
+{
+	register struct fifoinfo *fip = vp->v_fifoinfo;
+
+	printf(", fifo with %d readers and %d writers",
+		fip->fi_readers, fip->fi_writers);
+}
+
+/*
+ * Return POSIX pathconf information applicable to fifo's.
+ */
+fifo_pathconf(ap)
+	struct vop_pathconf_args /* {
+		struct vnode *a_vp;
+		int a_name;
+		int *a_retval;
+	} */ *ap;
+{
+
+	switch (ap->a_name) {
+	case _PC_LINK_MAX:
+		*ap->a_retval = LINK_MAX;
+		return (0);
+	case _PC_PIPE_BUF:
+		*ap->a_retval = PIPE_BUF;
+		return (0);
+	case _PC_CHOWN_RESTRICTED:
+		*ap->a_retval = 1;
+		return (0);
+	default:
+		return (EINVAL);
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * Fifo failed operation
+ */
+fifo_ebadf()
+{
+
+	return (EBADF);
+}
+
+/*
+ * Fifo advisory byte-level locks.
+ */
+/* ARGSUSED */
+fifo_advlock(ap)
+	struct vop_advlock_args /* {
+		struct vnode *a_vp;
+		caddr_t  a_id;
+		int  a_op;
+		struct flock *a_fl;
+		int  a_flags;
+	} */ *ap;
+{
+
+	return (EOPNOTSUPP);
+}
+
+/*
+ * Fifo bad operation
+ */
+fifo_badop()
+{
+
+	panic("fifo_badop called");
+	/* NOTREACHED */
+}
diff --git a/sys/miscfs/kernfs/kernfs.h b/sys/miscfs/kernfs/kernfs.h
new file mode 100644
index 00000000000..75ddecc6db1
--- /dev/null
+++ b/sys/miscfs/kernfs/kernfs.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kernfs.h	8.4 (Berkeley) 1/21/94
+ */
+
+#define	_PATH_KERNFS	"/kern"		/* Default mountpoint */
+
+#ifdef KERNEL
+struct kernfs_mount {
+	struct vnode	*kf_root;	/* Root node */
+};
+
+struct kernfs_node {
+	struct kern_target *kf_kt;
+};
+
+#define VFSTOKERNFS(mp)	((struct kernfs_mount *)((mp)->mnt_data))
+#define	VTOKERN(vp) ((struct kernfs_node *)(vp)->v_data)
+
+extern int (**kernfs_vnodeop_p)();
+extern struct vfsops kernfs_vfsops;
+extern struct vnode *rrootvp;
+#endif /* KERNEL */
diff --git a/sys/miscfs/kernfs/kernfs_vfsops.c b/sys/miscfs/kernfs/kernfs_vfsops.c
new file mode 100644
index 00000000000..b68d76eaddf
--- /dev/null
+++ b/sys/miscfs/kernfs/kernfs_vfsops.c
@@ -0,0 +1,329 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kernfs_vfsops.c	8.4 (Berkeley) 1/21/94
+ */
+
+/*
+ * Kernel params Filesystem
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/types.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+
+#include <miscfs/specfs/specdev.h>
+#include <miscfs/kernfs/kernfs.h>
+
+struct vnode *rrootvp;
+
+/*
+ * Create a vnode for a character device.
+ */
+int
+cdevvp(dev, vpp)
+	dev_t dev;
+	struct vnode **vpp;
+{
+	register struct vnode *vp;
+	struct vnode *nvp;
+	int error;
+
+	if (dev == NODEV)
+		return (0);
+	error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
+	if (error) {
+		*vpp = 0;
+		return (error);
+	}
+	vp = nvp;
+	vp->v_type = VCHR;
+	if (nvp = checkalias(vp, dev, (struct mount *)0)) {
+		vput(vp);
+		vp = nvp;
+	}
+	*vpp = vp;
+	return (0);
+}
+
+kernfs_init()
+{
+	int cmaj;
+	int bmaj = major(rootdev);
+	int error = ENXIO;
+
+#ifdef KERNFS_DIAGNOSTIC
+	printf("kernfs_init\n");		/* printed during system boot */
+#endif
+
+	for (cmaj = 0; cmaj < nchrdev; cmaj++) {
+		if (cdevsw[cmaj].d_open == bdevsw[bmaj].d_open) {
+			dev_t cdev = makedev(cmaj, minor(rootdev));
+			error = cdevvp(cdev, &rrootvp);
+			if (error == 0)
+				break;
+		}
+	}
+
+	if (error) {
+		printf("kernfs: no raw boot device\n");
+		rrootvp = 0;
+	}
+}
+
+/*
+ * Mount the Kernel params filesystem
+ */
+kernfs_mount(mp, path, data, ndp, p)
+	struct mount *mp;
+	char *path;
+	caddr_t data;
+	struct nameidata *ndp;
+	struct proc *p;
+{
+	int error = 0;
+	u_int size;
+	struct kernfs_mount *fmp;
+	struct vnode *rvp;
+
+#ifdef KERNFS_DIAGNOSTIC
+	printf("kernfs_mount(mp = %x)\n", mp);
+#endif
+
+	/*
+	 * Update is a no-op
+	 */
+	if (mp->mnt_flag & MNT_UPDATE)
+		return (EOPNOTSUPP);
+
+	error = getnewvnode(VT_KERNFS, mp, kernfs_vnodeop_p, &rvp);	/* XXX */
+	if (error)
+		return (error);
+
+	MALLOC(fmp, struct kernfs_mount *, sizeof(struct kernfs_mount),
+				M_UFSMNT, M_WAITOK);	/* XXX */
+	rvp->v_type = VDIR;
+	rvp->v_flag |= VROOT;
+#ifdef KERNFS_DIAGNOSTIC
+	printf("kernfs_mount: root vp = %x\n", rvp);
+#endif
+	fmp->kf_root = rvp;
+	mp->mnt_flag |= MNT_LOCAL;
+	mp->mnt_data = (qaddr_t) fmp;
+	getnewfsid(mp, MOUNT_KERNFS);
+
+	(void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+	bzero(mp->mnt_stat.f_mntfromname, MNAMELEN);
+	bcopy("kernfs", mp->mnt_stat.f_mntfromname, sizeof("kernfs"));
+#ifdef KERNFS_DIAGNOSTIC
+	printf("kernfs_mount: at %s\n", mp->mnt_stat.f_mntonname);
+#endif
+	return (0);
+}
+
+kernfs_start(mp, flags, p)
+	struct mount *mp;
+	int flags;
+	struct proc *p;
+{
+	return (0);
+}
+
+kernfs_unmount(mp, mntflags, p)
+	struct mount *mp;
+	int mntflags;
+	struct proc *p;
+{
+	int error;
+	int flags = 0;
+	extern int doforce;
+	struct vnode *rootvp = VFSTOKERNFS(mp)->kf_root;
+
+#ifdef KERNFS_DIAGNOSTIC
+	printf("kernfs_unmount(mp = %x)\n", mp);
+#endif
+
+	if (mntflags & MNT_FORCE) {
+		/* kernfs can never be rootfs so don't check for it */
+		if (!doforce)
+			return (EINVAL);
+		flags |= FORCECLOSE;
+	}
+
+	/*
+	 * Clear out buffer cache.  I don't think we
+	 * ever get anything cached at this level at the
+	 * moment, but who knows...
+	 */
+	if (rootvp->v_usecount > 1)
+		return (EBUSY);
+#ifdef KERNFS_DIAGNOSTIC
+	printf("kernfs_unmount: calling vflush\n");
+#endif
+	if (error = vflush(mp, rootvp, flags))
+		return (error);
+
+#ifdef KERNFS_DIAGNOSTIC
+	vprint("kernfs root", rootvp);
+#endif
+	/*
+	 * Release reference on underlying root vnode
+	 */
+	vrele(rootvp);
+	/*
+	 * And blow it away for future re-use
+	 */
+	vgone(rootvp);
+	/*
+	 * Finally, throw away the kernfs_mount structure
+	 */
+	free(mp->mnt_data, M_UFSMNT);	/* XXX */
+	mp->mnt_data = 0;
+	return 0;
+}
+
+kernfs_root(mp, vpp)
+	struct mount *mp;
+	struct vnode **vpp;
+{
+	struct vnode *vp;
+
+#ifdef KERNFS_DIAGNOSTIC
+	printf("kernfs_root(mp = %x)\n", mp);
+#endif
+
+	/*
+	 * Return locked reference to root.
+	 */
+	vp = VFSTOKERNFS(mp)->kf_root;
+	VREF(vp);
+	VOP_LOCK(vp);
+	*vpp = vp;
+	return (0);
+}
+
+kernfs_quotactl(mp, cmd, uid, arg, p)
+	struct mount *mp;
+	int cmd;
+	uid_t uid;
+	caddr_t arg;
+	struct proc *p;
+{
+	return (EOPNOTSUPP);
+}
+
+kernfs_statfs(mp, sbp, p)
+	struct mount *mp;
+	struct statfs *sbp;
+	struct proc *p;
+{
+#ifdef KERNFS_DIAGNOSTIC
+	printf("kernfs_statfs(mp = %x)\n", mp);
+#endif
+
+	sbp->f_type = MOUNT_KERNFS;
+	sbp->f_flags = 0;
+	sbp->f_bsize = DEV_BSIZE;
+	sbp->f_iosize = DEV_BSIZE;
+	sbp->f_blocks = 2;		/* 1K to keep df happy */
+	sbp->f_bfree = 0;
+	sbp->f_bavail = 0;
+	sbp->f_files = 0;
+	sbp->f_ffree = 0;
+	if (sbp != &mp->mnt_stat) {
+		bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+	}
+	return (0);
+}
+
+kernfs_sync(mp, waitfor)
+	struct mount *mp;
+	int waitfor;
+{
+	return (0);
+}
+
+/*
+ * Kernfs flat namespace lookup.
+ * Currently unsupported.
+ */
+kernfs_vget(mp, ino, vpp)
+	struct mount *mp;
+	ino_t ino;
+	struct vnode **vpp;
+{
+
+	return (EOPNOTSUPP);
+}
+
+
+kernfs_fhtovp(mp, fhp, setgen, vpp)
+	struct mount *mp;
+	struct fid *fhp;
+	int setgen;
+	struct vnode **vpp;
+{
+	return (EOPNOTSUPP);
+}
+
+kernfs_vptofh(vp, fhp)
+	struct vnode *vp;
+	struct fid *fhp;
+{
+	return (EOPNOTSUPP);
+}
+
+struct vfsops kernfs_vfsops = {
+	kernfs_mount,
+	kernfs_start,
+	kernfs_unmount,
+	kernfs_root,
+	kernfs_quotactl,
+	kernfs_statfs,
+	kernfs_sync,
+	kernfs_vget,
+	kernfs_fhtovp,
+	kernfs_vptofh,
+	kernfs_init,
+};
diff --git a/sys/miscfs/kernfs/kernfs_vnops.c b/sys/miscfs/kernfs/kernfs_vnops.c
new file mode 100644
index 00000000000..10b7d7c0a64
--- /dev/null
+++ b/sys/miscfs/kernfs/kernfs_vnops.c
@@ -0,0 +1,759 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kernfs_vnops.c	8.6 (Berkeley) 2/10/94
+ */
+
+/*
+ * Kernel parameter filesystem (/kern)
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/vmmeter.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/buf.h>
+#include <sys/dirent.h>
+#include <miscfs/kernfs/kernfs.h>
+
+#define KSTRING	256		/* Largest I/O available via this filesystem */
+#define	UIO_MX 32
+
+#define	READ_MODE	(S_IRUSR|S_IRGRP|S_IROTH)
+#define	WRITE_MODE	(S_IWUSR|S_IRUSR|S_IRGRP|S_IROTH)
+#define DIR_MODE	(S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
+
+struct kern_target {
+	char *kt_name;
+	void *kt_data;
+#define	KTT_NULL 1
+#define	KTT_TIME 5
+#define KTT_INT	17
+#define	KTT_STRING 31
+#define KTT_HOSTNAME 47
+#define KTT_AVENRUN 53
+	int kt_tag;
+	int kt_rw;
+	int kt_vtype;
+} kern_targets[] = {
+/* NOTE: The name must be less than UIO_MX-16 chars in length */
+	/* name		data		tag		ro/rw */
+	{ ".",		0,		KTT_NULL,	VREAD,		VDIR },
+	{ "..",		0,		KTT_NULL,	VREAD,		VDIR },
+	{ "boottime",	&boottime.tv_sec, KTT_INT,	VREAD,		VREG },
+	{ "copyright",	copyright,	KTT_STRING,	VREAD,		VREG },
+	{ "hostname",	0,		KTT_HOSTNAME,	VREAD|VWRITE,	VREG },
+	{ "hz",		&hz,		KTT_INT,	VREAD,		VREG },
+	{ "loadavg",	0,		KTT_AVENRUN,	VREAD,		VREG },
+	{ "pagesize",	&cnt.v_page_size, KTT_INT,	VREAD,		VREG },
+	{ "physmem",	&physmem,	KTT_INT,	VREAD,		VREG },
+#if 0
+	{ "root",	0,		KTT_NULL,	VREAD,		VDIR },
+#endif
+	{ "rootdev",	0,		KTT_NULL,	VREAD,		VBLK },
+	{ "rrootdev",	0,		KTT_NULL,	VREAD,		VCHR },
+	{ "time",	0,		KTT_TIME,	VREAD,		VREG },
+	{ "version",	version,	KTT_STRING,	VREAD,		VREG },
+};
+
+static int nkern_targets = sizeof(kern_targets) / sizeof(kern_targets[0]);
+
+static int
+kernfs_xread(kt, buf, len, lenp)
+	struct kern_target *kt;
+	char *buf;
+	int len;
+	int *lenp;
+{
+	switch (kt->kt_tag) {
+	case KTT_TIME: {
+		struct timeval tv;
+		microtime(&tv);
+		sprintf(buf, "%d %d\n", tv.tv_sec, tv.tv_usec);
+		break;
+	}
+
+	case KTT_INT: {
+		int *ip = kt->kt_data;
+		sprintf(buf, "%d\n", *ip);
+		break;
+	}
+
+	case KTT_STRING: {
+		char *cp = kt->kt_data;
+		int xlen = strlen(cp) + 1;
+
+		if (xlen >= len)
+			return (EINVAL);
+
+		bcopy(cp, buf, xlen);
+		break;
+	}
+
+	case KTT_HOSTNAME: {
+		char *cp = hostname;
+		int xlen = hostnamelen;
+
+		if (xlen >= (len-2))
+			return (EINVAL);
+
+		bcopy(cp, buf, xlen);
+		buf[xlen] = '\n';
+		buf[xlen+1] = '\0';
+		break;
+	}
+
+	case KTT_AVENRUN:
+		sprintf(buf, "%ld %ld %ld %ld\n",
+				averunnable.ldavg[0],
+				averunnable.ldavg[1],
+				averunnable.ldavg[2],
+				averunnable.fscale);
+		break;
+
+	default:
+		return (EINVAL);
+	}
+
+	*lenp = strlen(buf);
+	return (0);
+}
+
+static int
+kernfs_xwrite(kt, buf, len)
+	struct kern_target *kt;
+	char *buf;
+	int len;
+{
+	switch (kt->kt_tag) {
+	case KTT_HOSTNAME: {
+		if (buf[len-1] == '\n')
+			--len;
+		bcopy(buf, hostname, len);
+		hostname[len] = '\0';
+		hostnamelen = len;
+		return (0);
+	}
+
+	default:
+		return (EIO);
+	}
+}
+
+
+/*
+ * vp is the current namei directory
+ * ndp is the name to locate in that directory...
+ */
+kernfs_lookup(ap)
+	struct vop_lookup_args /* {
+		struct vnode * a_dvp;
+		struct vnode ** a_vpp;
+		struct componentname * a_cnp;
+	} */ *ap;
+{
+	struct vnode **vpp = ap->a_vpp;
+	struct vnode *dvp = ap->a_dvp;
+	struct componentname *cnp = ap->a_cnp;
+	struct vnode *fvp;
+	int error, i;
+	char *pname;
+
+#ifdef KERNFS_DIAGNOSTIC
+	printf("kernfs_lookup(%x)\n", ap);
+	printf("kernfs_lookup(dp = %x, vpp = %x, cnp = %x)\n", dvp, vpp, ap->a_cnp);
+#endif
+	pname = cnp->cn_nameptr;
+#ifdef KERNFS_DIAGNOSTIC
+	printf("kernfs_lookup(%s)\n", pname);
+#endif
+	if (cnp->cn_namelen == 1 && *pname == '.') {
+		*vpp = dvp;
+		VREF(dvp);
+		/*VOP_LOCK(dvp);*/
+		return (0);
+	}
+
+#if 0
+	if (cnp->cn_namelen == 4 && bcmp(pname, "root", 4) == 0) {
+		*vpp = rootdir;
+		VREF(rootdir);
+		VOP_LOCK(rootdir);
+		return (0);
+	}
+#endif
+
+	/*
+	 * /kern/rootdev is the root device
+	 */
+	if (cnp->cn_namelen == 7 && bcmp(pname, "rootdev", 7) == 0) {
+		*vpp = rootvp;
+		VREF(rootvp);
+		VOP_LOCK(rootvp);
+		return (0);
+	}
+
+	/*
+	 * /kern/rrootdev is the raw root device
+	 */
+	if (cnp->cn_namelen == 8 && bcmp(pname, "rrootdev", 8) == 0) {
+		if (rrootvp) {
+			*vpp = rrootvp;
+			VREF(rrootvp);
+			VOP_LOCK(rrootvp);
+			return (0);
+		}
+		error = ENXIO;
+		goto bad;
+	}
+
+	error = ENOENT;
+
+	for (i = 0; i < nkern_targets; i++) {
+		struct kern_target *kt = &kern_targets[i];
+		if (cnp->cn_namelen == strlen(kt->kt_name) &&
+		    bcmp(kt->kt_name, pname, cnp->cn_namelen) == 0) {
+			error = 0;
+			break;
+		}
+	}
+
+#ifdef KERNFS_DIAGNOSTIC
+	printf("kernfs_lookup: i = %d, error = %d\n", i, error);
+#endif
+
+	if (error)
+		goto bad;
+
+#ifdef KERNFS_DIAGNOSTIC
+	printf("kernfs_lookup: allocate new vnode\n");
+#endif
+	error = getnewvnode(VT_KERNFS, dvp->v_mount, kernfs_vnodeop_p, &fvp);
+	if (error)
+		goto bad;
+	MALLOC(fvp->v_data, void *, sizeof(struct kernfs_node), M_TEMP, M_WAITOK);
+	VTOKERN(fvp)->kf_kt = &kern_targets[i];
+	fvp->v_type = VTOKERN(fvp)->kf_kt->kt_vtype;
+	*vpp = fvp;
+#ifdef KERNFS_DIAGNOSTIC
+	printf("kernfs_lookup: newvp = %x\n", fvp);
+#endif
+	return (0);
+
+bad:;
+	*vpp = NULL;
+#ifdef KERNFS_DIAGNOSTIC
+	printf("kernfs_lookup: error = %d\n", error);
+#endif
+	return (error);
+}
+
+kernfs_open(ap)
+	struct vop_open_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+
+	/*
+	 * Can always open the root (modulo perms)
+	 */
+	if (vp->v_flag & VROOT)
+		return (0);
+
+#ifdef KERNFS_DIAGNOSTIC
+	printf("kernfs_open, mode = %x, file = %s\n",
+			ap->a_mode, VTOKERN(vp)->kf_kt->kt_name);
+#endif
+
+	if ((ap->a_mode & FWRITE) && !(VTOKERN(vp)->kf_kt->kt_rw & VWRITE))
+		return (EOPNOTSUPP);
+
+	return (0);
+}
+
+static int
+kernfs_access(ap)
+	struct vop_access_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct ucred *cred = ap->a_cred;
+	mode_t mode = ap->a_mode;
+
+	if (mode & VEXEC) {
+		if (vp->v_flag & VROOT)
+			return (0);
+		return (EACCES);
+	}
+
+	if (cred->cr_uid == 0) {
+		if ((vp->v_flag & VROOT) == 0) {
+			struct kern_target *kt = VTOKERN(vp)->kf_kt;
+
+			if ((mode & VWRITE) && !(kt->kt_rw & VWRITE))
+				return (EROFS);
+		}
+		return (0);
+	}
+
+	if (mode & VWRITE)
+		return (EACCES);
+
+	return (0);
+}
+
+
+kernfs_getattr(ap)
+	struct vop_getattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct vattr *vap = ap->a_vap;
+	int error = 0;
+	char strbuf[KSTRING];
+
+	bzero((caddr_t) vap, sizeof(*vap));
+	vattr_null(vap);
+	vap->va_uid = 0;
+	vap->va_gid = 0;
+	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+	/* vap->va_qsize = 0; */
+	vap->va_blocksize = DEV_BSIZE;
+	microtime(&vap->va_atime);
+	vap->va_mtime = vap->va_atime;
+	vap->va_ctime = vap->va_ctime;
+	vap->va_gen = 0;
+	vap->va_flags = 0;
+	vap->va_rdev = 0;
+	/* vap->va_qbytes = 0; */
+	vap->va_bytes = 0;
+
+	if (vp->v_flag & VROOT) {
+#ifdef KERNFS_DIAGNOSTIC
+		printf("kernfs_getattr: stat rootdir\n");
+#endif
+		vap->va_type = VDIR;
+		vap->va_mode = DIR_MODE;
+		vap->va_nlink = 2;
+		vap->va_fileid = 2;
+		vap->va_size = DEV_BSIZE;
+	} else {
+		struct kern_target *kt = VTOKERN(vp)->kf_kt;
+		int nbytes;
+#ifdef KERNFS_DIAGNOSTIC
+		printf("kernfs_getattr: stat target %s\n", kt->kt_name);
+#endif
+		vap->va_type = kt->kt_vtype;
+		vap->va_mode = (kt->kt_rw & VWRITE ? WRITE_MODE : READ_MODE);
+		vap->va_nlink = 1;
+		vap->va_fileid = 3 + (kt - kern_targets) / sizeof(*kt);
+		error = kernfs_xread(kt, strbuf, sizeof(strbuf), &nbytes);
+		vap->va_size = nbytes;
+	}
+
+	vp->v_type = vap->va_type;
+#ifdef KERNFS_DIAGNOSTIC
+	printf("kernfs_getattr: return error %d\n", error);
+#endif
+	return (error);
+}
+
+kernfs_setattr(ap)
+	struct vop_setattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	/*
+	 * Silently ignore attribute changes.
+	 * This allows for open with truncate to have no
+	 * effect until some data is written.  I want to
+	 * do it this way because all writes are atomic.
+	 */
+	return (0);
+}
+
+static int
+kernfs_read(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct uio *uio = ap->a_uio;
+	struct kern_target *kt;
+	char strbuf[KSTRING];
+	int off = uio->uio_offset;
+	int error, len;
+	char *cp;
+
+	if (vp->v_flag & VROOT)
+		return (EOPNOTSUPP);
+
+	kt = VTOKERN(vp)->kf_kt;
+
+#ifdef KERNFS_DIAGNOSTIC
+	printf("kern_read %s\n", kt->kt_name);
+#endif
+
+	len = 0;
+	error = kernfs_xread(kt, strbuf, sizeof(strbuf), &len);
+	if (error)
+		return (error);
+	cp = strbuf + off;
+	len -= off;
+	return (uiomove(cp, len, uio));
+}
+
+static int
+kernfs_write(ap)
+	struct vop_write_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct uio *uio = ap->a_uio;
+	struct kern_target *kt;
+	int error, xlen;
+	char strbuf[KSTRING];
+
+	if (vp->v_flag & VROOT)
+		return (0);
+
+	kt = VTOKERN(vp)->kf_kt;
+
+	if (uio->uio_offset != 0)
+		return (EINVAL);
+
+	xlen = min(uio->uio_resid, KSTRING-1);
+	error = uiomove(strbuf, xlen, uio);
+	if (error)
+		return (error);
+
+	if (uio->uio_resid != 0)
+		return (EIO);
+
+	strbuf[xlen] = '\0';
+	xlen = strlen(strbuf);
+	return (kernfs_xwrite(kt, strbuf, xlen));
+}
+
+
+kernfs_readdir(ap)
+	struct vop_readdir_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	struct uio *uio = ap->a_uio;
+	int i;
+	int error;
+
+	i = uio->uio_offset / UIO_MX;
+	error = 0;
+	while (uio->uio_resid > 0 && i < nkern_targets) {
+		struct dirent d;
+		struct dirent *dp = &d;
+		struct kern_target *kt = &kern_targets[i];
+#ifdef KERNFS_DIAGNOSTIC
+		printf("kernfs_readdir: i = %d\n", i);
+#endif
+
+		bzero((caddr_t) dp, UIO_MX);
+
+		dp->d_namlen = strlen(kt->kt_name);
+		bcopy(kt->kt_name, dp->d_name, dp->d_namlen+1);
+
+#ifdef KERNFS_DIAGNOSTIC
+		printf("kernfs_readdir: name = %s, len = %d\n",
+				dp->d_name, dp->d_namlen);
+#endif
+		/*
+		 * Fill in the remaining fields
+		 */
+		dp->d_reclen = UIO_MX;
+		dp->d_fileno = i + 3;
+		dp->d_type = DT_UNKNOWN;	/* XXX */
+		/*
+		 * And ship to userland
+		 */
+		error = uiomove((caddr_t) dp, UIO_MX, uio);
+		if (error)
+			break;
+		i++;
+	}
+
+	uio->uio_offset = i * UIO_MX;
+
+	return (error);
+}
+
+kernfs_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+
+	/*
+	 * Clear out the v_type field to avoid
+	 * nasty things happening in vgone().
+	 */
+	vp->v_type = VNON;
+#ifdef KERNFS_DIAGNOSTIC
+	printf("kernfs_inactive(%x)\n", vp);
+#endif
+	return (0);
+}
+
+kernfs_reclaim(ap)
+	struct vop_reclaim_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+#ifdef KERNFS_DIAGNOSTIC
+	printf("kernfs_reclaim(%x)\n", vp);
+#endif
+	if (vp->v_data) {
+		FREE(vp->v_data, M_TEMP);
+		vp->v_data = 0;
+	}
+	return (0);
+}
+
+/*
+ * Return POSIX pathconf information applicable to special devices.
+ */
+kernfs_pathconf(ap)
+	struct vop_pathconf_args /* {
+		struct vnode *a_vp;
+		int a_name;
+		int *a_retval;
+	} */ *ap;
+{
+
+	switch (ap->a_name) {
+	case _PC_LINK_MAX:
+		*ap->a_retval = LINK_MAX;
+		return (0);
+	case _PC_MAX_CANON:
+		*ap->a_retval = MAX_CANON;
+		return (0);
+	case _PC_MAX_INPUT:
+		*ap->a_retval = MAX_INPUT;
+		return (0);
+	case _PC_PIPE_BUF:
+		*ap->a_retval = PIPE_BUF;
+		return (0);
+	case _PC_CHOWN_RESTRICTED:
+		*ap->a_retval = 1;
+		return (0);
+	case _PC_VDISABLE:
+		*ap->a_retval = _POSIX_VDISABLE;
+		return (0);
+	default:
+		return (EINVAL);
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * Print out the contents of a /dev/fd vnode.
+ */
+/* ARGSUSED */
+kernfs_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	printf("tag VT_KERNFS, kernfs vnode\n");
+	return (0);
+}
+
+/*void*/
+kernfs_vfree(ap)
+	struct vop_vfree_args /* {
+		struct vnode *a_pvp;
+		ino_t a_ino;
+		int a_mode;
+	} */ *ap;
+{
+
+	return (0);
+}
+
+/*
+ * /dev/fd vnode unsupported operation
+ */
+kernfs_enotsupp()
+{
+
+	return (EOPNOTSUPP);
+}
+
+/*
+ * /dev/fd "should never get here" operation
+ */
+kernfs_badop()
+{
+
+	panic("kernfs: bad op");
+	/* NOTREACHED */
+}
+
+/*
+ * kernfs vnode null operation
+ */
+kernfs_nullop()
+{
+
+	return (0);
+}
+
+#define kernfs_create ((int (*) __P((struct  vop_create_args *)))kernfs_enotsupp)
+#define kernfs_mknod ((int (*) __P((struct  vop_mknod_args *)))kernfs_enotsupp)
+#define kernfs_close ((int (*) __P((struct  vop_close_args *)))nullop)
+#define kernfs_ioctl ((int (*) __P((struct  vop_ioctl_args *)))kernfs_enotsupp)
+#define kernfs_select ((int (*) __P((struct  vop_select_args *)))kernfs_enotsupp)
+#define kernfs_mmap ((int (*) __P((struct  vop_mmap_args *)))kernfs_enotsupp)
+#define kernfs_fsync ((int (*) __P((struct  vop_fsync_args *)))nullop)
+#define kernfs_seek ((int (*) __P((struct  vop_seek_args *)))nullop)
+#define kernfs_remove ((int (*) __P((struct  vop_remove_args *)))kernfs_enotsupp)
+#define kernfs_link ((int (*) __P((struct  vop_link_args *)))kernfs_enotsupp)
+#define kernfs_rename ((int (*) __P((struct  vop_rename_args *)))kernfs_enotsupp)
+#define kernfs_mkdir ((int (*) __P((struct  vop_mkdir_args *)))kernfs_enotsupp)
+#define kernfs_rmdir ((int (*) __P((struct  vop_rmdir_args *)))kernfs_enotsupp)
+#define kernfs_symlink ((int (*) __P((struct vop_symlink_args *)))kernfs_enotsupp)
+#define kernfs_readlink \
+	((int (*) __P((struct  vop_readlink_args *)))kernfs_enotsupp)
+#define kernfs_abortop ((int (*) __P((struct  vop_abortop_args *)))nullop)
+#define kernfs_lock ((int (*) __P((struct  vop_lock_args *)))nullop)
+#define kernfs_unlock ((int (*) __P((struct  vop_unlock_args *)))nullop)
+#define kernfs_bmap ((int (*) __P((struct  vop_bmap_args *)))kernfs_badop)
+#define kernfs_strategy ((int (*) __P((struct  vop_strategy_args *)))kernfs_badop)
+#define kernfs_islocked ((int (*) __P((struct  vop_islocked_args *)))nullop)
+#define kernfs_advlock ((int (*) __P((struct vop_advlock_args *)))kernfs_enotsupp)
+#define kernfs_blkatoff \
+	((int (*) __P((struct  vop_blkatoff_args *)))kernfs_enotsupp)
+#define kernfs_valloc ((int(*) __P(( \
+		struct vnode *pvp, \
+		int mode, \
+		struct ucred *cred, \
+		struct vnode **vpp))) kernfs_enotsupp)
+#define kernfs_truncate \
+	((int (*) __P((struct  vop_truncate_args *)))kernfs_enotsupp)
+#define kernfs_update ((int (*) __P((struct  vop_update_args *)))kernfs_enotsupp)
+#define kernfs_bwrite ((int (*) __P((struct  vop_bwrite_args *)))kernfs_enotsupp)
+
+int (**kernfs_vnodeop_p)();
+struct vnodeopv_entry_desc kernfs_vnodeop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, kernfs_lookup },	/* lookup */
+	{ &vop_create_desc, kernfs_create },	/* create */
+	{ &vop_mknod_desc, kernfs_mknod },	/* mknod */
+	{ &vop_open_desc, kernfs_open },	/* open */
+	{ &vop_close_desc, kernfs_close },	/* close */
+	{ &vop_access_desc, kernfs_access },	/* access */
+	{ &vop_getattr_desc, kernfs_getattr },	/* getattr */
+	{ &vop_setattr_desc, kernfs_setattr },	/* setattr */
+	{ &vop_read_desc, kernfs_read },	/* read */
+	{ &vop_write_desc, kernfs_write },	/* write */
+	{ &vop_ioctl_desc, kernfs_ioctl },	/* ioctl */
+	{ &vop_select_desc, kernfs_select },	/* select */
+	{ &vop_mmap_desc, kernfs_mmap },	/* mmap */
+	{ &vop_fsync_desc, kernfs_fsync },	/* fsync */
+	{ &vop_seek_desc, kernfs_seek },	/* seek */
+	{ &vop_remove_desc, kernfs_remove },	/* remove */
+	{ &vop_link_desc, kernfs_link },	/* link */
+	{ &vop_rename_desc, kernfs_rename },	/* rename */
+	{ &vop_mkdir_desc, kernfs_mkdir },	/* mkdir */
+	{ &vop_rmdir_desc, kernfs_rmdir },	/* rmdir */
+	{ &vop_symlink_desc, kernfs_symlink },	/* symlink */
+	{ &vop_readdir_desc, kernfs_readdir },	/* readdir */
+	{ &vop_readlink_desc, kernfs_readlink },/* readlink */
+	{ &vop_abortop_desc, kernfs_abortop },	/* abortop */
+	{ &vop_inactive_desc, kernfs_inactive },/* inactive */
+	{ &vop_reclaim_desc, kernfs_reclaim },	/* reclaim */
+	{ &vop_lock_desc, kernfs_lock },	/* lock */
+	{ &vop_unlock_desc, kernfs_unlock },	/* unlock */
+	{ &vop_bmap_desc, kernfs_bmap },	/* bmap */
+	{ &vop_strategy_desc, kernfs_strategy },/* strategy */
+	{ &vop_print_desc, kernfs_print },	/* print */
+	{ &vop_islocked_desc, kernfs_islocked },/* islocked */
+	{ &vop_pathconf_desc, kernfs_pathconf },/* pathconf */
+	{ &vop_advlock_desc, kernfs_advlock },	/* advlock */
+	{ &vop_blkatoff_desc, kernfs_blkatoff },/* blkatoff */
+	{ &vop_valloc_desc, kernfs_valloc },	/* valloc */
+	{ &vop_vfree_desc, kernfs_vfree },	/* vfree */
+	{ &vop_truncate_desc, kernfs_truncate },/* truncate */
+	{ &vop_update_desc, kernfs_update },	/* update */
+	{ &vop_bwrite_desc, kernfs_bwrite },	/* bwrite */
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc kernfs_vnodeop_opv_desc =
+	{ &kernfs_vnodeop_p, kernfs_vnodeop_entries };
diff --git a/sys/miscfs/nullfs/null.h b/sys/miscfs/nullfs/null.h
new file mode 100644
index 00000000000..14286ffeee0
--- /dev/null
+++ b/sys/miscfs/nullfs/null.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)null.h	8.2 (Berkeley) 1/21/94
+ *
+ * $Id: lofs.h,v 1.8 1992/05/30 10:05:43 jsp Exp jsp $
+ */
+
+struct null_args {
+	char		*target;	/* Target of loopback  */
+};
+
+struct null_mount {
+	struct mount	*nullm_vfs;
+	struct vnode	*nullm_rootvp;	/* Reference to root null_node */
+};
+
+#ifdef KERNEL
+/*
+ * A cache of vnode references
+ */
+struct null_node {
+	struct null_node	*null_forw;	/* Hash chain */
+	struct null_node	*null_back;
+	struct vnode	        *null_lowervp;	/* VREFed once */
+	struct vnode		*null_vnode;	/* Back pointer */
+};
+
+extern int null_node_create __P((struct mount *mp, struct vnode *target, struct vnode **vpp));
+
+#define	MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data))
+#define	VTONULL(vp) ((struct null_node *)(vp)->v_data)
+#define	NULLTOV(xp) ((xp)->null_vnode)
+#ifdef NULLFS_DIAGNOSTIC
+extern struct vnode *null_checkvp __P((struct vnode *vp, char *fil, int lno));
+#define	NULLVPTOLOWERVP(vp) null_checkvp((vp), __FILE__, __LINE__)
+#else
+#define	NULLVPTOLOWERVP(vp) (VTONULL(vp)->null_lowervp)
+#endif
+
+extern int (**null_vnodeop_p)();
+extern struct vfsops null_vfsops;
+#endif /* KERNEL */
diff --git a/sys/miscfs/nullfs/null_subr.c b/sys/miscfs/nullfs/null_subr.c
new file mode 100644
index 00000000000..a31723fe4c2
--- /dev/null
+++ b/sys/miscfs/nullfs/null_subr.c
@@ -0,0 +1,293 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)null_subr.c	8.4 (Berkeley) 1/21/94
+ *
+ * $Id: lofs_subr.c,v 1.11 1992/05/30 10:05:43 jsp Exp jsp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <miscfs/nullfs/null.h>
+
+#define LOG2_SIZEVNODE 7		/* log2(sizeof struct vnode) */
+#define	NNULLNODECACHE 16
+#define	NULL_NHASH(vp) ((((u_long)vp)>>LOG2_SIZEVNODE) & (NNULLNODECACHE-1))
+
+/*
+ * Null layer cache:
+ * Each cache entry holds a reference to the lower vnode
+ * along with a pointer to the alias vnode.  When an
+ * entry is added the lower vnode is VREF'd.  When the
+ * alias is removed the lower vnode is vrele'd.
+ */
+
+/*
+ * Cache head
+ */
+struct null_node_cache {
+	struct null_node	*ac_forw;
+	struct null_node	*ac_back;
+};
+
+static struct null_node_cache null_node_cache[NNULLNODECACHE];
+
+/*
+ * Initialise cache headers
+ */
+nullfs_init()
+{
+	struct null_node_cache *ac;
+#ifdef NULLFS_DIAGNOSTIC
+	printf("nullfs_init\n");		/* printed during system boot */
+#endif
+
+	for (ac = null_node_cache; ac < null_node_cache + NNULLNODECACHE; ac++)
+		ac->ac_forw = ac->ac_back = (struct null_node *) ac;
+}
+
+/*
+ * Compute hash list for given lower vnode
+ */
+static struct null_node_cache *
+null_node_hash(lowervp)
+struct vnode *lowervp;
+{
+
+	return (&null_node_cache[NULL_NHASH(lowervp)]);
+}
+
+/*
+ * Return a VREF'ed alias for lower vnode if already exists, else 0.
+ */
+static struct vnode *
+null_node_find(mp, lowervp)
+	struct mount *mp;
+	struct vnode *lowervp;
+{
+	struct null_node_cache *hd;
+	struct null_node *a;
+	struct vnode *vp;
+
+	/*
+	 * Find hash base, and then search the (two-way) linked
+	 * list looking for a null_node structure which is referencing
+	 * the lower vnode.  If found, the increment the null_node
+	 * reference count (but NOT the lower vnode's VREF counter).
+	 */
+	hd = null_node_hash(lowervp);
+loop:
+	for (a = hd->ac_forw; a != (struct null_node *) hd; a = a->null_forw) {
+		if (a->null_lowervp == lowervp && NULLTOV(a)->v_mount == mp) {
+			vp = NULLTOV(a);
+			/*
+			 * We need vget for the VXLOCK
+			 * stuff, but we don't want to lock
+			 * the lower node.
+			 */
+			if (vget(vp, 0)) {
+				printf ("null_node_find: vget failed.\n");
+				goto loop;
+			};
+			return (vp);
+		}
+	}
+
+	return NULL;
+}
+
+
+/*
+ * Make a new null_node node.
+ * Vp is the alias vnode, lofsvp is the lower vnode.
+ * Maintain a reference to (lowervp).
+ */
+static int
+null_node_alloc(mp, lowervp, vpp)
+	struct mount *mp;
+	struct vnode *lowervp;
+	struct vnode **vpp;
+{
+	struct null_node_cache *hd;
+	struct null_node *xp;
+	struct vnode *othervp, *vp;
+	int error;
+
+	if (error = getnewvnode(VT_NULL, mp, null_vnodeop_p, vpp))
+		return (error);
+	vp = *vpp;
+
+	MALLOC(xp, struct null_node *, sizeof(struct null_node), M_TEMP, M_WAITOK);
+	vp->v_type = lowervp->v_type;
+	xp->null_vnode = vp;
+	vp->v_data = xp;
+	xp->null_lowervp = lowervp;
+	/*
+	 * Before we insert our new node onto the hash chains,
+	 * check to see if someone else has beaten us to it.
+	 * (We could have slept in MALLOC.)
+	 */
+	if (othervp = null_node_find(lowervp)) {
+		FREE(xp, M_TEMP);
+		vp->v_type = VBAD;	/* node is discarded */
+		vp->v_usecount = 0;	/* XXX */
+		*vpp = othervp;
+		return 0;
+	};
+	VREF(lowervp);   /* Extra VREF will be vrele'd in null_node_create */
+	hd = null_node_hash(lowervp);
+	insque(xp, hd);
+	return 0;
+}
+
+
+/*
+ * Try to find an existing null_node vnode refering
+ * to it, otherwise make a new null_node vnode which
+ * contains a reference to the lower vnode.
+ */
+int
+null_node_create(mp, lowervp, newvpp)
+	struct mount *mp;
+	struct vnode *lowervp;
+	struct vnode **newvpp;
+{
+	struct vnode *aliasvp;
+
+	if (aliasvp = null_node_find(mp, lowervp)) {
+		/*
+		 * null_node_find has taken another reference
+		 * to the alias vnode.
+		 */
+#ifdef NULLFS_DIAGNOSTIC
+		vprint("null_node_create: exists", NULLTOV(ap));
+#endif
+		/* VREF(aliasvp); --- done in null_node_find */
+	} else {
+		int error;
+
+		/*
+		 * Get new vnode.
+		 */
+#ifdef NULLFS_DIAGNOSTIC
+		printf("null_node_create: create new alias vnode\n");
+#endif
+
+		/*
+		 * Make new vnode reference the null_node.
+		 */
+		if (error = null_node_alloc(mp, lowervp, &aliasvp))
+			return error;
+
+		/*
+		 * aliasvp is already VREF'd by getnewvnode()
+		 */
+	}
+
+	vrele(lowervp);
+
+#ifdef DIAGNOSTIC
+	if (lowervp->v_usecount < 1) {
+		/* Should never happen... */
+		vprint ("null_node_create: alias ");
+		vprint ("null_node_create: lower ");
+		printf ("null_node_create: lower has 0 usecount.\n");
+		panic ("null_node_create: lower has 0 usecount.");
+	};
+#endif
+
+#ifdef NULLFS_DIAGNOSTIC
+	vprint("null_node_create: alias", aliasvp);
+	vprint("null_node_create: lower", lowervp);
+#endif
+
+	*newvpp = aliasvp;
+	return (0);
+}
+#ifdef NULLFS_DIAGNOSTIC
+struct vnode *
+null_checkvp(vp, fil, lno)
+	struct vnode *vp;
+	char *fil;
+	int lno;
+{
+	struct null_node *a = VTONULL(vp);
+#ifdef notyet
+	/*
+	 * Can't do this check because vop_reclaim runs
+	 * with a funny vop vector.
+	 */
+	if (vp->v_op != null_vnodeop_p) {
+		printf ("null_checkvp: on non-null-node\n");
+		while (null_checkvp_barrier) /*WAIT*/ ;
+		panic("null_checkvp");
+	};
+#endif
+	if (a->null_lowervp == NULL) {
+		/* Should never happen */
+		int i; u_long *p;
+		printf("vp = %x, ZERO ptr\n", vp);
+		for (p = (u_long *) a, i = 0; i < 8; i++)
+			printf(" %x", p[i]);
+		printf("\n");
+		/* wait for debugger */
+		while (null_checkvp_barrier) /*WAIT*/ ;
+		panic("null_checkvp");
+	}
+	if (a->null_lowervp->v_usecount < 1) {
+		int i; u_long *p;
+		printf("vp = %x, unref'ed lowervp\n", vp);
+		for (p = (u_long *) a, i = 0; i < 8; i++)
+			printf(" %x", p[i]);
+		printf("\n");
+		/* wait for debugger */
+		while (null_checkvp_barrier) /*WAIT*/ ;
+		panic ("null with unref'ed lowervp");
+	};
+#ifdef notyet
+	printf("null %x/%d -> %x/%d [%s, %d]\n",
+	        NULLTOV(a), NULLTOV(a)->v_usecount,
+		a->null_lowervp, a->null_lowervp->v_usecount,
+		fil, lno);
+#endif
+	return a->null_lowervp;
+}
+#endif
diff --git a/sys/miscfs/nullfs/null_vfsops.c b/sys/miscfs/nullfs/null_vfsops.c
new file mode 100644
index 00000000000..b0d2df75cda
--- /dev/null
+++ b/sys/miscfs/nullfs/null_vfsops.c
@@ -0,0 +1,366 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)null_vfsops.c	8.2 (Berkeley) 1/21/94
+ *
+ * @(#)lofs_vfsops.c	1.2 (Berkeley) 6/18/92
+ * $Id: lofs_vfsops.c,v 1.9 1992/05/30 10:26:24 jsp Exp jsp $
+ */
+
+/*
+ * Null Layer
+ * (See null_vnops.c for a description of what this does.)
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <miscfs/nullfs/null.h>
+
+/*
+ * Mount null layer
+ */
+int
+nullfs_mount(mp, path, data, ndp, p)
+	struct mount *mp;
+	char *path;
+	caddr_t data;
+	struct nameidata *ndp;
+	struct proc *p;
+{
+	int error = 0;
+	struct null_args args;
+	struct vnode *lowerrootvp, *vp;
+	struct vnode *nullm_rootvp;
+	struct null_mount *xmp;
+	u_int size;
+
+#ifdef NULLFS_DIAGNOSTIC
+	printf("nullfs_mount(mp = %x)\n", mp);
+#endif
+
+	/*
+	 * Update is a no-op
+	 */
+	if (mp->mnt_flag & MNT_UPDATE) {
+		return (EOPNOTSUPP);
+		/* return VFS_MOUNT(MOUNTTONULLMOUNT(mp)->nullm_vfs, path, data, ndp, p);*/
+	}
+
+	/*
+	 * Get argument
+	 */
+	if (error = copyin(data, (caddr_t)&args, sizeof(struct null_args)))
+		return (error);
+
+	/*
+	 * Find lower node
+	 */
+	NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT|LOCKLEAF,
+		UIO_USERSPACE, args.target, p);
+	if (error = namei(ndp))
+		return (error);
+
+	/*
+	 * Sanity check on lower vnode
+	 */
+	lowerrootvp = ndp->ni_vp;
+
+	vrele(ndp->ni_dvp);
+	ndp->ni_dvp = NULL;
+
+	xmp = (struct null_mount *) malloc(sizeof(struct null_mount),
+				M_UFSMNT, M_WAITOK);	/* XXX */
+
+	/*
+	 * Save reference to underlying FS
+	 */
+	xmp->nullm_vfs = lowerrootvp->v_mount;
+
+	/*
+	 * Save reference.  Each mount also holds
+	 * a reference on the root vnode.
+	 */
+	error = null_node_create(mp, lowerrootvp, &vp);
+	/*
+	 * Unlock the node (either the lower or the alias)
+	 */
+	VOP_UNLOCK(vp);
+	/*
+	 * Make sure the node alias worked
+	 */
+	if (error) {
+		vrele(lowerrootvp);
+		free(xmp, M_UFSMNT);	/* XXX */
+		return (error);
+	}
+
+	/*
+	 * Keep a held reference to the root vnode.
+	 * It is vrele'd in nullfs_unmount.
+	 */
+	nullm_rootvp = vp;
+	nullm_rootvp->v_flag |= VROOT;
+	xmp->nullm_rootvp = nullm_rootvp;
+	if (NULLVPTOLOWERVP(nullm_rootvp)->v_mount->mnt_flag & MNT_LOCAL)
+		mp->mnt_flag |= MNT_LOCAL;
+	mp->mnt_data = (qaddr_t) xmp;
+	getnewfsid(mp, MOUNT_LOFS);
+
+	(void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+	(void) copyinstr(args.target, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 
+	    &size);
+	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+#ifdef NULLFS_DIAGNOSTIC
+	printf("nullfs_mount: lower %s, alias at %s\n",
+		mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
+#endif
+	return (0);
+}
+
+/*
+ * VFS start.  Nothing needed here - the start routine
+ * on the underlying filesystem will have been called
+ * when that filesystem was mounted.
+ */
+int
+nullfs_start(mp, flags, p)
+	struct mount *mp;
+	int flags;
+	struct proc *p;
+{
+	return (0);
+	/* return VFS_START(MOUNTTONULLMOUNT(mp)->nullm_vfs, flags, p); */
+}
+
+/*
+ * Free reference to null layer
+ */
+int
+nullfs_unmount(mp, mntflags, p)
+	struct mount *mp;
+	int mntflags;
+	struct proc *p;
+{
+	struct vnode *nullm_rootvp = MOUNTTONULLMOUNT(mp)->nullm_rootvp;
+	int error;
+	int flags = 0;
+	extern int doforce;
+
+#ifdef NULLFS_DIAGNOSTIC
+	printf("nullfs_unmount(mp = %x)\n", mp);
+#endif
+
+	if (mntflags & MNT_FORCE) {
+		/* lofs can never be rootfs so don't check for it */
+		if (!doforce)
+			return (EINVAL);
+		flags |= FORCECLOSE;
+	}
+
+	/*
+	 * Clear out buffer cache.  I don't think we
+	 * ever get anything cached at this level at the
+	 * moment, but who knows...
+	 */
+#if 0
+	mntflushbuf(mp, 0); 
+	if (mntinvalbuf(mp, 1))
+		return (EBUSY);
+#endif
+	if (nullm_rootvp->v_usecount > 1)
+		return (EBUSY);
+	if (error = vflush(mp, nullm_rootvp, flags))
+		return (error);
+
+#ifdef NULLFS_DIAGNOSTIC
+	vprint("alias root of lower", nullm_rootvp);
+#endif	 
+	/*
+	 * Release reference on underlying root vnode
+	 */
+	vrele(nullm_rootvp);
+	/*
+	 * And blow it away for future re-use
+	 */
+	vgone(nullm_rootvp);
+	/*
+	 * Finally, throw away the null_mount structure
+	 */
+	free(mp->mnt_data, M_UFSMNT);	/* XXX */
+	mp->mnt_data = 0;
+	return 0;
+}
+
+int
+nullfs_root(mp, vpp)
+	struct mount *mp;
+	struct vnode **vpp;
+{
+	struct vnode *vp;
+
+#ifdef NULLFS_DIAGNOSTIC
+	printf("nullfs_root(mp = %x, vp = %x->%x)\n", mp,
+			MOUNTTONULLMOUNT(mp)->nullm_rootvp,
+			NULLVPTOLOWERVP(MOUNTTONULLMOUNT(mp)->nullm_rootvp)
+			);
+#endif
+
+	/*
+	 * Return locked reference to root.
+	 */
+	vp = MOUNTTONULLMOUNT(mp)->nullm_rootvp;
+	VREF(vp);
+	VOP_LOCK(vp);
+	*vpp = vp;
+	return 0;
+}
+
+int
+nullfs_quotactl(mp, cmd, uid, arg, p)
+	struct mount *mp;
+	int cmd;
+	uid_t uid;
+	caddr_t arg;
+	struct proc *p;
+{
+	return VFS_QUOTACTL(MOUNTTONULLMOUNT(mp)->nullm_vfs, cmd, uid, arg, p);
+}
+
+int
+nullfs_statfs(mp, sbp, p)
+	struct mount *mp;
+	struct statfs *sbp;
+	struct proc *p;
+{
+	int error;
+	struct statfs mstat;
+
+#ifdef NULLFS_DIAGNOSTIC
+	printf("nullfs_statfs(mp = %x, vp = %x->%x)\n", mp,
+			MOUNTTONULLMOUNT(mp)->nullm_rootvp,
+			NULLVPTOLOWERVP(MOUNTTONULLMOUNT(mp)->nullm_rootvp)
+			);
+#endif
+
+	bzero(&mstat, sizeof(mstat));
+
+	error = VFS_STATFS(MOUNTTONULLMOUNT(mp)->nullm_vfs, &mstat, p);
+	if (error)
+		return (error);
+
+	/* now copy across the "interesting" information and fake the rest */
+	sbp->f_type = mstat.f_type;
+	sbp->f_flags = mstat.f_flags;
+	sbp->f_bsize = mstat.f_bsize;
+	sbp->f_iosize = mstat.f_iosize;
+	sbp->f_blocks = mstat.f_blocks;
+	sbp->f_bfree = mstat.f_bfree;
+	sbp->f_bavail = mstat.f_bavail;
+	sbp->f_files = mstat.f_files;
+	sbp->f_ffree = mstat.f_ffree;
+	if (sbp != &mp->mnt_stat) {
+		bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+	}
+	return (0);
+}
+
+int
+nullfs_sync(mp, waitfor, cred, p)
+	struct mount *mp;
+	int waitfor;
+	struct ucred *cred;
+	struct proc *p;
+{
+	/*
+	 * XXX - Assumes no data cached at null layer.
+	 */
+	return (0);
+}
+
+int
+nullfs_vget(mp, ino, vpp)
+	struct mount *mp;
+	ino_t ino;
+	struct vnode **vpp;
+{
+	
+	return VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, vpp);
+}
+
+int
+nullfs_fhtovp(mp, fidp, nam, vpp, exflagsp, credanonp)
+	struct mount *mp;
+	struct fid *fidp;
+	struct mbuf *nam;
+	struct vnode **vpp;
+	int *exflagsp;
+	struct ucred**credanonp;
+{
+
+	return VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fidp, nam, vpp, exflagsp,credanonp);
+}
+
+int
+nullfs_vptofh(vp, fhp)
+	struct vnode *vp;
+	struct fid *fhp;
+{
+	return VFS_VPTOFH(NULLVPTOLOWERVP(vp), fhp);
+}
+
+int nullfs_init __P((void));
+
+struct vfsops null_vfsops = {
+	nullfs_mount,
+	nullfs_start,
+	nullfs_unmount,
+	nullfs_root,
+	nullfs_quotactl,
+	nullfs_statfs,
+	nullfs_sync,
+	nullfs_vget,
+	nullfs_fhtovp,
+	nullfs_vptofh,
+	nullfs_init,
+};
diff --git a/sys/miscfs/nullfs/null_vnops.c b/sys/miscfs/nullfs/null_vnops.c
new file mode 100644
index 00000000000..115ff6f4643
--- /dev/null
+++ b/sys/miscfs/nullfs/null_vnops.c
@@ -0,0 +1,462 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * John Heidemann of the UCLA Ficus project.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)null_vnops.c	8.1 (Berkeley) 6/10/93
+ *
+ * Ancestors:
+ *	@(#)lofs_vnops.c	1.2 (Berkeley) 6/18/92
+ *	$Id: lofs_vnops.c,v 1.11 1992/05/30 10:05:43 jsp Exp jsp $
+ *	...and...
+ *	@(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project
+ */
+
+/*
+ * Null Layer
+ *
+ * (See mount_null(8) for more information.)
+ *
+ * The null layer duplicates a portion of the file system
+ * name space under a new name.  In this respect, it is
+ * similar to the loopback file system.  It differs from
+ * the loopback fs in two respects:  it is implemented using
+ * a stackable layers techniques, and it's "null-node"s stack above
+ * all lower-layer vnodes, not just over directory vnodes.
+ *
+ * The null layer has two purposes.  First, it serves as a demonstration
+ * of layering by proving a layer which does nothing.  (It actually
+ * does everything the loopback file system does, which is slightly
+ * more than nothing.)  Second, the null layer can serve as a prototype
+ * layer.  Since it provides all necessary layer framework,
+ * new file system layers can be created very easily be starting
+ * with a null layer.
+ *
+ * The remainder of this man page examines the null layer as a basis
+ * for constructing new layers.
+ *
+ *
+ * INSTANTIATING NEW NULL LAYERS
+ *
+ * New null layers are created with mount_null(8).
+ * Mount_null(8) takes two arguments, the pathname
+ * of the lower vfs (target-pn) and the pathname where the null
+ * layer will appear in the namespace (alias-pn).  After
+ * the null layer is put into place, the contents
+ * of target-pn subtree will be aliased under alias-pn.
+ *
+ *
+ * OPERATION OF A NULL LAYER
+ *
+ * The null layer is the minimum file system layer,
+ * simply bypassing all possible operations to the lower layer
+ * for processing there.  The majority of its activity centers
+ * on the bypass routine, though which nearly all vnode operations
+ * pass.
+ *
+ * The bypass routine accepts arbitrary vnode operations for
+ * handling by the lower layer.  It begins by examing vnode
+ * operation arguments and replacing any null-nodes by their
+ * lower-layer equivlants.  It then invokes the operation
+ * on the lower layer.  Finally, it replaces the null-nodes
+ * in the arguments and, if a vnode is return by the operation,
+ * stacks a null-node on top of the returned vnode.
+ *
+ * Although bypass handles most operations, 
+ * vop_getattr, _inactive, _reclaim, and _print are not bypassed.
+ * Vop_getattr must change the fsid being returned.
+ * Vop_inactive and vop_reclaim are not bypassed so that
+ * they can handle freeing null-layer specific data.
+ * Vop_print is not bypassed to avoid excessive debugging
+ * information.
+ *
+ *
+ * INSTANTIATING VNODE STACKS
+ *
+ * Mounting associates the null layer with a lower layer,
+ * effect stacking two VFSes.  Vnode stacks are instead
+ * created on demand as files are accessed.
+ *
+ * The initial mount creates a single vnode stack for the
+ * root of the new null layer.  All other vnode stacks
+ * are created as a result of vnode operations on
+ * this or other null vnode stacks.
+ *
+ * New vnode stacks come into existance as a result of
+ * an operation which returns a vnode.  
+ * The bypass routine stacks a null-node above the new
+ * vnode before returning it to the caller.
+ *
+ * For example, imagine mounting a null layer with
+ * "mount_null /usr/include /dev/layer/null".
+ * Changing directory to /dev/layer/null will assign
+ * the root null-node (which was created when the null layer was mounted).
+ * Now consider opening "sys".  A vop_lookup would be
+ * done on the root null-node.  This operation would bypass through
+ * to the lower layer which would return a vnode representing 
+ * the UFS "sys".  Null_bypass then builds a null-node
+ * aliasing the UFS "sys" and returns this to the caller.
+ * Later operations on the null-node "sys" will repeat this
+ * process when constructing other vnode stacks.
+ *
+ *
+ * CREATING OTHER FILE SYSTEM LAYERS
+ *
+ * One of the easiest ways to construct new file system layers is to make
+ * a copy of the null layer, rename all files and variables, and
+ * then begin modifing the copy.  Sed can be used to easily rename
+ * all variables.
+ *
+ * The umap layer is an example of a layer descended from the 
+ * null layer.
+ *
+ *
+ * INVOKING OPERATIONS ON LOWER LAYERS
+ *
+ * There are two techniques to invoke operations on a lower layer 
+ * when the operation cannot be completely bypassed.  Each method
+ * is appropriate in different situations.  In both cases,
+ * it is the responsibility of the aliasing layer to make
+ * the operation arguments "correct" for the lower layer
+ * by mapping an vnode arguments to the lower layer.
+ *
+ * The first approach is to call the aliasing layer's bypass routine.
+ * This method is most suitable when you wish to invoke the operation
+ * currently being hanldled on the lower layer.  It has the advantage
+ * that the bypass routine already must do argument mapping.
+ * An example of this is null_getattrs in the null layer.
+ *
+ * A second approach is to directly invoked vnode operations on
+ * the lower layer with the VOP_OPERATIONNAME interface.
+ * The advantage of this method is that it is easy to invoke
+ * arbitrary operations on the lower layer.  The disadvantage
+ * is that vnodes arguments must be manualy mapped.
+ *
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/buf.h>
+#include <miscfs/nullfs/null.h>
+
+
+int null_bug_bypass = 0;   /* for debugging: enables bypass printf'ing */
+
+/*
+ * This is the 10-Apr-92 bypass routine.
+ *    This version has been optimized for speed, throwing away some
+ * safety checks.  It should still always work, but it's not as
+ * robust to programmer errors.
+ *    Define SAFETY to include some error checking code.
+ *
+ * In general, we map all vnodes going down and unmap them on the way back.
+ * As an exception to this, vnodes can be marked "unmapped" by setting
+ * the Nth bit in operation's vdesc_flags.
+ *
+ * Also, some BSD vnode operations have the side effect of vrele'ing
+ * their arguments.  With stacking, the reference counts are held
+ * by the upper node, not the lower one, so we must handle these
+ * side-effects here.  This is not of concern in Sun-derived systems
+ * since there are no such side-effects.
+ *
+ * This makes the following assumptions:
+ * - only one returned vpp
+ * - no INOUT vpp's (Sun's vop_open has one of these)
+ * - the vnode operation vector of the first vnode should be used
+ *   to determine what implementation of the op should be invoked
+ * - all mapped vnodes are of our vnode-type (NEEDSWORK:
+ *   problems on rmdir'ing mount points and renaming?)
+ */ 
+int
+null_bypass(ap)
+	struct vop_generic_args /* {
+		struct vnodeop_desc *a_desc;
+		<other random data follows, presumably>
+	} */ *ap;
+{
+	extern int (**null_vnodeop_p)();  /* not extern, really "forward" */
+	register struct vnode **this_vp_p;
+	int error;
+	struct vnode *old_vps[VDESC_MAX_VPS];
+	struct vnode **vps_p[VDESC_MAX_VPS];
+	struct vnode ***vppp;
+	struct vnodeop_desc *descp = ap->a_desc;
+	int reles, i;
+
+	if (null_bug_bypass)
+		printf ("null_bypass: %s\n", descp->vdesc_name);
+
+#ifdef SAFETY
+	/*
+	 * We require at least one vp.
+	 */
+	if (descp->vdesc_vp_offsets == NULL ||
+	    descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
+		panic ("null_bypass: no vp's in map.\n");
+#endif
+
+	/*
+	 * Map the vnodes going in.
+	 * Later, we'll invoke the operation based on
+	 * the first mapped vnode's operation vector.
+	 */
+	reles = descp->vdesc_flags;
+	for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
+		if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
+			break;   /* bail out at end of list */
+		vps_p[i] = this_vp_p = 
+			VOPARG_OFFSETTO(struct vnode**,descp->vdesc_vp_offsets[i],ap);
+		/*
+		 * We're not guaranteed that any but the first vnode
+		 * are of our type.  Check for and don't map any
+		 * that aren't.  (We must always map first vp or vclean fails.)
+		 */
+		if (i && (*this_vp_p)->v_op != null_vnodeop_p) {
+			old_vps[i] = NULL;
+		} else {
+			old_vps[i] = *this_vp_p;
+			*(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p);
+			/*
+			 * XXX - Several operations have the side effect
+			 * of vrele'ing their vp's.  We must account for
+			 * that.  (This should go away in the future.)
+			 */
+			if (reles & 1)
+				VREF(*this_vp_p);
+		}
+			
+	}
+
+	/*
+	 * Call the operation on the lower layer
+	 * with the modified argument structure.
+	 */
+	error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
+
+	/*
+	 * Maintain the illusion of call-by-value
+	 * by restoring vnodes in the argument structure
+	 * to their original value.
+	 */
+	reles = descp->vdesc_flags;
+	for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
+		if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
+			break;   /* bail out at end of list */
+		if (old_vps[i]) {
+			*(vps_p[i]) = old_vps[i];
+			if (reles & 1)
+				vrele(*(vps_p[i]));
+		}
+	}
+
+	/*
+	 * Map the possible out-going vpp
+	 * (Assumes that the lower layer always returns
+	 * a VREF'ed vpp unless it gets an error.)
+	 */
+	if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET &&
+	    !(descp->vdesc_flags & VDESC_NOMAP_VPP) &&
+	    !error) {
+		/*
+		 * XXX - even though some ops have vpp returned vp's,
+		 * several ops actually vrele this before returning.
+		 * We must avoid these ops.
+		 * (This should go away when these ops are regularized.)
+		 */
+		if (descp->vdesc_flags & VDESC_VPP_WILLRELE)
+			goto out;
+		vppp = VOPARG_OFFSETTO(struct vnode***,
+				 descp->vdesc_vpp_offset,ap);
+		error = null_node_create(old_vps[0]->v_mount, **vppp, *vppp);
+	}
+
+ out:
+	return (error);
+}
+
+
+/*
+ *  We handle getattr only to change the fsid.
+ */
+int
+null_getattr(ap)
+	struct vop_getattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	int error;
+	if (error = null_bypass(ap))
+		return (error);
+	/* Requires that arguments be restored. */
+	ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
+	return (0);
+}
+
+
+int
+null_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	/*
+	 * Do nothing (and _don't_ bypass).
+	 * Wait to vrele lowervp until reclaim,
+	 * so that until then our null_node is in the
+	 * cache and reusable.
+	 *
+	 * NEEDSWORK: Someday, consider inactive'ing
+	 * the lowervp and then trying to reactivate it
+	 * with capabilities (v_id)
+	 * like they do in the name lookup cache code.
+	 * That's too much work for now.
+	 */
+	return (0);
+}
+
+int
+null_reclaim(ap)
+	struct vop_reclaim_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct null_node *xp = VTONULL(vp);
+	struct vnode *lowervp = xp->null_lowervp;
+
+	/*
+	 * Note: in vop_reclaim, vp->v_op == dead_vnodeop_p,
+	 * so we can't call VOPs on ourself.
+	 */
+	/* After this assignment, this node will not be re-used. */
+	xp->null_lowervp = NULL;
+	remque(xp);
+	FREE(vp->v_data, M_TEMP);
+	vp->v_data = NULL;
+	vrele (lowervp);
+	return (0);
+}
+
+
+int
+null_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	printf ("\ttag VT_NULLFS, vp=%x, lowervp=%x\n", vp, NULLVPTOLOWERVP(vp));
+	return (0);
+}
+
+
+/*
+ * XXX - vop_strategy must be hand coded because it has no
+ * vnode in its arguments.
+ * This goes away with a merged VM/buffer cache.
+ */
+int
+null_strategy(ap)
+	struct vop_strategy_args /* {
+		struct buf *a_bp;
+	} */ *ap;
+{
+	struct buf *bp = ap->a_bp;
+	int error;
+	struct vnode *savedvp;
+
+	savedvp = bp->b_vp;
+	bp->b_vp = NULLVPTOLOWERVP(bp->b_vp);
+
+	error = VOP_STRATEGY(bp);
+
+	bp->b_vp = savedvp;
+
+	return (error);
+}
+
+
+/*
+ * XXX - like vop_strategy, vop_bwrite must be hand coded because it has no
+ * vnode in its arguments.
+ * This goes away with a merged VM/buffer cache.
+ */
+int
+null_bwrite(ap)
+	struct vop_bwrite_args /* {
+		struct buf *a_bp;
+	} */ *ap;
+{
+	struct buf *bp = ap->a_bp;
+	int error;
+	struct vnode *savedvp;
+
+	savedvp = bp->b_vp;
+	bp->b_vp = NULLVPTOLOWERVP(bp->b_vp);
+
+	error = VOP_BWRITE(bp);
+
+	bp->b_vp = savedvp;
+
+	return (error);
+}
+
+/*
+ * Global vfs data structures
+ */
+int (**null_vnodeop_p)();
+struct vnodeopv_entry_desc null_vnodeop_entries[] = {
+	{ &vop_default_desc, null_bypass },
+
+	{ &vop_getattr_desc, null_getattr },
+	{ &vop_inactive_desc, null_inactive },
+	{ &vop_reclaim_desc, null_reclaim },
+	{ &vop_print_desc, null_print },
+
+	{ &vop_strategy_desc, null_strategy },
+	{ &vop_bwrite_desc, null_bwrite },
+
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc null_vnodeop_opv_desc =
+	{ &null_vnodeop_p, null_vnodeop_entries };
diff --git a/sys/miscfs/portal/portal.h b/sys/miscfs/portal/portal.h
new file mode 100644
index 00000000000..38d7ee0cdd2
--- /dev/null
+++ b/sys/miscfs/portal/portal.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)portal.h	8.4 (Berkeley) 1/21/94
+ *
+ * $Id: portal.h,v 1.3 1992/05/30 10:05:24 jsp Exp jsp $
+ */
+
+struct portal_args {
+	char		*pa_config;	/* Config file */
+	int		pa_socket;	/* Socket to server */
+};
+
+struct portal_cred {
+	int		pcr_flag;		/* File open mode */
+	uid_t		pcr_uid;		/* From ucred */
+	short		pcr_ngroups;		/* From ucred */
+	gid_t		pcr_groups[NGROUPS];	/* From ucred */
+};
+
+#ifdef KERNEL
+struct portalmount {
+	struct vnode	*pm_root;	/* Root node */
+	struct file	*pm_server;	/* Held reference to server socket */
+};
+
+struct portalnode {
+	int		pt_size;	/* Length of Arg */
+	char		*pt_arg;	/* Arg to send to server */
+	int		pt_fileid;	/* cookie */
+};
+
+#define VFSTOPORTAL(mp)	((struct portalmount *)((mp)->mnt_data))
+#define	VTOPORTAL(vp) ((struct portalnode *)(vp)->v_data)
+
+#define PORTAL_ROOTFILEID	2
+
+extern int (**portal_vnodeop_p)();
+extern struct vfsops portal_vfsops;
+#endif /* KERNEL */
diff --git a/sys/miscfs/portal/portal_vfsops.c b/sys/miscfs/portal/portal_vfsops.c
new file mode 100644
index 00000000000..39e8563009b
--- /dev/null
+++ b/sys/miscfs/portal/portal_vfsops.c
@@ -0,0 +1,313 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)portal_vfsops.c	8.6 (Berkeley) 1/21/94
+ *
+ * $Id: portal_vfsops.c,v 1.5 1992/05/30 10:25:27 jsp Exp jsp $
+ */
+
+/*
+ * Portal Filesystem
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/proc.h>
+#include <sys/filedesc.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/domain.h>
+#include <sys/un.h>
+#include <miscfs/portal/portal.h>
+
+int
+portal_init()
+{
+
+	return (0);
+}
+
+/*
+ * Mount the per-process file descriptors (/dev/fd)
+ */
+int
+portal_mount(mp, path, data, ndp, p)
+	struct mount *mp;
+	char *path;
+	caddr_t data;
+	struct nameidata *ndp;
+	struct proc *p;
+{
+	struct file *fp;
+	struct portal_args args;
+	struct portalmount *fmp;
+	struct socket *so;
+	struct vnode *rvp;
+	u_int size;
+	int error;
+
+	/*
+	 * Update is a no-op
+	 */
+	if (mp->mnt_flag & MNT_UPDATE)
+		return (EOPNOTSUPP);
+
+	if (error = copyin(data, (caddr_t) &args, sizeof(struct portal_args)))
+		return (error);
+
+	if (error = getsock(p->p_fd, args.pa_socket, &fp))
+		return (error);
+	so = (struct socket *) fp->f_data;
+	if (so->so_proto->pr_domain->dom_family != AF_UNIX)
+		return (ESOCKTNOSUPPORT);
+
+	error = getnewvnode(VT_PORTAL, mp, portal_vnodeop_p, &rvp); /* XXX */
+	if (error)
+		return (error);
+	MALLOC(rvp->v_data, void *, sizeof(struct portalnode),
+		M_TEMP, M_WAITOK);
+
+	fmp = (struct portalmount *) malloc(sizeof(struct portalmount),
+				 M_UFSMNT, M_WAITOK);	/* XXX */
+	rvp->v_type = VDIR;
+	rvp->v_flag |= VROOT;
+	VTOPORTAL(rvp)->pt_arg = 0;
+	VTOPORTAL(rvp)->pt_size = 0;
+	VTOPORTAL(rvp)->pt_fileid = PORTAL_ROOTFILEID;
+	fmp->pm_root = rvp;
+	fmp->pm_server = fp; fp->f_count++;
+
+	mp->mnt_flag |= MNT_LOCAL;
+	mp->mnt_data = (qaddr_t) fmp;
+	getnewfsid(mp, MOUNT_PORTAL);
+
+	(void)copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+	(void)copyinstr(args.pa_config,
+	    mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
+	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+
+#ifdef notdef
+	bzero(mp->mnt_stat.f_mntfromname, MNAMELEN);
+	bcopy("portal", mp->mnt_stat.f_mntfromname, sizeof("portal"));
+#endif
+
+	return (0);
+}
+
+int
+portal_start(mp, flags, p)
+	struct mount *mp;
+	int flags;
+	struct proc *p;
+{
+
+	return (0);
+}
+
+int
+portal_unmount(mp, mntflags, p)
+	struct mount *mp;
+	int mntflags;
+	struct proc *p;
+{
+	extern int doforce;
+	struct vnode *rootvp = VFSTOPORTAL(mp)->pm_root;
+	int error, flags = 0;
+
+
+	if (mntflags & MNT_FORCE) {
+		/* portal can never be rootfs so don't check for it */
+		if (!doforce)
+			return (EINVAL);
+		flags |= FORCECLOSE;
+	}
+
+	/*
+	 * Clear out buffer cache.  I don't think we
+	 * ever get anything cached at this level at the
+	 * moment, but who knows...
+	 */
+#ifdef notyet
+	mntflushbuf(mp, 0); 
+	if (mntinvalbuf(mp, 1))
+		return (EBUSY);
+#endif
+	if (rootvp->v_usecount > 1)
+		return (EBUSY);
+	if (error = vflush(mp, rootvp, flags))
+		return (error);
+
+	/*
+	 * Release reference on underlying root vnode
+	 */
+	vrele(rootvp);
+	/*
+	 * And blow it away for future re-use
+	 */
+	vgone(rootvp);
+	/*
+	 * Shutdown the socket.  This will cause the select in the
+	 * daemon to wake up, and then the accept will get ECONNABORTED
+	 * which it interprets as a request to go and bury itself.
+	 */
+	soshutdown((struct socket *) VFSTOPORTAL(mp)->pm_server->f_data, 2);
+	/*
+	 * Discard reference to underlying file.  Must call closef because
+	 * this may be the last reference.
+	 */
+	closef(VFSTOPORTAL(mp)->pm_server, (struct proc *) 0);
+	/*
+	 * Finally, throw away the portalmount structure
+	 */
+	free(mp->mnt_data, M_UFSMNT);	/* XXX */
+	mp->mnt_data = 0;
+	return (0);
+}
+
+int
+portal_root(mp, vpp)
+	struct mount *mp;
+	struct vnode **vpp;
+{
+	struct vnode *vp;
+
+
+	/*
+	 * Return locked reference to root.
+	 */
+	vp = VFSTOPORTAL(mp)->pm_root;
+	VREF(vp);
+	VOP_LOCK(vp);
+	*vpp = vp;
+	return (0);
+}
+
+int
+portal_quotactl(mp, cmd, uid, arg, p)
+	struct mount *mp;
+	int cmd;
+	uid_t uid;
+	caddr_t arg;
+	struct proc *p;
+{
+
+	return (EOPNOTSUPP);
+}
+
+int
+portal_statfs(mp, sbp, p)
+	struct mount *mp;
+	struct statfs *sbp;
+	struct proc *p;
+{
+
+	sbp->f_type = MOUNT_PORTAL;
+	sbp->f_flags = 0;
+	sbp->f_bsize = DEV_BSIZE;
+	sbp->f_iosize = DEV_BSIZE;
+	sbp->f_blocks = 2;		/* 1K to keep df happy */
+	sbp->f_bfree = 0;
+	sbp->f_bavail = 0;
+	sbp->f_files = 1;		/* Allow for "." */
+	sbp->f_ffree = 0;		/* See comments above */
+	if (sbp != &mp->mnt_stat) {
+		bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+	}
+	return (0);
+}
+
+int
+portal_sync(mp, waitfor)
+	struct mount *mp;
+	int waitfor;
+{
+
+	return (0);
+}
+
+int
+portal_vget(mp, ino, vpp)
+	struct mount *mp;
+	ino_t ino;
+	struct vnode **vpp;
+{
+
+	return (EOPNOTSUPP);
+}
+
+int
+portal_fhtovp(mp, fhp, vpp)
+	struct mount *mp;
+	struct fid *fhp;
+	struct vnode **vpp;
+{
+
+	return (EOPNOTSUPP);
+}
+
+int
+portal_vptofh(vp, fhp)
+	struct vnode *vp;
+	struct fid *fhp;
+{
+
+	return (EOPNOTSUPP);
+}
+
+struct vfsops portal_vfsops = {
+	portal_mount,
+	portal_start,
+	portal_unmount,
+	portal_root,
+	portal_quotactl,
+	portal_statfs,
+	portal_sync,
+	portal_vget,
+	portal_fhtovp,
+	portal_vptofh,
+	portal_init,
+};
diff --git a/sys/miscfs/portal/portal_vnops.c b/sys/miscfs/portal/portal_vnops.c
new file mode 100644
index 00000000000..5e170261e71
--- /dev/null
+++ b/sys/miscfs/portal/portal_vnops.c
@@ -0,0 +1,707 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)portal_vnops.c	8.8 (Berkeley) 1/21/94
+ *
+ * $Id: portal_vnops.c,v 1.4 1992/05/30 10:05:24 jsp Exp jsp $
+ */
+
+/*
+ * Portal Filesystem
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/proc.h>
+#include <sys/filedesc.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <sys/malloc.h>
+#include <sys/namei.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/un.h>
+#include <sys/unpcb.h>
+#include <miscfs/portal/portal.h>
+
+static int portal_fileid = PORTAL_ROOTFILEID+1;
+
+static void
+portal_closefd(p, fd)
+	struct proc *p;
+	int fd;
+{
+	int error;
+	struct {
+		int fd;
+	} ua;
+	int rc;
+
+	ua.fd = fd;
+	error = close(p, &ua, &rc);
+	/*
+	 * We should never get an error, and there isn't anything
+	 * we could do if we got one, so just print a message.
+	 */
+	if (error)
+		printf("portal_closefd: error = %d\n", error);
+}
+
+/*
+ * vp is the current namei directory
+ * cnp is the name to locate in that directory...
+ */
+int
+portal_lookup(ap)
+	struct vop_lookup_args /* {
+		struct vnode * a_dvp;
+		struct vnode ** a_vpp;
+		struct componentname * a_cnp;
+	} */ *ap;
+{
+	char *pname = ap->a_cnp->cn_nameptr;
+	struct portalnode *pt;
+	int error;
+	struct vnode *fvp = 0;
+	char *path;
+	int size;
+
+	if (ap->a_cnp->cn_namelen == 1 && *pname == '.') {
+		*ap->a_vpp = ap->a_dvp;
+		VREF(ap->a_dvp);
+		/*VOP_LOCK(ap->a_dvp);*/
+		return (0);
+	}
+
+
+	error = getnewvnode(VT_PORTAL, ap->a_dvp->v_mount, portal_vnodeop_p, &fvp);
+	if (error)
+		goto bad;
+	fvp->v_type = VREG;
+	MALLOC(fvp->v_data, void *, sizeof(struct portalnode),
+		M_TEMP, M_WAITOK);
+
+	pt = VTOPORTAL(fvp);
+	/*
+	 * Save all of the remaining pathname and
+	 * advance the namei next pointer to the end
+	 * of the string.
+	 */
+	for (size = 0, path = pname; *path; path++)
+		size++;
+	ap->a_cnp->cn_consume = size - ap->a_cnp->cn_namelen;
+
+	pt->pt_arg = malloc(size+1, M_TEMP, M_WAITOK);
+	pt->pt_size = size+1;
+	bcopy(pname, pt->pt_arg, pt->pt_size);
+	pt->pt_fileid = portal_fileid++;
+
+	*ap->a_vpp = fvp;
+	/*VOP_LOCK(fvp);*/
+	return (0);
+
+bad:;
+	if (fvp) {
+		vrele(fvp);
+	}
+	*ap->a_vpp = NULL;
+	return (error);
+}
+
+static int
+portal_connect(so, so2)
+	struct socket *so;
+	struct socket *so2;
+{
+	/* from unp_connect, bypassing the namei stuff... */
+	struct socket *so3;
+	struct unpcb *unp2;
+	struct unpcb *unp3;
+
+	if (so2 == 0)
+		return (ECONNREFUSED);
+
+	if (so->so_type != so2->so_type)
+		return (EPROTOTYPE);
+
+	if ((so2->so_options & SO_ACCEPTCONN) == 0)
+		return (ECONNREFUSED);
+
+	if ((so3 = sonewconn(so2, 0)) == 0)
+		return (ECONNREFUSED);
+
+	unp2 = sotounpcb(so2);
+	unp3 = sotounpcb(so3);
+	if (unp2->unp_addr)
+		unp3->unp_addr = m_copy(unp2->unp_addr, 0, (int)M_COPYALL);
+
+	so2 = so3;
+
+
+	return (unp_connect2(so, so2));
+}
+
+int
+portal_open(ap)
+	struct vop_open_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct socket *so = 0;
+	struct portalnode *pt;
+	struct proc *p = ap->a_p;
+	struct vnode *vp = ap->a_vp;
+	int s;
+	struct uio auio;
+	struct iovec aiov[2];
+	int res;
+	struct mbuf *cm = 0;
+	struct cmsghdr *cmsg;
+	int newfds;
+	int *ip;
+	int fd;
+	int error;
+	int len;
+	struct portalmount *fmp;
+	struct file *fp;
+	struct portal_cred pcred;
+
+	/*
+	 * Nothing to do when opening the root node.
+	 */
+	if (vp->v_flag & VROOT)
+		return (0);
+
+	/*
+	 * Can't be opened unless the caller is set up
+	 * to deal with the side effects.  Check for this
+	 * by testing whether the p_dupfd has been set.
+	 */
+	if (p->p_dupfd >= 0)
+		return (ENODEV);
+
+	pt = VTOPORTAL(vp);
+	fmp = VFSTOPORTAL(vp->v_mount);
+
+	/*
+	 * Create a new socket.
+	 */
+	error = socreate(AF_UNIX, &so, SOCK_STREAM, 0);
+	if (error)
+		goto bad;
+
+	/*
+	 * Reserve some buffer space
+	 */
+	res = pt->pt_size + sizeof(pcred) + 512;	/* XXX */
+	error = soreserve(so, res, res);
+	if (error)
+		goto bad;
+
+	/*
+	 * Kick off connection
+	 */
+	error = portal_connect(so, (struct socket *)fmp->pm_server->f_data);
+	if (error)
+		goto bad;
+
+	/*
+	 * Wait for connection to complete
+	 */
+	/*
+	 * XXX: Since the mount point is holding a reference on the
+	 * underlying server socket, it is not easy to find out whether
+	 * the server process is still running.  To handle this problem
+	 * we loop waiting for the new socket to be connected (something
+	 * which will only happen if the server is still running) or for
+	 * the reference count on the server socket to drop to 1, which
+	 * will happen if the server dies.  Sleep for 5 second intervals
+	 * and keep polling the reference count.   XXX.
+	 */
+	s = splnet();
+	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
+		if (fmp->pm_server->f_count == 1) {
+			error = ECONNREFUSED;
+			splx(s);
+			goto bad;
+		}
+		(void) tsleep((caddr_t) &so->so_timeo, PSOCK, "portalcon", 5 * hz);
+	}
+	splx(s);
+
+	if (so->so_error) {
+		error = so->so_error;
+		goto bad;
+	}
+		
+	/*
+	 * Set miscellaneous flags
+	 */
+	so->so_rcv.sb_timeo = 0;
+	so->so_snd.sb_timeo = 0;
+	so->so_rcv.sb_flags |= SB_NOINTR;
+	so->so_snd.sb_flags |= SB_NOINTR;
+
+
+	pcred.pcr_flag = ap->a_mode;
+	pcred.pcr_uid = ap->a_cred->cr_uid;
+	pcred.pcr_ngroups = ap->a_cred->cr_ngroups;
+	bcopy(ap->a_cred->cr_groups, pcred.pcr_groups, NGROUPS * sizeof(gid_t));
+	aiov[0].iov_base = (caddr_t) &pcred;
+	aiov[0].iov_len = sizeof(pcred);
+	aiov[1].iov_base = pt->pt_arg;
+	aiov[1].iov_len = pt->pt_size;
+	auio.uio_iov = aiov;
+	auio.uio_iovcnt = 2;
+	auio.uio_rw = UIO_WRITE;
+	auio.uio_segflg = UIO_SYSSPACE;
+	auio.uio_procp = p;
+	auio.uio_offset = 0;
+	auio.uio_resid = aiov[0].iov_len + aiov[1].iov_len;
+
+	error = sosend(so, (struct mbuf *) 0, &auio,
+			(struct mbuf *) 0, (struct mbuf *) 0, 0);
+	if (error)
+		goto bad;
+
+	len = auio.uio_resid = sizeof(int);
+	do {
+		struct mbuf *m = 0;
+		int flags = MSG_WAITALL;
+		error = soreceive(so, (struct mbuf **) 0, &auio,
+					&m, &cm, &flags);
+		if (error)
+			goto bad;
+
+		/*
+		 * Grab an error code from the mbuf.
+		 */
+		if (m) {
+			m = m_pullup(m, sizeof(int));	/* Needed? */
+			if (m) {
+				error = *(mtod(m, int *));
+				m_freem(m);
+			} else {
+				error = EINVAL;
+			}
+		} else {
+			if (cm == 0) {
+				error = ECONNRESET;	 /* XXX */
+#ifdef notdef
+				break;
+#endif
+			}
+		}
+	} while (cm == 0 && auio.uio_resid == len && !error);
+
+	if (cm == 0)
+		goto bad;
+
+	if (auio.uio_resid) {
+		error = 0;
+#ifdef notdef
+		error = EMSGSIZE;
+		goto bad;
+#endif
+	}
+
+	/*
+	 * XXX: Break apart the control message, and retrieve the
+	 * received file descriptor.  Note that more than one descriptor
+	 * may have been received, or that the rights chain may have more
+	 * than a single mbuf in it.  What to do?
+	 */
+	cmsg = mtod(cm, struct cmsghdr *);
+	newfds = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof (int);
+	if (newfds == 0) {
+		error = ECONNREFUSED;
+		goto bad;
+	}
+	/*
+	 * At this point the rights message consists of a control message
+	 * header, followed by a data region containing a vector of
+	 * integer file descriptors.  The fds were allocated by the action
+	 * of receiving the control message.
+	 */
+	ip = (int *) (cmsg + 1);
+	fd = *ip++;
+	if (newfds > 1) {
+		/*
+		 * Close extra fds.
+		 */
+		int i;
+		printf("portal_open: %d extra fds\n", newfds - 1);
+		for (i = 1; i < newfds; i++) {
+			portal_closefd(p, *ip);
+			ip++;
+		}
+	}
+
+	/*
+	 * Check that the mode the file is being opened for is a subset 
+	 * of the mode of the existing descriptor.
+	 */
+ 	fp = p->p_fd->fd_ofiles[fd];
+	if (((ap->a_mode & (FREAD|FWRITE)) | fp->f_flag) != fp->f_flag) {
+		portal_closefd(p, fd);
+		error = EACCES;
+		goto bad;
+	}
+
+	/*
+	 * Save the dup fd in the proc structure then return the
+	 * special error code (ENXIO) which causes magic things to
+	 * happen in vn_open.  The whole concept is, well, hmmm.
+	 */
+	p->p_dupfd = fd;
+	error = ENXIO;
+
+bad:;
+	/*
+	 * And discard the control message.
+	 */
+	if (cm) { 
+		m_freem(cm);
+	}
+
+	if (so) {
+		soshutdown(so, 2);
+		soclose(so);
+	}
+	return (error);
+}
+
+int
+portal_getattr(ap)
+	struct vop_getattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct vattr *vap = ap->a_vap;
+
+	bzero(vap, sizeof(*vap));
+	vattr_null(vap);
+	vap->va_uid = 0;
+	vap->va_gid = 0;
+	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+	vap->va_size = DEV_BSIZE;
+	vap->va_blocksize = DEV_BSIZE;
+	microtime(&vap->va_atime);
+	vap->va_mtime = vap->va_atime;
+	vap->va_ctime = vap->va_ctime;
+	vap->va_gen = 0;
+	vap->va_flags = 0;
+	vap->va_rdev = 0;
+	/* vap->va_qbytes = 0; */
+	vap->va_bytes = 0;
+	/* vap->va_qsize = 0; */
+	if (vp->v_flag & VROOT) {
+		vap->va_type = VDIR;
+		vap->va_mode = S_IRUSR|S_IWUSR|S_IXUSR|
+				S_IRGRP|S_IWGRP|S_IXGRP|
+				S_IROTH|S_IWOTH|S_IXOTH;
+		vap->va_nlink = 2;
+		vap->va_fileid = 2;
+	} else {
+		vap->va_type = VREG;
+		vap->va_mode = S_IRUSR|S_IWUSR|
+				S_IRGRP|S_IWGRP|
+				S_IROTH|S_IWOTH;
+		vap->va_nlink = 1;
+		vap->va_fileid = VTOPORTAL(vp)->pt_fileid;
+	}
+	return (0);
+}
+
+int
+portal_setattr(ap)
+	struct vop_setattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	/*
+	 * Can't mess with the root vnode
+	 */
+	if (ap->a_vp->v_flag & VROOT)
+		return (EACCES);
+
+	return (0);
+}
+
+/*
+ * Fake readdir, just return empty directory.
+ * It is hard to deal with '.' and '..' so don't bother.
+ */
+int
+portal_readdir(ap)
+	struct vop_readdir_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+
+	return (0);
+}
+
+int
+portal_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	return (0);
+}
+
+int
+portal_reclaim(ap)
+	struct vop_reclaim_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct portalnode *pt = VTOPORTAL(ap->a_vp);
+
+	if (pt->pt_arg) {
+		free((caddr_t) pt->pt_arg, M_TEMP);
+		pt->pt_arg = 0;
+	}
+	FREE(ap->a_vp->v_data, M_TEMP);
+	ap->a_vp->v_data = 0;
+
+	return (0);
+}
+
+/*
+ * Return POSIX pathconf information applicable to special devices.
+ */
+portal_pathconf(ap)
+	struct vop_pathconf_args /* {
+		struct vnode *a_vp;
+		int a_name;
+		int *a_retval;
+	} */ *ap;
+{
+
+	switch (ap->a_name) {
+	case _PC_LINK_MAX:
+		*ap->a_retval = LINK_MAX;
+		return (0);
+	case _PC_MAX_CANON:
+		*ap->a_retval = MAX_CANON;
+		return (0);
+	case _PC_MAX_INPUT:
+		*ap->a_retval = MAX_INPUT;
+		return (0);
+	case _PC_PIPE_BUF:
+		*ap->a_retval = PIPE_BUF;
+		return (0);
+	case _PC_CHOWN_RESTRICTED:
+		*ap->a_retval = 1;
+		return (0);
+	case _PC_VDISABLE:
+		*ap->a_retval = _POSIX_VDISABLE;
+		return (0);
+	default:
+		return (EINVAL);
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * Print out the contents of a Portal vnode.
+ */
+/* ARGSUSED */
+int
+portal_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	printf("tag VT_PORTAL, portal vnode\n");
+	return (0);
+}
+
+/*void*/
+int
+portal_vfree(ap)
+	struct vop_vfree_args /* {
+		struct vnode *a_pvp;
+		ino_t a_ino;
+		int a_mode;
+	} */ *ap;
+{
+
+	return (0);
+}
+
+
+/*
+ * Portal vnode unsupported operation
+ */
+int
+portal_enotsupp()
+{
+
+	return (EOPNOTSUPP);
+}
+
+/*
+ * Portal "should never get here" operation
+ */
+int
+portal_badop()
+{
+
+	panic("portal: bad op");
+	/* NOTREACHED */
+}
+
+/*
+ * Portal vnode null operation
+ */
+int
+portal_nullop()
+{
+
+	return (0);
+}
+
+#define portal_create ((int (*) __P((struct vop_create_args *)))portal_enotsupp)
+#define portal_mknod ((int (*) __P((struct  vop_mknod_args *)))portal_enotsupp)
+#define portal_close ((int (*) __P((struct  vop_close_args *)))nullop)
+#define portal_access ((int (*) __P((struct  vop_access_args *)))nullop)
+#define portal_read ((int (*) __P((struct  vop_read_args *)))portal_enotsupp)
+#define portal_write ((int (*) __P((struct  vop_write_args *)))portal_enotsupp)
+#define portal_ioctl ((int (*) __P((struct  vop_ioctl_args *)))portal_enotsupp)
+#define portal_select ((int (*) __P((struct vop_select_args *)))portal_enotsupp)
+#define portal_mmap ((int (*) __P((struct  vop_mmap_args *)))portal_enotsupp)
+#define portal_fsync ((int (*) __P((struct  vop_fsync_args *)))nullop)
+#define portal_seek ((int (*) __P((struct  vop_seek_args *)))nullop)
+#define portal_remove ((int (*) __P((struct vop_remove_args *)))portal_enotsupp)
+#define portal_link ((int (*) __P((struct  vop_link_args *)))portal_enotsupp)
+#define portal_rename ((int (*) __P((struct vop_rename_args *)))portal_enotsupp)
+#define portal_mkdir ((int (*) __P((struct  vop_mkdir_args *)))portal_enotsupp)
+#define portal_rmdir ((int (*) __P((struct  vop_rmdir_args *)))portal_enotsupp)
+#define portal_symlink \
+	((int (*) __P((struct  vop_symlink_args *)))portal_enotsupp)
+#define portal_readlink \
+	((int (*) __P((struct  vop_readlink_args *)))portal_enotsupp)
+#define portal_abortop ((int (*) __P((struct  vop_abortop_args *)))nullop)
+#define portal_lock ((int (*) __P((struct  vop_lock_args *)))nullop)
+#define portal_unlock ((int (*) __P((struct  vop_unlock_args *)))nullop)
+#define portal_bmap ((int (*) __P((struct  vop_bmap_args *)))portal_badop)
+#define portal_strategy \
+	((int (*) __P((struct  vop_strategy_args *)))portal_badop)
+#define portal_islocked ((int (*) __P((struct  vop_islocked_args *)))nullop)
+#define portal_advlock \
+	((int (*) __P((struct  vop_advlock_args *)))portal_enotsupp)
+#define portal_blkatoff \
+	((int (*) __P((struct  vop_blkatoff_args *)))portal_enotsupp)
+#define portal_valloc ((int(*) __P(( \
+		struct vnode *pvp, \
+		int mode, \
+		struct ucred *cred, \
+		struct vnode **vpp))) portal_enotsupp)
+#define portal_truncate \
+	((int (*) __P((struct  vop_truncate_args *)))portal_enotsupp)
+#define portal_update ((int (*) __P((struct vop_update_args *)))portal_enotsupp)
+#define portal_bwrite ((int (*) __P((struct vop_bwrite_args *)))portal_enotsupp)
+
+int (**portal_vnodeop_p)();
+struct vnodeopv_entry_desc portal_vnodeop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, portal_lookup },		/* lookup */
+	{ &vop_create_desc, portal_create },		/* create */
+	{ &vop_mknod_desc, portal_mknod },		/* mknod */
+	{ &vop_open_desc, portal_open },		/* open */
+	{ &vop_close_desc, portal_close },		/* close */
+	{ &vop_access_desc, portal_access },		/* access */
+	{ &vop_getattr_desc, portal_getattr },		/* getattr */
+	{ &vop_setattr_desc, portal_setattr },		/* setattr */
+	{ &vop_read_desc, portal_read },		/* read */
+	{ &vop_write_desc, portal_write },		/* write */
+	{ &vop_ioctl_desc, portal_ioctl },		/* ioctl */
+	{ &vop_select_desc, portal_select },		/* select */
+	{ &vop_mmap_desc, portal_mmap },		/* mmap */
+	{ &vop_fsync_desc, portal_fsync },		/* fsync */
+	{ &vop_seek_desc, portal_seek },		/* seek */
+	{ &vop_remove_desc, portal_remove },		/* remove */
+	{ &vop_link_desc, portal_link },		/* link */
+	{ &vop_rename_desc, portal_rename },		/* rename */
+	{ &vop_mkdir_desc, portal_mkdir },		/* mkdir */
+	{ &vop_rmdir_desc, portal_rmdir },		/* rmdir */
+	{ &vop_symlink_desc, portal_symlink },		/* symlink */
+	{ &vop_readdir_desc, portal_readdir },		/* readdir */
+	{ &vop_readlink_desc, portal_readlink },	/* readlink */
+	{ &vop_abortop_desc, portal_abortop },		/* abortop */
+	{ &vop_inactive_desc, portal_inactive },	/* inactive */
+	{ &vop_reclaim_desc, portal_reclaim },		/* reclaim */
+	{ &vop_lock_desc, portal_lock },		/* lock */
+	{ &vop_unlock_desc, portal_unlock },		/* unlock */
+	{ &vop_bmap_desc, portal_bmap },		/* bmap */
+	{ &vop_strategy_desc, portal_strategy },	/* strategy */
+	{ &vop_print_desc, portal_print },		/* print */
+	{ &vop_islocked_desc, portal_islocked },	/* islocked */
+	{ &vop_pathconf_desc, portal_pathconf },	/* pathconf */
+	{ &vop_advlock_desc, portal_advlock },		/* advlock */
+	{ &vop_blkatoff_desc, portal_blkatoff },	/* blkatoff */
+	{ &vop_valloc_desc, portal_valloc },		/* valloc */
+	{ &vop_vfree_desc, portal_vfree },		/* vfree */
+	{ &vop_truncate_desc, portal_truncate },	/* truncate */
+	{ &vop_update_desc, portal_update },		/* update */
+	{ &vop_bwrite_desc, portal_bwrite },		/* bwrite */
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc portal_vnodeop_opv_desc =
+	{ &portal_vnodeop_p, portal_vnodeop_entries };
diff --git a/sys/miscfs/procfs/README b/sys/miscfs/procfs/README
new file mode 100644
index 00000000000..38811b3f6e3
--- /dev/null
+++ b/sys/miscfs/procfs/README
@@ -0,0 +1,113 @@
+saute procfs lyonnais
+
+procfs supports two levels of directory.  the filesystem root
+directory contains a representation of the system process table.
+this consists of an entry for each active and zombie process, and
+an additional entry "curproc" which always represents the process
+making the lookup request.
+
+each of the sub-directories contains several files.  these files
+are used to control and interrogate processes.  the files implemented
+are:
+
+	file	- xxx.  the exec'ed file.
+
+	status  - r/o.  returns process status.
+
+	ctl	- w/o.  sends a control message to the process.
+			for example:
+				echo hup > /proc/curproc/note
+			will send a SIGHUP to the shell.
+			whereas
+				echo attach > /proc/1293/ctl
+			would set up process 1293 for debugging.
+			see below for more details.
+
+	mem	- r/w.  virtual memory image of the process.
+			parts of the address space are readable
+			only if they exist in the target process.
+			a more reasonable alternative might be
+			to return zero pages instead of an error.
+			comments?
+
+	note	- w/o.  writing a string here sends the
+			equivalent note to the process.
+			[ not implemented. ]
+
+	notepg	- w/o.  the same as note, but sends to all
+			members of the process group.
+			[ not implemented. ]
+
+	regs	- r/w.	process register set.  this can be read
+			or written any time even if the process
+			is not stopped.  since the bsd kernel
+			is single-processor, this implementation
+			will get the "right" register values.
+			a multi-proc kernel would need to do some
+			synchronisation.
+
+this then looks like:
+
+% ls -li /proc
+total 0
+   9 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 0
+  17 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 1
+  89 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 10
+  25 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 2
+2065 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 257
+2481 dr-xr-xr-x  2 jsp   staff  0 Sep 21 15:06 309
+ 265 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 32
+3129 dr-xr-xr-x  2 jsp   staff  0 Sep 21 15:06 390
+3209 dr-xr-xr-x  2 jsp   staff  0 Sep 21 15:06 400
+3217 dr-xr-xr-x  2 jsp   staff  0 Sep 21 15:06 401
+3273 dr-xr-xr-x  2 jsp   staff  0 Sep 21 15:06 408
+ 393 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 48
+ 409 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 50
+ 465 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 57
+ 481 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 59
+ 537 dr-xr-xr-x  2 root  kmem   0 Sep 21 15:06 66
+ 545 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 67
+ 657 dr-xr-xr-x  2 jsp   staff  0 Sep 21 15:06 81
+ 665 dr-xr-xr-x  2 jsp   staff  0 Sep 21 15:06 82
+ 673 dr-xr-xr-x  2 jsp   staff  0 Sep 21 15:06 83
+ 681 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 84
+3273 dr-xr-xr-x  2 jsp   staff  0 Sep 21 15:06 curproc
+% ls -li /proc/curproc
+total 408
+3341 --w-------  1 jsp  staff       0 Sep 21 15:06 ctl
+1554 -r-xr-xr-x  1 bin  bin     90112 Mar 29 04:52 file
+3339 -rw-------  1 jsp  staff  118784 Sep 21 15:06 mem
+3343 --w-------  1 jsp  staff       0 Sep 21 15:06 note
+3344 --w-------  1 jsp  staff       0 Sep 21 15:06 notepg
+3340 -rw-------  1 jsp  staff       0 Sep 21 15:06 regs
+3342 -r--r--r--  1 jsp  staff       0 Sep 21 15:06 status
+% df /proc/curproc /proc/curproc/file
+Filesystem  512-blocks    Used   Avail Capacity  Mounted on
+proc                 2       2       0   100%    /proc
+/dev/wd0a        16186   13548    1018    93%    /
+% cat /proc/curproc/status
+cat 446 439 400 81 12,0 ctty 748620684 270000 0 0 0 20000 nochan 11 20 20 20 0 21 117
+
+
+
+the basic sequence of commands written to "ctl" would be
+
+	attach		- this stops the target process and
+			  arranges for the sending process
+			  to become the debug control process
+	wait		- wait for the target process to come to
+			  a steady state ready for debugging.
+	step		- single step, with no signal delivery.
+	run		- continue running, with no signal delivery,
+			  until next trap or breakpoint.
+	<signame>	- deliver signal <signame> and continue running.
+	detach		- continue execution of the target process
+			  and remove it from control by the debug process
+
+in a normal debugging environment, where the target is fork/exec'd by
+the debugger, the debugger should fork and the child should stop itself
+(with a self-inflicted SIGSTOP).  the parent should do a "wait" then an
+"attach".  as before, the child will hit a breakpoint on the first
+instruction in any newly exec'd image.
+
+$Id: README,v 3.1 1993/12/15 09:40:17 jsp Exp $
diff --git a/sys/miscfs/procfs/procfs.h b/sys/miscfs/procfs/procfs.h
new file mode 100644
index 00000000000..f7b8fa3ef0e
--- /dev/null
+++ b/sys/miscfs/procfs/procfs.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)procfs.h	8.6 (Berkeley) 2/3/94
+ *
+ * From:
+ *	$Id: procfs.h,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+/*
+ * The different types of node in a procfs filesystem
+ */
+typedef enum {
+	Proot,		/* the filesystem root */
+	Pproc,		/* a process-specific sub-directory */
+	Pfile,		/* the executable file */
+	Pmem,		/* the process's memory image */
+	Pregs,		/* the process's register set */
+	Pfpregs,	/* the process's FP register set */
+	Pctl,		/* process control */
+	Pstatus,	/* process status */
+	Pnote,		/* process notifier */
+	Pnotepg		/* process group notifier */
+} pfstype;
+
+/*
+ * control data for the proc file system.
+ */
+struct pfsnode {
+	struct pfsnode	*pfs_next;	/* next on list */
+	struct vnode	*pfs_vnode;	/* vnode associated with this pfsnode */
+	pfstype		pfs_type;	/* type of procfs node */
+	pid_t		pfs_pid;	/* associated process */
+	u_short		pfs_mode;	/* mode bits for stat() */
+	u_long		pfs_flags;	/* open flags */
+	u_long		pfs_fileno;	/* unique file id */
+};
+
+#define PROCFS_NOTELEN	64	/* max length of a note (/proc/$pid/note) */
+#define PROCFS_CTLLEN 	8	/* max length of a ctl msg (/proc/$pid/ctl */
+
+/*
+ * Kernel stuff follows
+ */
+#ifdef KERNEL
+#define CNEQ(cnp, s, len) \
+	 ((cnp)->cn_namelen == (len) && \
+	  (bcmp((s), (cnp)->cn_nameptr, (len)) == 0))
+
+/*
+ * Format of a directory entry in /proc, ...
+ * This must map onto struct dirent (see <dirent.h>)
+ */
+#define PROCFS_NAMELEN 8
+struct pfsdent {
+	u_long	d_fileno;
+	u_short	d_reclen;
+	u_char	d_type;
+	u_char	d_namlen;
+	char	d_name[PROCFS_NAMELEN];
+};
+#define UIO_MX sizeof(struct pfsdent)
+#define PROCFS_FILENO(pid, type) \
+	(((type) == Proot) ? \
+			2 : \
+			((((pid)+1) << 3) + ((int) (type))))
+
+/*
+ * Convert between pfsnode vnode
+ */
+#define VTOPFS(vp)	((struct pfsnode *)(vp)->v_data)
+#define PFSTOV(pfs)	((pfs)->pfs_vnode)
+
+typedef struct vfs_namemap vfs_namemap_t;
+struct vfs_namemap {
+	const char *nm_name;
+	int nm_val;
+};
+
+extern int vfs_getuserstr __P((struct uio *, char *, int *));
+extern vfs_namemap_t *vfs_findname __P((vfs_namemap_t *, char *, int));
+
+/* <machine/reg.h> */
+struct reg;
+struct fpreg;
+
+#define PFIND(pid) ((pid) ? pfind(pid) : &proc0)
+extern int procfs_freevp __P((struct vnode *));
+extern int procfs_allocvp __P((struct mount *, struct vnode **, long, pfstype));
+extern struct vnode *procfs_findtextvp __P((struct proc *));
+extern int procfs_sstep __P((struct proc *));
+extern void procfs_fix_sstep __P((struct proc *));
+extern int procfs_read_regs __P((struct proc *, struct reg *));
+extern int procfs_write_regs __P((struct proc *, struct reg *));
+extern int procfs_read_fpregs __P((struct proc *, struct fpreg *));
+extern int procfs_write_fpregs __P((struct proc *, struct fpreg *));
+extern int procfs_donote __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+extern int procfs_doregs __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+extern int procfs_dofpregs __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+extern int procfs_domem __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+extern int procfs_doctl __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+extern int procfs_dostatus __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+
+#define PROCFS_LOCKED	0x01
+#define PROCFS_WANT	0x02
+
+extern int (**procfs_vnodeop_p)();
+extern struct vfsops procfs_vfsops;
+
+/*
+ * Prototypes for procfs vnode ops
+ */
+int	procfs_badop();	/* varargs */
+int	procfs_rw __P((struct vop_read_args *));
+int	procfs_lookup __P((struct vop_lookup_args *));
+#define procfs_create ((int (*) __P((struct vop_create_args *))) procfs_badop)
+#define procfs_mknod ((int (*) __P((struct vop_mknod_args *))) procfs_badop)
+int	procfs_open __P((struct vop_open_args *));
+int	procfs_close __P((struct vop_close_args *));
+int	procfs_access __P((struct vop_access_args *));
+int	procfs_getattr __P((struct vop_getattr_args *));
+int	procfs_setattr __P((struct vop_setattr_args *));
+#define	procfs_read procfs_rw
+#define	procfs_write procfs_rw
+int	procfs_ioctl __P((struct vop_ioctl_args *));
+#define procfs_select ((int (*) __P((struct vop_select_args *))) procfs_badop)
+#define procfs_mmap ((int (*) __P((struct vop_mmap_args *))) procfs_badop)
+#define procfs_fsync ((int (*) __P((struct vop_fsync_args *))) procfs_badop)
+#define procfs_seek ((int (*) __P((struct vop_seek_args *))) procfs_badop)
+#define procfs_remove ((int (*) __P((struct vop_remove_args *))) procfs_badop)
+#define procfs_link ((int (*) __P((struct vop_link_args *))) procfs_badop)
+#define procfs_rename ((int (*) __P((struct vop_rename_args *))) procfs_badop)
+#define procfs_mkdir ((int (*) __P((struct vop_mkdir_args *))) procfs_badop)
+#define procfs_rmdir ((int (*) __P((struct vop_rmdir_args *))) procfs_badop)
+#define procfs_symlink ((int (*) __P((struct vop_symlink_args *))) procfs_badop)
+int	procfs_readdir __P((struct vop_readdir_args *));
+#define procfs_readlink ((int (*) __P((struct vop_readlink_args *))) procfs_badop)
+int	procfs_abortop __P((struct vop_abortop_args *));
+int	procfs_inactive __P((struct vop_inactive_args *));
+int	procfs_reclaim __P((struct vop_reclaim_args *));
+#define procfs_lock ((int (*) __P((struct vop_lock_args *))) nullop)
+#define procfs_unlock ((int (*) __P((struct vop_unlock_args *))) nullop)
+int	procfs_bmap __P((struct vop_bmap_args *));
+#define	procfs_strategy ((int (*) __P((struct vop_strategy_args *))) procfs_badop)
+int	procfs_print __P((struct vop_print_args *));
+#define procfs_islocked ((int (*) __P((struct vop_islocked_args *))) nullop)
+#define procfs_advlock ((int (*) __P((struct vop_advlock_args *))) procfs_badop)
+#define procfs_blkatoff ((int (*) __P((struct vop_blkatoff_args *))) procfs_badop)
+#define procfs_valloc ((int (*) __P((struct vop_valloc_args *))) procfs_badop)
+#define procfs_vfree ((int (*) __P((struct vop_vfree_args *))) nullop)
+#define procfs_truncate ((int (*) __P((struct vop_truncate_args *))) procfs_badop)
+#define procfs_update ((int (*) __P((struct vop_update_args *))) nullop)
+#endif /* KERNEL */
diff --git a/sys/miscfs/procfs/procfs_ctl.c b/sys/miscfs/procfs/procfs_ctl.c
new file mode 100644
index 00000000000..a42a03ce91c
--- /dev/null
+++ b/sys/miscfs/procfs/procfs_ctl.c
@@ -0,0 +1,302 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)procfs_ctl.c	8.3 (Berkeley) 1/21/94
+ *
+ * From:
+ *	$Id: procfs_ctl.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/ioctl.h>
+#include <sys/tty.h>
+#include <sys/resource.h>
+#include <sys/resourcevar.h>
+#include <miscfs/procfs/procfs.h>
+
+/*
+ * True iff process (p) is in trace wait state
+ * relative to process (curp)
+ */
+#define TRACE_WAIT_P(curp, p) \
+	((p)->p_stat == SSTOP && \
+	 (p)->p_pptr == (curp) && \
+	 ((p)->p_flag & P_TRACED))
+
+#ifdef notdef
+#define FIX_SSTEP(p) { \
+		procfs_fix_sstep(p); \
+	} \
+}
+#else
+#define FIX_SSTEP(p)
+#endif
+
+#define PROCFS_CTL_ATTACH	1
+#define PROCFS_CTL_DETACH	2
+#define PROCFS_CTL_STEP		3
+#define PROCFS_CTL_RUN		4
+#define PROCFS_CTL_WAIT		5
+
+static vfs_namemap_t ctlnames[] = {
+	/* special /proc commands */
+	{ "attach",	PROCFS_CTL_ATTACH },
+	{ "detach",	PROCFS_CTL_DETACH },
+	{ "step",	PROCFS_CTL_STEP },
+	{ "run",	PROCFS_CTL_RUN },
+	{ "wait",	PROCFS_CTL_WAIT },
+	{ 0 },
+};
+
+static vfs_namemap_t signames[] = {
+	/* regular signal names */
+	{ "hup",	SIGHUP },	{ "int",	SIGINT },
+	{ "quit",	SIGQUIT },	{ "ill",	SIGILL },
+	{ "trap",	SIGTRAP },	{ "abrt",	SIGABRT },
+	{ "iot",	SIGIOT },	{ "emt",	SIGEMT },
+	{ "fpe",	SIGFPE },	{ "kill",	SIGKILL },
+	{ "bus",	SIGBUS },	{ "segv",	SIGSEGV },
+	{ "sys",	SIGSYS },	{ "pipe",	SIGPIPE },
+	{ "alrm",	SIGALRM },	{ "term",	SIGTERM },
+	{ "urg",	SIGURG },	{ "stop",	SIGSTOP },
+	{ "tstp",	SIGTSTP },	{ "cont",	SIGCONT },
+	{ "chld",	SIGCHLD },	{ "ttin",	SIGTTIN },
+	{ "ttou",	SIGTTOU },	{ "io",		SIGIO },
+	{ "xcpu",	SIGXCPU },	{ "xfsz",	SIGXFSZ },
+	{ "vtalrm",	SIGVTALRM },	{ "prof",	SIGPROF },
+	{ "winch",	SIGWINCH },	{ "info",	SIGINFO },
+	{ "usr1",	SIGUSR1 },	{ "usr2",	SIGUSR2 },
+	{ 0 },
+};
+
+static int
+procfs_control(curp, p, op)
+	struct proc *curp;
+	struct proc *p;
+	int op;
+{
+	int error;
+
+	/*
+	 * Attach - attaches the target process for debugging
+	 * by the calling process.
+	 */
+	if (op == PROCFS_CTL_ATTACH) {
+		/* check whether already being traced */
+		if (p->p_flag & P_TRACED)
+			return (EBUSY);
+
+		/* can't trace yourself! */
+		if (p->p_pid == curp->p_pid)
+			return (EINVAL);
+
+		/*
+		 * Go ahead and set the trace flag.
+		 * Save the old parent (it's reset in
+		 *   _DETACH, and also in kern_exit.c:wait4()
+		 * Reparent the process so that the tracing
+		 *   proc gets to see all the action.
+		 * Stop the target.
+		 */
+		p->p_flag |= P_TRACED;
+		p->p_xstat = 0;		/* XXX ? */
+		if (p->p_pptr != curp) {
+			p->p_oppid = p->p_pptr->p_pid;
+			proc_reparent(p, curp);
+		}
+		psignal(p, SIGSTOP);
+		return (0);
+	}
+
+	/*
+	 * Target process must be stopped, owned by (curp) and
+	 * be set up for tracing (P_TRACED flag set).
+	 * Allow DETACH to take place at any time for sanity.
+	 * Allow WAIT any time, of course.
+	 */
+	switch (op) {
+	case PROCFS_CTL_DETACH:
+	case PROCFS_CTL_WAIT:
+		break;
+
+	default:
+		if (!TRACE_WAIT_P(curp, p))
+			return (EBUSY);
+	}
+
+	/*
+	 * do single-step fixup if needed
+	 */
+	FIX_SSTEP(p);
+
+	/*
+	 * Don't deliver any signal by default.
+	 * To continue with a signal, just send
+	 * the signal name to the ctl file
+	 */
+	p->p_xstat = 0;
+
+	switch (op) {
+	/*
+	 * Detach.  Cleans up the target process, reparent it if possible
+	 * and set it running once more.
+	 */
+	case PROCFS_CTL_DETACH:
+		/* if not being traced, then this is a painless no-op */
+		if ((p->p_flag & P_TRACED) == 0)
+			return (0);
+
+		/* not being traced any more */
+		p->p_flag &= ~P_TRACED;
+
+		/* give process back to original parent */
+		if (p->p_oppid != p->p_pptr->p_pid) {
+			struct proc *pp;
+
+			pp = pfind(p->p_oppid);
+			if (pp)
+				proc_reparent(p, pp);
+		}
+
+		p->p_oppid = 0;
+		p->p_flag &= ~P_WAITED;	/* XXX ? */
+		wakeup((caddr_t) curp);	/* XXX for CTL_WAIT below ? */
+
+		break;
+
+	/*
+	 * Step.  Let the target process execute a single instruction.
+	 */
+	case PROCFS_CTL_STEP:
+		procfs_sstep(p);
+		break;
+
+	/*
+	 * Run.  Let the target process continue running until a breakpoint
+	 * or some other trap.
+	 */
+	case PROCFS_CTL_RUN:
+		break;
+
+	/*
+	 * Wait for the target process to stop.
+	 * If the target is not being traced then just wait
+	 * to enter
+	 */
+	case PROCFS_CTL_WAIT:
+		error = 0;
+		if (p->p_flag & P_TRACED) {
+			while (error == 0 &&
+					(p->p_stat != SSTOP) &&
+					(p->p_flag & P_TRACED) &&
+					(p->p_pptr == curp)) {
+				error = tsleep((caddr_t) p,
+						PWAIT|PCATCH, "procfsx", 0);
+			}
+			if (error == 0 && !TRACE_WAIT_P(curp, p))
+				error = EBUSY;
+		} else {
+			while (error == 0 && p->p_stat != SSTOP) {
+				error = tsleep((caddr_t) p,
+						PWAIT|PCATCH, "procfs", 0);
+			}
+		}
+		return (error);
+
+	default:
+		panic("procfs_control");
+	}
+
+	if (p->p_stat == SSTOP)
+		setrunnable(p);
+	return (0);
+}
+
+int
+procfs_doctl(curp, p, pfs, uio)
+	struct proc *curp;
+	struct pfsnode *pfs;
+	struct uio *uio;
+	struct proc *p;
+{
+	int xlen;
+	int error;
+	char msg[PROCFS_CTLLEN+1];
+	vfs_namemap_t *nm;
+
+	if (uio->uio_rw != UIO_WRITE)
+		return (EOPNOTSUPP);
+
+	xlen = PROCFS_CTLLEN;
+	error = vfs_getuserstr(uio, msg, &xlen);
+	if (error)
+		return (error);
+
+	/*
+	 * Map signal names into signal generation
+	 * or debug control.  Unknown commands and/or signals
+	 * return EOPNOTSUPP.
+	 *
+	 * Sending a signal while the process is being debugged
+	 * also has the side effect of letting the target continue
+	 * to run.  There is no way to single-step a signal delivery.
+	 */
+	error = EOPNOTSUPP;
+
+	nm = vfs_findname(ctlnames, msg, xlen);
+	if (nm) {
+		error = procfs_control(curp, p, nm->nm_val);
+	} else {
+		nm = vfs_findname(signames, msg, xlen);
+		if (nm) {
+			if (TRACE_WAIT_P(curp, p)) {
+				p->p_xstat = nm->nm_val;
+				FIX_SSTEP(p);
+				setrunnable(p);
+			} else {
+				psignal(p, nm->nm_val);
+			}
+			error = 0;
+		}
+	}
+
+	return (error);
+}
diff --git a/sys/miscfs/procfs/procfs_fpregs.c b/sys/miscfs/procfs/procfs_fpregs.c
new file mode 100644
index 00000000000..6d850a6a881
--- /dev/null
+++ b/sys/miscfs/procfs/procfs_fpregs.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)procfs_fpregs.c	8.1 (Berkeley) 1/27/94
+ *
+ * From:
+ *	$Id: procfs_regs.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <machine/reg.h>
+#include <miscfs/procfs/procfs.h>
+
+int
+procfs_dofpregs(curp, p, pfs, uio)
+	struct proc *curp;
+	struct proc *p;
+	struct pfsnode *pfs;
+	struct uio *uio;
+{
+	int error;
+	struct fpreg r;
+	char *kv;
+	int kl;
+
+	kl = sizeof(r);
+	kv = (char *) &r;
+
+	kv += uio->uio_offset;
+	kl -= uio->uio_offset;
+	if (kl > uio->uio_resid)
+		kl = uio->uio_resid;
+
+	if (kl < 0)
+		error = EINVAL;
+	else
+		error = procfs_read_fpregs(p, &r);
+	if (error == 0)
+		error = uiomove(kv, kl, uio);
+	if (error == 0 && uio->uio_rw == UIO_WRITE) {
+		if (p->p_stat != SSTOP)
+			error = EBUSY;
+		else
+			error = procfs_write_fpregs(p, &r);
+	}
+
+	uio->uio_offset = 0;
+	return (error);
+}
diff --git a/sys/miscfs/procfs/procfs_mem.c b/sys/miscfs/procfs/procfs_mem.c
new file mode 100644
index 00000000000..039983da09c
--- /dev/null
+++ b/sys/miscfs/procfs/procfs_mem.c
@@ -0,0 +1,302 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993 Sean Eric Fagan
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry and Sean Eric Fagan.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)procfs_mem.c	8.4 (Berkeley) 1/21/94
+ *
+ * From:
+ *	$Id: procfs_mem.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+/*
+ * This is a lightly hacked and merged version
+ * of sef's pread/pwrite functions
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <miscfs/procfs/procfs.h>
+#include <vm/vm.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+
+static int
+procfs_rwmem(p, uio)
+	struct proc *p;
+	struct uio *uio;
+{
+	int error;
+	int writing;
+
+	writing = uio->uio_rw == UIO_WRITE;
+
+	/*
+	 * Only map in one page at a time.  We don't have to, but it
+	 * makes things easier.  This way is trivial - right?
+	 */
+	do {
+		vm_map_t map, tmap;
+		vm_object_t object;
+		vm_offset_t kva;
+		vm_offset_t uva;
+		int page_offset;		/* offset into page */
+		vm_offset_t pageno;		/* page number */
+		vm_map_entry_t out_entry;
+		vm_prot_t out_prot;
+		vm_page_t m;
+		boolean_t wired, single_use;
+		vm_offset_t off;
+		u_int len;
+		int fix_prot;
+
+		uva = (vm_offset_t) uio->uio_offset;
+		if (uva > VM_MAXUSER_ADDRESS) {
+			error = 0;
+			break;
+		}
+
+		/*
+		 * Get the page number of this segment.
+		 */
+		pageno = trunc_page(uva);
+		page_offset = uva - pageno;
+
+		/*
+		 * How many bytes to copy
+		 */
+		len = min(PAGE_SIZE - page_offset, uio->uio_resid);
+
+		/*
+		 * The map we want...
+		 */
+		map = &p->p_vmspace->vm_map;
+  
+		/*
+		 * Check the permissions for the area we're interested
+		 * in.
+		 */
+		fix_prot = 0;
+		if (writing)
+			fix_prot = !vm_map_check_protection(map, pageno,
+					pageno + PAGE_SIZE, VM_PROT_WRITE);
+
+		if (fix_prot) {
+			/*
+			 * If the page is not writable, we make it so.
+			 * XXX It is possible that a page may *not* be
+			 * read/executable, if a process changes that!
+			 * We will assume, for now, that a page is either
+			 * VM_PROT_ALL, or VM_PROT_READ|VM_PROT_EXECUTE.
+			 */
+			error = vm_map_protect(map, pageno,
+					pageno + PAGE_SIZE, VM_PROT_ALL, 0);
+			if (error)
+				break;
+		}
+
+		/*
+		 * Now we need to get the page.  out_entry, out_prot, wired,
+		 * and single_use aren't used.  One would think the vm code
+		 * would be a *bit* nicer...  We use tmap because
+		 * vm_map_lookup() can change the map argument.
+		 */
+		tmap = map;
+		error = vm_map_lookup(&tmap, pageno,
+				      writing ? VM_PROT_WRITE : VM_PROT_READ,
+				      &out_entry, &object, &off, &out_prot,
+				      &wired, &single_use);
+		/*
+		 * We're done with tmap now.
+		 */
+		if (!error)
+			vm_map_lookup_done(tmap, out_entry);
+  
+		/*
+		 * Fault the page in...
+		 */
+		if (!error && writing && object->shadow) {
+			m = vm_page_lookup(object, off);
+			if (m == 0 || (m->flags & PG_COPYONWRITE))
+				error = vm_fault(map, pageno,
+							VM_PROT_WRITE, FALSE);
+		}
+
+		/* Find space in kernel_map for the page we're interested in */
+		if (!error)
+			error = vm_map_find(kernel_map, object, off, &kva,
+					PAGE_SIZE, 1);
+
+		if (!error) {
+			/*
+			 * Neither vm_map_lookup() nor vm_map_find() appear
+			 * to add a reference count to the object, so we do
+			 * that here and now.
+			 */
+			vm_object_reference(object);
+
+			/*
+			 * Mark the page we just found as pageable.
+			 */
+			error = vm_map_pageable(kernel_map, kva,
+				kva + PAGE_SIZE, 0);
+
+			/*
+			 * Now do the i/o move.
+			 */
+			if (!error)
+				error = uiomove(kva + page_offset, len, uio);
+
+			vm_map_remove(kernel_map, kva, kva + PAGE_SIZE);
+		}
+		if (fix_prot)
+			vm_map_protect(map, pageno, pageno + PAGE_SIZE,
+					VM_PROT_READ|VM_PROT_EXECUTE, 0);
+	} while (error == 0 && uio->uio_resid > 0);
+
+	return (error);
+}
+
+/*
+ * Copy data in and out of the target process.
+ * We do this by mapping the process's page into
+ * the kernel and then doing a uiomove direct
+ * from the kernel address space.
+ */
+int
+procfs_domem(curp, p, pfs, uio)
+	struct proc *curp;
+	struct proc *p;
+	struct pfsnode *pfs;
+	struct uio *uio;
+{
+	int error;
+
+	if (uio->uio_resid == 0)
+		return (0);
+
+	error = procfs_rwmem(p, uio);
+
+	return (error);
+}
+
+/*
+ * Given process (p), find the vnode from which
+ * it's text segment is being executed.
+ *
+ * It would be nice to grab this information from
+ * the VM system, however, there is no sure-fire
+ * way of doing that.  Instead, fork(), exec() and
+ * wait() all maintain the p_textvp field in the
+ * process proc structure which contains a held
+ * reference to the exec'ed vnode.
+ */
+struct vnode *
+procfs_findtextvp(p)
+	struct proc *p;
+{
+	return (p->p_textvp);
+}
+
+
+#ifdef probably_never
+/*
+ * Given process (p), find the vnode from which
+ * it's text segment is being mapped.
+ *
+ * (This is here, rather than in procfs_subr in order
+ * to keep all the VM related code in one place.)
+ */
+struct vnode *
+procfs_findtextvp(p)
+	struct proc *p;
+{
+	int error;
+	vm_object_t object;
+	vm_offset_t pageno;		/* page number */
+
+	/* find a vnode pager for the user address space */
+
+	for (pageno = VM_MIN_ADDRESS;
+			pageno < VM_MAXUSER_ADDRESS;
+			pageno += PAGE_SIZE) {
+		vm_map_t map;
+		vm_map_entry_t out_entry;
+		vm_prot_t out_prot;
+		boolean_t wired, single_use;
+		vm_offset_t off;
+
+		map = &p->p_vmspace->vm_map;
+		error = vm_map_lookup(&map, pageno,
+			      VM_PROT_READ,
+			      &out_entry, &object, &off, &out_prot,
+			      &wired, &single_use);
+
+		if (!error) {
+			vm_pager_t pager;
+
+			printf("procfs: found vm object\n");
+			vm_map_lookup_done(map, out_entry);
+			printf("procfs: vm object = %x\n", object);
+
+			/*
+			 * At this point, assuming no errors, object
+			 * is the VM object mapping UVA (pageno).
+			 * Ensure it has a vnode pager, then grab
+			 * the vnode from that pager's handle.
+			 */
+
+			pager = object->pager;
+			printf("procfs: pager = %x\n", pager);
+			if (pager)
+				printf("procfs: found pager, type = %d\n", pager->pg_type);
+			if (pager && pager->pg_type == PG_VNODE) {
+				struct vnode *vp;
+
+				vp = (struct vnode *) pager->pg_handle;
+				printf("procfs: vp = 0x%x\n", vp);
+				return (vp);
+			}
+		}
+	}
+
+	printf("procfs: text object not found\n");
+	return (0);
+}
+#endif /* probably_never */
diff --git a/sys/miscfs/procfs/procfs_note.c b/sys/miscfs/procfs/procfs_note.c
new file mode 100644
index 00000000000..bf2f160baa0
--- /dev/null
+++ b/sys/miscfs/procfs/procfs_note.c
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)procfs_note.c	8.2 (Berkeley) 1/21/94
+ *
+ * From:
+ *	$Id: procfs_note.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/signal.h>
+#include <miscfs/procfs/procfs.h>
+
+int
+procfs_donote(curp, p, pfs, uio)
+	struct proc *curp;
+	struct proc *p;
+	struct pfsnode *pfs;
+	struct uio *uio;
+{
+	int xlen;
+	int error;
+	char note[PROCFS_NOTELEN+1];
+
+	if (uio->uio_rw != UIO_WRITE)
+		return (EINVAL);
+
+	xlen = PROCFS_NOTELEN;
+	error = vfs_getuserstr(uio, note, &xlen);
+	if (error)
+		return (error);
+
+	/* send to process's notify function */
+	return (EOPNOTSUPP);
+}
diff --git a/sys/miscfs/procfs/procfs_regs.c b/sys/miscfs/procfs/procfs_regs.c
new file mode 100644
index 00000000000..fa95fef8f10
--- /dev/null
+++ b/sys/miscfs/procfs/procfs_regs.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)procfs_regs.c	8.3 (Berkeley) 1/27/94
+ *
+ * From:
+ *	$Id: procfs_regs.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <machine/reg.h>
+#include <miscfs/procfs/procfs.h>
+
+int
+procfs_doregs(curp, p, pfs, uio)
+	struct proc *curp;
+	struct proc *p;
+	struct pfsnode *pfs;
+	struct uio *uio;
+{
+	int error;
+	struct reg r;
+	char *kv;
+	int kl;
+
+	kl = sizeof(r);
+	kv = (char *) &r;
+
+	kv += uio->uio_offset;
+	kl -= uio->uio_offset;
+	if (kl > uio->uio_resid)
+		kl = uio->uio_resid;
+
+	if (kl < 0)
+		error = EINVAL;
+	else
+		error = procfs_read_regs(p, &r);
+	if (error == 0)
+		error = uiomove(kv, kl, uio);
+	if (error == 0 && uio->uio_rw == UIO_WRITE) {
+		if (p->p_stat != SSTOP)
+			error = EBUSY;
+		else
+			error = procfs_write_regs(p, &r);
+	}
+
+	uio->uio_offset = 0;
+	return (error);
+}
diff --git a/sys/miscfs/procfs/procfs_status.c b/sys/miscfs/procfs/procfs_status.c
new file mode 100644
index 00000000000..d88aaabdfb0
--- /dev/null
+++ b/sys/miscfs/procfs/procfs_status.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)procfs_status.c	8.3 (Berkeley) 2/17/94
+ *
+ * From:
+ *	$Id: procfs_status.c,v 3.1 1993/12/15 09:40:17 jsp Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/ioctl.h>
+#include <sys/tty.h>
+#include <sys/resource.h>
+#include <sys/resourcevar.h>
+#include <miscfs/procfs/procfs.h>
+
+int
+procfs_dostatus(curp, p, pfs, uio)
+	struct proc *curp;
+	struct proc *p;
+	struct pfsnode *pfs;
+	struct uio *uio;
+{
+	struct session *sess;
+	struct tty *tp;
+	struct ucred *cr;
+	char *ps;
+	char *sep;
+	int pid, ppid, pgid, sid;
+	int i;
+	int xlen;
+	int error;
+	char psbuf[256];		/* XXX - conservative */
+
+	if (uio->uio_rw != UIO_READ)
+		return (EOPNOTSUPP);
+
+	pid = p->p_pid;
+	ppid = p->p_pptr ? p->p_pptr->p_pid : 0,
+	pgid = p->p_pgrp->pg_id;
+	sess = p->p_pgrp->pg_session;
+	sid = sess->s_leader ? sess->s_leader->p_pid : 0;
+
+/* comm pid ppid pgid sid maj,min ctty,sldr start ut st wmsg uid groups ... */
+
+	ps = psbuf;
+	bcopy(p->p_comm, ps, MAXCOMLEN);
+	ps[MAXCOMLEN] = '\0';
+	ps += strlen(ps);
+	ps += sprintf(ps, " %d %d %d %d ", pid, ppid, pgid, sid);
+
+	if ((p->p_flag&P_CONTROLT) && (tp = sess->s_ttyp))
+		ps += sprintf(ps, "%d,%d ", major(tp->t_dev), minor(tp->t_dev));
+	else
+		ps += sprintf(ps, "%d,%d ", -1, -1);
+
+	sep = "";
+	if (sess->s_ttyvp) {
+		ps += sprintf(ps, "%sctty", sep);
+		sep = ",";
+	}
+	if (SESS_LEADER(p)) {
+		ps += sprintf(ps, "%ssldr", sep);
+		sep = ",";
+	}
+	if (*sep != ',')
+		ps += sprintf(ps, "noflags");
+
+	if (p->p_flag & P_INMEM)
+		ps += sprintf(ps, " %d,%d",
+			p->p_stats->p_start.tv_sec,
+			p->p_stats->p_start.tv_usec);
+	else
+		ps += sprintf(ps, " -1,-1");
+	
+	{
+		struct timeval ut, st;
+
+		calcru(p, &ut, &st, (void *) 0);
+		ps += sprintf(ps, " %d,%d %d,%d",
+			ut.tv_sec,
+			ut.tv_usec,
+			st.tv_sec,
+			st.tv_usec);
+	}
+
+	ps += sprintf(ps, " %s",
+		(p->p_wchan && p->p_wmesg) ? p->p_wmesg : "nochan");
+
+	cr = p->p_ucred;
+
+	ps += sprintf(ps, " %d", cr->cr_uid, cr->cr_gid);
+	for (i = 0; i < cr->cr_ngroups; i++)
+		ps += sprintf(ps, ",%d", cr->cr_groups[i]);
+	ps += sprintf(ps, "\n");
+
+	xlen = ps - psbuf;
+	xlen -= uio->uio_offset;
+	ps = psbuf + uio->uio_offset;
+	xlen = min(xlen, uio->uio_resid);
+	if (xlen <= 0)
+		error = 0;
+	else
+		error = uiomove(ps, xlen, uio);
+
+	return (error);
+}
diff --git a/sys/miscfs/procfs/procfs_subr.c b/sys/miscfs/procfs/procfs_subr.c
new file mode 100644
index 00000000000..b371af19af0
--- /dev/null
+++ b/sys/miscfs/procfs/procfs_subr.c
@@ -0,0 +1,314 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)procfs_subr.c	8.4 (Berkeley) 1/27/94
+ *
+ * From:
+ *	$Id: procfs_subr.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <miscfs/procfs/procfs.h>
+
+static struct pfsnode *pfshead;
+static int pfsvplock;
+
+/*
+ * allocate a pfsnode/vnode pair.  the vnode is
+ * referenced, but not locked.
+ *
+ * the pid, pfs_type, and mount point uniquely
+ * identify a pfsnode.  the mount point is needed
+ * because someone might mount this filesystem
+ * twice.
+ *
+ * all pfsnodes are maintained on a singly-linked
+ * list.  new nodes are only allocated when they cannot
+ * be found on this list.  entries on the list are
+ * removed when the vfs reclaim entry is called.
+ *
+ * a single lock is kept for the entire list.  this is
+ * needed because the getnewvnode() function can block
+ * waiting for a vnode to become free, in which case there
+ * may be more than one process trying to get the same
+ * vnode.  this lock is only taken if we are going to
+ * call getnewvnode, since the kernel itself is single-threaded.
+ *
+ * if an entry is found on the list, then call vget() to
+ * take a reference.  this is done because there may be
+ * zero references to it and so it needs to removed from
+ * the vnode free list.
+ */
+int
+procfs_allocvp(mp, vpp, pid, pfs_type)
+	struct mount *mp;
+	struct vnode **vpp;
+	long pid;
+	pfstype pfs_type;
+{
+	int error;
+	struct pfsnode *pfs;
+	struct pfsnode **pp;
+
+loop:
+	for (pfs = pfshead; pfs != 0; pfs = pfs->pfs_next) {
+		if (pfs->pfs_pid == pid &&
+		    pfs->pfs_type == pfs_type &&
+		    PFSTOV(pfs)->v_mount == mp) {
+			if (vget(pfs->pfs_vnode, 0))
+				goto loop;
+			*vpp = pfs->pfs_vnode;
+			return (0);
+		}
+	}
+
+	/*
+	 * otherwise lock the vp list while we call getnewvnode
+	 * since that can block.
+	 */ 
+	if (pfsvplock & PROCFS_LOCKED) {
+		pfsvplock |= PROCFS_WANT;
+		sleep((caddr_t) &pfsvplock, PINOD);
+		goto loop;
+	}
+	pfsvplock |= PROCFS_LOCKED;
+
+	error = getnewvnode(VT_PROCFS, mp, procfs_vnodeop_p, vpp);
+	if (error)
+		goto out;
+
+	MALLOC((*vpp)->v_data, void *, sizeof(struct pfsnode),
+		M_TEMP, M_WAITOK);
+
+	pfs = VTOPFS(*vpp);
+	pfs->pfs_next = 0;
+	pfs->pfs_pid = (pid_t) pid;
+	pfs->pfs_type = pfs_type;
+	pfs->pfs_vnode = *vpp;
+	pfs->pfs_flags = 0;
+	pfs->pfs_fileno = PROCFS_FILENO(pid, pfs_type);
+
+	switch (pfs_type) {
+	case Proot:	/* /proc = dr-xr-xr-x */
+		pfs->pfs_mode = (VREAD|VEXEC) |
+				(VREAD|VEXEC) >> 3 |
+				(VREAD|VEXEC) >> 6;
+		break;
+
+	case Pproc:
+		pfs->pfs_mode = (VREAD|VEXEC) |
+				(VREAD|VEXEC) >> 3 |
+				(VREAD|VEXEC) >> 6;
+		break;
+
+	case Pfile:
+		pfs->pfs_mode = (VREAD|VWRITE);
+		break;
+
+	case Pmem:
+		pfs->pfs_mode = (VREAD|VWRITE);
+		break;
+
+	case Pregs:
+		pfs->pfs_mode = (VREAD|VWRITE);
+		break;
+
+	case Pfpregs:
+		pfs->pfs_mode = (VREAD|VWRITE);
+		break;
+
+	case Pctl:
+		pfs->pfs_mode = (VWRITE);
+		break;
+
+	case Pstatus:
+		pfs->pfs_mode = (VREAD) |
+				(VREAD >> 3) |
+				(VREAD >> 6);
+		break;
+
+	case Pnote:
+		pfs->pfs_mode = (VWRITE);
+		break;
+
+	case Pnotepg:
+		pfs->pfs_mode = (VWRITE);
+		break;
+
+	default:
+		panic("procfs_allocvp");
+	}
+
+	/* add to procfs vnode list */
+	for (pp = &pfshead; *pp; pp = &(*pp)->pfs_next)
+		continue;
+	*pp = pfs;
+
+out:
+	pfsvplock &= ~PROCFS_LOCKED;
+
+	if (pfsvplock & PROCFS_WANT) {
+		pfsvplock &= ~PROCFS_WANT;
+		wakeup((caddr_t) &pfsvplock);
+	}
+
+	return (error);
+}
+
+int
+procfs_freevp(vp)
+	struct vnode *vp;
+{
+	struct pfsnode **pfspp;
+	struct pfsnode *pfs = VTOPFS(vp);
+
+	for (pfspp = &pfshead; *pfspp != 0; pfspp = &(*pfspp)->pfs_next) {
+		if (*pfspp == pfs) {
+			*pfspp = pfs->pfs_next;
+			break;
+		}
+	}
+
+	FREE(vp->v_data, M_TEMP);
+	vp->v_data = 0;
+	return (0);
+}
+
+int
+procfs_rw(ap)
+	struct vop_read_args *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct uio *uio = ap->a_uio;
+	struct proc *curp = uio->uio_procp;
+	struct pfsnode *pfs = VTOPFS(vp);
+	struct proc *p;
+
+	p = PFIND(pfs->pfs_pid);
+	if (p == 0)
+		return (EINVAL);
+
+	switch (pfs->pfs_type) {
+	case Pnote:
+	case Pnotepg:
+		return (procfs_donote(curp, p, pfs, uio));
+
+	case Pregs:
+		return (procfs_doregs(curp, p, pfs, uio));
+
+	case Pfpregs:
+		return (procfs_dofpregs(curp, p, pfs, uio));
+
+	case Pctl:
+		return (procfs_doctl(curp, p, pfs, uio));
+
+	case Pstatus:
+		return (procfs_dostatus(curp, p, pfs, uio));
+
+	case Pmem:
+		return (procfs_domem(curp, p, pfs, uio));
+
+	default:
+		return (EOPNOTSUPP);
+	}
+}
+
+/*
+ * Get a string from userland into (buf).  Strip a trailing
+ * nl character (to allow easy access from the shell).
+ * The buffer should be *buflenp + 1 chars long.  vfs_getuserstr
+ * will automatically add a nul char at the end.
+ *
+ * Returns 0 on success or the following errors
+ *
+ * EINVAL:    file offset is non-zero.
+ * EMSGSIZE:  message is longer than kernel buffer
+ * EFAULT:    user i/o buffer is not addressable
+ */
+int
+vfs_getuserstr(uio, buf, buflenp)
+	struct uio *uio;
+	char *buf;
+	int *buflenp;
+{
+	int xlen;
+	int error;
+
+	if (uio->uio_offset != 0)
+		return (EINVAL);
+
+	xlen = *buflenp;
+
+	/* must be able to read the whole string in one go */
+	if (xlen < uio->uio_resid)
+		return (EMSGSIZE);
+	xlen = uio->uio_resid;
+
+	error = uiomove(buf, xlen, uio);
+	if (error)
+		return (error);
+
+	/* allow multiple writes without seeks */
+	uio->uio_offset = 0;
+
+	/* cleanup string and remove trailing newline */
+	buf[xlen] = '\0';
+	xlen = strlen(buf);
+	if (xlen > 0 && buf[xlen-1] == '\n')
+		buf[--xlen] = '\0';
+	*buflenp = xlen;
+
+	return (0);
+}
+
+vfs_namemap_t *
+vfs_findname(nm, buf, buflen)
+	vfs_namemap_t *nm;
+	char *buf;
+	int buflen;
+{
+	for (; nm->nm_name; nm++)
+		if (bcmp(buf, (char *) nm->nm_name, buflen+1) == 0)
+			return (nm);
+
+	return (0);
+}
diff --git a/sys/miscfs/procfs/procfs_vfsops.c b/sys/miscfs/procfs/procfs_vfsops.c
new file mode 100644
index 00000000000..3938ca12357
--- /dev/null
+++ b/sys/miscfs/procfs/procfs_vfsops.c
@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)procfs_vfsops.c	8.4 (Berkeley) 1/21/94
+ *
+ * From:
+ *	$Id: procfs_vfsops.c,v 3.1 1993/12/15 09:40:17 jsp Exp $
+ */
+
+/*
+ * procfs VFS interface
+ */
+
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/syslog.h>
+#include <sys/mount.h>
+#include <sys/signalvar.h>
+#include <sys/vnode.h>
+#include <miscfs/procfs/procfs.h>
+#include <vm/vm.h>			/* for PAGE_SIZE */
+
+/*
+ * VFS Operations.
+ *
+ * mount system call
+ */
+/* ARGSUSED */
+procfs_mount(mp, path, data, ndp, p)
+	struct mount *mp;
+	char *path;
+	caddr_t data;
+	struct nameidata *ndp;
+	struct proc *p;
+{
+	u_int size;
+
+	if (UIO_MX & (UIO_MX-1)) {
+		log(LOG_ERR, "procfs: invalid directory entry size");
+		return (EINVAL);
+	}
+
+	if (mp->mnt_flag & MNT_UPDATE)
+		return (EOPNOTSUPP);
+
+	mp->mnt_flag |= MNT_LOCAL;
+	mp->mnt_data = 0;
+	getnewfsid(mp, MOUNT_PROCFS);
+
+	(void) copyinstr(path, (caddr_t)mp->mnt_stat.f_mntonname, MNAMELEN, &size);
+	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+
+	size = sizeof("procfs") - 1;
+	bcopy("procfs", mp->mnt_stat.f_mntfromname, size);
+	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+
+	return (0);
+}
+
+/*
+ * unmount system call
+ */
+procfs_unmount(mp, mntflags, p)
+	struct mount *mp;
+	int mntflags;
+	struct proc *p;
+{
+	int error;
+	extern int doforce;
+	int flags = 0;
+
+	if (mntflags & MNT_FORCE) {
+		/* procfs can never be rootfs so don't check for it */
+		if (!doforce)
+			return (EINVAL);
+		flags |= FORCECLOSE;
+	}
+
+	if (error = vflush(mp, 0, flags))
+		return (error);
+
+	return (0);
+}
+
+procfs_root(mp, vpp)
+	struct mount *mp;
+	struct vnode **vpp;
+{
+	struct pfsnode *pfs;
+	struct vnode *vp;
+	int error;
+
+	error = procfs_allocvp(mp, &vp, (pid_t) 0, Proot);
+	if (error)
+		return (error);
+
+	vp->v_type = VDIR;
+	vp->v_flag = VROOT;
+	pfs = VTOPFS(vp);
+
+	*vpp = vp;
+	return (0);
+}
+
+/*
+ */
+/* ARGSUSED */
+procfs_start(mp, flags, p)
+	struct mount *mp;
+	int flags;
+	struct proc *p;
+{
+
+	return (0);
+}
+
+/*
+ * Get file system statistics.
+ */
+procfs_statfs(mp, sbp, p)
+	struct mount *mp;
+	struct statfs *sbp;
+	struct proc *p;
+{
+	sbp->f_type = MOUNT_PROCFS;
+	sbp->f_bsize = PAGE_SIZE;
+	sbp->f_iosize = PAGE_SIZE;
+	sbp->f_blocks = 1;	/* avoid divide by zero in some df's */
+	sbp->f_bfree = 0;
+	sbp->f_bavail = 0;
+	sbp->f_files = maxproc;			/* approx */
+	sbp->f_ffree = maxproc - nprocs;	/* approx */
+
+	if (sbp != &mp->mnt_stat) {
+		bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+	}
+
+	return (0);
+}
+
+
+procfs_quotactl(mp, cmds, uid, arg, p)
+	struct mount *mp;
+	int cmds;
+	uid_t uid;
+	caddr_t arg;
+	struct proc *p;
+{
+
+	return (EOPNOTSUPP);
+}
+
+procfs_sync(mp, waitfor)
+	struct mount *mp;
+	int waitfor;
+{
+
+	return (0);
+}
+
+procfs_vget(mp, ino, vpp)
+	struct mount *mp;
+	ino_t ino;
+	struct vnode **vpp;
+{
+
+	return (EOPNOTSUPP);
+}
+
+procfs_fhtovp(mp, fhp, vpp)
+	struct mount *mp;
+	struct fid *fhp;
+	struct vnode **vpp;
+{
+
+	return (EINVAL);
+}
+
+procfs_vptofh(vp, fhp)
+	struct vnode *vp;
+	struct fid *fhp;
+{
+
+	return EINVAL;
+}
+
+procfs_init()
+{
+
+	return (0);
+}
+
+struct vfsops procfs_vfsops = {
+	procfs_mount,
+	procfs_start,
+	procfs_unmount,
+	procfs_root,
+	procfs_quotactl,
+	procfs_statfs,
+	procfs_sync,
+	procfs_vget,
+	procfs_fhtovp,
+	procfs_vptofh,
+	procfs_init,
+};
diff --git a/sys/miscfs/procfs/procfs_vnops.c b/sys/miscfs/procfs/procfs_vnops.c
new file mode 100644
index 00000000000..4e1ee002bb9
--- /dev/null
+++ b/sys/miscfs/procfs/procfs_vnops.c
@@ -0,0 +1,814 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)procfs_vnops.c	8.6 (Berkeley) 2/7/94
+ *
+ * From:
+ *	$Id: procfs_vnops.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+/*
+ * procfs vnode interface
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/dirent.h>
+#include <sys/resourcevar.h>
+#include <miscfs/procfs/procfs.h>
+#include <vm/vm.h>	/* for PAGE_SIZE */
+
+/*
+ * Vnode Operations.
+ *
+ */
+
+/*
+ * This is a list of the valid names in the
+ * process-specific sub-directories.  It is
+ * used in procfs_lookup and procfs_readdir
+ */
+static struct pfsnames {
+	u_short	d_namlen;
+	char	d_name[PROCFS_NAMELEN];
+	pfstype	d_pfstype;
+} procent[] = {
+#define N(s) sizeof(s)-1, s
+	/* namlen, nam, type */
+	{  N("file"),	Pfile },
+	{  N("mem"),	Pmem },
+	{  N("regs"),	Pregs },
+	{  N("fpregs"),	Pfpregs },
+	{  N("ctl"),	Pctl },
+	{  N("status"),	Pstatus },
+	{  N("note"),	Pnote },
+	{  N("notepg"),	Pnotepg },
+#undef N
+};
+#define Nprocent (sizeof(procent)/sizeof(procent[0]))
+
+static pid_t atopid __P((const char *, u_int));
+
+/*
+ * set things up for doing i/o on
+ * the pfsnode (vp).  (vp) is locked
+ * on entry, and should be left locked
+ * on exit.
+ *
+ * for procfs we don't need to do anything
+ * in particular for i/o.  all that is done
+ * is to support exclusive open on process
+ * memory images.
+ */
+procfs_open(ap)
+	struct vop_open_args *ap;
+{
+	struct pfsnode *pfs = VTOPFS(ap->a_vp);
+
+	switch (pfs->pfs_type) {
+	case Pmem:
+		if (PFIND(pfs->pfs_pid) == 0)
+			return (ENOENT);	/* was ESRCH, jsp */
+
+		if ((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL) ||
+				(pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE))
+			return (EBUSY);
+
+
+		if (ap->a_mode & FWRITE)
+			pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL);
+
+		return (0);
+
+	default:
+		break;
+	}
+
+	return (0);
+}
+
+/*
+ * close the pfsnode (vp) after doing i/o.
+ * (vp) is not locked on entry or exit.
+ *
+ * nothing to do for procfs other than undo
+ * any exclusive open flag (see _open above).
+ */
+procfs_close(ap)
+	struct vop_close_args *ap;
+{
+	struct pfsnode *pfs = VTOPFS(ap->a_vp);
+
+	switch (pfs->pfs_type) {
+	case Pmem:
+		if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL))
+			pfs->pfs_flags &= ~(FWRITE|O_EXCL);
+		break;
+	}
+
+	return (0);
+}
+
+/*
+ * do an ioctl operation on pfsnode (vp).
+ * (vp) is not locked on entry or exit.
+ */
+procfs_ioctl(ap)
+	struct vop_ioctl_args *ap;
+{
+
+	return (ENOTTY);
+}
+
+/*
+ * do block mapping for pfsnode (vp).
+ * since we don't use the buffer cache
+ * for procfs this function should never
+ * be called.  in any case, it's not clear
+ * what part of the kernel ever makes use
+ * of this function.  for sanity, this is the
+ * usual no-op bmap, although returning
+ * (EIO) would be a reasonable alternative.
+ */
+procfs_bmap(ap)
+	struct vop_bmap_args *ap;
+{
+
+	if (ap->a_vpp != NULL)
+		*ap->a_vpp = ap->a_vp;
+	if (ap->a_bnp != NULL)
+		*ap->a_bnp = ap->a_bn;
+	return (0);
+}
+
+/*
+ * _inactive is called when the pfsnode
+ * is vrele'd and the reference count goes
+ * to zero.  (vp) will be on the vnode free
+ * list, so to get it back vget() must be
+ * used.
+ *
+ * for procfs, check if the process is still
+ * alive and if it isn't then just throw away
+ * the vnode by calling vgone().  this may
+ * be overkill and a waste of time since the
+ * chances are that the process will still be
+ * there and PFIND is not free.
+ *
+ * (vp) is not locked on entry or exit.
+ */
+procfs_inactive(ap)
+	struct vop_inactive_args *ap;
+{
+	struct pfsnode *pfs = VTOPFS(ap->a_vp);
+
+	if (PFIND(pfs->pfs_pid) == 0)
+		vgone(ap->a_vp);
+
+	return (0);
+}
+
+/*
+ * _reclaim is called when getnewvnode()
+ * wants to make use of an entry on the vnode
+ * free list.  at this time the filesystem needs
+ * to free any private data and remove the node
+ * from any private lists.
+ */
+procfs_reclaim(ap)
+	struct vop_reclaim_args *ap;
+{
+	int error;
+
+	error = procfs_freevp(ap->a_vp);
+	return (error);
+}
+
+/*
+ * Return POSIX pathconf information applicable to special devices.
+ */
+procfs_pathconf(ap)
+	struct vop_pathconf_args /* {
+		struct vnode *a_vp;
+		int a_name;
+		int *a_retval;
+	} */ *ap;
+{
+
+	switch (ap->a_name) {
+	case _PC_LINK_MAX:
+		*ap->a_retval = LINK_MAX;
+		return (0);
+	case _PC_MAX_CANON:
+		*ap->a_retval = MAX_CANON;
+		return (0);
+	case _PC_MAX_INPUT:
+		*ap->a_retval = MAX_INPUT;
+		return (0);
+	case _PC_PIPE_BUF:
+		*ap->a_retval = PIPE_BUF;
+		return (0);
+	case _PC_CHOWN_RESTRICTED:
+		*ap->a_retval = 1;
+		return (0);
+	case _PC_VDISABLE:
+		*ap->a_retval = _POSIX_VDISABLE;
+		return (0);
+	default:
+		return (EINVAL);
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * _print is used for debugging.
+ * just print a readable description
+ * of (vp).
+ */
+procfs_print(ap)
+	struct vop_print_args *ap;
+{
+	struct pfsnode *pfs = VTOPFS(ap->a_vp);
+
+	printf("tag VT_PROCFS, pid %d, mode %x, flags %x\n",
+		pfs->pfs_pid,
+		pfs->pfs_mode, pfs->pfs_flags);
+}
+
+/*
+ * _abortop is called when operations such as
+ * rename and create fail.  this entry is responsible
+ * for undoing any side-effects caused by the lookup.
+ * this will always include freeing the pathname buffer.
+ */
+procfs_abortop(ap)
+	struct vop_abortop_args *ap;
+{
+
+	if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
+		FREE(ap->a_cnp->cn_pnbuf, M_NAMEI);
+	return (0);
+}
+
+/*
+ * generic entry point for unsupported operations
+ */
+procfs_badop()
+{
+
+	return (EIO);
+}
+
+/*
+ * Invent attributes for pfsnode (vp) and store
+ * them in (vap).
+ * Directories lengths are returned as zero since
+ * any real length would require the genuine size
+ * to be computed, and nothing cares anyway.
+ *
+ * this is relatively minimal for procfs.
+ */
+procfs_getattr(ap)
+	struct vop_getattr_args *ap;
+{
+	struct pfsnode *pfs = VTOPFS(ap->a_vp);
+	struct vattr *vap = ap->a_vap;
+	struct proc *procp;
+	int error;
+
+	/* first check the process still exists */
+	switch (pfs->pfs_type) {
+	case Proot:
+		procp = 0;
+		break;
+
+	default:
+		procp = PFIND(pfs->pfs_pid);
+		if (procp == 0)
+			return (ENOENT);
+	}
+
+	error = 0;
+
+	/* start by zeroing out the attributes */
+	VATTR_NULL(vap);
+
+	/* next do all the common fields */
+	vap->va_type = ap->a_vp->v_type;
+	vap->va_mode = pfs->pfs_mode;
+	vap->va_fileid = pfs->pfs_fileno;
+	vap->va_flags = 0;
+	vap->va_blocksize = PAGE_SIZE;
+	vap->va_bytes = vap->va_size = 0;
+
+	/*
+	 * If the process has exercised some setuid or setgid
+	 * privilege, then rip away read/write permission so
+	 * that only root can gain access.
+	 */
+	switch (pfs->pfs_type) {
+	case Pregs:
+	case Pfpregs:
+	case Pmem:
+		if (procp->p_flag & P_SUGID)
+			vap->va_mode &= ~((VREAD|VWRITE)|
+					  ((VREAD|VWRITE)>>3)|
+					  ((VREAD|VWRITE)>>6));
+		break;
+	}
+
+	/*
+	 * Make all times be current TOD.
+	 * It would be possible to get the process start
+	 * time from the p_stat structure, but there's
+	 * no "file creation" time stamp anyway, and the
+	 * p_stat structure is not addressible if u. gets
+	 * swapped out for that process.
+	 */
+	microtime(&vap->va_ctime);
+	vap->va_atime = vap->va_mtime = vap->va_ctime;
+
+	/*
+	 * now do the object specific fields
+	 *
+	 * The size could be set from struct reg, but it's hardly
+	 * worth the trouble, and it puts some (potentially) machine
+	 * dependent data into this machine-independent code.  If it
+	 * becomes important then this function should break out into
+	 * a per-file stat function in the corresponding .c file.
+	 */
+
+	switch (pfs->pfs_type) {
+	case Proot:
+		vap->va_nlink = 2;
+		vap->va_uid = 0;
+		vap->va_gid = 0;
+		break;
+
+	case Pproc:
+		vap->va_nlink = 2;
+		vap->va_uid = procp->p_ucred->cr_uid;
+		vap->va_gid = procp->p_ucred->cr_gid;
+		break;
+
+	case Pfile:
+		error = EOPNOTSUPP;
+		break;
+
+	case Pmem:
+		vap->va_nlink = 1;
+		vap->va_bytes = vap->va_size =
+			ctob(procp->p_vmspace->vm_tsize +
+				    procp->p_vmspace->vm_dsize +
+				    procp->p_vmspace->vm_ssize);
+		vap->va_uid = procp->p_ucred->cr_uid;
+		vap->va_gid = procp->p_ucred->cr_gid;
+		break;
+
+	case Pregs:
+	case Pfpregs:
+	case Pctl:
+	case Pstatus:
+	case Pnote:
+	case Pnotepg:
+		vap->va_nlink = 1;
+		vap->va_uid = procp->p_ucred->cr_uid;
+		vap->va_gid = procp->p_ucred->cr_gid;
+		break;
+
+	default:
+		panic("procfs_getattr");
+	}
+
+	return (error);
+}
+
+procfs_setattr(ap)
+	struct vop_setattr_args *ap;
+{
+	/*
+	 * just fake out attribute setting
+	 * it's not good to generate an error
+	 * return, otherwise things like creat()
+	 * will fail when they try to set the
+	 * file length to 0.  worse, this means
+	 * that echo $note > /proc/$pid/note will fail.
+	 */
+
+	return (0);
+}
+
+/*
+ * implement access checking.
+ *
+ * something very similar to this code is duplicated
+ * throughout the 4bsd kernel and should be moved
+ * into kern/vfs_subr.c sometime.
+ *
+ * actually, the check for super-user is slightly
+ * broken since it will allow read access to write-only
+ * objects.  this doesn't cause any particular trouble
+ * but does mean that the i/o entry points need to check
+ * that the operation really does make sense.
+ */
+procfs_access(ap)
+	struct vop_access_args *ap;
+{
+	struct vattr *vap;
+	struct vattr vattr;
+	int error;
+
+	/*
+	 * If you're the super-user,
+	 * you always get access.
+	 */
+	if (ap->a_cred->cr_uid == (uid_t) 0)
+		return (0);
+	vap = &vattr;
+	if (error = VOP_GETATTR(ap->a_vp, vap, ap->a_cred, ap->a_p))
+		return (error);
+
+	/*
+	 * Access check is based on only one of owner, group, public.
+	 * If not owner, then check group. If not a member of the
+	 * group, then check public access.
+	 */
+	if (ap->a_cred->cr_uid != vap->va_uid) {
+		gid_t *gp;
+		int i;
+
+		(ap->a_mode) >>= 3;
+		gp = ap->a_cred->cr_groups;
+		for (i = 0; i < ap->a_cred->cr_ngroups; i++, gp++)
+			if (vap->va_gid == *gp)
+				goto found;
+		ap->a_mode >>= 3;
+found:
+		;
+	}
+
+	if ((vap->va_mode & ap->a_mode) == ap->a_mode)
+		return (0);
+
+	return (EACCES);
+}
+
+/*
+ * lookup.  this is incredibly complicated in the
+ * general case, however for most pseudo-filesystems
+ * very little needs to be done.
+ *
+ * unless you want to get a migraine, just make sure your
+ * filesystem doesn't do any locking of its own.  otherwise
+ * read and inwardly digest ufs_lookup().
+ */
+procfs_lookup(ap)
+	struct vop_lookup_args *ap;
+{
+	struct componentname *cnp = ap->a_cnp;
+	struct vnode **vpp = ap->a_vpp;
+	struct vnode *dvp = ap->a_dvp;
+	char *pname = cnp->cn_nameptr;
+	int error = 0;
+	pid_t pid;
+	struct vnode *nvp;
+	struct pfsnode *pfs;
+	struct proc *procp;
+	pfstype pfs_type;
+	int i;
+
+	if (cnp->cn_namelen == 1 && *pname == '.') {
+		*vpp = dvp;
+		VREF(dvp);
+		/*VOP_LOCK(dvp);*/
+		return (0);
+	}
+
+	*vpp = NULL;
+
+	pfs = VTOPFS(dvp);
+	switch (pfs->pfs_type) {
+	case Proot:
+		if (cnp->cn_flags & ISDOTDOT)
+			return (EIO);
+
+		if (CNEQ(cnp, "curproc", 7))
+			pid = cnp->cn_proc->p_pid;
+		else
+			pid = atopid(pname, cnp->cn_namelen);
+		if (pid == NO_PID)
+			return (ENOENT);
+
+		procp = PFIND(pid);
+		if (procp == 0)
+			return (ENOENT);
+
+		error = procfs_allocvp(dvp->v_mount, &nvp, pid, Pproc);
+		if (error)
+			return (error);
+
+		nvp->v_type = VDIR;
+		pfs = VTOPFS(nvp);
+
+		*vpp = nvp;
+		return (0);
+
+	case Pproc:
+		if (cnp->cn_flags & ISDOTDOT) {
+			error = procfs_root(dvp->v_mount, vpp);
+			return (error);
+		}
+
+		procp = PFIND(pfs->pfs_pid);
+		if (procp == 0)
+			return (ENOENT);
+
+		for (i = 0; i < Nprocent; i++) {
+			struct pfsnames *dp = &procent[i];
+
+			if (cnp->cn_namelen == dp->d_namlen &&
+			    bcmp(pname, dp->d_name, dp->d_namlen) == 0) {
+			    	pfs_type = dp->d_pfstype;
+				goto found;
+			}
+		}
+		return (ENOENT);
+
+	found:
+		if (pfs_type == Pfile) {
+			nvp = procfs_findtextvp(procp);
+			if (nvp) {
+				VREF(nvp);
+				VOP_LOCK(nvp);
+			} else {
+				error = ENXIO;
+			}
+		} else {
+			error = procfs_allocvp(dvp->v_mount, &nvp,
+					pfs->pfs_pid, pfs_type);
+			if (error)
+				return (error);
+
+			nvp->v_type = VREG;
+			pfs = VTOPFS(nvp);
+		}
+		*vpp = nvp;
+		return (error);
+
+	default:
+		return (ENOTDIR);
+	}
+}
+
+/*
+ * readdir returns directory entries from pfsnode (vp).
+ *
+ * the strategy here with procfs is to generate a single
+ * directory entry at a time (struct pfsdent) and then
+ * copy that out to userland using uiomove.  a more efficent
+ * though more complex implementation, would try to minimize
+ * the number of calls to uiomove().  for procfs, this is
+ * hardly worth the added code complexity.
+ *
+ * this should just be done through read()
+ */
+procfs_readdir(ap)
+	struct vop_readdir_args *ap;
+{
+	struct uio *uio = ap->a_uio;
+	struct pfsdent d;
+	struct pfsdent *dp = &d;
+	struct pfsnode *pfs;
+	int error;
+	int count;
+	int i;
+
+	pfs = VTOPFS(ap->a_vp);
+
+	if (uio->uio_resid < UIO_MX)
+		return (EINVAL);
+	if (uio->uio_offset & (UIO_MX-1))
+		return (EINVAL);
+	if (uio->uio_offset < 0)
+		return (EINVAL);
+
+	error = 0;
+	count = 0;
+	i = uio->uio_offset / UIO_MX;
+
+	switch (pfs->pfs_type) {
+	/*
+	 * this is for the process-specific sub-directories.
+	 * all that is needed to is copy out all the entries
+	 * from the procent[] table (top of this file).
+	 */
+	case Pproc: {
+		while (uio->uio_resid >= UIO_MX) {
+			struct pfsnames *dt;
+
+			if (i >= Nprocent)
+				break;
+
+			dt = &procent[i];
+			
+			dp->d_reclen = UIO_MX;
+			dp->d_fileno = PROCFS_FILENO(pfs->pfs_pid, dt->d_pfstype);
+			dp->d_type = DT_REG;
+			dp->d_namlen = dt->d_namlen;
+			bcopy(dt->d_name, dp->d_name, sizeof(dt->d_name)-1);
+			error = uiomove((caddr_t) dp, UIO_MX, uio);
+			if (error)
+				break;
+			count += UIO_MX;
+			i++;
+		}
+
+	    	break;
+
+	    }
+
+	/*
+	 * this is for the root of the procfs filesystem
+	 * what is needed is a special entry for "curproc"
+	 * followed by an entry for each process on allproc
+#ifdef PROCFS_ZOMBIE
+	 * and zombproc.
+#endif
+	 */
+
+	case Proot: {
+		int pcnt;
+#ifdef PROCFS_ZOMBIE
+		int doingzomb = 0;
+#endif
+		volatile struct proc *p;
+
+		p = allproc;
+
+#define PROCFS_XFILES	1	/* number of other entries, like "curproc" */
+		pcnt = PROCFS_XFILES;
+
+		while (p && uio->uio_resid >= UIO_MX) {
+			bzero((char *) dp, UIO_MX);
+			dp->d_type = DT_DIR;
+			dp->d_reclen = UIO_MX;
+
+			switch (i) {
+			case 0:
+				/* ship out entry for "curproc" */
+				dp->d_fileno = PROCFS_FILENO(PID_MAX+1, Pproc);
+				dp->d_namlen = sprintf(dp->d_name, "curproc");
+				break;
+
+			default:
+				if (pcnt >= i) {
+					dp->d_fileno = PROCFS_FILENO(p->p_pid, Pproc);
+					dp->d_namlen = sprintf(dp->d_name, "%ld", (long) p->p_pid);
+				}
+
+				p = p->p_next;
+
+#ifdef PROCFS_ZOMBIE
+				if (p == 0 && doingzomb == 0) {
+					doingzomb = 1;
+					p = zombproc;
+				}
+#endif
+
+				if (pcnt++ < i)
+					continue;
+
+				break;
+			}
+			error = uiomove((caddr_t) dp, UIO_MX, uio);
+			if (error)
+				break;
+			count += UIO_MX;
+			i++;
+		}
+
+		break;
+
+	    }
+
+	default:
+		error = ENOTDIR;
+		break;
+	}
+
+	uio->uio_offset = i * UIO_MX;
+
+	return (error);
+}
+
+/*
+ * convert decimal ascii to pid_t
+ */
+static pid_t
+atopid(b, len)
+	const char *b;
+	u_int len;
+{
+	pid_t p = 0;
+
+	while (len--) {
+		char c = *b++;
+		if (c < '0' || c > '9')
+			return (NO_PID);
+		p = 10 * p + (c - '0');
+		if (p > PID_MAX)
+			return (NO_PID);
+	}
+
+	return (p);
+}
+
+/*
+ * procfs vnode operations.
+ */
+int (**procfs_vnodeop_p)();
+struct vnodeopv_entry_desc procfs_vnodeop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, procfs_lookup },		/* lookup */
+	{ &vop_create_desc, procfs_create },		/* create */
+	{ &vop_mknod_desc, procfs_mknod },		/* mknod */
+	{ &vop_open_desc, procfs_open },		/* open */
+	{ &vop_close_desc, procfs_close },		/* close */
+	{ &vop_access_desc, procfs_access },		/* access */
+	{ &vop_getattr_desc, procfs_getattr },		/* getattr */
+	{ &vop_setattr_desc, procfs_setattr },		/* setattr */
+	{ &vop_read_desc, procfs_read },		/* read */
+	{ &vop_write_desc, procfs_write },		/* write */
+	{ &vop_ioctl_desc, procfs_ioctl },		/* ioctl */
+	{ &vop_select_desc, procfs_select },		/* select */
+	{ &vop_mmap_desc, procfs_mmap },		/* mmap */
+	{ &vop_fsync_desc, procfs_fsync },		/* fsync */
+	{ &vop_seek_desc, procfs_seek },		/* seek */
+	{ &vop_remove_desc, procfs_remove },		/* remove */
+	{ &vop_link_desc, procfs_link },		/* link */
+	{ &vop_rename_desc, procfs_rename },		/* rename */
+	{ &vop_mkdir_desc, procfs_mkdir },		/* mkdir */
+	{ &vop_rmdir_desc, procfs_rmdir },		/* rmdir */
+	{ &vop_symlink_desc, procfs_symlink },		/* symlink */
+	{ &vop_readdir_desc, procfs_readdir },		/* readdir */
+	{ &vop_readlink_desc, procfs_readlink },	/* readlink */
+	{ &vop_abortop_desc, procfs_abortop },		/* abortop */
+	{ &vop_inactive_desc, procfs_inactive },	/* inactive */
+	{ &vop_reclaim_desc, procfs_reclaim },		/* reclaim */
+	{ &vop_lock_desc, procfs_lock },		/* lock */
+	{ &vop_unlock_desc, procfs_unlock },		/* unlock */
+	{ &vop_bmap_desc, procfs_bmap },		/* bmap */
+	{ &vop_strategy_desc, procfs_strategy },	/* strategy */
+	{ &vop_print_desc, procfs_print },		/* print */
+	{ &vop_islocked_desc, procfs_islocked },	/* islocked */
+	{ &vop_pathconf_desc, procfs_pathconf },	/* pathconf */
+	{ &vop_advlock_desc, procfs_advlock },		/* advlock */
+	{ &vop_blkatoff_desc, procfs_blkatoff },	/* blkatoff */
+	{ &vop_valloc_desc, procfs_valloc },		/* valloc */
+	{ &vop_vfree_desc, procfs_vfree },		/* vfree */
+	{ &vop_truncate_desc, procfs_truncate },	/* truncate */
+	{ &vop_update_desc, procfs_update },		/* update */
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc procfs_vnodeop_opv_desc =
+	{ &procfs_vnodeop_p, procfs_vnodeop_entries };
diff --git a/sys/miscfs/specfs/spec_vnops.c b/sys/miscfs/specfs/spec_vnops.c
new file mode 100644
index 00000000000..111c517b162
--- /dev/null
+++ b/sys/miscfs/specfs/spec_vnops.c
@@ -0,0 +1,689 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)spec_vnops.c	8.6 (Berkeley) 4/9/94
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/conf.h>
+#include <sys/buf.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/stat.h>
+#include <sys/errno.h>
+#include <sys/ioctl.h>
+#include <sys/file.h>
+#include <sys/disklabel.h>
+#include <miscfs/specfs/specdev.h>
+
+/* symbolic sleep message strings for devices */
+char	devopn[] = "devopn";
+char	devio[] = "devio";
+char	devwait[] = "devwait";
+char	devin[] = "devin";
+char	devout[] = "devout";
+char	devioc[] = "devioc";
+char	devcls[] = "devcls";
+
+int (**spec_vnodeop_p)();
+struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, spec_lookup },		/* lookup */
+	{ &vop_create_desc, spec_create },		/* create */
+	{ &vop_mknod_desc, spec_mknod },		/* mknod */
+	{ &vop_open_desc, spec_open },			/* open */
+	{ &vop_close_desc, spec_close },		/* close */
+	{ &vop_access_desc, spec_access },		/* access */
+	{ &vop_getattr_desc, spec_getattr },		/* getattr */
+	{ &vop_setattr_desc, spec_setattr },		/* setattr */
+	{ &vop_read_desc, spec_read },			/* read */
+	{ &vop_write_desc, spec_write },		/* write */
+	{ &vop_ioctl_desc, spec_ioctl },		/* ioctl */
+	{ &vop_select_desc, spec_select },		/* select */
+	{ &vop_mmap_desc, spec_mmap },			/* mmap */
+	{ &vop_fsync_desc, spec_fsync },		/* fsync */
+	{ &vop_seek_desc, spec_seek },			/* seek */
+	{ &vop_remove_desc, spec_remove },		/* remove */
+	{ &vop_link_desc, spec_link },			/* link */
+	{ &vop_rename_desc, spec_rename },		/* rename */
+	{ &vop_mkdir_desc, spec_mkdir },		/* mkdir */
+	{ &vop_rmdir_desc, spec_rmdir },		/* rmdir */
+	{ &vop_symlink_desc, spec_symlink },		/* symlink */
+	{ &vop_readdir_desc, spec_readdir },		/* readdir */
+	{ &vop_readlink_desc, spec_readlink },		/* readlink */
+	{ &vop_abortop_desc, spec_abortop },		/* abortop */
+	{ &vop_inactive_desc, spec_inactive },		/* inactive */
+	{ &vop_reclaim_desc, spec_reclaim },		/* reclaim */
+	{ &vop_lock_desc, spec_lock },			/* lock */
+	{ &vop_unlock_desc, spec_unlock },		/* unlock */
+	{ &vop_bmap_desc, spec_bmap },			/* bmap */
+	{ &vop_strategy_desc, spec_strategy },		/* strategy */
+	{ &vop_print_desc, spec_print },		/* print */
+	{ &vop_islocked_desc, spec_islocked },		/* islocked */
+	{ &vop_pathconf_desc, spec_pathconf },		/* pathconf */
+	{ &vop_advlock_desc, spec_advlock },		/* advlock */
+	{ &vop_blkatoff_desc, spec_blkatoff },		/* blkatoff */
+	{ &vop_valloc_desc, spec_valloc },		/* valloc */
+	{ &vop_vfree_desc, spec_vfree },		/* vfree */
+	{ &vop_truncate_desc, spec_truncate },		/* truncate */
+	{ &vop_update_desc, spec_update },		/* update */
+	{ &vop_bwrite_desc, spec_bwrite },		/* bwrite */
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc spec_vnodeop_opv_desc =
+	{ &spec_vnodeop_p, spec_vnodeop_entries };
+
+/*
+ * Trivial lookup routine that always fails.
+ */
+int
+spec_lookup(ap)
+	struct vop_lookup_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+
+	*ap->a_vpp = NULL;
+	return (ENOTDIR);
+}
+
+/*
+ * Open a special file.
+ */
+/* ARGSUSED */
+spec_open(ap)
+	struct vop_open_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *bvp, *vp = ap->a_vp;
+	dev_t bdev, dev = (dev_t)vp->v_rdev;
+	register int maj = major(dev);
+	int error;
+
+	/*
+	 * Don't allow open if fs is mounted -nodev.
+	 */
+	if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV))
+		return (ENXIO);
+
+	switch (vp->v_type) {
+
+	case VCHR:
+		if ((u_int)maj >= nchrdev)
+			return (ENXIO);
+		if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) {
+			/*
+			 * When running in very secure mode, do not allow
+			 * opens for writing of any disk character devices.
+			 */
+			if (securelevel >= 2 && isdisk(dev, VCHR))
+				return (EPERM);
+			/*
+			 * When running in secure mode, do not allow opens
+			 * for writing of /dev/mem, /dev/kmem, or character
+			 * devices whose corresponding block devices are
+			 * currently mounted.
+			 */
+			if (securelevel >= 1) {
+				if ((bdev = chrtoblk(dev)) != NODEV &&
+				    vfinddev(bdev, VBLK, &bvp) &&
+				    bvp->v_usecount > 0 &&
+				    (error = vfs_mountedon(bvp)))
+					return (error);
+				if (iskmemdev(dev))
+					return (EPERM);
+			}
+		}
+		VOP_UNLOCK(vp);
+		error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, ap->a_p);
+		VOP_LOCK(vp);
+		return (error);
+
+	case VBLK:
+		if ((u_int)maj >= nblkdev)
+			return (ENXIO);
+		/*
+		 * When running in very secure mode, do not allow
+		 * opens for writing of any disk block devices.
+		 */
+		if (securelevel >= 2 && ap->a_cred != FSCRED &&
+		    (ap->a_mode & FWRITE) && isdisk(dev, VBLK))
+			return (EPERM);
+		/*
+		 * Do not allow opens of block devices that are
+		 * currently mounted.
+		 */
+		if (error = vfs_mountedon(vp))
+			return (error);
+		return ((*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, ap->a_p));
+	}
+	return (0);
+}
+
+/*
+ * Vnode op for read
+ */
+/* ARGSUSED */
+spec_read(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct uio *uio = ap->a_uio;
+ 	struct proc *p = uio->uio_procp;
+	struct buf *bp;
+	daddr_t bn, nextbn;
+	long bsize, bscale;
+	struct partinfo dpart;
+	int n, on, majordev, (*ioctl)();
+	int error = 0;
+	dev_t dev;
+
+#ifdef DIAGNOSTIC
+	if (uio->uio_rw != UIO_READ)
+		panic("spec_read mode");
+	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
+		panic("spec_read proc");
+#endif
+	if (uio->uio_resid == 0)
+		return (0);
+
+	switch (vp->v_type) {
+
+	case VCHR:
+		VOP_UNLOCK(vp);
+		error = (*cdevsw[major(vp->v_rdev)].d_read)
+			(vp->v_rdev, uio, ap->a_ioflag);
+		VOP_LOCK(vp);
+		return (error);
+
+	case VBLK:
+		if (uio->uio_offset < 0)
+			return (EINVAL);
+		bsize = BLKDEV_IOSIZE;
+		dev = vp->v_rdev;
+		if ((majordev = major(dev)) < nblkdev &&
+		    (ioctl = bdevsw[majordev].d_ioctl) != NULL &&
+		    (*ioctl)(dev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0 &&
+		    dpart.part->p_fstype == FS_BSDFFS &&
+		    dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
+			bsize = dpart.part->p_frag * dpart.part->p_fsize;
+		bscale = bsize / DEV_BSIZE;
+		do {
+			bn = (uio->uio_offset / DEV_BSIZE) &~ (bscale - 1);
+			on = uio->uio_offset % bsize;
+			n = min((unsigned)(bsize - on), uio->uio_resid);
+			if (vp->v_lastr + bscale == bn) {
+				nextbn = bn + bscale;
+				error = breadn(vp, bn, (int)bsize, &nextbn,
+					(int *)&bsize, 1, NOCRED, &bp);
+			} else
+				error = bread(vp, bn, (int)bsize, NOCRED, &bp);
+			vp->v_lastr = bn;
+			n = min(n, bsize - bp->b_resid);
+			if (error) {
+				brelse(bp);
+				return (error);
+			}
+			error = uiomove((char *)bp->b_data + on, n, uio);
+			if (n + on == bsize)
+				bp->b_flags |= B_AGE;
+			brelse(bp);
+		} while (error == 0 && uio->uio_resid > 0 && n != 0);
+		return (error);
+
+	default:
+		panic("spec_read type");
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * Vnode op for write
+ */
+/* ARGSUSED */
+spec_write(ap)
+	struct vop_write_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct uio *uio = ap->a_uio;
+	struct proc *p = uio->uio_procp;
+	struct buf *bp;
+	daddr_t bn;
+	int bsize, blkmask;
+	struct partinfo dpart;
+	register int n, on;
+	int error = 0;
+
+#ifdef DIAGNOSTIC
+	if (uio->uio_rw != UIO_WRITE)
+		panic("spec_write mode");
+	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
+		panic("spec_write proc");
+#endif
+
+	switch (vp->v_type) {
+
+	case VCHR:
+		VOP_UNLOCK(vp);
+		error = (*cdevsw[major(vp->v_rdev)].d_write)
+			(vp->v_rdev, uio, ap->a_ioflag);
+		VOP_LOCK(vp);
+		return (error);
+
+	case VBLK:
+		if (uio->uio_resid == 0)
+			return (0);
+		if (uio->uio_offset < 0)
+			return (EINVAL);
+		bsize = BLKDEV_IOSIZE;
+		if ((*bdevsw[major(vp->v_rdev)].d_ioctl)(vp->v_rdev, DIOCGPART,
+		    (caddr_t)&dpart, FREAD, p) == 0) {
+			if (dpart.part->p_fstype == FS_BSDFFS &&
+			    dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
+				bsize = dpart.part->p_frag *
+				    dpart.part->p_fsize;
+		}
+		blkmask = (bsize / DEV_BSIZE) - 1;
+		do {
+			bn = (uio->uio_offset / DEV_BSIZE) &~ blkmask;
+			on = uio->uio_offset % bsize;
+			n = min((unsigned)(bsize - on), uio->uio_resid);
+			if (n == bsize)
+				bp = getblk(vp, bn, bsize, 0, 0);
+			else
+				error = bread(vp, bn, bsize, NOCRED, &bp);
+			n = min(n, bsize - bp->b_resid);
+			if (error) {
+				brelse(bp);
+				return (error);
+			}
+			error = uiomove((char *)bp->b_data + on, n, uio);
+			if (n + on == bsize) {
+				bp->b_flags |= B_AGE;
+				bawrite(bp);
+			} else
+				bdwrite(bp);
+		} while (error == 0 && uio->uio_resid > 0 && n != 0);
+		return (error);
+
+	default:
+		panic("spec_write type");
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * Device ioctl operation.
+ */
+/* ARGSUSED */
+spec_ioctl(ap)
+	struct vop_ioctl_args /* {
+		struct vnode *a_vp;
+		int  a_command;
+		caddr_t  a_data;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	dev_t dev = ap->a_vp->v_rdev;
+
+	switch (ap->a_vp->v_type) {
+
+	case VCHR:
+		return ((*cdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data,
+		    ap->a_fflag, ap->a_p));
+
+	case VBLK:
+		if (ap->a_command == 0 && (int)ap->a_data == B_TAPE)
+			if (bdevsw[major(dev)].d_flags & B_TAPE)
+				return (0);
+			else
+				return (1);
+		return ((*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data,
+		   ap->a_fflag, ap->a_p));
+
+	default:
+		panic("spec_ioctl");
+		/* NOTREACHED */
+	}
+}
+
+/* ARGSUSED */
+spec_select(ap)
+	struct vop_select_args /* {
+		struct vnode *a_vp;
+		int  a_which;
+		int  a_fflags;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register dev_t dev;
+
+	switch (ap->a_vp->v_type) {
+
+	default:
+		return (1);		/* XXX */
+
+	case VCHR:
+		dev = ap->a_vp->v_rdev;
+		return (*cdevsw[major(dev)].d_select)(dev, ap->a_which, ap->a_p);
+	}
+}
+/*
+ * Synch buffers associated with a block device
+ */
+/* ARGSUSED */
+int
+spec_fsync(ap)
+	struct vop_fsync_args /* {
+		struct vnode *a_vp;
+		struct ucred *a_cred;
+		int  a_waitfor;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct buf *bp;
+	struct buf *nbp;
+	int s;
+
+	if (vp->v_type == VCHR)
+		return (0);
+	/*
+	 * Flush all dirty buffers associated with a block device.
+	 */
+loop:
+	s = splbio();
+	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
+		nbp = bp->b_vnbufs.le_next;
+		if ((bp->b_flags & B_BUSY))
+			continue;
+		if ((bp->b_flags & B_DELWRI) == 0)
+			panic("spec_fsync: not dirty");
+		bremfree(bp);
+		bp->b_flags |= B_BUSY;
+		splx(s);
+		bawrite(bp);
+		goto loop;
+	}
+	if (ap->a_waitfor == MNT_WAIT) {
+		while (vp->v_numoutput) {
+			vp->v_flag |= VBWAIT;
+			sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1);
+		}
+#ifdef DIAGNOSTIC
+		if (vp->v_dirtyblkhd.lh_first) {
+			vprint("spec_fsync: dirty", vp);
+			goto loop;
+		}
+#endif
+	}
+	splx(s);
+	return (0);
+}
+
+/*
+ * Just call the device strategy routine
+ */
+spec_strategy(ap)
+	struct vop_strategy_args /* {
+		struct buf *a_bp;
+	} */ *ap;
+{
+
+	(*bdevsw[major(ap->a_bp->b_dev)].d_strategy)(ap->a_bp);
+	return (0);
+}
+
+/*
+ * This is a noop, simply returning what one has been given.
+ */
+spec_bmap(ap)
+	struct vop_bmap_args /* {
+		struct vnode *a_vp;
+		daddr_t  a_bn;
+		struct vnode **a_vpp;
+		daddr_t *a_bnp;
+	} */ *ap;
+{
+
+	if (ap->a_vpp != NULL)
+		*ap->a_vpp = ap->a_vp;
+	if (ap->a_bnp != NULL)
+		*ap->a_bnp = ap->a_bn;
+	return (0);
+}
+
+/*
+ * At the moment we do not do any locking.
+ */
+/* ARGSUSED */
+spec_lock(ap)
+	struct vop_lock_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	return (0);
+}
+
+/* ARGSUSED */
+spec_unlock(ap)
+	struct vop_unlock_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	return (0);
+}
+
+/*
+ * Device close routine
+ */
+/* ARGSUSED */
+spec_close(ap)
+	struct vop_close_args /* {
+		struct vnode *a_vp;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	dev_t dev = vp->v_rdev;
+	int (*devclose) __P((dev_t, int, int, struct proc *));
+	int mode, error;
+
+	switch (vp->v_type) {
+
+	case VCHR:
+		/*
+		 * Hack: a tty device that is a controlling terminal
+		 * has a reference from the session structure.
+		 * We cannot easily tell that a character device is
+		 * a controlling terminal, unless it is the closing
+		 * process' controlling terminal.  In that case,
+		 * if the reference count is 2 (this last descriptor
+		 * plus the session), release the reference from the session.
+		 */
+		if (vcount(vp) == 2 && ap->a_p &&
+		    vp == ap->a_p->p_session->s_ttyvp) {
+			vrele(vp);
+			ap->a_p->p_session->s_ttyvp = NULL;
+		}
+		/*
+		 * If the vnode is locked, then we are in the midst
+		 * of forcably closing the device, otherwise we only
+		 * close on last reference.
+		 */
+		if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
+			return (0);
+		devclose = cdevsw[major(dev)].d_close;
+		mode = S_IFCHR;
+		break;
+
+	case VBLK:
+		/*
+		 * On last close of a block device (that isn't mounted)
+		 * we must invalidate any in core blocks, so that
+		 * we can, for instance, change floppy disks.
+		 */
+		if (error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0))
+			return (error);
+		/*
+		 * We do not want to really close the device if it
+		 * is still in use unless we are trying to close it
+		 * forcibly. Since every use (buffer, vnode, swap, cmap)
+		 * holds a reference to the vnode, and because we mark
+		 * any other vnodes that alias this device, when the
+		 * sum of the reference counts on all the aliased
+		 * vnodes descends to one, we are on last close.
+		 */
+		if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
+			return (0);
+		devclose = bdevsw[major(dev)].d_close;
+		mode = S_IFBLK;
+		break;
+
+	default:
+		panic("spec_close: not special");
+	}
+
+	return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p));
+}
+
+/*
+ * Print out the contents of a special device vnode.
+ */
+spec_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev),
+		minor(ap->a_vp->v_rdev));
+}
+
+/*
+ * Return POSIX pathconf information applicable to special devices.
+ */
+spec_pathconf(ap)
+	struct vop_pathconf_args /* {
+		struct vnode *a_vp;
+		int a_name;
+		int *a_retval;
+	} */ *ap;
+{
+
+	switch (ap->a_name) {
+	case _PC_LINK_MAX:
+		*ap->a_retval = LINK_MAX;
+		return (0);
+	case _PC_MAX_CANON:
+		*ap->a_retval = MAX_CANON;
+		return (0);
+	case _PC_MAX_INPUT:
+		*ap->a_retval = MAX_INPUT;
+		return (0);
+	case _PC_PIPE_BUF:
+		*ap->a_retval = PIPE_BUF;
+		return (0);
+	case _PC_CHOWN_RESTRICTED:
+		*ap->a_retval = 1;
+		return (0);
+	case _PC_VDISABLE:
+		*ap->a_retval = _POSIX_VDISABLE;
+		return (0);
+	default:
+		return (EINVAL);
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * Special device advisory byte-level locks.
+ */
+/* ARGSUSED */
+spec_advlock(ap)
+	struct vop_advlock_args /* {
+		struct vnode *a_vp;
+		caddr_t  a_id;
+		int  a_op;
+		struct flock *a_fl;
+		int  a_flags;
+	} */ *ap;
+{
+
+	return (EOPNOTSUPP);
+}
+
+/*
+ * Special device failed operation
+ */
+spec_ebadf()
+{
+
+	return (EBADF);
+}
+
+/*
+ * Special device bad operation
+ */
+spec_badop()
+{
+
+	panic("spec_badop called");
+	/* NOTREACHED */
+}
diff --git a/sys/miscfs/specfs/specdev.h b/sys/miscfs/specfs/specdev.h
new file mode 100644
index 00000000000..a13b66e5113
--- /dev/null
+++ b/sys/miscfs/specfs/specdev.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)specdev.h	8.2 (Berkeley) 2/2/94
+ */
+
+/*
+ * This structure defines the information maintained about
+ * special devices. It is allocated in checkalias and freed
+ * in vgone.
+ */
+struct specinfo {
+	struct	vnode **si_hashchain;
+	struct	vnode *si_specnext;
+	long	si_flags;
+	dev_t	si_rdev;
+};
+/*
+ * Exported shorthand
+ */
+#define v_rdev v_specinfo->si_rdev
+#define v_hashchain v_specinfo->si_hashchain
+#define v_specnext v_specinfo->si_specnext
+#define v_specflags v_specinfo->si_flags
+
+/*
+ * Flags for specinfo
+ */
+#define	SI_MOUNTEDON	0x0001	/* block special device is mounted on */
+
+/*
+ * Special device management
+ */
+#define	SPECHSZ	64
+#if	((SPECHSZ&(SPECHSZ-1)) == 0)
+#define	SPECHASH(rdev)	(((rdev>>5)+(rdev))&(SPECHSZ-1))
+#else
+#define	SPECHASH(rdev)	(((unsigned)((rdev>>5)+(rdev)))%SPECHSZ)
+#endif
+
+struct vnode *speclisth[SPECHSZ];
+
+/*
+ * Prototypes for special file operations on vnodes.
+ */
+extern	int (**spec_vnodeop_p)();
+struct	nameidata;
+struct	componentname;
+struct	ucred;
+struct	flock;
+struct	buf;
+struct	uio;
+
+int	spec_badop(),
+	spec_ebadf();
+
+int	spec_lookup __P((struct vop_lookup_args *));
+#define spec_create ((int (*) __P((struct  vop_create_args *)))spec_badop)
+#define spec_mknod ((int (*) __P((struct  vop_mknod_args *)))spec_badop)
+int	spec_open __P((struct vop_open_args *));
+int	spec_close __P((struct vop_close_args *));
+#define spec_access ((int (*) __P((struct  vop_access_args *)))spec_ebadf)
+#define spec_getattr ((int (*) __P((struct  vop_getattr_args *)))spec_ebadf)
+#define spec_setattr ((int (*) __P((struct  vop_setattr_args *)))spec_ebadf)
+int	spec_read __P((struct vop_read_args *));
+int	spec_write __P((struct vop_write_args *));
+int	spec_ioctl __P((struct vop_ioctl_args *));
+int	spec_select __P((struct vop_select_args *));
+#define spec_mmap ((int (*) __P((struct  vop_mmap_args *)))spec_badop)
+int	spec_fsync __P((struct  vop_fsync_args *));
+#define spec_seek ((int (*) __P((struct  vop_seek_args *)))spec_badop)
+#define spec_remove ((int (*) __P((struct  vop_remove_args *)))spec_badop)
+#define spec_link ((int (*) __P((struct  vop_link_args *)))spec_badop)
+#define spec_rename ((int (*) __P((struct  vop_rename_args *)))spec_badop)
+#define spec_mkdir ((int (*) __P((struct  vop_mkdir_args *)))spec_badop)
+#define spec_rmdir ((int (*) __P((struct  vop_rmdir_args *)))spec_badop)
+#define spec_symlink ((int (*) __P((struct  vop_symlink_args *)))spec_badop)
+#define spec_readdir ((int (*) __P((struct  vop_readdir_args *)))spec_badop)
+#define spec_readlink ((int (*) __P((struct  vop_readlink_args *)))spec_badop)
+#define spec_abortop ((int (*) __P((struct  vop_abortop_args *)))spec_badop)
+#define spec_inactive ((int (*) __P((struct  vop_inactive_args *)))nullop)
+#define spec_reclaim ((int (*) __P((struct  vop_reclaim_args *)))nullop)
+int	spec_lock __P((struct vop_lock_args *));
+int	spec_unlock __P((struct vop_unlock_args *));
+int	spec_bmap __P((struct vop_bmap_args *));
+int	spec_strategy __P((struct vop_strategy_args *));
+int	spec_print __P((struct vop_print_args *));
+#define spec_islocked ((int (*) __P((struct  vop_islocked_args *)))nullop)
+int	spec_pathconf __P((struct vop_pathconf_args *));
+int	spec_advlock __P((struct vop_advlock_args *));
+#define spec_blkatoff ((int (*) __P((struct  vop_blkatoff_args *)))spec_badop)
+#define spec_valloc ((int (*) __P((struct  vop_valloc_args *)))spec_badop)
+#define spec_reallocblks \
+	((int (*) __P((struct  vop_reallocblks_args *)))spec_badop)
+#define spec_vfree ((int (*) __P((struct  vop_vfree_args *)))spec_badop)
+#define spec_truncate ((int (*) __P((struct  vop_truncate_args *)))nullop)
+#define spec_update ((int (*) __P((struct  vop_update_args *)))nullop)
+#define spec_bwrite ((int (*) __P((struct  vop_bwrite_args *)))nullop)
diff --git a/sys/miscfs/umapfs/umap.h b/sys/miscfs/umapfs/umap.h
new file mode 100644
index 00000000000..9f4d1e7ace5
--- /dev/null
+++ b/sys/miscfs/umapfs/umap.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * the UCLA Ficus project.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)umap.h	8.3 (Berkeley) 1/21/94
+ *
+ * @(#)null_vnops.c       1.5 (Berkeley) 7/10/92
+ */
+
+#define MAPFILEENTRIES 64
+#define GMAPFILEENTRIES 16
+#define NOBODY 32767
+#define NULLGROUP 65534
+
+struct umap_args {
+	char		*target;	/* Target of loopback  */
+	int 		nentries;       /* # of entries in user map array */
+	int 		gnentries;	/* # of entries in group map array */
+	u_long 		(*mapdata)[2];	/* pointer to array of user mappings */
+	u_long 		(*gmapdata)[2];	/* pointer to array of group mappings */
+};
+
+struct umap_mount {
+	struct mount	*umapm_vfs;
+	struct vnode	*umapm_rootvp;	/* Reference to root umap_node */
+	int             info_nentries;  /* number of uid mappings */
+	int		info_gnentries;	/* number of gid mappings */
+	u_long		info_mapdata[MAPFILEENTRIES][2]; /* mapping data for 
+	    user mapping in ficus */
+	u_long		info_gmapdata[GMAPFILEENTRIES][2]; /*mapping data for 
+	    group mapping in ficus */
+};
+
+#ifdef KERNEL
+/*
+ * A cache of vnode references
+ */
+struct umap_node {
+	struct umap_node	*umap_forw;	/* Hash chain */
+	struct umap_node	*umap_back;
+	struct vnode	*umap_lowervp;	/* Aliased vnode - VREFed once */
+	struct vnode	*umap_vnode;	/* Back pointer to vnode/umap_node */
+};
+
+extern int umap_node_create __P((struct mount *mp, struct vnode *target, struct vnode **vpp));
+extern u_long umap_reverse_findid __P((u_long id, u_long map[][2], int nentries));
+extern void umap_mapids __P((struct mount *v_mount, struct ucred *credp));
+
+#define	MOUNTTOUMAPMOUNT(mp) ((struct umap_mount *)((mp)->mnt_data))
+#define	VTOUMAP(vp) ((struct umap_node *)(vp)->v_data)
+#define UMAPTOV(xp) ((xp)->umap_vnode)
+#ifdef UMAPFS_DIAGNOSTIC
+extern struct vnode *umap_checkvp __P((struct vnode *vp, char *fil, int lno));
+#define	UMAPVPTOLOWERVP(vp) umap_checkvp((vp), __FILE__, __LINE__)
+#else
+#define	UMAPVPTOLOWERVP(vp) (VTOUMAP(vp)->umap_lowervp)
+#endif
+
+extern int (**umap_vnodeop_p)();
+extern struct vfsops umap_vfsops;
+#endif /* KERNEL */
diff --git a/sys/miscfs/umapfs/umap_subr.c b/sys/miscfs/umapfs/umap_subr.c
new file mode 100644
index 00000000000..6f1f077a621
--- /dev/null
+++ b/sys/miscfs/umapfs/umap_subr.c
@@ -0,0 +1,397 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)umap_subr.c	8.6 (Berkeley) 1/26/94
+ *
+ * $Id: lofs_subr.c, v 1.11 1992/05/30 10:05:43 jsp Exp jsp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <miscfs/umapfs/umap.h>
+
+#define LOG2_SIZEVNODE 7		/* log2(sizeof struct vnode) */
+#define	NUMAPNODECACHE 16
+#define	UMAP_NHASH(vp) ((((u_long) vp)>>LOG2_SIZEVNODE) & (NUMAPNODECACHE-1))
+
+/*
+ * Null layer cache:
+ * Each cache entry holds a reference to the target vnode
+ * along with a pointer to the alias vnode.  When an
+ * entry is added the target vnode is VREF'd.  When the
+ * alias is removed the target vnode is vrele'd.
+ */
+
+/*
+ * Cache head
+ */
+struct umap_node_cache {
+	struct umap_node	*ac_forw;
+	struct umap_node	*ac_back;
+};
+
+static struct umap_node_cache umap_node_cache[NUMAPNODECACHE];
+
+/*
+ * Initialise cache headers
+ */
+umapfs_init()
+{
+	struct umap_node_cache *ac;
+#ifdef UMAPFS_DIAGNOSTIC
+	printf("umapfs_init\n");		/* printed during system boot */
+#endif
+
+	for (ac = umap_node_cache; ac < umap_node_cache + NUMAPNODECACHE; ac++)
+		ac->ac_forw = ac->ac_back = (struct umap_node *) ac;
+}
+
+/*
+ * Compute hash list for given target vnode
+ */
+static struct umap_node_cache *
+umap_node_hash(targetvp)
+	struct vnode *targetvp;
+{
+
+	return (&umap_node_cache[UMAP_NHASH(targetvp)]);
+}
+
+/*
+ * umap_findid is called by various routines in umap_vnodeops.c to
+ * find a user or group id in a map.
+ */
+static u_long
+umap_findid(id, map, nentries)
+	u_long id;
+	u_long map[][2];
+	int nentries;
+{
+	int i;
+
+	/* Find uid entry in map */
+	i = 0;
+	while ((i<nentries) && ((map[i][0]) != id))
+		i++;
+
+	if (i < nentries)
+		return (map[i][1]);
+	else
+		return (-1);
+
+}
+
+/*
+ * umap_reverse_findid is called by umap_getattr() in umap_vnodeops.c to
+ * find a user or group id in a map, in reverse.
+ */
+u_long
+umap_reverse_findid(id, map, nentries)
+	u_long id;
+	u_long map[][2];
+	int nentries;
+{
+	int i;
+
+	/* Find uid entry in map */
+	i = 0;
+	while ((i<nentries) && ((map[i][1]) != id))
+		i++;
+
+	if (i < nentries)
+		return (map[i][0]);
+	else
+		return (-1);
+
+}
+
+/*
+ * Return alias for target vnode if already exists, else 0.
+ */
+static struct vnode *
+umap_node_find(mp, targetvp)
+	struct mount *mp;
+	struct vnode *targetvp;
+{
+	struct umap_node_cache *hd;
+	struct umap_node *a;
+	struct vnode *vp;
+
+#ifdef UMAPFS_DIAGNOSTIC
+	printf("umap_node_find(mp = %x, target = %x)\n", mp, targetvp);
+#endif
+
+	/*
+	 * Find hash base, and then search the (two-way) linked
+	 * list looking for a umap_node structure which is referencing
+	 * the target vnode.  If found, the increment the umap_node
+	 * reference count (but NOT the target vnode's VREF counter).
+	 */
+	hd = umap_node_hash(targetvp);
+
+ loop:
+	for (a = hd->ac_forw; a != (struct umap_node *) hd; a = a->umap_forw) {
+		if (a->umap_lowervp == targetvp &&
+		    a->umap_vnode->v_mount == mp) {
+			vp = UMAPTOV(a);
+			/*
+			 * We need vget for the VXLOCK
+			 * stuff, but we don't want to lock
+			 * the lower node.
+			 */
+			if (vget(vp, 0)) {
+#ifdef UMAPFS_DIAGNOSTIC
+				printf ("umap_node_find: vget failed.\n");
+#endif
+				goto loop;
+			}
+			return (vp);
+		}
+	}
+
+#ifdef UMAPFS_DIAGNOSTIC
+	printf("umap_node_find(%x, %x): NOT found\n", mp, targetvp);
+#endif
+
+	return (0);
+}
+
+/*
+ * Make a new umap_node node.
+ * Vp is the alias vnode, lofsvp is the target vnode.
+ * Maintain a reference to (targetvp).
+ */
+static int
+umap_node_alloc(mp, lowervp, vpp)
+	struct mount *mp;
+	struct vnode *lowervp;
+	struct vnode **vpp;
+{
+	struct umap_node_cache *hd;
+	struct umap_node *xp;
+	struct vnode *othervp, *vp;
+	int error;
+
+	if (error = getnewvnode(VT_UMAP, mp, umap_vnodeop_p, vpp))
+		return (error);
+	vp = *vpp;
+
+	MALLOC(xp, struct umap_node *, sizeof(struct umap_node),
+	    M_TEMP, M_WAITOK);
+	vp->v_type = lowervp->v_type;
+	xp->umap_vnode = vp;
+	vp->v_data = xp;
+	xp->umap_lowervp = lowervp;
+	/*
+	 * Before we insert our new node onto the hash chains,
+	 * check to see if someone else has beaten us to it.
+	 * (We could have slept in MALLOC.)
+	 */
+	if (othervp = umap_node_find(lowervp)) {
+		FREE(xp, M_TEMP);
+		vp->v_type = VBAD;	/* node is discarded */
+		vp->v_usecount = 0;	/* XXX */
+		*vpp = othervp;
+		return (0);
+	}
+	VREF(lowervp);   /* Extra VREF will be vrele'd in umap_node_create */
+	hd = umap_node_hash(lowervp);
+	insque(xp, hd);
+	return (0);
+}
+
+
+/*
+ * Try to find an existing umap_node vnode refering
+ * to it, otherwise make a new umap_node vnode which
+ * contains a reference to the target vnode.
+ */
+int
+umap_node_create(mp, targetvp, newvpp)
+	struct mount *mp;
+	struct vnode *targetvp;
+	struct vnode **newvpp;
+{
+	struct vnode *aliasvp;
+
+	if (aliasvp = umap_node_find(mp, targetvp)) {
+		/*
+		 * Take another reference to the alias vnode
+		 */
+#ifdef UMAPFS_DIAGNOSTIC
+		vprint("umap_node_create: exists", ap->umap_vnode);
+#endif
+		/* VREF(aliasvp); */
+	} else {
+		int error;
+
+		/*
+		 * Get new vnode.
+		 */
+#ifdef UMAPFS_DIAGNOSTIC
+		printf("umap_node_create: create new alias vnode\n");
+#endif
+		/*
+		 * Make new vnode reference the umap_node.
+		 */
+		if (error = umap_node_alloc(mp, targetvp, &aliasvp))
+			return (error);
+
+		/*
+		 * aliasvp is already VREF'd by getnewvnode()
+		 */
+	}
+
+	vrele(targetvp);
+
+#ifdef UMAPFS_DIAGNOSTIC
+	vprint("umap_node_create: alias", aliasvp);
+	vprint("umap_node_create: target", targetvp);
+#endif
+
+	*newvpp = aliasvp;
+	return (0);
+}
+
+#ifdef UMAPFS_DIAGNOSTIC
+int umap_checkvp_barrier = 1;
+struct vnode *
+umap_checkvp(vp, fil, lno)
+	struct vnode *vp;
+	char *fil;
+	int lno;
+{
+	struct umap_node *a = VTOUMAP(vp);
+#if 0
+	/*
+	 * Can't do this check because vop_reclaim runs
+	 * with funny vop vector.
+	 */
+	if (vp->v_op != umap_vnodeop_p) {
+		printf ("umap_checkvp: on non-umap-node\n");
+		while (umap_checkvp_barrier) /*WAIT*/ ;
+		panic("umap_checkvp");
+	}
+#endif
+	if (a->umap_lowervp == NULL) {
+		/* Should never happen */
+		int i; u_long *p;
+		printf("vp = %x, ZERO ptr\n", vp);
+		for (p = (u_long *) a, i = 0; i < 8; i++)
+			printf(" %x", p[i]);
+		printf("\n");
+		/* wait for debugger */
+		while (umap_checkvp_barrier) /*WAIT*/ ;
+		panic("umap_checkvp");
+	}
+	if (a->umap_lowervp->v_usecount < 1) {
+		int i; u_long *p;
+		printf("vp = %x, unref'ed lowervp\n", vp);
+		for (p = (u_long *) a, i = 0; i < 8; i++)
+			printf(" %x", p[i]);
+		printf("\n");
+		/* wait for debugger */
+		while (umap_checkvp_barrier) /*WAIT*/ ;
+		panic ("umap with unref'ed lowervp");
+	}
+#if 0
+	printf("umap %x/%d -> %x/%d [%s, %d]\n",
+	        a->umap_vnode, a->umap_vnode->v_usecount,
+		a->umap_lowervp, a->umap_lowervp->v_usecount,
+		fil, lno);
+#endif
+	return (a->umap_lowervp);
+}
+#endif
+
+/* umap_mapids maps all of the ids in a credential, both user and group. */
+
+void
+umap_mapids(v_mount, credp)
+	struct mount *v_mount;
+	struct ucred *credp;
+{
+	int i, unentries, gnentries;
+	u_long *groupmap, *usermap;
+	uid_t uid;
+	gid_t gid;
+
+	unentries =  MOUNTTOUMAPMOUNT(v_mount)->info_nentries;
+	usermap =  &(MOUNTTOUMAPMOUNT(v_mount)->info_mapdata[0][0]);
+	gnentries =  MOUNTTOUMAPMOUNT(v_mount)->info_gnentries;
+	groupmap =  &(MOUNTTOUMAPMOUNT(v_mount)->info_gmapdata[0][0]);
+
+	/* Find uid entry in map */
+
+	uid = (uid_t) umap_findid(credp->cr_uid, usermap, unentries);
+
+	if (uid != -1)
+		credp->cr_uid = uid;
+	else
+		credp->cr_uid = (uid_t) NOBODY;
+
+#ifdef notdef
+	/* cr_gid is the same as cr_groups[0] in 4BSD */
+
+	/* Find gid entry in map */
+
+	gid = (gid_t) umap_findid(credp->cr_gid, groupmap, gnentries);
+
+	if (gid != -1)
+		credp->cr_gid = gid;
+	else
+		credp->cr_gid = NULLGROUP;
+#endif
+
+	/* Now we must map each of the set of groups in the cr_groups 
+		structure. */
+
+	i = 0;
+	while (credp->cr_groups[i] != 0) {
+		gid = (gid_t) umap_findid(credp->cr_groups[i],
+					groupmap, gnentries);
+
+		if (gid != -1)
+			credp->cr_groups[i++] = gid;
+		else
+			credp->cr_groups[i++] = NULLGROUP;
+	}
+}
diff --git a/sys/miscfs/umapfs/umap_vfsops.c b/sys/miscfs/umapfs/umap_vfsops.c
new file mode 100644
index 00000000000..2480a85e440
--- /dev/null
+++ b/sys/miscfs/umapfs/umap_vfsops.c
@@ -0,0 +1,407 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * the UCLA Ficus project.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)umap_vfsops.c	8.3 (Berkeley) 1/21/94
+ *
+ * @(#)null_vfsops.c       1.5 (Berkeley) 7/10/92
+ */
+
+/*
+ * Umap Layer
+ * (See mount_umap(8) for a description of this layer.)
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <miscfs/umapfs/umap.h>
+
+/*
+ * Mount umap layer
+ */
+int
+umapfs_mount(mp, path, data, ndp, p)
+	struct mount *mp;
+	char *path;
+	caddr_t data;
+	struct nameidata *ndp;
+	struct proc *p;
+{
+	struct umap_args args;
+	struct vnode *lowerrootvp, *vp;
+	struct vnode *umapm_rootvp;
+	struct umap_mount *amp;
+	u_int size;
+	int error;
+
+#ifdef UMAPFS_DIAGNOSTIC
+	printf("umapfs_mount(mp = %x)\n", mp);
+#endif
+
+	/*
+	 * Update is a no-op
+	 */
+	if (mp->mnt_flag & MNT_UPDATE) {
+		return (EOPNOTSUPP);
+		/* return (VFS_MOUNT(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, path, data, ndp, p));*/
+	}
+
+	/*
+	 * Get argument
+	 */
+	if (error = copyin(data, (caddr_t)&args, sizeof(struct umap_args)))
+		return (error);
+
+	/*
+	 * Find lower node
+	 */
+	NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT|LOCKLEAF,
+		UIO_USERSPACE, args.target, p);
+	if (error = namei(ndp))
+		return (error);
+
+	/*
+	 * Sanity check on lower vnode
+	 */
+	lowerrootvp = ndp->ni_vp;
+#ifdef UMAPFS_DIAGNOSTIC
+	printf("vp = %x, check for VDIR...\n", lowerrootvp);
+#endif
+	vrele(ndp->ni_dvp);
+	ndp->ni_dvp = 0;
+
+	if (lowerrootvp->v_type != VDIR) {
+		vput(lowerrootvp);
+		return (EINVAL);
+	}
+
+#ifdef UMAPFS_DIAGNOSTIC
+	printf("mp = %x\n", mp);
+#endif
+
+	amp = (struct umap_mount *) malloc(sizeof(struct umap_mount),
+				M_UFSMNT, M_WAITOK);	/* XXX */
+
+	/*
+	 * Save reference to underlying FS
+	 */
+	amp->umapm_vfs = lowerrootvp->v_mount;
+
+	/* 
+	 * Now copy in the number of entries and maps for umap mapping.
+	 */
+	amp->info_nentries = args.nentries;
+	amp->info_gnentries = args.gnentries;
+	error = copyin(args.mapdata, (caddr_t)amp->info_mapdata, 
+	    2*sizeof(u_long)*args.nentries);
+	if (error)
+		return (error);
+
+#ifdef UMAP_DIAGNOSTIC
+	printf("umap_mount:nentries %d\n",args.nentries);
+	for (i = 0; i < args.nentries; i++)
+		printf("   %d maps to %d\n", amp->info_mapdata[i][0],
+	 	    amp->info_mapdata[i][1]);
+#endif
+
+	error = copyin(args.gmapdata, (caddr_t)amp->info_gmapdata, 
+	    2*sizeof(u_long)*args.nentries);
+	if (error)
+		return (error);
+
+#ifdef UMAP_DIAGNOSTIC
+	printf("umap_mount:gnentries %d\n",args.gnentries);
+	for (i = 0; i < args.gnentries; i++)
+		printf("	group %d maps to %d\n", 
+		    amp->info_gmapdata[i][0],
+	 	    amp->info_gmapdata[i][1]);
+#endif
+
+
+	/*
+	 * Save reference.  Each mount also holds
+	 * a reference on the root vnode.
+	 */
+	error = umap_node_create(mp, lowerrootvp, &vp);
+	/*
+	 * Unlock the node (either the lower or the alias)
+	 */
+	VOP_UNLOCK(vp);
+	/*
+	 * Make sure the node alias worked
+	 */
+	if (error) {
+		vrele(lowerrootvp);
+		free(amp, M_UFSMNT);	/* XXX */
+		return (error);
+	}
+
+	/*
+	 * Keep a held reference to the root vnode.
+	 * It is vrele'd in umapfs_unmount.
+	 */
+	umapm_rootvp = vp;
+	umapm_rootvp->v_flag |= VROOT;
+	amp->umapm_rootvp = umapm_rootvp;
+	if (UMAPVPTOLOWERVP(umapm_rootvp)->v_mount->mnt_flag & MNT_LOCAL)
+		mp->mnt_flag |= MNT_LOCAL;
+	mp->mnt_data = (qaddr_t) amp;
+	getnewfsid(mp, MOUNT_LOFS);
+
+	(void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+	(void) copyinstr(args.target, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 
+	    &size);
+	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+#ifdef UMAPFS_DIAGNOSTIC
+	printf("umapfs_mount: lower %s, alias at %s\n",
+		mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
+#endif
+	return (0);
+}
+
+/*
+ * VFS start.  Nothing needed here - the start routine
+ * on the underlying filesystem will have been called
+ * when that filesystem was mounted.
+ */
+int
+umapfs_start(mp, flags, p)
+	struct mount *mp;
+	int flags;
+	struct proc *p;
+{
+	return (0);
+	/* return (VFS_START(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, flags, p)); */
+}
+
+/*
+ * Free reference to umap layer
+ */
+int
+umapfs_unmount(mp, mntflags, p)
+	struct mount *mp;
+	int mntflags;
+	struct proc *p;
+{
+	struct vnode *umapm_rootvp = MOUNTTOUMAPMOUNT(mp)->umapm_rootvp;
+	int error;
+	int flags = 0;
+	extern int doforce;
+
+#ifdef UMAPFS_DIAGNOSTIC
+	printf("umapfs_unmount(mp = %x)\n", mp);
+#endif
+
+	if (mntflags & MNT_FORCE) {
+		/* lofs can never be rootfs so don't check for it */
+		if (!doforce)
+			return (EINVAL);
+		flags |= FORCECLOSE;
+	}
+
+	/*
+	 * Clear out buffer cache.  I don't think we
+	 * ever get anything cached at this level at the
+	 * moment, but who knows...
+	 */
+#ifdef notyet
+	mntflushbuf(mp, 0); 
+	if (mntinvalbuf(mp, 1))
+		return (EBUSY);
+#endif
+	if (umapm_rootvp->v_usecount > 1)
+		return (EBUSY);
+	if (error = vflush(mp, umapm_rootvp, flags))
+		return (error);
+
+#ifdef UMAPFS_DIAGNOSTIC
+	vprint("alias root of lower", umapm_rootvp);
+#endif	 
+	/*
+	 * Release reference on underlying root vnode
+	 */
+	vrele(umapm_rootvp);
+	/*
+	 * And blow it away for future re-use
+	 */
+	vgone(umapm_rootvp);
+	/*
+	 * Finally, throw away the umap_mount structure
+	 */
+	free(mp->mnt_data, M_UFSMNT);	/* XXX */
+	mp->mnt_data = 0;
+	return (0);
+}
+
+int
+umapfs_root(mp, vpp)
+	struct mount *mp;
+	struct vnode **vpp;
+{
+	struct vnode *vp;
+
+#ifdef UMAPFS_DIAGNOSTIC
+	printf("umapfs_root(mp = %x, vp = %x->%x)\n", mp,
+			MOUNTTOUMAPMOUNT(mp)->umapm_rootvp,
+			UMAPVPTOLOWERVP(MOUNTTOUMAPMOUNT(mp)->umapm_rootvp)
+			);
+#endif
+
+	/*
+	 * Return locked reference to root.
+	 */
+	vp = MOUNTTOUMAPMOUNT(mp)->umapm_rootvp;
+	VREF(vp);
+	VOP_LOCK(vp);
+	*vpp = vp;
+	return (0);
+}
+
+int
+umapfs_quotactl(mp, cmd, uid, arg, p)
+	struct mount *mp;
+	int cmd;
+	uid_t uid;
+	caddr_t arg;
+	struct proc *p;
+{
+	return (VFS_QUOTACTL(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, cmd, uid, arg, p));
+}
+
+int
+umapfs_statfs(mp, sbp, p)
+	struct mount *mp;
+	struct statfs *sbp;
+	struct proc *p;
+{
+	int error;
+	struct statfs mstat;
+
+#ifdef UMAPFS_DIAGNOSTIC
+	printf("umapfs_statfs(mp = %x, vp = %x->%x)\n", mp,
+			MOUNTTOUMAPMOUNT(mp)->umapm_rootvp,
+			UMAPVPTOLOWERVP(MOUNTTOUMAPMOUNT(mp)->umapm_rootvp)
+			);
+#endif
+
+	bzero(&mstat, sizeof(mstat));
+
+	error = VFS_STATFS(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, &mstat, p);
+	if (error)
+		return (error);
+
+	/* now copy across the "interesting" information and fake the rest */
+	sbp->f_type = mstat.f_type;
+	sbp->f_flags = mstat.f_flags;
+	sbp->f_bsize = mstat.f_bsize;
+	sbp->f_iosize = mstat.f_iosize;
+	sbp->f_blocks = mstat.f_blocks;
+	sbp->f_bfree = mstat.f_bfree;
+	sbp->f_bavail = mstat.f_bavail;
+	sbp->f_files = mstat.f_files;
+	sbp->f_ffree = mstat.f_ffree;
+	if (sbp != &mp->mnt_stat) {
+		bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+	}
+	return (0);
+}
+
+int
+umapfs_sync(mp, waitfor, cred, p)
+	struct mount *mp;
+	int waitfor;
+	struct ucred *cred;
+	struct proc *p;
+{
+	/*
+	 * XXX - Assumes no data cached at umap layer.
+	 */
+	return (0);
+}
+
+int
+umapfs_vget(mp, ino, vpp)
+	struct mount *mp;
+	ino_t ino;
+	struct vnode **vpp;
+{
+	
+	return (VFS_VGET(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, ino, vpp));
+}
+
+int
+umapfs_fhtovp(mp, fidp, nam, vpp, exflagsp, credanonp)
+	struct mount *mp;
+	struct fid *fidp;
+	struct mbuf *nam;
+	struct vnode **vpp;
+	int *exflagsp;
+	struct ucred**credanonp;
+{
+
+	return (VFS_FHTOVP(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, fidp, nam, vpp, exflagsp,credanonp));
+}
+
+int
+umapfs_vptofh(vp, fhp)
+	struct vnode *vp;
+	struct fid *fhp;
+{
+	return (VFS_VPTOFH(UMAPVPTOLOWERVP(vp), fhp));
+}
+
+int umapfs_init __P((void));
+
+struct vfsops umap_vfsops = {
+	umapfs_mount,
+	umapfs_start,
+	umapfs_unmount,
+	umapfs_root,
+	umapfs_quotactl,
+	umapfs_statfs,
+	umapfs_sync,
+	umapfs_vget,
+	umapfs_fhtovp,
+	umapfs_vptofh,
+	umapfs_init,
+};
diff --git a/sys/miscfs/umapfs/umap_vnops.c b/sys/miscfs/umapfs/umap_vnops.c
new file mode 100644
index 00000000000..287804e1561
--- /dev/null
+++ b/sys/miscfs/umapfs/umap_vnops.c
@@ -0,0 +1,488 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * the UCLA Ficus project.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)umap_vnops.c	8.3 (Berkeley) 1/5/94
+ */
+
+/*
+ * Umap Layer
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/buf.h>
+#include <miscfs/umapfs/umap.h>
+
+
+int umap_bug_bypass = 0;   /* for debugging: enables bypass printf'ing */
+
+/*
+ * This is the 10-Apr-92 bypass routine.
+ * See null_vnops.c:null_bypass for more details.
+ */ 
+int
+umap_bypass(ap)
+	struct vop_generic_args /* {
+		struct vnodeop_desc *a_desc;
+		<other random data follows, presumably>
+	} */ *ap;
+{
+	extern int (**umap_vnodeop_p)();  /* not extern, really "forward" */
+	struct ucred **credpp = 0, *credp = 0;
+	struct ucred *savecredp, *savecompcredp = 0;
+	struct ucred *compcredp = 0;
+	struct vnode **this_vp_p;
+	int error;
+	struct vnode *old_vps[VDESC_MAX_VPS];
+	struct vnode *vp1 = 0;
+	struct vnode **vps_p[VDESC_MAX_VPS];
+	struct vnode ***vppp;
+	struct vnodeop_desc *descp = ap->a_desc;
+	int reles, i;
+	struct componentname **compnamepp = 0;
+
+	if (umap_bug_bypass)
+		printf ("umap_bypass: %s\n", descp->vdesc_name);
+
+#ifdef SAFETY
+	/*
+	 * We require at least one vp.
+	 */
+	if (descp->vdesc_vp_offsets == NULL ||
+	    descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
+		panic ("umap_bypass: no vp's in map.\n");
+#endif
+
+	/*
+	 * Map the vnodes going in.
+	 * Later, we'll invoke the operation based on
+	 * the first mapped vnode's operation vector.
+	 */
+	reles = descp->vdesc_flags;
+	for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
+		if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
+			break;   /* bail out at end of list */
+		vps_p[i] = this_vp_p = 
+			VOPARG_OFFSETTO(struct vnode**, descp->vdesc_vp_offsets[i], ap);
+
+		if (i == 0) {
+			vp1 = *vps_p[0];
+		}
+
+		/*
+		 * We're not guaranteed that any but the first vnode
+		 * are of our type.  Check for and don't map any
+		 * that aren't.  (Must map first vp or vclean fails.)
+		 */
+
+		if (i && (*this_vp_p)->v_op != umap_vnodeop_p) {
+			old_vps[i] = NULL;
+		} else {
+			old_vps[i] = *this_vp_p;
+			*(vps_p[i]) = UMAPVPTOLOWERVP(*this_vp_p);
+			if (reles & 1)
+				VREF(*this_vp_p);
+		}
+			
+	}
+
+	/*
+	 * Fix the credentials.  (That's the purpose of this layer.)
+	 */
+
+	if (descp->vdesc_cred_offset != VDESC_NO_OFFSET) {
+
+		credpp = VOPARG_OFFSETTO(struct ucred**, 
+		    descp->vdesc_cred_offset, ap);
+
+		/* Save old values */
+
+		savecredp = (*credpp);
+		(*credpp) = crdup(savecredp);
+		credp = *credpp;
+
+		if (umap_bug_bypass && credp->cr_uid != 0)
+			printf("umap_bypass: user was %d, group %d\n", 
+			    credp->cr_uid, credp->cr_gid);
+
+		/* Map all ids in the credential structure. */
+
+		umap_mapids(vp1->v_mount, credp);
+
+		if (umap_bug_bypass && credp->cr_uid != 0)
+			printf("umap_bypass: user now %d, group %d\n", 
+			    credp->cr_uid, credp->cr_gid);
+	}
+
+	/* BSD often keeps a credential in the componentname structure
+	 * for speed.  If there is one, it better get mapped, too. 
+	 */
+
+	if (descp->vdesc_componentname_offset != VDESC_NO_OFFSET) {
+
+		compnamepp = VOPARG_OFFSETTO(struct componentname**, 
+		    descp->vdesc_componentname_offset, ap);
+
+		compcredp = (*compnamepp)->cn_cred;
+		savecompcredp = compcredp;
+		compcredp = (*compnamepp)->cn_cred = crdup(savecompcredp);
+
+		if (umap_bug_bypass && compcredp->cr_uid != 0)
+			printf("umap_bypass: component credit user was %d, group %d\n", 
+			    compcredp->cr_uid, compcredp->cr_gid);
+
+		/* Map all ids in the credential structure. */
+
+		umap_mapids(vp1->v_mount, compcredp);
+
+		if (umap_bug_bypass && compcredp->cr_uid != 0)
+			printf("umap_bypass: component credit user now %d, group %d\n", 
+			    compcredp->cr_uid, compcredp->cr_gid);
+	}
+
+	/*
+	 * Call the operation on the lower layer
+	 * with the modified argument structure.
+	 */
+	error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
+
+	/*
+	 * Maintain the illusion of call-by-value
+	 * by restoring vnodes in the argument structure
+	 * to their original value.
+	 */
+	reles = descp->vdesc_flags;
+	for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
+		if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
+			break;   /* bail out at end of list */
+		if (old_vps[i]) {
+			*(vps_p[i]) = old_vps[i];
+			if (reles & 1)
+				vrele(*(vps_p[i]));
+		};
+	};
+
+	/*
+	 * Map the possible out-going vpp
+	 * (Assumes that the lower layer always returns
+	 * a VREF'ed vpp unless it gets an error.)
+	 */
+	if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET &&
+	    !(descp->vdesc_flags & VDESC_NOMAP_VPP) &&
+	    !error) {
+		if (descp->vdesc_flags & VDESC_VPP_WILLRELE)
+			goto out;
+		vppp = VOPARG_OFFSETTO(struct vnode***,
+				 descp->vdesc_vpp_offset, ap);
+		error = umap_node_create(old_vps[0]->v_mount, **vppp, *vppp);
+	};
+
+ out:
+	/* 
+	 * Free duplicate cred structure and restore old one.
+	 */
+	if (descp->vdesc_cred_offset != VDESC_NO_OFFSET) {
+		if (umap_bug_bypass && credp && credp->cr_uid != 0)
+			printf("umap_bypass: returning-user was %d\n",
+					credp->cr_uid);
+
+		crfree(credp);
+		(*credpp) = savecredp;
+		if (umap_bug_bypass && credpp && (*credpp)->cr_uid != 0)
+		 	printf("umap_bypass: returning-user now %d\n\n", 
+			    (*credpp)->cr_uid);
+	}
+
+	if (descp->vdesc_componentname_offset != VDESC_NO_OFFSET) {
+		if (umap_bug_bypass && compcredp && compcredp->cr_uid != 0)
+		printf("umap_bypass: returning-component-user was %d\n", 
+				compcredp->cr_uid);
+
+		crfree(compcredp);
+		(*compnamepp)->cn_cred = savecompcredp;
+		if (umap_bug_bypass && credpp && (*credpp)->cr_uid != 0)
+		 	printf("umap_bypass: returning-component-user now %d\n", 
+					compcredp->cr_uid);
+	}
+
+	return (error);
+}
+
+
+/*
+ *  We handle getattr to change the fsid.
+ */
+int
+umap_getattr(ap)
+	struct vop_getattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	short uid, gid;
+	int error, tmpid, nentries, gnentries;
+	u_long (*mapdata)[2], (*gmapdata)[2];
+	struct vnode **vp1p;
+	struct vnodeop_desc *descp = ap->a_desc;
+
+	if (error = umap_bypass(ap))
+		return (error);
+	/* Requires that arguments be restored. */
+	ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
+
+	/*
+	 * Umap needs to map the uid and gid returned by a stat
+	 * into the proper values for this site.  This involves
+	 * finding the returned uid in the mapping information,
+	 * translating it into the uid on the other end,
+	 * and filling in the proper field in the vattr
+	 * structure pointed to by ap->a_vap.  The group
+	 * is easier, since currently all groups will be
+	 * translate to the NULLGROUP.
+	 */
+
+	/* Find entry in map */
+
+	uid = ap->a_vap->va_uid;
+	gid = ap->a_vap->va_gid;
+	if (umap_bug_bypass)
+		printf("umap_getattr: mapped uid = %d, mapped gid = %d\n", uid, 
+		    gid);
+
+	vp1p = VOPARG_OFFSETTO(struct vnode**, descp->vdesc_vp_offsets[0], ap);
+	nentries =  MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_nentries;
+	mapdata =  (MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_mapdata);
+	gnentries =  MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_gnentries;
+	gmapdata =  (MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_gmapdata);
+
+	/* Reverse map the uid for the vnode.  Since it's a reverse
+		map, we can't use umap_mapids() to do it. */
+
+	tmpid = umap_reverse_findid(uid, mapdata, nentries);
+
+	if (tmpid != -1) {
+
+		ap->a_vap->va_uid = (uid_t) tmpid;
+		if (umap_bug_bypass)
+			printf("umap_getattr: original uid = %d\n", uid);
+	} else 
+		ap->a_vap->va_uid = (uid_t) NOBODY;
+
+	/* Reverse map the gid for the vnode. */
+
+	tmpid = umap_reverse_findid(gid, gmapdata, gnentries);
+
+	if (tmpid != -1) {
+
+		ap->a_vap->va_gid = (gid_t) tmpid;
+		if (umap_bug_bypass)
+			printf("umap_getattr: original gid = %d\n", gid);
+	} else
+		ap->a_vap->va_gid = (gid_t) NULLGROUP;
+	
+	return (0);
+}
+
+int
+umap_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	/*
+	 * Do nothing (and _don't_ bypass).
+	 * Wait to vrele lowervp until reclaim,
+	 * so that until then our umap_node is in the
+	 * cache and reusable.
+	 *
+	 */
+	return (0);
+}
+
+int
+umap_reclaim(ap)
+	struct vop_reclaim_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct umap_node *xp = VTOUMAP(vp);
+	struct vnode *lowervp = xp->umap_lowervp;
+	
+	/* After this assignment, this node will not be re-used. */
+	xp->umap_lowervp = NULL;
+	remque(xp);
+	FREE(vp->v_data, M_TEMP);
+	vp->v_data = NULL;
+	vrele(lowervp);
+	return (0);
+}
+
+int
+umap_strategy(ap)
+	struct vop_strategy_args /* {
+		struct buf *a_bp;
+	} */ *ap;
+{
+	struct buf *bp = ap->a_bp;
+	int error;
+	struct vnode *savedvp;
+
+	savedvp = bp->b_vp;
+	bp->b_vp = UMAPVPTOLOWERVP(bp->b_vp);
+
+	error = VOP_STRATEGY(ap->a_bp);
+
+	bp->b_vp = savedvp;
+
+	return (error);
+}
+
+int
+umap_bwrite(ap)
+	struct vop_bwrite_args /* {
+		struct buf *a_bp;
+	} */ *ap;
+{
+	struct buf *bp = ap->a_bp;
+	int error;
+	struct vnode *savedvp;
+
+	savedvp = bp->b_vp;
+	bp->b_vp = UMAPVPTOLOWERVP(bp->b_vp);
+
+	error = VOP_BWRITE(ap->a_bp);
+
+	bp->b_vp = savedvp;
+
+	return (error);
+}
+
+
+int
+umap_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	printf("\ttag VT_UMAPFS, vp=%x, lowervp=%x\n", vp, UMAPVPTOLOWERVP(vp));
+	return (0);
+}
+
+int
+umap_rename(ap)
+	struct vop_rename_args  /* {
+		struct vnode *a_fdvp;
+		struct vnode *a_fvp;
+		struct componentname *a_fcnp;
+		struct vnode *a_tdvp;
+		struct vnode *a_tvp;
+		struct componentname *a_tcnp;
+	} */ *ap;
+{
+	int error;
+	struct componentname *compnamep;
+	struct ucred *compcredp, *savecompcredp;
+	struct vnode *vp;
+
+	/*
+	 * Rename is irregular, having two componentname structures.
+	 * We need to map the cre in the second structure,
+	 * and then bypass takes care of the rest.
+	 */
+
+	vp = ap->a_fdvp;
+	compnamep = ap->a_tcnp;
+	compcredp = compnamep->cn_cred;
+
+	savecompcredp = compcredp;
+	compcredp = compnamep->cn_cred = crdup(savecompcredp);
+
+	if (umap_bug_bypass && compcredp->cr_uid != 0)
+		printf("umap_rename: rename component credit user was %d, group %d\n", 
+		    compcredp->cr_uid, compcredp->cr_gid);
+
+	/* Map all ids in the credential structure. */
+
+	umap_mapids(vp->v_mount, compcredp);
+
+	if (umap_bug_bypass && compcredp->cr_uid != 0)
+		printf("umap_rename: rename component credit user now %d, group %d\n", 
+		    compcredp->cr_uid, compcredp->cr_gid);
+
+	error = umap_bypass(ap);
+	
+	/* Restore the additional mapped componentname cred structure. */
+
+	crfree(compcredp);
+	compnamep->cn_cred = savecompcredp;
+
+	return error;
+}
+
+/*
+ * Global vfs data structures
+ */
+/*
+ * XXX - strategy, bwrite are hand coded currently.  They should
+ * go away with a merged buffer/block cache.
+ *
+ */
+int (**umap_vnodeop_p)();
+struct vnodeopv_entry_desc umap_vnodeop_entries[] = {
+	{ &vop_default_desc, umap_bypass },
+
+	{ &vop_getattr_desc, umap_getattr },
+	{ &vop_inactive_desc, umap_inactive },
+	{ &vop_reclaim_desc, umap_reclaim },
+	{ &vop_print_desc, umap_print },
+	{ &vop_rename_desc, umap_rename },
+
+	{ &vop_strategy_desc, umap_strategy },
+	{ &vop_bwrite_desc, umap_bwrite },
+
+	{ (struct vnodeop_desc*) NULL, (int(*)()) NULL }
+};
+struct vnodeopv_desc umap_vnodeop_opv_desc =
+	{ &umap_vnodeop_p, umap_vnodeop_entries };
diff --git a/sys/miscfs/union/README b/sys/miscfs/union/README
new file mode 100644
index 00000000000..14a476987c9
--- /dev/null
+++ b/sys/miscfs/union/README
@@ -0,0 +1,7 @@
+If you plan on using union mounts, then you should consider replacing
+"libc/gen/opendir.c" in the C library with the file "libc.opendir.c"
+in this directory.  The replacement version of opendir() automatically
+removes duplicate names when a union stack is encountered.  You will
+then need to rebuild the C library and all commands.
+
+@(#)README	8.1 (Berkeley) 2/15/94
diff --git a/sys/miscfs/union/libc.opendir.c b/sys/miscfs/union/libc.opendir.c
new file mode 100644
index 00000000000..99ed58b86fd
--- /dev/null
+++ b/sys/miscfs/union/libc.opendir.c
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 1983, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char orig_sccsid[] = "@(#)opendir.c	8.2 (Berkeley) 2/12/94";
+static char sccsid[] = "@(#)libc.opendir.c	8.1 (Berkeley) 2/15/94";
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/param.h>
+#include <sys/mount.h>
+
+#include <dirent.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+/*
+ * open a directory.
+ */
+DIR *
+opendir(name)
+	const char *name;
+{
+	DIR *dirp;
+	int fd;
+	int incr;
+	struct statfs sfb;
+
+	if ((fd = open(name, 0)) == -1)
+		return (NULL);
+	if (fcntl(fd, F_SETFD, FD_CLOEXEC) == -1 ||
+	    (dirp = (DIR *)malloc(sizeof(DIR))) == NULL) {
+		close(fd);
+		return (NULL);
+	}
+
+	/*
+	 * If CLBYTES is an exact multiple of DIRBLKSIZ, use a CLBYTES
+	 * buffer that it cluster boundary aligned.
+	 * Hopefully this can be a big win someday by allowing page
+	 * trades trade to user space to be done by getdirentries()
+	 */
+	if ((CLBYTES % DIRBLKSIZ) == 0)
+		incr = CLBYTES;
+	else
+		incr = DIRBLKSIZ;
+
+#ifdef MOUNT_UNION
+	/*
+	 * Determine whether this directory is the top of a union stack.
+	 */
+	if (fstatfs(fd, &sfb) < 0) {
+		free(dirp);
+		close(fd);
+		return (NULL);
+	}
+
+	if (sfb.f_type == MOUNT_UNION) {
+		int len = 0;
+		int space = 0;
+		char *buf = 0;
+		char *ddptr = 0;
+		int n;
+		struct dirent **dpv;
+
+		/*
+		 * The strategy here is to read all the directory
+		 * entries into a buffer, sort the buffer, and
+		 * remove duplicate entries by setting the inode
+		 * number to zero.
+		 */
+
+		/*
+		 * Fixup dd_loc to be non-zero to fake out readdir
+		 */
+		dirp->dd_loc = sizeof(void *);
+
+		do {
+			/*
+			 * Always make at least DIRBLKSIZ bytes
+			 * available to getdirentries
+			 */
+			if (space < DIRBLKSIZ) {
+				space += incr;
+				len += incr;
+				buf = realloc(buf, len);
+				if (buf == NULL) {
+					free(dirp);
+					close(fd);
+					return (NULL);
+				}
+				ddptr = buf + (len - space) + dirp->dd_loc;
+			}
+
+			n = getdirentries(fd, ddptr, space, &dirp->dd_seek);
+			if (n > 0) {
+				ddptr += n;
+				space -= n;
+			}
+		} while (n > 0);
+
+		/*
+		 * There is now a buffer full of (possibly) duplicate
+		 * names.
+		 */
+		dirp->dd_buf = buf;
+
+		/*
+		 * Go round this loop twice...
+		 *
+		 * Scan through the buffer, counting entries.
+		 * On the second pass, save pointers to each one.
+		 * Then sort the pointers and remove duplicate names.
+		 */
+		for (dpv = 0;;) {
+			n = 0;
+			ddptr = buf + dirp->dd_loc;
+			while (ddptr < buf + len) {
+				struct dirent *dp;
+
+				dp = (struct dirent *) ddptr;
+				if ((int)dp & 03)
+					break;
+				if ((dp->d_reclen <= 0) ||
+				    (dp->d_reclen > (buf + len + 1 - ddptr)))
+					break;
+				ddptr += dp->d_reclen;
+				if (dp->d_fileno) {
+					if (dpv)
+						dpv[n] = dp;
+					n++;
+				}
+			}
+
+			if (dpv) {
+				struct dirent *xp;
+
+				/*
+				 * If and when whiteouts happen,
+				 * this sort would need to be stable.
+				 */
+				heapsort(dpv, n, sizeof(*dpv), alphasort);
+
+				dpv[n] = NULL;
+				xp = NULL;
+
+				/*
+				 * Scan through the buffer in sort order,
+				 * zapping the inode number of any
+				 * duplicate names.
+				 */
+				for (n = 0; dpv[n]; n++) {
+					struct dirent *dp = dpv[n];
+
+					if ((xp == NULL) ||
+					    strcmp(dp->d_name, xp->d_name))
+						xp = dp;
+					else
+						dp->d_fileno = 0;
+				}
+
+				free(dpv);
+				break;
+			} else {
+				dpv = malloc((n+1) * sizeof(struct dirent *));
+				if (dpv == NULL)
+					break;
+			}
+		}
+
+		dirp->dd_len = len;
+		dirp->dd_size = ddptr - dirp->dd_buf;
+	} else
+#endif /* MOUNT_UNION */
+	{
+		dirp->dd_len = incr;
+		dirp->dd_buf = malloc(dirp->dd_len);
+		if (dirp->dd_buf == NULL) {
+			free(dirp);
+			close (fd);
+			return (NULL);
+		}
+		dirp->dd_seek = 0;
+		dirp->dd_loc = 0;
+	}
+
+	dirp->dd_fd = fd;
+
+	/*
+	 * Set up seek point for rewinddir.
+	 */
+	dirp->dd_rewind = telldir(dirp);
+
+	return (dirp);
+}
diff --git a/sys/miscfs/union/union.h b/sys/miscfs/union/union.h
new file mode 100644
index 00000000000..463218ac3ed
--- /dev/null
+++ b/sys/miscfs/union/union.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 1994 The Regents of the University of California.
+ * Copyright (c) 1994 Jan-Simon Pendry.
+ * All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)union.h	8.2 (Berkeley) 2/17/94
+ */
+
+struct union_args {
+	char		*target;	/* Target of loopback  */
+	int		mntflags;	/* Options on the mount */
+};
+
+#define UNMNT_ABOVE	0x0001		/* Target appears below mount point */
+#define UNMNT_BELOW	0x0002		/* Target appears below mount point */
+#define UNMNT_REPLACE	0x0003		/* Target replaces mount point */
+#define UNMNT_OPMASK	0x0003
+
+struct union_mount {
+	struct vnode	*um_uppervp;
+	struct vnode	*um_lowervp;
+	struct ucred	*um_cred;	/* Credentials of user calling mount */
+	int		um_cmode;	/* cmask from mount process */
+	int		um_op;		/* Operation mode */
+};
+
+#ifdef KERNEL
+
+/*
+ * DEFDIRMODE is the mode bits used to create a shadow directory.
+ */
+#define VRWXMODE (VREAD|VWRITE|VEXEC)
+#define VRWMODE (VREAD|VWRITE)
+#define UN_DIRMODE ((VRWXMODE)|(VRWXMODE>>3)|(VRWXMODE>>6))
+#define UN_FILEMODE ((VRWMODE)|(VRWMODE>>3)|(VRWMODE>>6))
+
+/*
+ * A cache of vnode references
+ */
+struct union_node {
+	LIST_ENTRY(union_node)	un_cache;	/* Hash chain */
+	struct vnode		*un_vnode;	/* Back pointer */
+	struct vnode	        *un_uppervp;	/* overlaying object */
+	struct vnode	        *un_lowervp;	/* underlying object */
+	struct vnode		*un_dirvp;	/* Parent dir of uppervp */
+	char			*un_path;	/* saved component name */
+	int			un_hash;	/* saved un_path hash value */
+	int			un_openl;	/* # of opens on lowervp */
+	int			un_flags;
+#ifdef DIAGNOSTIC
+	pid_t			un_pid;
+#endif
+};
+
+#define UN_WANT		0x01
+#define UN_LOCKED	0x02
+#define UN_ULOCK	0x04		/* Upper node is locked */
+#define UN_KLOCK	0x08		/* Keep upper node locked on vput */
+
+extern int union_allocvp __P((struct vnode **, struct mount *,
+				struct vnode *, struct vnode *,
+				struct componentname *, struct vnode *,
+				struct vnode *));
+extern int union_copyfile __P((struct proc *, struct ucred *,
+				struct vnode *, struct vnode *));
+extern int union_mkshadow __P((struct union_mount *, struct vnode *,
+				struct componentname *, struct vnode **));
+extern int union_vn_create __P((struct vnode **, struct union_node *,
+				struct proc *));
+extern int union_cn_close __P((struct vnode *, int, struct ucred *,
+				struct proc *));
+extern void union_removed_upper __P((struct union_node *un));
+extern struct vnode *union_lowervp __P((struct vnode *));
+extern void union_newlower __P((struct union_node *, struct vnode *));
+extern void union_newupper __P((struct union_node *, struct vnode *));
+
+#define	MOUNTTOUNIONMOUNT(mp) ((struct union_mount *)((mp)->mnt_data))
+#define	VTOUNION(vp) ((struct union_node *)(vp)->v_data)
+#define	UNIONTOV(un) ((un)->un_vnode)
+#define	LOWERVP(vp) (VTOUNION(vp)->un_lowervp)
+#define	UPPERVP(vp) (VTOUNION(vp)->un_uppervp)
+#define OTHERVP(vp) (UPPERVP(vp) ? UPPERVP(vp) : LOWERVP(vp))
+
+extern int (**union_vnodeop_p)();
+extern struct vfsops union_vfsops;
+#endif /* KERNEL */
diff --git a/sys/miscfs/union/union_subr.c b/sys/miscfs/union/union_subr.c
new file mode 100644
index 00000000000..77947d1dfbe
--- /dev/null
+++ b/sys/miscfs/union/union_subr.c
@@ -0,0 +1,744 @@
+/*
+ * Copyright (c) 1994 Jan-Simon Pendry
+ * Copyright (c) 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)union_subr.c	8.4 (Berkeley) 2/17/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/queue.h>
+#include <miscfs/union/union.h>
+
+#ifdef DIAGNOSTIC
+#include <sys/proc.h>
+#endif
+
+/* must be power of two, otherwise change UNION_HASH() */
+#define NHASH 32
+
+/* unsigned int ... */
+#define UNION_HASH(u, l) \
+	(((((unsigned long) (u)) + ((unsigned long) l)) >> 8) & (NHASH-1))
+
+static LIST_HEAD(unhead, union_node) unhead[NHASH];
+static int unvplock[NHASH];
+
+int
+union_init()
+{
+	int i;
+
+	for (i = 0; i < NHASH; i++)
+		LIST_INIT(&unhead[i]);
+	bzero((caddr_t) unvplock, sizeof(unvplock));
+}
+
+static int
+union_list_lock(ix)
+	int ix;
+{
+
+	if (unvplock[ix] & UN_LOCKED) {
+		unvplock[ix] |= UN_WANT;
+		sleep((caddr_t) &unvplock[ix], PINOD);
+		return (1);
+	}
+
+	unvplock[ix] |= UN_LOCKED;
+
+	return (0);
+}
+
+static void
+union_list_unlock(ix)
+	int ix;
+{
+
+	unvplock[ix] &= ~UN_LOCKED;
+
+	if (unvplock[ix] & UN_WANT) {
+		unvplock[ix] &= ~UN_WANT;
+		wakeup((caddr_t) &unvplock[ix]);
+	}
+}
+
+void
+union_updatevp(un, uppervp, lowervp)
+	struct union_node *un;
+	struct vnode *uppervp;
+	struct vnode *lowervp;
+{
+	int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp);
+	int nhash = UNION_HASH(uppervp, lowervp);
+
+	if (ohash != nhash) {
+		/*
+		 * Ensure locking is ordered from lower to higher
+		 * to avoid deadlocks.
+		 */
+		if (nhash < ohash) {
+			int t = ohash;
+			ohash = nhash;
+			nhash = t;
+		}
+
+		while (union_list_lock(ohash))
+			continue;
+
+		while (union_list_lock(nhash))
+			continue;
+
+		LIST_REMOVE(un, un_cache);
+		union_list_unlock(ohash);
+	} else {	
+		while (union_list_lock(nhash))
+			continue;
+	}
+
+	if (un->un_lowervp != lowervp) {
+		if (un->un_lowervp) {
+			vrele(un->un_lowervp);
+			if (un->un_path) {
+				free(un->un_path, M_TEMP);
+				un->un_path = 0;
+			}
+			if (un->un_dirvp) {
+				vrele(un->un_dirvp);
+				un->un_dirvp = NULLVP;
+			}
+		}
+		un->un_lowervp = lowervp;
+	}
+
+	if (un->un_uppervp != uppervp) {
+		if (un->un_uppervp)
+			vrele(un->un_uppervp);
+
+		un->un_uppervp = uppervp;
+	}
+
+	if (ohash != nhash)
+		LIST_INSERT_HEAD(&unhead[nhash], un, un_cache);
+
+	union_list_unlock(nhash);
+}
+
+void
+union_newlower(un, lowervp)
+	struct union_node *un;
+	struct vnode *lowervp;
+{
+
+	union_updatevp(un, un->un_uppervp, lowervp);
+}
+
+void
+union_newupper(un, uppervp)
+	struct union_node *un;
+	struct vnode *uppervp;
+{
+
+	union_updatevp(un, uppervp, un->un_lowervp);
+}
+
+/*
+ * allocate a union_node/vnode pair.  the vnode is
+ * referenced and locked.  the new vnode is returned
+ * via (vpp).  (mp) is the mountpoint of the union filesystem,
+ * (dvp) is the parent directory where the upper layer object
+ * should exist (but doesn't) and (cnp) is the componentname
+ * information which is partially copied to allow the upper
+ * layer object to be created at a later time.  (uppervp)
+ * and (lowervp) reference the upper and lower layer objects
+ * being mapped.  either, but not both, can be nil.
+ * if supplied, (uppervp) is locked.
+ * the reference is either maintained in the new union_node
+ * object which is allocated, or they are vrele'd.
+ *
+ * all union_nodes are maintained on a singly-linked
+ * list.  new nodes are only allocated when they cannot
+ * be found on this list.  entries on the list are
+ * removed when the vfs reclaim entry is called.
+ *
+ * a single lock is kept for the entire list.  this is
+ * needed because the getnewvnode() function can block
+ * waiting for a vnode to become free, in which case there
+ * may be more than one process trying to get the same
+ * vnode.  this lock is only taken if we are going to
+ * call getnewvnode, since the kernel itself is single-threaded.
+ *
+ * if an entry is found on the list, then call vget() to
+ * take a reference.  this is done because there may be
+ * zero references to it and so it needs to removed from
+ * the vnode free list.
+ */
+int
+union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp)
+	struct vnode **vpp;
+	struct mount *mp;
+	struct vnode *undvp;
+	struct vnode *dvp;		/* may be null */
+	struct componentname *cnp;	/* may be null */
+	struct vnode *uppervp;		/* may be null */
+	struct vnode *lowervp;		/* may be null */
+{
+	int error;
+	struct union_node *un;
+	struct union_node **pp;
+	struct vnode *xlowervp = NULLVP;
+	int hash;
+	int try;
+
+	if (uppervp == NULLVP && lowervp == NULLVP)
+		panic("union: unidentifiable allocation");
+
+	if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) {
+		xlowervp = lowervp;
+		lowervp = NULLVP;
+	}
+
+loop:
+	for (try = 0; try < 3; try++) {
+		switch (try) {
+		case 0:
+			if (lowervp == NULLVP)
+				continue;
+			hash = UNION_HASH(uppervp, lowervp);
+			break;
+
+		case 1:
+			if (uppervp == NULLVP)
+				continue;
+			hash = UNION_HASH(uppervp, NULLVP);
+			break;
+
+		case 2:
+			if (lowervp == NULLVP)
+				continue;
+			hash = UNION_HASH(NULLVP, lowervp);
+			break;
+		}
+
+		while (union_list_lock(hash))
+			continue;
+
+		for (un = unhead[hash].lh_first; un != 0;
+					un = un->un_cache.le_next) {
+			if ((un->un_lowervp == lowervp ||
+			     un->un_lowervp == NULLVP) &&
+			    (un->un_uppervp == uppervp ||
+			     un->un_uppervp == NULLVP) &&
+			    (UNIONTOV(un)->v_mount == mp)) {
+				if (vget(UNIONTOV(un), 0)) {
+					union_list_unlock(hash);
+					goto loop;
+				}
+				break;
+			}
+		}
+
+		union_list_unlock(hash);
+
+		if (un)
+			break;
+	}
+
+	if (un) {
+		/*
+		 * Obtain a lock on the union_node.
+		 * uppervp is locked, though un->un_uppervp
+		 * may not be.  this doesn't break the locking
+		 * hierarchy since in the case that un->un_uppervp
+		 * is not yet locked it will be vrele'd and replaced
+		 * with uppervp.
+		 */
+
+		if ((dvp != NULLVP) && (uppervp == dvp)) {
+			/*
+			 * Access ``.'', so (un) will already
+			 * be locked.  Since this process has
+			 * the lock on (uppervp) no other
+			 * process can hold the lock on (un).
+			 */
+#ifdef DIAGNOSTIC
+			if ((un->un_flags & UN_LOCKED) == 0)
+				panic("union: . not locked");
+			else if (curproc && un->un_pid != curproc->p_pid &&
+				    un->un_pid > -1 && curproc->p_pid > -1)
+				panic("union: allocvp not lock owner");
+#endif
+		} else {
+			if (un->un_flags & UN_LOCKED) {
+				vrele(UNIONTOV(un));
+				un->un_flags |= UN_WANT;
+				sleep((caddr_t) &un->un_flags, PINOD);
+				goto loop;
+			}
+			un->un_flags |= UN_LOCKED;
+
+#ifdef DIAGNOSTIC
+			if (curproc)
+				un->un_pid = curproc->p_pid;
+			else
+				un->un_pid = -1;
+#endif
+		}
+
+		/*
+		 * At this point, the union_node is locked,
+		 * un->un_uppervp may not be locked, and uppervp
+		 * is locked or nil.
+		 */
+
+		/*
+		 * Save information about the upper layer.
+		 */
+		if (uppervp != un->un_uppervp) {
+			union_newupper(un, uppervp);
+		} else if (uppervp) {
+			vrele(uppervp);
+		}
+
+		if (un->un_uppervp) {
+			un->un_flags |= UN_ULOCK;
+			un->un_flags &= ~UN_KLOCK;
+		}
+
+		/*
+		 * Save information about the lower layer.
+		 * This needs to keep track of pathname
+		 * and directory information which union_vn_create
+		 * might need.
+		 */
+		if (lowervp != un->un_lowervp) {
+			union_newlower(un, lowervp);
+			if (cnp && (lowervp != NULLVP) &&
+			    (lowervp->v_type == VREG)) {
+				un->un_hash = cnp->cn_hash;
+				un->un_path = malloc(cnp->cn_namelen+1,
+						M_TEMP, M_WAITOK);
+				bcopy(cnp->cn_nameptr, un->un_path,
+						cnp->cn_namelen);
+				un->un_path[cnp->cn_namelen] = '\0';
+				VREF(dvp);
+				un->un_dirvp = dvp;
+			}
+		} else if (lowervp) {
+			vrele(lowervp);
+		}
+		*vpp = UNIONTOV(un);
+		return (0);
+	}
+
+	/*
+	 * otherwise lock the vp list while we call getnewvnode
+	 * since that can block.
+	 */ 
+	hash = UNION_HASH(uppervp, lowervp);
+
+	if (union_list_lock(hash))
+		goto loop;
+
+	error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp);
+	if (error) {
+		if (uppervp) {
+			if (dvp == uppervp)
+				vrele(uppervp);
+			else
+				vput(uppervp);
+		}
+		if (lowervp)
+			vrele(lowervp);
+
+		goto out;
+	}
+
+	MALLOC((*vpp)->v_data, void *, sizeof(struct union_node),
+		M_TEMP, M_WAITOK);
+
+	if (uppervp)
+		(*vpp)->v_type = uppervp->v_type;
+	else
+		(*vpp)->v_type = lowervp->v_type;
+	un = VTOUNION(*vpp);
+	un->un_vnode = *vpp;
+	un->un_uppervp = uppervp;
+	un->un_lowervp = lowervp;
+	un->un_openl = 0;
+	un->un_flags = UN_LOCKED;
+	if (un->un_uppervp)
+		un->un_flags |= UN_ULOCK;
+#ifdef DIAGNOSTIC
+	if (curproc)
+		un->un_pid = curproc->p_pid;
+	else
+		un->un_pid = -1;
+#endif
+	if (cnp && (lowervp != NULLVP) && (lowervp->v_type == VREG)) {
+		un->un_hash = cnp->cn_hash;
+		un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK);
+		bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen);
+		un->un_path[cnp->cn_namelen] = '\0';
+		VREF(dvp);
+		un->un_dirvp = dvp;
+	} else {
+		un->un_hash = 0;
+		un->un_path = 0;
+		un->un_dirvp = 0;
+	}
+
+	LIST_INSERT_HEAD(&unhead[hash], un, un_cache);
+
+	if (xlowervp)
+		vrele(xlowervp);
+
+out:
+	union_list_unlock(hash);
+
+	return (error);
+}
+
+int
+union_freevp(vp)
+	struct vnode *vp;
+{
+	struct union_node *un = VTOUNION(vp);
+
+	LIST_REMOVE(un, un_cache);
+
+	if (un->un_uppervp)
+		vrele(un->un_uppervp);
+	if (un->un_lowervp)
+		vrele(un->un_lowervp);
+	if (un->un_dirvp)
+		vrele(un->un_dirvp);
+	if (un->un_path)
+		free(un->un_path, M_TEMP);
+
+	FREE(vp->v_data, M_TEMP);
+	vp->v_data = 0;
+
+	return (0);
+}
+
+/*
+ * copyfile.  copy the vnode (fvp) to the vnode (tvp)
+ * using a sequence of reads and writes.  both (fvp)
+ * and (tvp) are locked on entry and exit.
+ */
+int
+union_copyfile(p, cred, fvp, tvp)
+	struct proc *p;
+	struct ucred *cred;
+	struct vnode *fvp;
+	struct vnode *tvp;
+{
+	char *buf;
+	struct uio uio;
+	struct iovec iov;
+	int error = 0;
+
+	/*
+	 * strategy:
+	 * allocate a buffer of size MAXBSIZE.
+	 * loop doing reads and writes, keeping track
+	 * of the current uio offset.
+	 * give up at the first sign of trouble.
+	 */
+
+	uio.uio_procp = p;
+	uio.uio_segflg = UIO_SYSSPACE;
+	uio.uio_offset = 0;
+
+	VOP_UNLOCK(fvp);				/* XXX */
+	LEASE_CHECK(fvp, p, cred, LEASE_READ);
+	VOP_LOCK(fvp);					/* XXX */
+	VOP_UNLOCK(tvp);				/* XXX */
+	LEASE_CHECK(tvp, p, cred, LEASE_WRITE);
+	VOP_LOCK(tvp);					/* XXX */
+
+	buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
+
+	/* ugly loop follows... */
+	do {
+		off_t offset = uio.uio_offset;
+
+		uio.uio_iov = &iov;
+		uio.uio_iovcnt = 1;
+		iov.iov_base = buf;
+		iov.iov_len = MAXBSIZE;
+		uio.uio_resid = iov.iov_len;
+		uio.uio_rw = UIO_READ;
+		error = VOP_READ(fvp, &uio, 0, cred);
+
+		if (error == 0) {
+			uio.uio_iov = &iov;
+			uio.uio_iovcnt = 1;
+			iov.iov_base = buf;
+			iov.iov_len = MAXBSIZE - uio.uio_resid;
+			uio.uio_offset = offset;
+			uio.uio_rw = UIO_WRITE;
+			uio.uio_resid = iov.iov_len;
+
+			if (uio.uio_resid == 0)
+				break;
+
+			do {
+				error = VOP_WRITE(tvp, &uio, 0, cred);
+			} while ((uio.uio_resid > 0) && (error == 0));
+		}
+
+	} while (error == 0);
+
+	free(buf, M_TEMP);
+	return (error);
+}
+
+/*
+ * Create a shadow directory in the upper layer.
+ * The new vnode is returned locked.
+ *
+ * (um) points to the union mount structure for access to the
+ * the mounting process's credentials.
+ * (dvp) is the directory in which to create the shadow directory.
+ * it is unlocked on entry and exit.
+ * (cnp) is the componentname to be created.
+ * (vpp) is the returned newly created shadow directory, which
+ * is returned locked.
+ */
+int
+union_mkshadow(um, dvp, cnp, vpp)
+	struct union_mount *um;
+	struct vnode *dvp;
+	struct componentname *cnp;
+	struct vnode **vpp;
+{
+	int error;
+	struct vattr va;
+	struct proc *p = cnp->cn_proc;
+	struct componentname cn;
+
+	/*
+	 * policy: when creating the shadow directory in the
+	 * upper layer, create it owned by the user who did
+	 * the mount, group from parent directory, and mode
+	 * 777 modified by umask (ie mostly identical to the
+	 * mkdir syscall).  (jsp, kb)
+	 */
+
+	/*
+	 * A new componentname structure must be faked up because
+	 * there is no way to know where the upper level cnp came
+	 * from or what it is being used for.  This must duplicate
+	 * some of the work done by NDINIT, some of the work done
+	 * by namei, some of the work done by lookup and some of
+	 * the work done by VOP_LOOKUP when given a CREATE flag.
+	 * Conclusion: Horrible.
+	 *
+	 * The pathname buffer will be FREEed by VOP_MKDIR.
+	 */
+	cn.cn_pnbuf = malloc(cnp->cn_namelen+1, M_NAMEI, M_WAITOK);
+	bcopy(cnp->cn_nameptr, cn.cn_pnbuf, cnp->cn_namelen);
+	cn.cn_pnbuf[cnp->cn_namelen] = '\0';
+
+	cn.cn_nameiop = CREATE;
+	cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
+	cn.cn_proc = cnp->cn_proc;
+	if (um->um_op == UNMNT_ABOVE)
+		cn.cn_cred = cnp->cn_cred;
+	else
+		cn.cn_cred = um->um_cred;
+	cn.cn_nameptr = cn.cn_pnbuf;
+	cn.cn_namelen = cnp->cn_namelen;
+	cn.cn_hash = cnp->cn_hash;
+	cn.cn_consume = cnp->cn_consume;
+
+	VREF(dvp);
+	if (error = relookup(dvp, vpp, &cn))
+		return (error);
+	vrele(dvp);
+
+	if (*vpp) {
+		VOP_ABORTOP(dvp, &cn);
+		VOP_UNLOCK(dvp);
+		vrele(*vpp);
+		*vpp = NULLVP;
+		return (EEXIST);
+	}
+
+	VATTR_NULL(&va);
+	va.va_type = VDIR;
+	va.va_mode = um->um_cmode;
+
+	/* LEASE_CHECK: dvp is locked */
+	LEASE_CHECK(dvp, p, p->p_ucred, LEASE_WRITE);
+
+	error = VOP_MKDIR(dvp, vpp, &cn, &va);
+	return (error);
+}
+
+/*
+ * union_vn_create: creates and opens a new shadow file
+ * on the upper union layer.  this function is similar
+ * in spirit to calling vn_open but it avoids calling namei().
+ * the problem with calling namei is that a) it locks too many
+ * things, and b) it doesn't start at the "right" directory,
+ * whereas relookup is told where to start.
+ */
+int
+union_vn_create(vpp, un, p)
+	struct vnode **vpp;
+	struct union_node *un;
+	struct proc *p;
+{
+	struct vnode *vp;
+	struct ucred *cred = p->p_ucred;
+	struct vattr vat;
+	struct vattr *vap = &vat;
+	int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL);
+	int error;
+	int cmode = UN_FILEMODE & ~p->p_fd->fd_cmask;
+	char *cp;
+	struct componentname cn;
+
+	*vpp = NULLVP;
+
+	/*
+	 * Build a new componentname structure (for the same
+	 * reasons outlines in union_mkshadow).
+	 * The difference here is that the file is owned by
+	 * the current user, rather than by the person who
+	 * did the mount, since the current user needs to be
+	 * able to write the file (that's why it is being
+	 * copied in the first place).
+	 */
+	cn.cn_namelen = strlen(un->un_path);
+	cn.cn_pnbuf = (caddr_t) malloc(cn.cn_namelen, M_NAMEI, M_WAITOK);
+	bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1);
+	cn.cn_nameiop = CREATE;
+	cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
+	cn.cn_proc = p;
+	cn.cn_cred = p->p_ucred;
+	cn.cn_nameptr = cn.cn_pnbuf;
+	cn.cn_hash = un->un_hash;
+	cn.cn_consume = 0;
+
+	VREF(un->un_dirvp);
+	if (error = relookup(un->un_dirvp, &vp, &cn))
+		return (error);
+	vrele(un->un_dirvp);
+
+	if (vp) {
+		VOP_ABORTOP(un->un_dirvp, &cn);
+		if (un->un_dirvp == vp)
+			vrele(un->un_dirvp);
+		else
+			vput(un->un_dirvp);
+		vrele(vp);
+		return (EEXIST);
+	}
+
+	/*
+	 * Good - there was no race to create the file
+	 * so go ahead and create it.  The permissions
+	 * on the file will be 0666 modified by the
+	 * current user's umask.  Access to the file, while
+	 * it is unioned, will require access to the top *and*
+	 * bottom files.  Access when not unioned will simply
+	 * require access to the top-level file.
+	 * TODO: confirm choice of access permissions.
+	 */
+	VATTR_NULL(vap);
+	vap->va_type = VREG;
+	vap->va_mode = cmode;
+	LEASE_CHECK(un->un_dirvp, p, cred, LEASE_WRITE);
+	if (error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap))
+		return (error);
+
+	if (error = VOP_OPEN(vp, fmode, cred, p)) {
+		vput(vp);
+		return (error);
+	}
+
+	vp->v_writecount++;
+	*vpp = vp;
+	return (0);
+}
+
+int
+union_vn_close(vp, fmode, cred, p)
+	struct vnode *vp;
+	int fmode;
+	struct ucred *cred;
+	struct proc *p;
+{
+	if (fmode & FWRITE)
+		--vp->v_writecount;
+	return (VOP_CLOSE(vp, fmode));
+}
+
+void
+union_removed_upper(un)
+	struct union_node *un;
+{
+	if (un->un_flags & UN_ULOCK) {
+		un->un_flags &= ~UN_ULOCK;
+		VOP_UNLOCK(un->un_uppervp);
+	}
+
+	union_newupper(un, NULLVP);
+}
+
+struct vnode *
+union_lowervp(vp)
+	struct vnode *vp;
+{
+	struct union_node *un = VTOUNION(vp);
+
+	if (un->un_lowervp && (vp->v_type == un->un_lowervp->v_type)) {
+		if (vget(un->un_lowervp, 0))
+			return (NULLVP);
+	}
+
+	return (un->un_lowervp);
+}
diff --git a/sys/miscfs/union/union_vfsops.c b/sys/miscfs/union/union_vfsops.c
new file mode 100644
index 00000000000..9fa27460e3d
--- /dev/null
+++ b/sys/miscfs/union/union_vfsops.c
@@ -0,0 +1,550 @@
+/*
+ * Copyright (c) 1994 The Regents of the University of California.
+ * Copyright (c) 1994 Jan-Simon Pendry.
+ * All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)union_vfsops.c	8.7 (Berkeley) 3/5/94
+ */
+
+/*
+ * Union Layer
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/filedesc.h>
+#include <sys/queue.h>
+#include <miscfs/union/union.h>
+
+/*
+ * Mount union filesystem
+ */
+int
+union_mount(mp, path, data, ndp, p)
+	struct mount *mp;
+	char *path;
+	caddr_t data;
+	struct nameidata *ndp;
+	struct proc *p;
+{
+	int error = 0;
+	struct union_args args;
+	struct vnode *lowerrootvp = NULLVP;
+	struct vnode *upperrootvp = NULLVP;
+	struct union_mount *um;
+	struct ucred *cred = 0;
+	struct ucred *scred;
+	struct vattr va;
+	char *cp;
+	int len;
+	u_int size;
+
+#ifdef UNION_DIAGNOSTIC
+	printf("union_mount(mp = %x)\n", mp);
+#endif
+
+	/*
+	 * Update is a no-op
+	 */
+	if (mp->mnt_flag & MNT_UPDATE) {
+		/*
+		 * Need to provide.
+		 * 1. a way to convert between rdonly and rdwr mounts.
+		 * 2. support for nfs exports.
+		 */
+		error = EOPNOTSUPP;
+		goto bad;
+	}
+
+	/*
+	 * Take a copy of the process's credentials.  This isn't
+	 * quite right since the euid will always be zero and we
+	 * want to get the "real" users credentials.  So fix up
+	 * the uid field after taking the copy.
+	 */
+	cred = crdup(p->p_ucred);
+	cred->cr_uid = p->p_cred->p_ruid;
+
+	/*
+	 * Ensure the *real* user has write permission on the
+	 * mounted-on directory.  This allows the mount_union
+	 * command to be made setuid root so allowing anyone
+	 * to do union mounts onto any directory on which they
+	 * have write permission and which they also own.
+	 */
+	error = VOP_GETATTR(mp->mnt_vnodecovered, &va, cred, p);
+	if (error)
+		goto bad;
+	if ((va.va_uid != cred->cr_uid) && 
+	    (cred->cr_uid != 0)) {
+		error = EACCES;
+		goto bad;
+	}
+	error = VOP_ACCESS(mp->mnt_vnodecovered, VWRITE, cred, p);
+	if (error)
+		goto bad;
+
+	/*
+	 * Get argument
+	 */
+	if (error = copyin(data, (caddr_t)&args, sizeof(struct union_args)))
+		goto bad;
+
+	lowerrootvp = mp->mnt_vnodecovered;
+	VREF(lowerrootvp);
+
+	/*
+	 * Find upper node.  Use the real process credentials,
+	 * not the effective ones since this will have come
+	 * through a setuid process (mount_union).  All this
+	 * messing around with permissions is entirely bogus
+	 * and should be removed by allowing any user straight
+	 * past the mount system call.
+	 */
+	scred = p->p_ucred;
+	p->p_ucred = cred;
+	NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT,
+	       UIO_USERSPACE, args.target, p);
+	p->p_ucred = scred;
+
+	if (error = namei(ndp))
+		goto bad;
+
+	upperrootvp = ndp->ni_vp;
+	vrele(ndp->ni_dvp);
+	ndp->ni_dvp = NULL;
+
+	if (upperrootvp->v_type != VDIR) {
+		error = EINVAL;
+		goto bad;
+	}
+	
+	um = (struct union_mount *) malloc(sizeof(struct union_mount),
+				M_UFSMNT, M_WAITOK);	/* XXX */
+
+	/*
+	 * Keep a held reference to the target vnodes.
+	 * They are vrele'd in union_unmount.
+	 *
+	 * Depending on the _BELOW flag, the filesystems are
+	 * viewed in a different order.  In effect, this is the
+	 * same as providing a mount under option to the mount syscall.
+	 */
+
+	um->um_op = args.mntflags & UNMNT_OPMASK;
+	switch (um->um_op) {
+	case UNMNT_ABOVE:
+		um->um_lowervp = lowerrootvp;
+		um->um_uppervp = upperrootvp;
+		break;
+
+	case UNMNT_BELOW:
+		um->um_lowervp = upperrootvp;
+		um->um_uppervp = lowerrootvp;
+		break;
+
+	case UNMNT_REPLACE:
+		vrele(lowerrootvp);
+		lowerrootvp = NULLVP;
+		um->um_uppervp = upperrootvp;
+		um->um_lowervp = lowerrootvp;
+		break;
+
+	default:
+		error = EINVAL;
+		goto bad;
+	}
+
+	um->um_cred = cred;
+	um->um_cmode = UN_DIRMODE &~ p->p_fd->fd_cmask;
+
+	/*
+	 * Depending on what you think the MNT_LOCAL flag might mean,
+	 * you may want the && to be || on the conditional below.
+	 * At the moment it has been defined that the filesystem is
+	 * only local if it is all local, ie the MNT_LOCAL flag implies
+	 * that the entire namespace is local.  If you think the MNT_LOCAL
+	 * flag implies that some of the files might be stored locally
+	 * then you will want to change the conditional.
+	 */
+	if (um->um_op == UNMNT_ABOVE) {
+		if (((um->um_lowervp == NULLVP) ||
+		     (um->um_lowervp->v_mount->mnt_flag & MNT_LOCAL)) &&
+		    (um->um_uppervp->v_mount->mnt_flag & MNT_LOCAL))
+			mp->mnt_flag |= MNT_LOCAL;
+	}
+
+	/*
+	 * Copy in the upper layer's RDONLY flag.  This is for the benefit
+	 * of lookup() which explicitly checks the flag, rather than asking
+	 * the filesystem for it's own opinion.  This means, that an update
+	 * mount of the underlying filesystem to go from rdonly to rdwr
+	 * will leave the unioned view as read-only.
+	 */
+	mp->mnt_flag |= (um->um_uppervp->v_mount->mnt_flag & MNT_RDONLY);
+
+	/*
+	 * This is a user mount.  Privilege check for unmount
+	 * will be done in union_unmount.
+	 */
+	mp->mnt_flag |= MNT_USER;
+
+	mp->mnt_data = (qaddr_t) um;
+	getnewfsid(mp, MOUNT_UNION);
+
+	(void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+
+	switch (um->um_op) {
+	case UNMNT_ABOVE:
+		cp = "<above>";
+		break;
+	case UNMNT_BELOW:
+		cp = "<below>";
+		break;
+	case UNMNT_REPLACE:
+		cp = "";
+		break;
+	}
+	len = strlen(cp);
+	bcopy(cp, mp->mnt_stat.f_mntfromname, len);
+
+	cp = mp->mnt_stat.f_mntfromname + len;
+	len = MNAMELEN - len;
+
+	(void) copyinstr(args.target, cp, len - 1, &size);
+	bzero(cp + size, len - size);
+
+#ifdef UNION_DIAGNOSTIC
+	printf("union_mount: from %s, on %s\n",
+		mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
+#endif
+	return (0);
+
+bad:
+	if (cred)
+		crfree(cred);
+	if (upperrootvp)
+		vrele(upperrootvp);
+	if (lowerrootvp)
+		vrele(lowerrootvp);
+	return (error);
+}
+
+/*
+ * VFS start.  Nothing needed here - the start routine
+ * on the underlying filesystem(s) will have been called
+ * when that filesystem was mounted.
+ */
+int
+union_start(mp, flags, p)
+	struct mount *mp;
+	int flags;
+	struct proc *p;
+{
+
+	return (0);
+}
+
+/*
+ * Free reference to union layer
+ */
+int
+union_unmount(mp, mntflags, p)
+	struct mount *mp;
+	int mntflags;
+	struct proc *p;
+{
+	struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
+	struct vnode *um_rootvp;
+	int error;
+	int flags = 0;
+	extern int doforce;
+
+#ifdef UNION_DIAGNOSTIC
+	printf("union_unmount(mp = %x)\n", mp);
+#endif
+
+	/* only the mounter, or superuser can unmount */
+	if ((p->p_cred->p_ruid != um->um_cred->cr_uid) &&
+	    (error = suser(p->p_ucred, &p->p_acflag)))
+		return (error);
+
+	if (mntflags & MNT_FORCE) {
+		/* union can never be rootfs so don't check for it */
+		if (!doforce)
+			return (EINVAL);
+		flags |= FORCECLOSE;
+	}
+
+	if (error = union_root(mp, &um_rootvp))
+		return (error);
+	if (um_rootvp->v_usecount > 1) {
+		vput(um_rootvp);
+		return (EBUSY);
+	}
+	if (error = vflush(mp, um_rootvp, flags)) {
+		vput(um_rootvp);
+		return (error);
+	}
+
+#ifdef UNION_DIAGNOSTIC
+	vprint("alias root of lower", um_rootvp);
+#endif	 
+	/*
+	 * Discard references to upper and lower target vnodes.
+	 */
+	if (um->um_lowervp)
+		vrele(um->um_lowervp);
+	vrele(um->um_uppervp);
+	crfree(um->um_cred);
+	/*
+	 * Release reference on underlying root vnode
+	 */
+	vput(um_rootvp);
+	/*
+	 * And blow it away for future re-use
+	 */
+	vgone(um_rootvp);
+	/*
+	 * Finally, throw away the union_mount structure
+	 */
+	free(mp->mnt_data, M_UFSMNT);	/* XXX */
+	mp->mnt_data = 0;
+	return (0);
+}
+
+int
+union_root(mp, vpp)
+	struct mount *mp;
+	struct vnode **vpp;
+{
+	struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
+	int error;
+	int loselock;
+
+#ifdef UNION_DIAGNOSTIC
+	printf("union_root(mp = %x, lvp = %x, uvp = %x)\n", mp,
+			um->um_lowervp,
+			um->um_uppervp);
+#endif
+
+	/*
+	 * Return locked reference to root.
+	 */
+	VREF(um->um_uppervp);
+	if ((um->um_op == UNMNT_BELOW) &&
+	     VOP_ISLOCKED(um->um_uppervp)) {
+		loselock = 1;
+	} else {
+		VOP_LOCK(um->um_uppervp);
+		loselock = 0;
+	}
+	if (um->um_lowervp)
+		VREF(um->um_lowervp);
+	error = union_allocvp(vpp, mp,
+			      (struct vnode *) 0,
+			      (struct vnode *) 0,
+			      (struct componentname *) 0,
+			      um->um_uppervp,
+			      um->um_lowervp);
+
+	if (error) {
+		if (!loselock)
+			VOP_UNLOCK(um->um_uppervp);
+		vrele(um->um_uppervp);
+		if (um->um_lowervp)
+			vrele(um->um_lowervp);
+	} else {
+		(*vpp)->v_flag |= VROOT;
+		if (loselock)
+			VTOUNION(*vpp)->un_flags &= ~UN_ULOCK;
+	}
+
+	return (error);
+}
+
+int
+union_quotactl(mp, cmd, uid, arg, p)
+	struct mount *mp;
+	int cmd;
+	uid_t uid;
+	caddr_t arg;
+	struct proc *p;
+{
+
+	return (EOPNOTSUPP);
+}
+
+int
+union_statfs(mp, sbp, p)
+	struct mount *mp;
+	struct statfs *sbp;
+	struct proc *p;
+{
+	int error;
+	struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
+	struct statfs mstat;
+	int lbsize;
+
+#ifdef UNION_DIAGNOSTIC
+	printf("union_statfs(mp = %x, lvp = %x, uvp = %x)\n", mp,
+			um->um_lowervp,
+	       		um->um_uppervp);
+#endif
+
+	bzero(&mstat, sizeof(mstat));
+
+	if (um->um_lowervp) {
+		error = VFS_STATFS(um->um_lowervp->v_mount, &mstat, p);
+		if (error)
+			return (error);
+	}
+
+	/* now copy across the "interesting" information and fake the rest */
+#if 0
+	sbp->f_type = mstat.f_type;
+	sbp->f_flags = mstat.f_flags;
+	sbp->f_bsize = mstat.f_bsize;
+	sbp->f_iosize = mstat.f_iosize;
+#endif
+	lbsize = mstat.f_bsize;
+	sbp->f_blocks = mstat.f_blocks;
+	sbp->f_bfree = mstat.f_bfree;
+	sbp->f_bavail = mstat.f_bavail;
+	sbp->f_files = mstat.f_files;
+	sbp->f_ffree = mstat.f_ffree;
+
+	error = VFS_STATFS(um->um_uppervp->v_mount, &mstat, p);
+	if (error)
+		return (error);
+
+	sbp->f_type = MOUNT_UNION;
+	sbp->f_flags = mstat.f_flags;
+	sbp->f_bsize = mstat.f_bsize;
+	sbp->f_iosize = mstat.f_iosize;
+
+	/*
+	 * if the lower and upper blocksizes differ, then frig the
+	 * block counts so that the sizes reported by df make some
+	 * kind of sense.  none of this makes sense though.
+	 */
+
+	if (mstat.f_bsize != lbsize) {
+		sbp->f_blocks = sbp->f_blocks * lbsize / mstat.f_bsize;
+		sbp->f_bfree = sbp->f_bfree * lbsize / mstat.f_bsize;
+		sbp->f_bavail = sbp->f_bavail * lbsize / mstat.f_bsize;
+	}
+	sbp->f_blocks += mstat.f_blocks;
+	sbp->f_bfree += mstat.f_bfree;
+	sbp->f_bavail += mstat.f_bavail;
+	sbp->f_files += mstat.f_files;
+	sbp->f_ffree += mstat.f_ffree;
+
+	if (sbp != &mp->mnt_stat) {
+		bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+	}
+	return (0);
+}
+
+int
+union_sync(mp, waitfor, cred, p)
+	struct mount *mp;
+	int waitfor;
+	struct ucred *cred;
+	struct proc *p;
+{
+
+	/*
+	 * XXX - Assumes no data cached at union layer.
+	 */
+	return (0);
+}
+
+int
+union_vget(mp, ino, vpp)
+	struct mount *mp;
+	ino_t ino;
+	struct vnode **vpp;
+{
+	
+	return (EOPNOTSUPP);
+}
+
+int
+union_fhtovp(mp, fidp, nam, vpp, exflagsp, credanonp)
+	struct mount *mp;
+	struct fid *fidp;
+	struct mbuf *nam;
+	struct vnode **vpp;
+	int *exflagsp;
+	struct ucred **credanonp;
+{
+
+	return (EOPNOTSUPP);
+}
+
+int
+union_vptofh(vp, fhp)
+	struct vnode *vp;
+	struct fid *fhp;
+{
+
+	return (EOPNOTSUPP);
+}
+
+int union_init __P((void));
+
+struct vfsops union_vfsops = {
+	union_mount,
+	union_start,
+	union_unmount,
+	union_root,
+	union_quotactl,
+	union_statfs,
+	union_sync,
+	union_vget,
+	union_fhtovp,
+	union_vptofh,
+	union_init,
+};
diff --git a/sys/miscfs/union/union_vnops.c b/sys/miscfs/union/union_vnops.c
new file mode 100644
index 00000000000..96327b0922d
--- /dev/null
+++ b/sys/miscfs/union/union_vnops.c
@@ -0,0 +1,1495 @@
+/*
+ * Copyright (c) 1992, 1993, 1994 The Regents of the University of California.
+ * Copyright (c) 1992, 1993, 1994 Jan-Simon Pendry.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)union_vnops.c	8.6 (Berkeley) 2/17/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/buf.h>
+#include <sys/queue.h>
+#include <miscfs/union/union.h>
+
+#define FIXUP(un) { \
+	if (((un)->un_flags & UN_ULOCK) == 0) { \
+		union_fixup(un); \
+	} \
+}
+
+static void
+union_fixup(un)
+	struct union_node *un;
+{
+
+	VOP_LOCK(un->un_uppervp);
+	un->un_flags |= UN_ULOCK;
+}
+
+static int
+union_lookup1(udvp, dvp, vpp, cnp)
+	struct vnode *udvp;
+	struct vnode *dvp;
+	struct vnode **vpp;
+	struct componentname *cnp;
+{
+	int error;
+	struct vnode *tdvp;
+	struct mount *mp;
+
+	/*
+	 * If stepping up the directory tree, check for going
+	 * back across the mount point, in which case do what
+	 * lookup would do by stepping back down the mount
+	 * hierarchy.
+	 */
+	if (cnp->cn_flags & ISDOTDOT) {
+		for (;;) {
+			/*
+			 * Don't do the NOCROSSMOUNT check
+			 * at this level.  By definition,
+			 * union fs deals with namespaces, not
+			 * filesystems.
+			 */
+			if ((dvp->v_flag & VROOT) == 0)
+				break;
+
+			tdvp = dvp;
+			dvp = dvp->v_mount->mnt_vnodecovered;
+			vput(tdvp);
+			VREF(dvp);
+			VOP_LOCK(dvp);
+		}
+	}
+
+        error = VOP_LOOKUP(dvp, &tdvp, cnp);
+	if (error)
+		return (error);
+
+	/*
+	 * The parent directory will have been unlocked, unless lookup
+	 * found the last component.  In which case, re-lock the node
+	 * here to allow it to be unlocked again (phew) in union_lookup.
+	 */
+	if (dvp != tdvp && !(cnp->cn_flags & ISLASTCN))
+		VOP_LOCK(dvp);
+
+	dvp = tdvp;
+
+	/*
+	 * Lastly check if the current node is a mount point in
+	 * which case walk up the mount hierarchy making sure not to
+	 * bump into the root of the mount tree (ie. dvp != udvp).
+	 */
+	while (dvp != udvp && (dvp->v_type == VDIR) &&
+	       (mp = dvp->v_mountedhere)) {
+
+		if (mp->mnt_flag & MNT_MLOCK) {
+			mp->mnt_flag |= MNT_MWAIT;
+			sleep((caddr_t) mp, PVFS);
+			continue;
+		}
+
+		if (error = VFS_ROOT(mp, &tdvp)) {
+			vput(dvp);
+			return (error);
+		}
+
+		vput(dvp);
+		dvp = tdvp;
+	}
+
+	*vpp = dvp;
+	return (0);
+}
+
+int
+union_lookup(ap)
+	struct vop_lookup_args /* {
+		struct vnodeop_desc *a_desc;
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	int error;
+	int uerror, lerror;
+	struct vnode *uppervp, *lowervp;
+	struct vnode *upperdvp, *lowerdvp;
+	struct vnode *dvp = ap->a_dvp;
+	struct union_node *dun = VTOUNION(dvp);
+	struct componentname *cnp = ap->a_cnp;
+	int lockparent = cnp->cn_flags & LOCKPARENT;
+	int rdonly = cnp->cn_flags & RDONLY;
+	struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
+	struct ucred *saved_cred;
+
+	cnp->cn_flags |= LOCKPARENT;
+
+	upperdvp = dun->un_uppervp;
+	lowerdvp = dun->un_lowervp;
+	uppervp = NULLVP;
+	lowervp = NULLVP;
+
+	/*
+	 * do the lookup in the upper level.
+	 * if that level comsumes additional pathnames,
+	 * then assume that something special is going
+	 * on and just return that vnode.
+	 */
+	if (upperdvp) {
+		FIXUP(dun);
+		uerror = union_lookup1(um->um_uppervp, upperdvp,
+					&uppervp, cnp);
+		/*if (uppervp == upperdvp)
+			dun->un_flags |= UN_KLOCK;*/
+
+		if (cnp->cn_consume != 0) {
+			*ap->a_vpp = uppervp;
+			if (!lockparent)
+				cnp->cn_flags &= ~LOCKPARENT;
+			return (uerror);
+		}
+	} else {
+		uerror = ENOENT;
+	}
+
+	/*
+	 * in a similar way to the upper layer, do the lookup
+	 * in the lower layer.   this time, if there is some
+	 * component magic going on, then vput whatever we got
+	 * back from the upper layer and return the lower vnode
+	 * instead.
+	 */
+	if (lowerdvp) {
+		int nameiop;
+
+		VOP_LOCK(lowerdvp);
+
+		/*
+		 * Only do a LOOKUP on the bottom node, since
+		 * we won't be making changes to it anyway.
+		 */
+		nameiop = cnp->cn_nameiop;
+		cnp->cn_nameiop = LOOKUP;
+		if (um->um_op == UNMNT_BELOW) {
+			saved_cred = cnp->cn_cred;
+			cnp->cn_cred = um->um_cred;
+		}
+		lerror = union_lookup1(um->um_lowervp, lowerdvp,
+				&lowervp, cnp);
+		if (um->um_op == UNMNT_BELOW)
+			cnp->cn_cred = saved_cred;
+		cnp->cn_nameiop = nameiop;
+
+		if (lowervp != lowerdvp)
+			VOP_UNLOCK(lowerdvp);
+
+		if (cnp->cn_consume != 0) {
+			if (uppervp) {
+				if (uppervp == upperdvp)
+					vrele(uppervp);
+				else
+					vput(uppervp);
+				uppervp = NULLVP;
+			}
+			*ap->a_vpp = lowervp;
+			if (!lockparent)
+				cnp->cn_flags &= ~LOCKPARENT;
+			return (lerror);
+		}
+	} else {
+		lerror = ENOENT;
+	}
+
+	if (!lockparent)
+		cnp->cn_flags &= ~LOCKPARENT;
+
+	/*
+	 * at this point, we have uerror and lerror indicating
+	 * possible errors with the lookups in the upper and lower
+	 * layers.  additionally, uppervp and lowervp are (locked)
+	 * references to existing vnodes in the upper and lower layers.
+	 *
+	 * there are now three cases to consider.
+	 * 1. if both layers returned an error, then return whatever
+	 *    error the upper layer generated.
+	 *
+	 * 2. if the top layer failed and the bottom layer succeeded
+	 *    then two subcases occur.
+	 *    a.  the bottom vnode is not a directory, in which
+	 *	  case just return a new union vnode referencing
+	 *	  an empty top layer and the existing bottom layer.
+	 *    b.  the bottom vnode is a directory, in which case
+	 *	  create a new directory in the top-level and
+	 *	  continue as in case 3.
+	 *
+	 * 3. if the top layer succeeded then return a new union
+	 *    vnode referencing whatever the new top layer and
+	 *    whatever the bottom layer returned.
+	 */
+
+	*ap->a_vpp = NULLVP;
+
+	/* case 1. */
+	if ((uerror != 0) && (lerror != 0)) {
+		return (uerror);
+	}
+
+	/* case 2. */
+	if (uerror != 0 /* && (lerror == 0) */ ) {
+		if (lowervp->v_type == VDIR) { /* case 2b. */
+			dun->un_flags &= ~UN_ULOCK;
+			VOP_UNLOCK(upperdvp);
+			uerror = union_mkshadow(um, upperdvp, cnp, &uppervp);
+			VOP_LOCK(upperdvp);
+			dun->un_flags |= UN_ULOCK;
+
+			if (uerror) {
+				if (lowervp) {
+					vput(lowervp);
+					lowervp = NULLVP;
+				}
+				return (uerror);
+			}
+		}
+	}
+
+	if (lowervp)
+		VOP_UNLOCK(lowervp);
+
+	error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
+			      uppervp, lowervp);
+
+	if (error) {
+		if (uppervp)
+			vput(uppervp);
+		if (lowervp)
+			vrele(lowervp);
+	} else {
+		if (*ap->a_vpp != dvp)
+			if (!lockparent || !(cnp->cn_flags & ISLASTCN))
+				VOP_UNLOCK(dvp);
+	}
+
+	return (error);
+}
+
+int
+union_create(ap)
+	struct vop_create_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	struct union_node *un = VTOUNION(ap->a_dvp);
+	struct vnode *dvp = un->un_uppervp;
+
+	if (dvp) {
+		int error;
+		struct vnode *vp;
+
+		FIXUP(un);
+
+		VREF(dvp);
+		un->un_flags |= UN_KLOCK;
+		vput(ap->a_dvp);
+		error = VOP_CREATE(dvp, &vp, ap->a_cnp, ap->a_vap);
+		if (error)
+			return (error);
+
+		error = union_allocvp(
+				ap->a_vpp,
+				ap->a_dvp->v_mount,
+				ap->a_dvp,
+				NULLVP,
+				ap->a_cnp,
+				vp,
+				NULLVP);
+		if (error)
+			vput(vp);
+		return (error);
+	}
+
+	vput(ap->a_dvp);
+	return (EROFS);
+}
+
+int
+union_mknod(ap)
+	struct vop_mknod_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	struct union_node *un = VTOUNION(ap->a_dvp);
+	struct vnode *dvp = un->un_uppervp;
+
+	if (dvp) {
+		int error;
+		struct vnode *vp;
+
+		FIXUP(un);
+
+		VREF(dvp);
+		un->un_flags |= UN_KLOCK;
+		vput(ap->a_dvp);
+		error = VOP_MKNOD(dvp, &vp, ap->a_cnp, ap->a_vap);
+		if (error)
+			return (error);
+
+		if (vp) {
+			error = union_allocvp(
+					ap->a_vpp,
+					ap->a_dvp->v_mount,
+					ap->a_dvp,
+					NULLVP,
+					ap->a_cnp,
+					vp,
+					NULLVP);
+			if (error)
+				vput(vp);
+		}
+		return (error);
+	}
+
+	vput(ap->a_dvp);
+	return (EROFS);
+}
+
+int
+union_open(ap)
+	struct vop_open_args /* {
+		struct vnodeop_desc *a_desc;
+		struct vnode *a_vp;
+		int a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct union_node *un = VTOUNION(ap->a_vp);
+	struct vnode *tvp;
+	int mode = ap->a_mode;
+	struct ucred *cred = ap->a_cred;
+	struct proc *p = ap->a_p;
+	int error;
+
+	/*
+	 * If there is an existing upper vp then simply open that.
+	 */
+	tvp = un->un_uppervp;
+	if (tvp == NULLVP) {
+		/*
+		 * If the lower vnode is being opened for writing, then
+		 * copy the file contents to the upper vnode and open that,
+		 * otherwise can simply open the lower vnode.
+		 */
+		tvp = un->un_lowervp;
+		if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
+			struct vnode *vp;
+			int i;
+
+			/*
+			 * Open the named file in the upper layer.  Note that
+			 * the file may have come into existence *since* the
+			 * lookup was done, since the upper layer may really
+			 * be a loopback mount of some other filesystem...
+			 * so open the file with exclusive create and barf if
+			 * it already exists.
+			 * XXX - perhaps should re-lookup the node (once more
+			 * with feeling) and simply open that.  Who knows.
+			 */
+			error = union_vn_create(&vp, un, p);
+			if (error)
+				return (error);
+
+			/* at this point, uppervp is locked */
+			union_newupper(un, vp);
+			un->un_flags |= UN_ULOCK;
+
+			/*
+			 * Now, if the file is being opened with truncation,
+			 * then the (new) upper vnode is ready to fly,
+			 * otherwise the data from the lower vnode must be
+			 * copied to the upper layer first.  This only works
+			 * for regular files (check is made above).
+			 */
+			if ((mode & O_TRUNC) == 0) {
+				/*
+				 * XXX - should not ignore errors
+				 * from VOP_CLOSE
+				 */
+				VOP_LOCK(tvp);
+				error = VOP_OPEN(tvp, FREAD, cred, p);
+				if (error == 0) {
+					error = union_copyfile(p, cred,
+						       tvp, un->un_uppervp);
+					VOP_UNLOCK(tvp);
+					(void) VOP_CLOSE(tvp, FREAD);
+				} else {
+					VOP_UNLOCK(tvp);
+				}
+
+#ifdef UNION_DIAGNOSTIC
+				if (!error)
+					uprintf("union: copied up %s\n",
+								un->un_path);
+#endif
+			}
+
+			un->un_flags &= ~UN_ULOCK;
+			VOP_UNLOCK(un->un_uppervp);
+			union_vn_close(un->un_uppervp, FWRITE, cred, p);
+			VOP_LOCK(un->un_uppervp);
+			un->un_flags |= UN_ULOCK;
+
+			/*
+			 * Subsequent IOs will go to the top layer, so
+			 * call close on the lower vnode and open on the
+			 * upper vnode to ensure that the filesystem keeps
+			 * its references counts right.  This doesn't do
+			 * the right thing with (cred) and (FREAD) though.
+			 * Ignoring error returns is not righ, either.
+			 */
+			for (i = 0; i < un->un_openl; i++) {
+				(void) VOP_CLOSE(tvp, FREAD);
+				(void) VOP_OPEN(un->un_uppervp, FREAD, cred, p);
+			}
+			un->un_openl = 0;
+
+			if (error == 0)
+				error = VOP_OPEN(un->un_uppervp, mode, cred, p);
+			return (error);
+		}
+
+		/*
+		 * Just open the lower vnode
+		 */
+		un->un_openl++;
+		VOP_LOCK(tvp);
+		error = VOP_OPEN(tvp, mode, cred, p);
+		VOP_UNLOCK(tvp);
+
+		return (error);
+	}
+
+	FIXUP(un);
+
+	error = VOP_OPEN(tvp, mode, cred, p);
+
+	return (error);
+}
+
+int
+union_close(ap)
+	struct vop_close_args /* {
+		struct vnode *a_vp;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct union_node *un = VTOUNION(ap->a_vp);
+	struct vnode *vp;
+
+	if (un->un_uppervp) {
+		vp = un->un_uppervp;
+	} else {
+#ifdef UNION_DIAGNOSTIC
+		if (un->un_openl <= 0)
+			panic("union: un_openl cnt");
+#endif
+		--un->un_openl;
+		vp = un->un_lowervp;
+	}
+
+	return (VOP_CLOSE(vp, ap->a_fflag, ap->a_cred, ap->a_p));
+}
+
+/*
+ * Check access permission on the union vnode.
+ * The access check being enforced is to check
+ * against both the underlying vnode, and any
+ * copied vnode.  This ensures that no additional
+ * file permissions are given away simply because
+ * the user caused an implicit file copy.
+ */
+int
+union_access(ap)
+	struct vop_access_args /* {
+		struct vnodeop_desc *a_desc;
+		struct vnode *a_vp;
+		int a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct union_node *un = VTOUNION(ap->a_vp);
+	int error = EACCES;
+	struct vnode *vp;
+
+	if (vp = un->un_uppervp) {
+		FIXUP(un);
+		return (VOP_ACCESS(vp, ap->a_mode, ap->a_cred, ap->a_p));
+	}
+
+	if (vp = un->un_lowervp) {
+		VOP_LOCK(vp);
+		error = VOP_ACCESS(vp, ap->a_mode, ap->a_cred, ap->a_p);
+		if (error == 0) {
+			struct union_mount *um = MOUNTTOUNIONMOUNT(vp->v_mount);
+
+			if (um->um_op == UNMNT_BELOW)
+				error = VOP_ACCESS(vp, ap->a_mode,
+						um->um_cred, ap->a_p);
+		}
+		VOP_UNLOCK(vp);
+		if (error)
+			return (error);
+	}
+
+	return (error);
+}
+
+/*
+ *  We handle getattr only to change the fsid.
+ */
+int
+union_getattr(ap)
+	struct vop_getattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	int error;
+	struct union_node *un = VTOUNION(ap->a_vp);
+	struct vnode *vp = un->un_uppervp;
+	struct vattr *vap;
+	struct vattr va;
+
+
+	/*
+	 * Some programs walk the filesystem hierarchy by counting
+	 * links to directories to avoid stat'ing all the time.
+	 * This means the link count on directories needs to be "correct".
+	 * The only way to do that is to call getattr on both layers
+	 * and fix up the link count.  The link count will not necessarily
+	 * be accurate but will be large enough to defeat the tree walkers.
+	 */
+
+	vap = ap->a_vap;
+
+	vp = un->un_uppervp;
+	if (vp != NULLVP) {
+		FIXUP(un);
+		error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p);
+		if (error)
+			return (error);
+	}
+
+	if (vp == NULLVP) {
+		vp = un->un_lowervp;
+	} else if (vp->v_type == VDIR) {
+		vp = un->un_lowervp;
+		vap = &va;
+	} else {
+		vp = NULLVP;
+	}
+
+	if (vp != NULLVP) {
+		VOP_LOCK(vp);
+		error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p);
+		VOP_UNLOCK(vp);
+		if (error)
+			return (error);
+	}
+
+	if ((vap != ap->a_vap) && (vap->va_type == VDIR))
+		ap->a_vap->va_nlink += vap->va_nlink;
+
+	vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
+	return (0);
+}
+
+int
+union_setattr(ap)
+	struct vop_setattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct union_node *un = VTOUNION(ap->a_vp);
+	int error;
+
+	/*
+	 * Handle case of truncating lower object to zero size,
+	 * by creating a zero length upper object.  This is to
+	 * handle the case of open with O_TRUNC and O_CREAT.
+	 */
+	if ((un->un_uppervp == NULLVP) &&
+	    /* assert(un->un_lowervp != NULLVP) */
+	    (un->un_lowervp->v_type == VREG) &&
+	    (ap->a_vap->va_size == 0)) {
+		struct vnode *vp;
+
+		error = union_vn_create(&vp, un, ap->a_p);
+		if (error)
+			return (error);
+
+		/* at this point, uppervp is locked */
+		union_newupper(un, vp);
+
+		VOP_UNLOCK(vp);
+		union_vn_close(un->un_uppervp, FWRITE, ap->a_cred, ap->a_p);
+		VOP_LOCK(vp);
+		un->un_flags |= UN_ULOCK;
+	}
+
+	/*
+	 * Try to set attributes in upper layer,
+	 * otherwise return read-only filesystem error.
+	 */
+	if (un->un_uppervp != NULLVP) {
+		FIXUP(un);
+		error = VOP_SETATTR(un->un_uppervp, ap->a_vap,
+					ap->a_cred, ap->a_p);
+	} else {
+		error = EROFS;
+	}
+
+	return (error);
+}
+
+int
+union_read(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	int error;
+	struct vnode *vp = OTHERVP(ap->a_vp);
+	int dolock = (vp == LOWERVP(ap->a_vp));
+
+	if (dolock)
+		VOP_LOCK(vp);
+	else
+		FIXUP(VTOUNION(ap->a_vp));
+	error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
+	if (dolock)
+		VOP_UNLOCK(vp);
+
+	return (error);
+}
+
+int
+union_write(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	int error;
+	struct vnode *vp = OTHERVP(ap->a_vp);
+	int dolock = (vp == LOWERVP(ap->a_vp));
+
+	if (dolock)
+		VOP_LOCK(vp);
+	else
+		FIXUP(VTOUNION(ap->a_vp));
+	error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
+	if (dolock)
+		VOP_UNLOCK(vp);
+
+	return (error);
+}
+
+int
+union_ioctl(ap)
+	struct vop_ioctl_args /* {
+		struct vnode *a_vp;
+		int  a_command;
+		caddr_t  a_data;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	return (VOP_IOCTL(OTHERVP(ap->a_vp), ap->a_command, ap->a_data,
+				ap->a_fflag, ap->a_cred, ap->a_p));
+}
+
+int
+union_select(ap)
+	struct vop_select_args /* {
+		struct vnode *a_vp;
+		int  a_which;
+		int  a_fflags;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	return (VOP_SELECT(OTHERVP(ap->a_vp), ap->a_which, ap->a_fflags,
+				ap->a_cred, ap->a_p));
+}
+
+int
+union_mmap(ap)
+	struct vop_mmap_args /* {
+		struct vnode *a_vp;
+		int  a_fflags;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	return (VOP_MMAP(OTHERVP(ap->a_vp), ap->a_fflags,
+				ap->a_cred, ap->a_p));
+}
+
+int
+union_fsync(ap)
+	struct vop_fsync_args /* {
+		struct vnode *a_vp;
+		struct ucred *a_cred;
+		int  a_waitfor;
+		struct proc *a_p;
+	} */ *ap;
+{
+	int error = 0;
+	struct vnode *targetvp = OTHERVP(ap->a_vp);
+
+	if (targetvp) {
+		int dolock = (targetvp == LOWERVP(ap->a_vp));
+
+		if (dolock)
+			VOP_LOCK(targetvp);
+		else
+			FIXUP(VTOUNION(ap->a_vp));
+		error = VOP_FSYNC(targetvp, ap->a_cred,
+					ap->a_waitfor, ap->a_p);
+		if (dolock)
+			VOP_UNLOCK(targetvp);
+	}
+
+	return (error);
+}
+
+int
+union_seek(ap)
+	struct vop_seek_args /* {
+		struct vnode *a_vp;
+		off_t  a_oldoff;
+		off_t  a_newoff;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+
+	return (VOP_SEEK(OTHERVP(ap->a_vp), ap->a_oldoff, ap->a_newoff, ap->a_cred));
+}
+
+int
+union_remove(ap)
+	struct vop_remove_args /* {
+		struct vnode *a_dvp;
+		struct vnode *a_vp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	int error;
+	struct union_node *dun = VTOUNION(ap->a_dvp);
+	struct union_node *un = VTOUNION(ap->a_vp);
+
+	if (dun->un_uppervp && un->un_uppervp) {
+		struct vnode *dvp = dun->un_uppervp;
+		struct vnode *vp = un->un_uppervp;
+
+		FIXUP(dun);
+		VREF(dvp);
+		dun->un_flags |= UN_KLOCK;
+		vput(ap->a_dvp);
+		FIXUP(un);
+		VREF(vp);
+		un->un_flags |= UN_KLOCK;
+		vput(ap->a_vp);
+
+		error = VOP_REMOVE(dvp, vp, ap->a_cnp);
+		if (!error)
+			union_removed_upper(un);
+
+		/*
+		 * XXX: should create a whiteout here
+		 */
+	} else {
+		/*
+		 * XXX: should create a whiteout here
+		 */
+		vput(ap->a_dvp);
+		vput(ap->a_vp);
+		error = EROFS;
+	}
+
+	return (error);
+}
+
+int
+union_link(ap)
+	struct vop_link_args /* {
+		struct vnode *a_vp;
+		struct vnode *a_tdvp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	int error;
+	struct union_node *dun = VTOUNION(ap->a_vp);
+	struct union_node *un = VTOUNION(ap->a_tdvp);
+
+	if (dun->un_uppervp && un->un_uppervp) {
+		struct vnode *dvp = dun->un_uppervp;
+		struct vnode *vp = un->un_uppervp;
+
+		FIXUP(dun);
+		VREF(dvp);
+		dun->un_flags |= UN_KLOCK;
+		vput(ap->a_vp);
+		FIXUP(un);
+		VREF(vp);
+		vrele(ap->a_tdvp);
+
+		error = VOP_LINK(dvp, vp, ap->a_cnp);
+	} else {
+		/*
+		 * XXX: need to copy to upper layer
+		 * and do the link there.
+		 */
+		vput(ap->a_vp);
+		vrele(ap->a_tdvp);
+		error = EROFS;
+	}
+
+	return (error);
+}
+
+int
+union_rename(ap)
+	struct vop_rename_args  /* {
+		struct vnode *a_fdvp;
+		struct vnode *a_fvp;
+		struct componentname *a_fcnp;
+		struct vnode *a_tdvp;
+		struct vnode *a_tvp;
+		struct componentname *a_tcnp;
+	} */ *ap;
+{
+	int error;
+
+	struct vnode *fdvp = ap->a_fdvp;
+	struct vnode *fvp = ap->a_fvp;
+	struct vnode *tdvp = ap->a_tdvp;
+	struct vnode *tvp = ap->a_tvp;
+
+	if (fdvp->v_op == union_vnodeop_p) {	/* always true */
+		struct union_node *un = VTOUNION(fdvp);
+		if (un->un_uppervp == NULLVP) {
+			error = EROFS;
+			goto bad;
+		}
+
+		FIXUP(un);
+		fdvp = un->un_uppervp;
+		VREF(fdvp);
+		vrele(ap->a_fdvp);
+	}
+
+	if (fvp->v_op == union_vnodeop_p) {	/* always true */
+		struct union_node *un = VTOUNION(fvp);
+		if (un->un_uppervp == NULLVP) {
+			error = EROFS;
+			goto bad;
+		}
+
+		FIXUP(un);
+		fvp = un->un_uppervp;
+		VREF(fvp);
+		vrele(ap->a_fvp);
+	}
+
+	if (tdvp->v_op == union_vnodeop_p) {
+		struct union_node *un = VTOUNION(tdvp);
+		if (un->un_uppervp == NULLVP) {
+			error = EROFS;
+			goto bad;
+		}
+
+		tdvp = un->un_uppervp;
+		VREF(tdvp);
+		un->un_flags |= UN_KLOCK;
+		vput(ap->a_tdvp);
+	}
+
+	if (tvp && tvp->v_op == union_vnodeop_p) {
+		struct union_node *un = VTOUNION(tvp);
+		if (un->un_uppervp == NULLVP) {
+			error = EROFS;
+			goto bad;
+		}
+
+		tvp = un->un_uppervp;
+		VREF(tvp);
+		un->un_flags |= UN_KLOCK;
+		vput(ap->a_tvp);
+	}
+
+	return (VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp));
+
+bad:
+	vrele(fdvp);
+	vrele(fvp);
+	vput(tdvp);
+	if (tvp)
+		vput(tvp);
+
+	return (error);
+}
+
+int
+union_mkdir(ap)
+	struct vop_mkdir_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	struct union_node *un = VTOUNION(ap->a_dvp);
+	struct vnode *dvp = un->un_uppervp;
+
+	if (dvp) {
+		int error;
+		struct vnode *vp;
+
+		FIXUP(un);
+		VREF(dvp);
+		un->un_flags |= UN_KLOCK;
+		vput(ap->a_dvp);
+		error = VOP_MKDIR(dvp, &vp, ap->a_cnp, ap->a_vap);
+		if (error)
+			return (error);
+
+		error = union_allocvp(
+				ap->a_vpp,
+				ap->a_dvp->v_mount,
+				ap->a_dvp,
+				NULLVP,
+				ap->a_cnp,
+				vp,
+				NULLVP);
+		if (error)
+			vput(vp);
+		return (error);
+	}
+
+	vput(ap->a_dvp);
+	return (EROFS);
+}
+
+int
+union_rmdir(ap)
+	struct vop_rmdir_args /* {
+		struct vnode *a_dvp;
+		struct vnode *a_vp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	int error;
+	struct union_node *dun = VTOUNION(ap->a_dvp);
+	struct union_node *un = VTOUNION(ap->a_vp);
+
+	if (dun->un_uppervp && un->un_uppervp) {
+		struct vnode *dvp = dun->un_uppervp;
+		struct vnode *vp = un->un_uppervp;
+
+		FIXUP(dun);
+		VREF(dvp);
+		dun->un_flags |= UN_KLOCK;
+		vput(ap->a_dvp);
+		FIXUP(un);
+		VREF(vp);
+		un->un_flags |= UN_KLOCK;
+		vput(ap->a_vp);
+
+		error = VOP_RMDIR(dvp, vp, ap->a_cnp);
+		if (!error)
+			union_removed_upper(un);
+
+		/*
+		 * XXX: should create a whiteout here
+		 */
+	} else {
+		/*
+		 * XXX: should create a whiteout here
+		 */
+		vput(ap->a_dvp);
+		vput(ap->a_vp);
+		error = EROFS;
+	}
+
+	return (error);
+}
+
+int
+union_symlink(ap)
+	struct vop_symlink_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+		char *a_target;
+	} */ *ap;
+{
+	struct union_node *un = VTOUNION(ap->a_dvp);
+	struct vnode *dvp = un->un_uppervp;
+
+	if (dvp) {
+		int error;
+		struct vnode *vp;
+		struct mount *mp = ap->a_dvp->v_mount;
+
+		FIXUP(un);
+		VREF(dvp);
+		un->un_flags |= UN_KLOCK;
+		vput(ap->a_dvp);
+		error = VOP_SYMLINK(dvp, &vp, ap->a_cnp,
+					ap->a_vap, ap->a_target);
+		*ap->a_vpp = NULLVP;
+		return (error);
+	}
+
+	vput(ap->a_dvp);
+	return (EROFS);
+}
+
+/*
+ * union_readdir works in concert with getdirentries and
+ * readdir(3) to provide a list of entries in the unioned
+ * directories.  getdirentries is responsible for walking
+ * down the union stack.  readdir(3) is responsible for
+ * eliminating duplicate names from the returned data stream.
+ */
+int
+union_readdir(ap)
+	struct vop_readdir_args /* {
+		struct vnodeop_desc *a_desc;
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	int error = 0;
+	struct union_node *un = VTOUNION(ap->a_vp);
+
+	if (un->un_uppervp) {
+		FIXUP(un);
+		error = VOP_READDIR(un->un_uppervp, ap->a_uio, ap->a_cred);
+	}
+
+	return (error);
+}
+
+int
+union_readlink(ap)
+	struct vop_readlink_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	int error;
+	struct vnode *vp = OTHERVP(ap->a_vp);
+	int dolock = (vp == LOWERVP(ap->a_vp));
+
+	if (dolock)
+		VOP_LOCK(vp);
+	else
+		FIXUP(VTOUNION(ap->a_vp));
+	error = VOP_READLINK(vp, ap->a_uio, ap->a_cred);
+	if (dolock)
+		VOP_UNLOCK(vp);
+
+	return (error);
+}
+
+int
+union_abortop(ap)
+	struct vop_abortop_args /* {
+		struct vnode *a_dvp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	int error;
+	struct vnode *vp = OTHERVP(ap->a_dvp);
+	struct union_node *un = VTOUNION(ap->a_dvp);
+	int islocked = un->un_flags & UN_LOCKED;
+	int dolock = (vp == LOWERVP(ap->a_dvp));
+
+	if (islocked) {
+		if (dolock)
+			VOP_LOCK(vp);
+		else
+			FIXUP(VTOUNION(ap->a_dvp));
+	}
+	error = VOP_ABORTOP(vp, ap->a_cnp);
+	if (islocked && dolock)
+		VOP_UNLOCK(vp);
+
+	return (error);
+}
+
+int
+union_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	/*
+	 * Do nothing (and _don't_ bypass).
+	 * Wait to vrele lowervp until reclaim,
+	 * so that until then our union_node is in the
+	 * cache and reusable.
+	 *
+	 * NEEDSWORK: Someday, consider inactive'ing
+	 * the lowervp and then trying to reactivate it
+	 * with capabilities (v_id)
+	 * like they do in the name lookup cache code.
+	 * That's too much work for now.
+	 */
+
+#ifdef UNION_DIAGNOSTIC
+	struct union_node *un = VTOUNION(ap->a_vp);
+
+	if (un->un_flags & UN_LOCKED)
+		panic("union: inactivating locked node");
+#endif
+
+	return (0);
+}
+
+int
+union_reclaim(ap)
+	struct vop_reclaim_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	union_freevp(ap->a_vp);
+
+	return (0);
+}
+
+int
+union_lock(ap)
+	struct vop_lock_args *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct union_node *un;
+
+start:
+	while (vp->v_flag & VXLOCK) {
+		vp->v_flag |= VXWANT;
+		sleep((caddr_t)vp, PINOD);
+	}
+
+	un = VTOUNION(vp);
+
+	if (un->un_uppervp) {
+		if ((un->un_flags & UN_ULOCK) == 0) {
+			un->un_flags |= UN_ULOCK;
+			VOP_LOCK(un->un_uppervp);
+		}
+#ifdef DIAGNOSTIC
+		if (un->un_flags & UN_KLOCK)
+			panic("union: dangling upper lock");
+#endif
+	}
+
+	if (un->un_flags & UN_LOCKED) {
+#ifdef DIAGNOSTIC
+		if (curproc && un->un_pid == curproc->p_pid &&
+			    un->un_pid > -1 && curproc->p_pid > -1)
+			panic("union: locking against myself");
+#endif
+		un->un_flags |= UN_WANT;
+		sleep((caddr_t) &un->un_flags, PINOD);
+		goto start;
+	}
+
+#ifdef DIAGNOSTIC
+	if (curproc)
+		un->un_pid = curproc->p_pid;
+	else
+		un->un_pid = -1;
+#endif
+
+	un->un_flags |= UN_LOCKED;
+	return (0);
+}
+
+int
+union_unlock(ap)
+	struct vop_lock_args *ap;
+{
+	struct union_node *un = VTOUNION(ap->a_vp);
+
+#ifdef DIAGNOSTIC
+	if ((un->un_flags & UN_LOCKED) == 0)
+		panic("union: unlock unlocked node");
+	if (curproc && un->un_pid != curproc->p_pid &&
+			curproc->p_pid > -1 && un->un_pid > -1)
+		panic("union: unlocking other process's union node");
+#endif
+
+	un->un_flags &= ~UN_LOCKED;
+
+	if ((un->un_flags & (UN_ULOCK|UN_KLOCK)) == UN_ULOCK)
+		VOP_UNLOCK(un->un_uppervp);
+
+	un->un_flags &= ~(UN_ULOCK|UN_KLOCK);
+
+	if (un->un_flags & UN_WANT) {
+		un->un_flags &= ~UN_WANT;
+		wakeup((caddr_t) &un->un_flags);
+	}
+
+#ifdef DIAGNOSTIC
+	un->un_pid = 0;
+#endif
+
+	return (0);
+}
+
+int
+union_bmap(ap)
+	struct vop_bmap_args /* {
+		struct vnode *a_vp;
+		daddr_t  a_bn;
+		struct vnode **a_vpp;
+		daddr_t *a_bnp;
+		int *a_runp;
+	} */ *ap;
+{
+	int error;
+	struct vnode *vp = OTHERVP(ap->a_vp);
+	int dolock = (vp == LOWERVP(ap->a_vp));
+
+	if (dolock)
+		VOP_LOCK(vp);
+	else
+		FIXUP(VTOUNION(ap->a_vp));
+	error = VOP_BMAP(vp, ap->a_bn, ap->a_vpp, ap->a_bnp, ap->a_runp);
+	if (dolock)
+		VOP_UNLOCK(vp);
+
+	return (error);
+}
+
+int
+union_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+
+	printf("\ttag VT_UNION, vp=%x, uppervp=%x, lowervp=%x\n",
+			vp, UPPERVP(vp), LOWERVP(vp));
+	return (0);
+}
+
+int
+union_islocked(ap)
+	struct vop_islocked_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	return ((VTOUNION(ap->a_vp)->un_flags & UN_LOCKED) ? 1 : 0);
+}
+
+int
+union_pathconf(ap)
+	struct vop_pathconf_args /* {
+		struct vnode *a_vp;
+		int a_name;
+		int *a_retval;
+	} */ *ap;
+{
+	int error;
+	struct vnode *vp = OTHERVP(ap->a_vp);
+	int dolock = (vp == LOWERVP(ap->a_vp));
+
+	if (dolock)
+		VOP_LOCK(vp);
+	else
+		FIXUP(VTOUNION(ap->a_vp));
+	error = VOP_PATHCONF(vp, ap->a_name, ap->a_retval);
+	if (dolock)
+		VOP_UNLOCK(vp);
+
+	return (error);
+}
+
+int
+union_advlock(ap)
+	struct vop_advlock_args /* {
+		struct vnode *a_vp;
+		caddr_t  a_id;
+		int  a_op;
+		struct flock *a_fl;
+		int  a_flags;
+	} */ *ap;
+{
+
+	return (VOP_ADVLOCK(OTHERVP(ap->a_vp), ap->a_id, ap->a_op,
+				ap->a_fl, ap->a_flags));
+}
+
+
+/*
+ * XXX - vop_strategy must be hand coded because it has no
+ * vnode in its arguments.
+ * This goes away with a merged VM/buffer cache.
+ */
+int
+union_strategy(ap)
+	struct vop_strategy_args /* {
+		struct buf *a_bp;
+	} */ *ap;
+{
+	struct buf *bp = ap->a_bp;
+	int error;
+	struct vnode *savedvp;
+
+	savedvp = bp->b_vp;
+	bp->b_vp = OTHERVP(bp->b_vp);
+
+#ifdef DIAGNOSTIC
+	if (bp->b_vp == NULLVP)
+		panic("union_strategy: nil vp");
+	if (((bp->b_flags & B_READ) == 0) &&
+	    (bp->b_vp == LOWERVP(savedvp)))
+		panic("union_strategy: writing to lowervp");
+#endif
+
+	error = VOP_STRATEGY(bp);
+	bp->b_vp = savedvp;
+
+	return (error);
+}
+
+/*
+ * Global vfs data structures
+ */
+int (**union_vnodeop_p)();
+struct vnodeopv_entry_desc union_vnodeop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, union_lookup },		/* lookup */
+	{ &vop_create_desc, union_create },		/* create */
+	{ &vop_mknod_desc, union_mknod },		/* mknod */
+	{ &vop_open_desc, union_open },			/* open */
+	{ &vop_close_desc, union_close },		/* close */
+	{ &vop_access_desc, union_access },		/* access */
+	{ &vop_getattr_desc, union_getattr },		/* getattr */
+	{ &vop_setattr_desc, union_setattr },		/* setattr */
+	{ &vop_read_desc, union_read },			/* read */
+	{ &vop_write_desc, union_write },		/* write */
+	{ &vop_ioctl_desc, union_ioctl },		/* ioctl */
+	{ &vop_select_desc, union_select },		/* select */
+	{ &vop_mmap_desc, union_mmap },			/* mmap */
+	{ &vop_fsync_desc, union_fsync },		/* fsync */
+	{ &vop_seek_desc, union_seek },			/* seek */
+	{ &vop_remove_desc, union_remove },		/* remove */
+	{ &vop_link_desc, union_link },			/* link */
+	{ &vop_rename_desc, union_rename },		/* rename */
+	{ &vop_mkdir_desc, union_mkdir },		/* mkdir */
+	{ &vop_rmdir_desc, union_rmdir },		/* rmdir */
+	{ &vop_symlink_desc, union_symlink },		/* symlink */
+	{ &vop_readdir_desc, union_readdir },		/* readdir */
+	{ &vop_readlink_desc, union_readlink },		/* readlink */
+	{ &vop_abortop_desc, union_abortop },		/* abortop */
+	{ &vop_inactive_desc, union_inactive },		/* inactive */
+	{ &vop_reclaim_desc, union_reclaim },		/* reclaim */
+	{ &vop_lock_desc, union_lock },			/* lock */
+	{ &vop_unlock_desc, union_unlock },		/* unlock */
+	{ &vop_bmap_desc, union_bmap },			/* bmap */
+	{ &vop_strategy_desc, union_strategy },		/* strategy */
+	{ &vop_print_desc, union_print },		/* print */
+	{ &vop_islocked_desc, union_islocked },		/* islocked */
+	{ &vop_pathconf_desc, union_pathconf },		/* pathconf */
+	{ &vop_advlock_desc, union_advlock },		/* advlock */
+#ifdef notdef
+	{ &vop_blkatoff_desc, union_blkatoff },		/* blkatoff */
+	{ &vop_valloc_desc, union_valloc },		/* valloc */
+	{ &vop_vfree_desc, union_vfree },		/* vfree */
+	{ &vop_truncate_desc, union_truncate },		/* truncate */
+	{ &vop_update_desc, union_update },		/* update */
+	{ &vop_bwrite_desc, union_bwrite },		/* bwrite */
+#endif
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc union_vnodeop_opv_desc =
+	{ &union_vnodeop_p, union_vnodeop_entries };
diff --git a/sys/net/bpf.c b/sys/net/bpf.c
new file mode 100644
index 00000000000..e40b769b980
--- /dev/null
+++ b/sys/net/bpf.c
@@ -0,0 +1,1316 @@
+/*
+ * Copyright (c) 1990, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from the Stanford/CMU enet packet filter,
+ * (net/enet.c) distributed as part of 4.3BSD, and code contributed
+ * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 
+ * Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *      @(#)bpf.c	8.2 (Berkeley) 3/28/94
+ *
+ * static char rcsid[] =
+ * "$Header: bpf.c,v 1.33 91/10/27 21:21:58 mccanne Exp $";
+ */
+
+#include "bpfilter.h"
+
+#if NBPFILTER > 0
+
+#ifndef __GNUC__
+#define inline
+#else
+#define inline __inline
+#endif
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/buf.h>
+#include <sys/time.h>
+#include <sys/proc.h>
+#include <sys/user.h>
+#include <sys/ioctl.h>
+#include <sys/map.h>
+
+#include <sys/file.h>
+#if defined(sparc) && BSD < 199103
+#include <sys/stream.h>
+#endif
+#include <sys/tty.h>
+#include <sys/uio.h>
+
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <net/if.h>
+
+#include <net/bpf.h>
+#include <net/bpfdesc.h>
+
+#include <sys/errno.h>
+
+#include <netinet/in.h>
+#include <netinet/if_ether.h>
+#include <sys/kernel.h>
+
+/*
+ * Older BSDs don't have kernel malloc.
+ */
+#if BSD < 199103
+extern bcopy();
+static caddr_t bpf_alloc();
+#include <net/bpf_compat.h>
+#define BPF_BUFSIZE (MCLBYTES-8)
+#define UIOMOVE(cp, len, code, uio) uiomove(cp, len, code, uio)
+#else
+#define BPF_BUFSIZE 4096
+#define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio)
+#endif
+
+#define PRINET  26			/* interruptible */
+
+/*
+ * The default read buffer size is patchable.
+ */
+int bpf_bufsize = BPF_BUFSIZE;
+
+/*
+ *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
+ *  bpf_dtab holds the descriptors, indexed by minor device #
+ */
+struct bpf_if	*bpf_iflist;
+struct bpf_d	bpf_dtab[NBPFILTER];
+
+#if BSD >= 199207
+/*
+ * bpfilterattach() is called at boot time in new systems.  We do
+ * nothing here since old systems will not call this.
+ */
+/* ARGSUSED */
+void
+bpfilterattach(n)
+	int n;
+{
+}
+#endif
+
+static int	bpf_allocbufs __P((struct bpf_d *));
+static int	bpf_allocbufs __P((struct bpf_d *));
+static void	bpf_freed __P((struct bpf_d *));
+static void	bpf_freed __P((struct bpf_d *));
+static void	bpf_ifname __P((struct ifnet *, struct ifreq *));
+static void	bpf_ifname __P((struct ifnet *, struct ifreq *));
+static void	bpf_mcopy __P((const void *, void *, u_int));
+static int	bpf_movein __P((struct uio *, int,
+		    struct mbuf **, struct sockaddr *, int *));
+static int	bpf_setif __P((struct bpf_d *, struct ifreq *));
+static int	bpf_setif __P((struct bpf_d *, struct ifreq *));
+static inline void
+		bpf_wakeup __P((struct bpf_d *));
+static void	catchpacket __P((struct bpf_d *, u_char *, u_int,
+		    u_int, void (*)(const void *, void *, u_int)));
+static void	reset_d __P((struct bpf_d *));
+
+static int
+bpf_movein(uio, linktype, mp, sockp, datlen)
+	register struct uio *uio;
+	int linktype, *datlen;
+	register struct mbuf **mp;
+	register struct sockaddr *sockp;
+{
+	struct mbuf *m;
+	int error;
+	int len;
+	int hlen;
+
+	/*
+	 * Build a sockaddr based on the data link layer type.
+	 * We do this at this level because the ethernet header
+	 * is copied directly into the data field of the sockaddr.
+	 * In the case of SLIP, there is no header and the packet
+	 * is forwarded as is.
+	 * Also, we are careful to leave room at the front of the mbuf
+	 * for the link level header.
+	 */
+	switch (linktype) {
+
+	case DLT_SLIP:
+		sockp->sa_family = AF_INET;
+		hlen = 0;
+		break;
+
+	case DLT_EN10MB:
+		sockp->sa_family = AF_UNSPEC;
+		/* XXX Would MAXLINKHDR be better? */
+		hlen = sizeof(struct ether_header);
+		break;
+
+	case DLT_FDDI:
+		sockp->sa_family = AF_UNSPEC;
+		/* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */
+		hlen = 24;
+		break;
+
+	case DLT_NULL:
+		sockp->sa_family = AF_UNSPEC;
+		hlen = 0;
+		break;
+
+	default:
+		return (EIO);
+	}
+
+	len = uio->uio_resid;
+	*datlen = len - hlen;
+	if ((unsigned)len > MCLBYTES)
+		return (EIO);
+
+	MGET(m, M_WAIT, MT_DATA);
+	if (m == 0)
+		return (ENOBUFS);
+	if (len > MLEN) {
+#if BSD >= 199103
+		MCLGET(m, M_WAIT);
+		if ((m->m_flags & M_EXT) == 0) {
+#else
+		MCLGET(m);
+		if (m->m_len != MCLBYTES) {
+#endif
+			error = ENOBUFS;
+			goto bad;
+		}
+	}
+	m->m_len = len;
+	*mp = m;
+	/*
+	 * Make room for link header.
+	 */
+	if (hlen != 0) {
+		m->m_len -= hlen;
+#if BSD >= 199103
+		m->m_data += hlen; /* XXX */
+#else
+		m->m_off += hlen;
+#endif
+		error = UIOMOVE((caddr_t)sockp->sa_data, hlen, UIO_WRITE, uio);
+		if (error)
+			goto bad;
+	}
+	error = UIOMOVE(mtod(m, caddr_t), len - hlen, UIO_WRITE, uio);
+	if (!error)
+		return (0);
+ bad:
+	m_freem(m);
+	return (error);
+}
+
+/*
+ * Attach file to the bpf interface, i.e. make d listen on bp.
+ * Must be called at splimp.
+ */
+static void
+bpf_attachd(d, bp)
+	struct bpf_d *d;
+	struct bpf_if *bp;
+{
+	/*
+	 * Point d at bp, and add d to the interface's list of listeners.
+	 * Finally, point the driver's bpf cookie at the interface so
+	 * it will divert packets to bpf.
+	 */
+	d->bd_bif = bp;
+	d->bd_next = bp->bif_dlist;
+	bp->bif_dlist = d;
+
+	*bp->bif_driverp = bp;
+}
+
+/*
+ * Detach a file from its interface.
+ */
+static void
+bpf_detachd(d)
+	struct bpf_d *d;
+{
+	struct bpf_d **p;
+	struct bpf_if *bp;
+
+	bp = d->bd_bif;
+	/*
+	 * Check if this descriptor had requested promiscuous mode.
+	 * If so, turn it off.
+	 */
+	if (d->bd_promisc) {
+		d->bd_promisc = 0;
+		if (ifpromisc(bp->bif_ifp, 0))
+			/*
+			 * Something is really wrong if we were able to put
+			 * the driver into promiscuous mode, but can't
+			 * take it out.
+			 */
+			panic("bpf: ifpromisc failed");
+	}
+	/* Remove d from the interface's descriptor list. */
+	p = &bp->bif_dlist;
+	while (*p != d) {
+		p = &(*p)->bd_next;
+		if (*p == 0)
+			panic("bpf_detachd: descriptor not in list");
+	}
+	*p = (*p)->bd_next;
+	if (bp->bif_dlist == 0)
+		/*
+		 * Let the driver know that there are no more listeners.
+		 */
+		*d->bd_bif->bif_driverp = 0;
+	d->bd_bif = 0;
+}
+
+
+/*
+ * Mark a descriptor free by making it point to itself.
+ * This is probably cheaper than marking with a constant since
+ * the address should be in a register anyway.
+ */
+#define D_ISFREE(d) ((d) == (d)->bd_next)
+#define D_MARKFREE(d) ((d)->bd_next = (d))
+#define D_MARKUSED(d) ((d)->bd_next = 0)
+
+/*
+ * Open ethernet device.  Returns ENXIO for illegal minor device number,
+ * EBUSY if file is open by another process.
+ */
+/* ARGSUSED */
+int
+bpfopen(dev, flag)
+	dev_t dev;
+	int flag;
+{
+	register struct bpf_d *d;
+
+	if (minor(dev) >= NBPFILTER)
+		return (ENXIO);
+	/*
+	 * Each minor can be opened by only one process.  If the requested
+	 * minor is in use, return EBUSY.
+	 */
+	d = &bpf_dtab[minor(dev)];
+	if (!D_ISFREE(d))
+		return (EBUSY);
+
+	/* Mark "free" and do most initialization. */
+	bzero((char *)d, sizeof(*d));
+	d->bd_bufsize = bpf_bufsize;
+
+	return (0);
+}
+
+/*
+ * Close the descriptor by detaching it from its interface,
+ * deallocating its buffers, and marking it free.
+ */
+/* ARGSUSED */
+int
+bpfclose(dev, flag)
+	dev_t dev;
+	int flag;
+{
+	register struct bpf_d *d = &bpf_dtab[minor(dev)];
+	register int s;
+
+	s = splimp();
+	if (d->bd_bif)
+		bpf_detachd(d);
+	splx(s);
+	bpf_freed(d);
+
+	return (0);
+}
+
+/*
+ * Support for SunOS, which does not have tsleep.
+ */
+#if BSD < 199103
+static
+bpf_timeout(arg)
+	caddr_t arg;
+{
+	struct bpf_d *d = (struct bpf_d *)arg;
+	d->bd_timedout = 1;
+	wakeup(arg);
+}
+
+#define BPF_SLEEP(chan, pri, s, t) bpf_sleep((struct bpf_d *)chan)
+
+int
+bpf_sleep(d)
+	register struct bpf_d *d;
+{
+	register int rto = d->bd_rtout;
+	register int st;
+
+	if (rto != 0) {
+		d->bd_timedout = 0;
+		timeout(bpf_timeout, (caddr_t)d, rto);
+	}
+	st = sleep((caddr_t)d, PRINET|PCATCH);
+	if (rto != 0) {
+		if (d->bd_timedout == 0)
+			untimeout(bpf_timeout, (caddr_t)d);
+		else if (st == 0)
+			return EWOULDBLOCK;
+	}
+	return (st != 0) ? EINTR : 0;
+}
+#else
+#define BPF_SLEEP tsleep
+#endif
+
+/*
+ * Rotate the packet buffers in descriptor d.  Move the store buffer
+ * into the hold slot, and the free buffer into the store slot.
+ * Zero the length of the new store buffer.
+ */
+#define ROTATE_BUFFERS(d) \
+	(d)->bd_hbuf = (d)->bd_sbuf; \
+	(d)->bd_hlen = (d)->bd_slen; \
+	(d)->bd_sbuf = (d)->bd_fbuf; \
+	(d)->bd_slen = 0; \
+	(d)->bd_fbuf = 0;
+/*
+ *  bpfread - read next chunk of packets from buffers
+ */
+int
+bpfread(dev, uio)
+	dev_t dev;
+	register struct uio *uio;
+{
+	register struct bpf_d *d = &bpf_dtab[minor(dev)];
+	int error;
+	int s;
+
+	/*
+	 * Restrict application to use a buffer the same size as
+	 * as kernel buffers.
+	 */
+	if (uio->uio_resid != d->bd_bufsize)
+		return (EINVAL);
+
+	s = splimp();
+	/*
+	 * If the hold buffer is empty, then do a timed sleep, which
+	 * ends when the timeout expires or when enough packets
+	 * have arrived to fill the store buffer.
+	 */
+	while (d->bd_hbuf == 0) {
+		if (d->bd_immediate && d->bd_slen != 0) {
+			/*
+			 * A packet(s) either arrived since the previous
+			 * read or arrived while we were asleep.
+			 * Rotate the buffers and return what's here.
+			 */
+			ROTATE_BUFFERS(d);
+			break;
+		}
+		error = BPF_SLEEP((caddr_t)d, PRINET|PCATCH, "bpf",
+				  d->bd_rtout);
+		if (error == EINTR || error == ERESTART) {
+			splx(s);
+			return (error);
+		}
+		if (error == EWOULDBLOCK) {
+			/*
+			 * On a timeout, return what's in the buffer,
+			 * which may be nothing.  If there is something
+			 * in the store buffer, we can rotate the buffers.
+			 */
+			if (d->bd_hbuf)
+				/*
+				 * We filled up the buffer in between
+				 * getting the timeout and arriving
+				 * here, so we don't need to rotate.
+				 */
+				break;
+
+			if (d->bd_slen == 0) {
+				splx(s);
+				return (0);
+			}
+			ROTATE_BUFFERS(d);
+			break;
+		}
+	}
+	/*
+	 * At this point, we know we have something in the hold slot.
+	 */
+	splx(s);
+
+	/*
+	 * Move data from hold buffer into user space.
+	 * We know the entire buffer is transferred since
+	 * we checked above that the read buffer is bpf_bufsize bytes.
+	 */
+	error = UIOMOVE(d->bd_hbuf, d->bd_hlen, UIO_READ, uio);
+
+	s = splimp();
+	d->bd_fbuf = d->bd_hbuf;
+	d->bd_hbuf = 0;
+	d->bd_hlen = 0;
+	splx(s);
+
+	return (error);
+}
+
+
+/*
+ * If there are processes sleeping on this descriptor, wake them up.
+ */
+static inline void
+bpf_wakeup(d)
+	register struct bpf_d *d;
+{
+	wakeup((caddr_t)d);
+#if BSD >= 199103
+	selwakeup(&d->bd_sel);
+	/* XXX */
+	d->bd_sel.si_pid = 0;
+#else
+	if (d->bd_selproc) {
+		selwakeup(d->bd_selproc, (int)d->bd_selcoll);
+		d->bd_selcoll = 0;
+		d->bd_selproc = 0;
+	}
+#endif
+}
+
+int
+bpfwrite(dev, uio)
+	dev_t dev;
+	struct uio *uio;
+{
+	register struct bpf_d *d = &bpf_dtab[minor(dev)];
+	struct ifnet *ifp;
+	struct mbuf *m;
+	int error, s;
+	static struct sockaddr dst;
+	int datlen;
+
+	if (d->bd_bif == 0)
+		return (ENXIO);
+
+	ifp = d->bd_bif->bif_ifp;
+
+	if (uio->uio_resid == 0)
+		return (0);
+
+	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, &m, &dst, &datlen);
+	if (error)
+		return (error);
+
+	if (datlen > ifp->if_mtu)
+		return (EMSGSIZE);
+
+	s = splnet();
+#if BSD >= 199103
+	error = (*ifp->if_output)(ifp, m, &dst, (struct rtentry *)0);
+#else
+	error = (*ifp->if_output)(ifp, m, &dst);
+#endif
+	splx(s);
+	/*
+	 * The driver frees the mbuf.
+	 */
+	return (error);
+}
+
+/*
+ * Reset a descriptor by flushing its packet buffer and clearing the
+ * receive and drop counts.  Should be called at splimp.
+ */
+static void
+reset_d(d)
+	struct bpf_d *d;
+{
+	if (d->bd_hbuf) {
+		/* Free the hold buffer. */
+		d->bd_fbuf = d->bd_hbuf;
+		d->bd_hbuf = 0;
+	}
+	d->bd_slen = 0;
+	d->bd_hlen = 0;
+	d->bd_rcount = 0;
+	d->bd_dcount = 0;
+}
+
+/*
+ *  FIONREAD		Check for read packet available.
+ *  SIOCGIFADDR		Get interface address - convenient hook to driver.
+ *  BIOCGBLEN		Get buffer len [for read()].
+ *  BIOCSETF		Set ethernet read filter.
+ *  BIOCFLUSH		Flush read packet buffer.
+ *  BIOCPROMISC		Put interface into promiscuous mode.
+ *  BIOCGDLT		Get link layer type.
+ *  BIOCGETIF		Get interface name.
+ *  BIOCSETIF		Set interface.
+ *  BIOCSRTIMEOUT	Set read timeout.
+ *  BIOCGRTIMEOUT	Get read timeout.
+ *  BIOCGSTATS		Get packet stats.
+ *  BIOCIMMEDIATE	Set immediate mode.
+ *  BIOCVERSION		Get filter language version.
+ */
+/* ARGSUSED */
+int
+bpfioctl(dev, cmd, addr, flag)
+	dev_t dev;
+	int cmd;
+	caddr_t addr;
+	int flag;
+{
+	register struct bpf_d *d = &bpf_dtab[minor(dev)];
+	int s, error = 0;
+
+	switch (cmd) {
+
+	default:
+		error = EINVAL;
+		break;
+
+	/*
+	 * Check for read packet available.
+	 */
+	case FIONREAD:
+		{
+			int n;
+
+			s = splimp();
+			n = d->bd_slen;
+			if (d->bd_hbuf)
+				n += d->bd_hlen;
+			splx(s);
+
+			*(int *)addr = n;
+			break;
+		}
+
+	case SIOCGIFADDR:
+		{
+			struct ifnet *ifp;
+
+			if (d->bd_bif == 0)
+				error = EINVAL;
+			else {
+				ifp = d->bd_bif->bif_ifp;
+				error = (*ifp->if_ioctl)(ifp, cmd, addr);
+			}
+			break;
+		}
+
+	/*
+	 * Get buffer len [for read()].
+	 */
+	case BIOCGBLEN:
+		*(u_int *)addr = d->bd_bufsize;
+		break;
+
+	/*
+	 * Set buffer length.
+	 */
+	case BIOCSBLEN:
+#if BSD < 199103
+		error = EINVAL;
+#else
+		if (d->bd_bif != 0)
+			error = EINVAL;
+		else {
+			register u_int size = *(u_int *)addr;
+
+			if (size > BPF_MAXBUFSIZE)
+				*(u_int *)addr = size = BPF_MAXBUFSIZE;
+			else if (size < BPF_MINBUFSIZE)
+				*(u_int *)addr = size = BPF_MINBUFSIZE;
+			d->bd_bufsize = size;
+		}
+#endif
+		break;
+
+	/*
+	 * Set link layer read filter.
+	 */
+	case BIOCSETF:
+		error = bpf_setf(d, (struct bpf_program *)addr);
+		break;
+
+	/*
+	 * Flush read packet buffer.
+	 */
+	case BIOCFLUSH:
+		s = splimp();
+		reset_d(d);
+		splx(s);
+		break;
+
+	/*
+	 * Put interface into promiscuous mode.
+	 */
+	case BIOCPROMISC:
+		if (d->bd_bif == 0) {
+			/*
+			 * No interface attached yet.
+			 */
+			error = EINVAL;
+			break;
+		}
+		s = splimp();
+		if (d->bd_promisc == 0) {
+			error = ifpromisc(d->bd_bif->bif_ifp, 1);
+			if (error == 0)
+				d->bd_promisc = 1;
+		}
+		splx(s);
+		break;
+
+	/*
+	 * Get device parameters.
+	 */
+	case BIOCGDLT:
+		if (d->bd_bif == 0)
+			error = EINVAL;
+		else
+			*(u_int *)addr = d->bd_bif->bif_dlt;
+		break;
+
+	/*
+	 * Set interface name.
+	 */
+	case BIOCGETIF:
+		if (d->bd_bif == 0)
+			error = EINVAL;
+		else
+			bpf_ifname(d->bd_bif->bif_ifp, (struct ifreq *)addr);
+		break;
+
+	/*
+	 * Set interface.
+	 */
+	case BIOCSETIF:
+		error = bpf_setif(d, (struct ifreq *)addr);
+		break;
+
+	/*
+	 * Set read timeout.
+	 */
+	case BIOCSRTIMEOUT:
+		{
+			struct timeval *tv = (struct timeval *)addr;
+			u_long msec;
+
+			/* Compute number of milliseconds. */
+			msec = tv->tv_sec * 1000 + tv->tv_usec / 1000;
+			/* Scale milliseconds to ticks.  Assume hard
+			   clock has millisecond or greater resolution
+			   (i.e. tick >= 1000).  For 10ms hardclock,
+			   tick/1000 = 10, so rtout<-msec/10. */
+			d->bd_rtout = msec / (tick / 1000);
+			break;
+		}
+
+	/*
+	 * Get read timeout.
+	 */
+	case BIOCGRTIMEOUT:
+		{
+			struct timeval *tv = (struct timeval *)addr;
+			u_long msec = d->bd_rtout;
+
+			msec *= tick / 1000;
+			tv->tv_sec = msec / 1000;
+			tv->tv_usec = msec % 1000;
+			break;
+		}
+
+	/*
+	 * Get packet stats.
+	 */
+	case BIOCGSTATS:
+		{
+			struct bpf_stat *bs = (struct bpf_stat *)addr;
+
+			bs->bs_recv = d->bd_rcount;
+			bs->bs_drop = d->bd_dcount;
+			break;
+		}
+
+	/*
+	 * Set immediate mode.
+	 */
+	case BIOCIMMEDIATE:
+		d->bd_immediate = *(u_int *)addr;
+		break;
+
+	case BIOCVERSION:
+		{
+			struct bpf_version *bv = (struct bpf_version *)addr;
+
+			bv->bv_major = BPF_MAJOR_VERSION;
+			bv->bv_minor = BPF_MINOR_VERSION;
+			break;
+		}
+	}
+	return (error);
+}
+
+/*
+ * Set d's packet filter program to fp.  If this file already has a filter,
+ * free it and replace it.  Returns EINVAL for bogus requests.
+ */
+int
+bpf_setf(d, fp)
+	struct bpf_d *d;
+	struct bpf_program *fp;
+{
+	struct bpf_insn *fcode, *old;
+	u_int flen, size;
+	int s;
+
+	old = d->bd_filter;
+	if (fp->bf_insns == 0) {
+		if (fp->bf_len != 0)
+			return (EINVAL);
+		s = splimp();
+		d->bd_filter = 0;
+		reset_d(d);
+		splx(s);
+		if (old != 0)
+			free((caddr_t)old, M_DEVBUF);
+		return (0);
+	}
+	flen = fp->bf_len;
+	if (flen > BPF_MAXINSNS)
+		return (EINVAL);
+
+	size = flen * sizeof(*fp->bf_insns);
+	fcode = (struct bpf_insn *)malloc(size, M_DEVBUF, M_WAITOK);
+	if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
+	    bpf_validate(fcode, (int)flen)) {
+		s = splimp();
+		d->bd_filter = fcode;
+		reset_d(d);
+		splx(s);
+		if (old != 0)
+			free((caddr_t)old, M_DEVBUF);
+
+		return (0);
+	}
+	free((caddr_t)fcode, M_DEVBUF);
+	return (EINVAL);
+}
+
+/*
+ * Detach a file from its current interface (if attached at all) and attach
+ * to the interface indicated by the name stored in ifr.
+ * Return an errno or 0.
+ */
+static int
+bpf_setif(d, ifr)
+	struct bpf_d *d;
+	struct ifreq *ifr;
+{
+	struct bpf_if *bp;
+	char *cp;
+	int unit, s, error;
+
+	/*
+	 * Separate string into name part and unit number.  Put a null
+	 * byte at the end of the name part, and compute the number.
+	 * If the a unit number is unspecified, the default is 0,
+	 * as initialized above.  XXX This should be common code.
+	 */
+	unit = 0;
+	cp = ifr->ifr_name;
+	cp[sizeof(ifr->ifr_name) - 1] = '\0';
+	while (*cp++) {
+		if (*cp >= '0' && *cp <= '9') {
+			unit = *cp - '0';
+			*cp++ = '\0';
+			while (*cp)
+				unit = 10 * unit + *cp++ - '0';
+			break;
+		}
+	}
+	/*
+	 * Look through attached interfaces for the named one.
+	 */
+	for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
+		struct ifnet *ifp = bp->bif_ifp;
+
+		if (ifp == 0 || unit != ifp->if_unit
+		    || strcmp(ifp->if_name, ifr->ifr_name) != 0)
+			continue;
+		/*
+		 * We found the requested interface.
+		 * If it's not up, return an error.
+		 * Allocate the packet buffers if we need to.
+		 * If we're already attached to requested interface,
+		 * just flush the buffer.
+		 */
+		if ((ifp->if_flags & IFF_UP) == 0)
+			return (ENETDOWN);
+
+		if (d->bd_sbuf == 0) {
+			error = bpf_allocbufs(d);
+			if (error != 0)
+				return (error);
+		}
+		s = splimp();
+		if (bp != d->bd_bif) {
+			if (d->bd_bif)
+				/*
+				 * Detach if attached to something else.
+				 */
+				bpf_detachd(d);
+
+			bpf_attachd(d, bp);
+		}
+		reset_d(d);
+		splx(s);
+		return (0);
+	}
+	/* Not found. */
+	return (ENXIO);
+}
+
+/*
+ * Convert an interface name plus unit number of an ifp to a single
+ * name which is returned in the ifr.
+ */
+static void
+bpf_ifname(ifp, ifr)
+	struct ifnet *ifp;
+	struct ifreq *ifr;
+{
+	char *s = ifp->if_name;
+	char *d = ifr->ifr_name;
+
+	while (*d++ = *s++)
+		continue;
+	/* XXX Assume that unit number is less than 10. */
+	*d++ = ifp->if_unit + '0';
+	*d = '\0';
+}
+
+/*
+ * The new select interface passes down the proc pointer; the old select
+ * stubs had to grab it out of the user struct.  This glue allows either case.
+ */
+#if BSD >= 199103
+#define bpf_select bpfselect
+#else
+int
+bpfselect(dev, rw)
+	register dev_t dev;
+	int rw;
+{
+	return (bpf_select(dev, rw, u.u_procp));
+}
+#endif
+
+/*
+ * Support for select() system call
+ *
+ * Return true iff the specific operation will not block indefinitely.
+ * Otherwise, return false but make a note that a selwakeup() must be done.
+ */
+int
+bpf_select(dev, rw, p)
+	register dev_t dev;
+	int rw;
+	struct proc *p;
+{
+	register struct bpf_d *d;
+	register int s;
+
+	if (rw != FREAD)
+		return (0);
+	/*
+	 * An imitation of the FIONREAD ioctl code.
+	 */
+	d = &bpf_dtab[minor(dev)];
+
+	s = splimp();
+	if (d->bd_hlen != 0 || (d->bd_immediate && d->bd_slen != 0)) {
+		/*
+		 * There is data waiting.
+		 */
+		splx(s);
+		return (1);
+	}
+#if BSD >= 199103
+	selrecord(p, &d->bd_sel);
+#else
+	/*
+	 * No data ready.  If there's already a select() waiting on this
+	 * minor device then this is a collision.  This shouldn't happen
+	 * because minors really should not be shared, but if a process
+	 * forks while one of these is open, it is possible that both
+	 * processes could select on the same descriptor.
+	 */
+	if (d->bd_selproc && d->bd_selproc->p_wchan == (caddr_t)&selwait)
+		d->bd_selcoll = 1;
+	else
+		d->bd_selproc = p;
+#endif
+	splx(s);
+	return (0);
+}
+
+/*
+ * Incoming linkage from device drivers.  Process the packet pkt, of length
+ * pktlen, which is stored in a contiguous buffer.  The packet is parsed
+ * by each process' filter, and if accepted, stashed into the corresponding
+ * buffer.
+ */
+void
+bpf_tap(arg, pkt, pktlen)
+	caddr_t arg;
+	register u_char *pkt;
+	register u_int pktlen;
+{
+	struct bpf_if *bp;
+	register struct bpf_d *d;
+	register u_int slen;
+	/*
+	 * Note that the ipl does not have to be raised at this point.
+	 * The only problem that could arise here is that if two different
+	 * interfaces shared any data.  This is not the case.
+	 */
+	bp = (struct bpf_if *)arg;
+	for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
+		++d->bd_rcount;
+		slen = bpf_filter(d->bd_filter, pkt, pktlen, pktlen);
+		if (slen != 0)
+			catchpacket(d, pkt, pktlen, slen, bcopy);
+	}
+}
+
+/*
+ * Copy data from an mbuf chain into a buffer.  This code is derived
+ * from m_copydata in sys/uipc_mbuf.c.
+ */
+static void
+bpf_mcopy(src_arg, dst_arg, len)
+	const void *src_arg;
+	void *dst_arg;
+	register u_int len;
+{
+	register const struct mbuf *m;
+	register u_int count;
+	u_char *dst;
+
+	m = src_arg;
+	dst = dst_arg;
+	while (len > 0) {
+		if (m == 0)
+			panic("bpf_mcopy");
+		count = min(m->m_len, len);
+		bcopy(mtod(m, caddr_t), (caddr_t)dst, count);
+		m = m->m_next;
+		dst += count;
+		len -= count;
+	}
+}
+
+/*
+ * Incoming linkage from device drivers, when packet is in an mbuf chain.
+ */
+void
+bpf_mtap(arg, m)
+	caddr_t arg;
+	struct mbuf *m;
+{
+	struct bpf_if *bp = (struct bpf_if *)arg;
+	struct bpf_d *d;
+	u_int pktlen, slen;
+	struct mbuf *m0;
+
+	pktlen = 0;
+	for (m0 = m; m0 != 0; m0 = m0->m_next)
+		pktlen += m0->m_len;
+
+	for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
+		++d->bd_rcount;
+		slen = bpf_filter(d->bd_filter, (u_char *)m, pktlen, 0);
+		if (slen != 0)
+			catchpacket(d, (u_char *)m, pktlen, slen, bpf_mcopy);
+	}
+}
+
+/*
+ * Move the packet data from interface memory (pkt) into the
+ * store buffer.  Return 1 if it's time to wakeup a listener (buffer full),
+ * otherwise 0.  "copy" is the routine called to do the actual data
+ * transfer.  bcopy is passed in to copy contiguous chunks, while
+ * bpf_mcopy is passed in to copy mbuf chains.  In the latter case,
+ * pkt is really an mbuf.
+ */
+static void
+catchpacket(d, pkt, pktlen, snaplen, cpfn)
+	register struct bpf_d *d;
+	register u_char *pkt;
+	register u_int pktlen, snaplen;
+	register void (*cpfn)(const void *, void *, u_int);
+{
+	register struct bpf_hdr *hp;
+	register int totlen, curlen;
+	register int hdrlen = d->bd_bif->bif_hdrlen;
+	/*
+	 * Figure out how many bytes to move.  If the packet is
+	 * greater or equal to the snapshot length, transfer that
+	 * much.  Otherwise, transfer the whole packet (unless
+	 * we hit the buffer size limit).
+	 */
+	totlen = hdrlen + min(snaplen, pktlen);
+	if (totlen > d->bd_bufsize)
+		totlen = d->bd_bufsize;
+
+	/*
+	 * Round up the end of the previous packet to the next longword.
+	 */
+	curlen = BPF_WORDALIGN(d->bd_slen);
+	if (curlen + totlen > d->bd_bufsize) {
+		/*
+		 * This packet will overflow the storage buffer.
+		 * Rotate the buffers if we can, then wakeup any
+		 * pending reads.
+		 */
+		if (d->bd_fbuf == 0) {
+			/*
+			 * We haven't completed the previous read yet,
+			 * so drop the packet.
+			 */
+			++d->bd_dcount;
+			return;
+		}
+		ROTATE_BUFFERS(d);
+		bpf_wakeup(d);
+		curlen = 0;
+	}
+	else if (d->bd_immediate)
+		/*
+		 * Immediate mode is set.  A packet arrived so any
+		 * reads should be woken up.
+		 */
+		bpf_wakeup(d);
+
+	/*
+	 * Append the bpf header.
+	 */
+	hp = (struct bpf_hdr *)(d->bd_sbuf + curlen);
+#if BSD >= 199103
+	microtime(&hp->bh_tstamp);
+#elif defined(sun)
+	uniqtime(&hp->bh_tstamp);
+#else
+	hp->bh_tstamp = time;
+#endif
+	hp->bh_datalen = pktlen;
+	hp->bh_hdrlen = hdrlen;
+	/*
+	 * Copy the packet data into the store buffer and update its length.
+	 */
+	(*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen));
+	d->bd_slen = curlen + totlen;
+}
+
+/*
+ * Initialize all nonzero fields of a descriptor.
+ */
+static int
+bpf_allocbufs(d)
+	register struct bpf_d *d;
+{
+	d->bd_fbuf = (caddr_t)malloc(d->bd_bufsize, M_DEVBUF, M_WAITOK);
+	if (d->bd_fbuf == 0)
+		return (ENOBUFS);
+
+	d->bd_sbuf = (caddr_t)malloc(d->bd_bufsize, M_DEVBUF, M_WAITOK);
+	if (d->bd_sbuf == 0) {
+		free(d->bd_fbuf, M_DEVBUF);
+		return (ENOBUFS);
+	}
+	d->bd_slen = 0;
+	d->bd_hlen = 0;
+	return (0);
+}
+
+/*
+ * Free buffers currently in use by a descriptor.
+ * Called on close.
+ */
+static void
+bpf_freed(d)
+	register struct bpf_d *d;
+{
+	/*
+	 * We don't need to lock out interrupts since this descriptor has
+	 * been detached from its interface and it yet hasn't been marked
+	 * free.
+	 */
+	if (d->bd_sbuf != 0) {
+		free(d->bd_sbuf, M_DEVBUF);
+		if (d->bd_hbuf != 0)
+			free(d->bd_hbuf, M_DEVBUF);
+		if (d->bd_fbuf != 0)
+			free(d->bd_fbuf, M_DEVBUF);
+	}
+	if (d->bd_filter)
+		free((caddr_t)d->bd_filter, M_DEVBUF);
+
+	D_MARKFREE(d);
+}
+
+/*
+ * Attach an interface to bpf.  driverp is a pointer to a (struct bpf_if *)
+ * in the driver's softc; dlt is the link layer type; hdrlen is the fixed
+ * size of the link header (variable length headers not yet supported).
+ */
+void
+bpfattach(driverp, ifp, dlt, hdrlen)
+	caddr_t *driverp;
+	struct ifnet *ifp;
+	u_int dlt, hdrlen;
+{
+	struct bpf_if *bp;
+	int i;
+#if BSD < 199103
+	static struct bpf_if bpf_ifs[NBPFILTER];
+	static int bpfifno;
+
+	bp = (bpfifno < NBPFILTER) ? &bpf_ifs[bpfifno++] : 0;
+#else
+	bp = (struct bpf_if *)malloc(sizeof(*bp), M_DEVBUF, M_DONTWAIT);
+#endif
+	if (bp == 0)
+		panic("bpfattach");
+
+	bp->bif_dlist = 0;
+	bp->bif_driverp = (struct bpf_if **)driverp;
+	bp->bif_ifp = ifp;
+	bp->bif_dlt = dlt;
+
+	bp->bif_next = bpf_iflist;
+	bpf_iflist = bp;
+
+	*bp->bif_driverp = 0;
+
+	/*
+	 * Compute the length of the bpf header.  This is not necessarily
+	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
+	 * that the network layer header begins on a longword boundary (for
+	 * performance reasons and to alleviate alignment restrictions).
+	 */
+	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
+
+	/*
+	 * Mark all the descriptors free if this hasn't been done.
+	 */
+	if (!D_ISFREE(&bpf_dtab[0]))
+		for (i = 0; i < NBPFILTER; ++i)
+			D_MARKFREE(&bpf_dtab[i]);
+
+	printf("bpf: %s%d attached\n", ifp->if_name, ifp->if_unit);
+}
+
+#if BSD >= 199103
+/* XXX This routine belongs in net/if.c. */
+/*
+ * Set/clear promiscuous mode on interface ifp based on the truth value
+ * of pswitch.  The calls are reference counted so that only the first
+ * "on" request actually has an effect, as does the final "off" request.
+ * Results are undefined if the "off" and "on" requests are not matched.
+ */
+int
+ifpromisc(ifp, pswitch)
+	struct ifnet *ifp;
+	int pswitch;
+{
+	struct ifreq ifr;
+	/*
+	 * If the device is not configured up, we cannot put it in
+	 * promiscuous mode.
+	 */
+	if ((ifp->if_flags & IFF_UP) == 0)
+		return (ENETDOWN);
+
+	if (pswitch) {
+		if (ifp->if_pcount++ != 0)
+			return (0);
+		ifp->if_flags |= IFF_PROMISC;
+	} else {
+		if (--ifp->if_pcount > 0)
+			return (0);
+		ifp->if_flags &= ~IFF_PROMISC;
+	}
+	ifr.ifr_flags = ifp->if_flags;
+	return ((*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr));
+}
+#endif
+
+#if BSD < 199103
+/*
+ * Allocate some memory for bpf.  This is temporary SunOS support, and
+ * is admittedly a hack.
+ * If resources unavaiable, return 0.
+ */
+static caddr_t
+bpf_alloc(size, canwait)
+	register int size;
+	register int canwait;
+{
+	register struct mbuf *m;
+
+	if ((unsigned)size > (MCLBYTES-8))
+		return 0;
+
+	MGET(m, canwait, MT_DATA);
+	if (m == 0)
+		return 0;
+	if ((unsigned)size > (MLEN-8)) {
+		MCLGET(m);
+		if (m->m_len != MCLBYTES) {
+			m_freem(m);
+			return 0;
+		}
+	}
+	*mtod(m, struct mbuf **) = m;
+	return mtod(m, caddr_t) + 8;
+}
+#endif
+#endif
diff --git a/sys/net/bpf.h b/sys/net/bpf.h
new file mode 100644
index 00000000000..2e093ac5ce1
--- /dev/null
+++ b/sys/net/bpf.h
@@ -0,0 +1,252 @@
+/*
+ * Copyright (c) 1990, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from the Stanford/CMU enet packet filter,
+ * (net/enet.c) distributed as part of 4.3BSD, and code contributed
+ * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
+ * Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *      @(#)bpf.h	8.1 (Berkeley) 6/10/93
+ *
+ * @(#) $Header: bpf.h,v 1.24 91/10/27 21:22:32 mccanne Exp $ (LBL)
+ */
+
+/*
+ * Alignment macros.  BPF_WORDALIGN rounds up to the next 
+ * even multiple of BPF_ALIGNMENT. 
+ */
+#define BPF_ALIGNMENT sizeof(long)
+#define BPF_WORDALIGN(x) (((x)+(BPF_ALIGNMENT-1))&~(BPF_ALIGNMENT-1))
+
+#define BPF_MAXINSNS 512
+#define BPF_MAXBUFSIZE 0x8000
+#define BPF_MINBUFSIZE 32
+
+/*
+ *  Structure for BIOCSETF.
+ */
+struct bpf_program {
+	u_int bf_len;
+	struct bpf_insn *bf_insns;
+};
+ 
+/*
+ * Struct returned by BIOCGSTATS.
+ */
+struct bpf_stat {
+	u_int bs_recv;		/* number of packets received */
+	u_int bs_drop;		/* number of packets dropped */
+};
+
+/*
+ * Struct return by BIOCVERSION.  This represents the version number of 
+ * the filter language described by the instruction encodings below.
+ * bpf understands a program iff kernel_major == filter_major &&
+ * kernel_minor >= filter_minor, that is, if the value returned by the
+ * running kernel has the same major number and a minor number equal
+ * equal to or less than the filter being downloaded.  Otherwise, the
+ * results are undefined, meaning an error may be returned or packets
+ * may be accepted haphazardly.
+ * It has nothing to do with the source code version.
+ */
+struct bpf_version {
+	u_short bv_major;
+	u_short bv_minor;
+};
+/* Current version number. */
+#define BPF_MAJOR_VERSION 1
+#define BPF_MINOR_VERSION 1
+
+/*
+ * BPF ioctls
+ *
+ * The first set is for compatibility with Sun's pcc style
+ * header files.  If your using gcc, we assume that you
+ * have run fixincludes so the latter set should work.
+ */
+#if (defined(sun) || defined(ibm032)) && !defined(__GNUC__)
+#define	BIOCGBLEN	_IOR(B,102, u_int)
+#define	BIOCSBLEN	_IOWR(B,102, u_int)
+#define	BIOCSETF	_IOW(B,103, struct bpf_program)
+#define	BIOCFLUSH	_IO(B,104)
+#define BIOCPROMISC	_IO(B,105)
+#define	BIOCGDLT	_IOR(B,106, u_int)
+#define BIOCGETIF	_IOR(B,107, struct ifreq)
+#define BIOCSETIF	_IOW(B,108, struct ifreq)
+#define BIOCSRTIMEOUT	_IOW(B,109, struct timeval)
+#define BIOCGRTIMEOUT	_IOR(B,110, struct timeval)
+#define BIOCGSTATS	_IOR(B,111, struct bpf_stat)
+#define BIOCIMMEDIATE	_IOW(B,112, u_int)
+#define BIOCVERSION	_IOR(B,113, struct bpf_version)
+#else
+#define	BIOCGBLEN	_IOR('B',102, u_int)
+#define	BIOCSBLEN	_IOWR('B',102, u_int)
+#define	BIOCSETF	_IOW('B',103, struct bpf_program)
+#define	BIOCFLUSH	_IO('B',104)
+#define BIOCPROMISC	_IO('B',105)
+#define	BIOCGDLT	_IOR('B',106, u_int)
+#define BIOCGETIF	_IOR('B',107, struct ifreq)
+#define BIOCSETIF	_IOW('B',108, struct ifreq)
+#define BIOCSRTIMEOUT	_IOW('B',109, struct timeval)
+#define BIOCGRTIMEOUT	_IOR('B',110, struct timeval)
+#define BIOCGSTATS	_IOR('B',111, struct bpf_stat)
+#define BIOCIMMEDIATE	_IOW('B',112, u_int)
+#define BIOCVERSION	_IOR('B',113, struct bpf_version)
+#endif
+
+/*
+ * Structure prepended to each packet.
+ */
+struct bpf_hdr {
+	struct timeval	bh_tstamp;	/* time stamp */
+	u_long		bh_caplen;	/* length of captured portion */
+	u_long		bh_datalen;	/* original length of packet */
+	u_short		bh_hdrlen;	/* length of bpf header (this struct
+					   plus alignment padding) */
+};
+/*
+ * Because the structure above is not a multiple of 4 bytes, some compilers
+ * will insist on inserting padding; hence, sizeof(struct bpf_hdr) won't work.
+ * Only the kernel needs to know about it; applications use bh_hdrlen.
+ */
+#ifdef KERNEL
+#define SIZEOF_BPF_HDR 18
+#endif
+
+/*
+ * Data-link level type codes.
+ * Currently, only DLT_EN10MB and DLT_SLIP are supported.
+ */
+#define DLT_NULL	0	/* no link-layer encapsulation */
+#define DLT_EN10MB	1	/* Ethernet (10Mb) */
+#define DLT_EN3MB	2	/* Experimental Ethernet (3Mb) */
+#define DLT_AX25	3	/* Amateur Radio AX.25 */
+#define DLT_PRONET	4	/* Proteon ProNET Token Ring */
+#define DLT_CHAOS	5	/* Chaos */
+#define DLT_IEEE802	6	/* IEEE 802 Networks */
+#define DLT_ARCNET	7	/* ARCNET */
+#define DLT_SLIP	8	/* Serial Line IP */
+#define DLT_PPP		9	/* Point-to-point Protocol */
+#define DLT_FDDI	10	/* FDDI */
+
+/*
+ * The instruction encondings.
+ */
+/* instruction classes */
+#define BPF_CLASS(code) ((code) & 0x07)
+#define		BPF_LD		0x00
+#define		BPF_LDX		0x01
+#define		BPF_ST		0x02
+#define		BPF_STX		0x03
+#define		BPF_ALU		0x04
+#define		BPF_JMP		0x05
+#define		BPF_RET		0x06
+#define		BPF_MISC	0x07
+
+/* ld/ldx fields */
+#define BPF_SIZE(code)	((code) & 0x18)
+#define		BPF_W		0x00
+#define		BPF_H		0x08
+#define		BPF_B		0x10
+#define BPF_MODE(code)	((code) & 0xe0)
+#define		BPF_IMM 	0x00
+#define		BPF_ABS		0x20
+#define		BPF_IND		0x40
+#define		BPF_MEM		0x60
+#define		BPF_LEN		0x80
+#define		BPF_MSH		0xa0
+
+/* alu/jmp fields */
+#define BPF_OP(code)	((code) & 0xf0)
+#define		BPF_ADD		0x00
+#define		BPF_SUB		0x10
+#define		BPF_MUL		0x20
+#define		BPF_DIV		0x30
+#define		BPF_OR		0x40
+#define		BPF_AND		0x50
+#define		BPF_LSH		0x60
+#define		BPF_RSH		0x70
+#define		BPF_NEG		0x80
+#define		BPF_JA		0x00
+#define		BPF_JEQ		0x10
+#define		BPF_JGT		0x20
+#define		BPF_JGE		0x30
+#define		BPF_JSET	0x40
+#define BPF_SRC(code)	((code) & 0x08)
+#define		BPF_K		0x00
+#define		BPF_X		0x08
+
+/* ret - BPF_K and BPF_X also apply */
+#define BPF_RVAL(code)	((code) & 0x18)
+#define		BPF_A		0x10
+
+/* misc */
+#define BPF_MISCOP(code) ((code) & 0xf8)
+#define		BPF_TAX		0x00
+#define		BPF_TXA		0x80
+
+/*
+ * The instruction data structure.
+ */
+struct bpf_insn {
+	u_short	code;
+	u_char 	jt;
+	u_char 	jf;
+	long	k;
+};
+
+/*
+ * Macros for insn array initializers.
+ */
+#define BPF_STMT(code, k) { (u_short)(code), 0, 0, k }
+#define BPF_JUMP(code, k, jt, jf) { (u_short)(code), jt, jf, k }
+
+#ifdef KERNEL
+int	 bpf_validate __P((struct bpf_insn *, int));
+int	 bpfopen __P((dev_t, int));
+int	 bpfclose __P((dev_t, int));
+int	 bpfread __P((dev_t, struct uio *));
+int	 bpfwrite __P((dev_t, struct uio *));
+int	 bpfioctl __P((dev_t, int, caddr_t, int));
+int	 bpf_select __P((dev_t, int, struct proc *));
+void	 bpf_tap __P((caddr_t, u_char *, u_int));
+void	 bpf_mtap __P((caddr_t, struct mbuf *));
+void	 bpfattach __P((caddr_t *, struct ifnet *, u_int, u_int));
+void	 bpfilterattach __P((int));
+u_int	 bpf_filter __P((struct bpf_insn *, u_char *, u_int, u_int));
+#endif
+
+/*
+ * Number of scratch memory words (for BPF_LD|BPF_MEM and BPF_ST).
+ */
+#define BPF_MEMWORDS 16
+
diff --git a/sys/net/bpf_compat.h b/sys/net/bpf_compat.h
new file mode 100644
index 00000000000..132a6df6452
--- /dev/null
+++ b/sys/net/bpf_compat.h
@@ -0,0 +1,50 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)bpf_compat.h	8.1 (Berkeley) 6/10/93
+ */
+
+/* from: $Header: bpf_compat.h,v 1.1 92/05/22 15:33:20 mccanne Exp $ (LBL) */
+
+/*
+ * Some hacks for compatibility across SunOS and 4.4BSD.  We emulate malloc
+ * and free with mbuf clusters.  We store a pointer to the mbuf in the first
+ * word of the mbuf and return 8 bytes passed the start of data (for double
+ * word alignment).  We cannot just use offsets because clusters are not at
+ * a fixed offset from the associated mbuf.  Sorry for this kludge.
+ */
+#define malloc(size, type, canwait) bpf_alloc(size, canwait)
+#define free(cp, type) m_free(*(struct mbuf **)(cp - 8))
+#define M_WAITOK M_WAIT
+
+/* This mapping works for our purposes. */
+#define ERESTART EINTR
diff --git a/sys/net/bpf_filter.c b/sys/net/bpf_filter.c
new file mode 100644
index 00000000000..6a30a665754
--- /dev/null
+++ b/sys/net/bpf_filter.c
@@ -0,0 +1,548 @@
+/*
+ * Copyright (c) 1990, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from the Stanford/CMU enet packet filter,
+ * (net/enet.c) distributed as part of 4.3BSD, and code contributed
+ * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
+ * Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *      @(#)bpf_filter.c	8.1 (Berkeley) 6/10/93
+ *
+ * static char rcsid[] =
+ * "$Header: bpf_filter.c,v 1.16 91/10/27 21:22:35 mccanne Exp $";
+ */
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/time.h>
+
+#ifdef sun
+#include <netinet/in.h>
+#endif
+
+#if defined(sparc) || defined(mips) || defined(ibm032)
+#define BPF_ALIGN
+#endif
+
+#ifndef BPF_ALIGN
+#define EXTRACT_SHORT(p)	((u_short)ntohs(*(u_short *)p))
+#define EXTRACT_LONG(p)		(ntohl(*(u_long *)p))
+#else
+#define EXTRACT_SHORT(p)\
+	((u_short)\
+		((u_short)*((u_char *)p+0)<<8|\
+		 (u_short)*((u_char *)p+1)<<0))
+#define EXTRACT_LONG(p)\
+		((u_long)*((u_char *)p+0)<<24|\
+		 (u_long)*((u_char *)p+1)<<16|\
+		 (u_long)*((u_char *)p+2)<<8|\
+		 (u_long)*((u_char *)p+3)<<0)
+#endif
+
+#ifdef KERNEL
+#include <sys/mbuf.h>
+#define MINDEX(m, k) \
+{ \
+	register int len = m->m_len; \
+ \
+	while (k >= len) { \
+		k -= len; \
+		m = m->m_next; \
+		if (m == 0) \
+			return 0; \
+		len = m->m_len; \
+	} \
+}
+
+static int
+m_xword(m, k, err)
+	register struct mbuf *m;
+	register int k, *err;
+{
+	register int len;
+	register u_char *cp, *np;
+	register struct mbuf *m0;
+
+	len = m->m_len;
+	while (k >= len) {
+		k -= len;
+		m = m->m_next;
+		if (m == 0)
+			goto bad;
+		len = m->m_len;
+	}
+	cp = mtod(m, u_char *) + k;
+	if (len - k >= 4) {
+		*err = 0;
+		return EXTRACT_LONG(cp);
+	}
+	m0 = m->m_next;
+	if (m0 == 0 || m0->m_len + len - k < 4)
+		goto bad;
+	*err = 0;
+	np = mtod(m0, u_char *);
+	switch (len - k) {
+
+	case 1:
+		return (cp[k] << 24) | (np[0] << 16) | (np[1] << 8) | np[2];
+
+	case 2:
+		return (cp[k] << 24) | (cp[k + 1] << 16) | (np[0] << 8) | 
+			np[1];
+
+	default:
+		return (cp[k] << 24) | (cp[k + 1] << 16) | (cp[k + 2] << 8) |
+			np[0];
+	}
+    bad:
+	*err = 1;
+	return 0;
+}
+
+static int
+m_xhalf(m, k, err)
+	register struct mbuf *m;
+	register int k, *err;
+{
+	register int len;
+	register u_char *cp;
+	register struct mbuf *m0;
+
+	len = m->m_len;
+	while (k >= len) {
+		k -= len;
+		m = m->m_next;
+		if (m == 0)
+			goto bad;
+		len = m->m_len;
+	}
+	cp = mtod(m, u_char *) + k;
+	if (len - k >= 2) {
+		*err = 0;
+		return EXTRACT_SHORT(cp);
+	}
+	m0 = m->m_next;
+	if (m0 == 0)
+		goto bad;
+	*err = 0;
+	return (cp[k] << 8) | mtod(m0, u_char *)[0];
+ bad:
+	*err = 1;
+	return 0;
+}
+#endif
+
+#include <net/bpf.h>
+/*
+ * Execute the filter program starting at pc on the packet p
+ * wirelen is the length of the original packet
+ * buflen is the amount of data present
+ */
+u_int
+bpf_filter(pc, p, wirelen, buflen)
+	register struct bpf_insn *pc;
+	register u_char *p;
+	u_int wirelen;
+	register u_int buflen;
+{
+	register u_long A, X;
+	register int k;
+	long mem[BPF_MEMWORDS];
+
+	if (pc == 0)
+		/*
+		 * No filter means accept all.
+		 */
+		return (u_int)-1;
+#ifdef lint
+	A = 0;
+	X = 0;
+#endif
+	--pc;
+	while (1) {
+		++pc;
+		switch (pc->code) {
+
+		default:
+#ifdef KERNEL
+			return 0;
+#else
+			abort();
+#endif			
+		case BPF_RET|BPF_K:
+			return (u_int)pc->k;
+
+		case BPF_RET|BPF_A:
+			return (u_int)A;
+
+		case BPF_LD|BPF_W|BPF_ABS:
+			k = pc->k;
+			if (k + sizeof(long) > buflen) {
+#ifdef KERNEL
+				int merr;
+
+				if (buflen != 0)
+					return 0;
+				A = m_xword((struct mbuf *)p, k, &merr);
+				if (merr != 0)
+					return 0;
+				continue;
+#else
+				return 0;
+#endif
+			}
+#ifdef BPF_ALIGN
+			if (((int)(p + k) & 3) != 0)
+				A = EXTRACT_LONG(&p[k]);
+			else
+#endif
+				A = ntohl(*(long *)(p + k));
+			continue;
+
+		case BPF_LD|BPF_H|BPF_ABS:
+			k = pc->k;
+			if (k + sizeof(short) > buflen) {
+#ifdef KERNEL
+				int merr;
+
+				if (buflen != 0)
+					return 0;
+				A = m_xhalf((struct mbuf *)p, k, &merr);
+				continue;
+#else
+				return 0;
+#endif
+			}
+			A = EXTRACT_SHORT(&p[k]);
+			continue;
+
+		case BPF_LD|BPF_B|BPF_ABS:
+			k = pc->k;
+			if (k >= buflen) {
+#ifdef KERNEL
+				register struct mbuf *m;
+
+				if (buflen != 0)
+					return 0;
+				m = (struct mbuf *)p;
+				MINDEX(m, k);
+				A = mtod(m, u_char *)[k];
+				continue;
+#else
+				return 0;
+#endif
+			}
+			A = p[k];
+			continue;
+
+		case BPF_LD|BPF_W|BPF_LEN:
+			A = wirelen;
+			continue;
+
+		case BPF_LDX|BPF_W|BPF_LEN:
+			X = wirelen;
+			continue;
+
+		case BPF_LD|BPF_W|BPF_IND:
+			k = X + pc->k;
+			if (k + sizeof(long) > buflen) {
+#ifdef KERNEL
+				int merr;
+
+				if (buflen != 0)
+					return 0;
+				A = m_xword((struct mbuf *)p, k, &merr);
+				if (merr != 0)
+					return 0;
+				continue;
+#else
+				return 0;
+#endif
+			}
+#ifdef BPF_ALIGN
+			if (((int)(p + k) & 3) != 0)
+				A = EXTRACT_LONG(&p[k]);
+			else
+#endif
+				A = ntohl(*(long *)(p + k));
+			continue;
+
+		case BPF_LD|BPF_H|BPF_IND:
+			k = X + pc->k;
+			if (k + sizeof(short) > buflen) {
+#ifdef KERNEL
+				int merr;
+
+				if (buflen != 0)
+					return 0;
+				A = m_xhalf((struct mbuf *)p, k, &merr);
+				if (merr != 0)
+					return 0;
+				continue;
+#else
+				return 0;
+#endif
+			}
+			A = EXTRACT_SHORT(&p[k]);
+			continue;
+
+		case BPF_LD|BPF_B|BPF_IND:
+			k = X + pc->k;
+			if (k >= buflen) {
+#ifdef KERNEL
+				register struct mbuf *m;
+
+				if (buflen != 0)
+					return 0;
+				m = (struct mbuf *)p;
+				MINDEX(m, k);
+				A = mtod(m, char *)[k];
+				continue;
+#else
+				return 0;
+#endif
+			}
+			A = p[k];
+			continue;
+
+		case BPF_LDX|BPF_MSH|BPF_B:
+			k = pc->k;
+			if (k >= buflen) {
+#ifdef KERNEL
+				register struct mbuf *m;
+
+				if (buflen != 0)
+					return 0;
+				m = (struct mbuf *)p;
+				MINDEX(m, k);
+				X = (mtod(m, char *)[k] & 0xf) << 2;
+				continue;
+#else
+				return 0;
+#endif
+			}
+			X = (p[pc->k] & 0xf) << 2;
+			continue;
+
+		case BPF_LD|BPF_IMM:
+			A = pc->k;
+			continue;
+
+		case BPF_LDX|BPF_IMM:
+			X = pc->k;
+			continue;
+
+		case BPF_LD|BPF_MEM:
+			A = mem[pc->k];
+			continue;
+			
+		case BPF_LDX|BPF_MEM:
+			X = mem[pc->k];
+			continue;
+
+		case BPF_ST:
+			mem[pc->k] = A;
+			continue;
+
+		case BPF_STX:
+			mem[pc->k] = X;
+			continue;
+
+		case BPF_JMP|BPF_JA:
+			pc += pc->k;
+			continue;
+
+		case BPF_JMP|BPF_JGT|BPF_K:
+			pc += (A > pc->k) ? pc->jt : pc->jf;
+			continue;
+
+		case BPF_JMP|BPF_JGE|BPF_K:
+			pc += (A >= pc->k) ? pc->jt : pc->jf;
+			continue;
+
+		case BPF_JMP|BPF_JEQ|BPF_K:
+			pc += (A == pc->k) ? pc->jt : pc->jf;
+			continue;
+
+		case BPF_JMP|BPF_JSET|BPF_K:
+			pc += (A & pc->k) ? pc->jt : pc->jf;
+			continue;
+
+		case BPF_JMP|BPF_JGT|BPF_X:
+			pc += (A > X) ? pc->jt : pc->jf;
+			continue;
+
+		case BPF_JMP|BPF_JGE|BPF_X:
+			pc += (A >= X) ? pc->jt : pc->jf;
+			continue;
+
+		case BPF_JMP|BPF_JEQ|BPF_X:
+			pc += (A == X) ? pc->jt : pc->jf;
+			continue;
+
+		case BPF_JMP|BPF_JSET|BPF_X:
+			pc += (A & X) ? pc->jt : pc->jf;
+			continue;
+
+		case BPF_ALU|BPF_ADD|BPF_X:
+			A += X;
+			continue;
+			
+		case BPF_ALU|BPF_SUB|BPF_X:
+			A -= X;
+			continue;
+			
+		case BPF_ALU|BPF_MUL|BPF_X:
+			A *= X;
+			continue;
+			
+		case BPF_ALU|BPF_DIV|BPF_X:
+			if (X == 0)
+				return 0;
+			A /= X;
+			continue;
+			
+		case BPF_ALU|BPF_AND|BPF_X:
+			A &= X;
+			continue;
+			
+		case BPF_ALU|BPF_OR|BPF_X:
+			A |= X;
+			continue;
+
+		case BPF_ALU|BPF_LSH|BPF_X:
+			A <<= X;
+			continue;
+
+		case BPF_ALU|BPF_RSH|BPF_X:
+			A >>= X;
+			continue;
+
+		case BPF_ALU|BPF_ADD|BPF_K:
+			A += pc->k;
+			continue;
+			
+		case BPF_ALU|BPF_SUB|BPF_K:
+			A -= pc->k;
+			continue;
+			
+		case BPF_ALU|BPF_MUL|BPF_K:
+			A *= pc->k;
+			continue;
+			
+		case BPF_ALU|BPF_DIV|BPF_K:
+			A /= pc->k;
+			continue;
+			
+		case BPF_ALU|BPF_AND|BPF_K:
+			A &= pc->k;
+			continue;
+			
+		case BPF_ALU|BPF_OR|BPF_K:
+			A |= pc->k;
+			continue;
+
+		case BPF_ALU|BPF_LSH|BPF_K:
+			A <<= pc->k;
+			continue;
+
+		case BPF_ALU|BPF_RSH|BPF_K:
+			A >>= pc->k;
+			continue;
+
+		case BPF_ALU|BPF_NEG:
+			A = -A;
+			continue;
+
+		case BPF_MISC|BPF_TAX:
+			X = A;
+			continue;
+
+		case BPF_MISC|BPF_TXA:
+			A = X;
+			continue;
+		}
+	}
+}
+
+#ifdef KERNEL
+/*
+ * Return true if the 'fcode' is a valid filter program.
+ * The constraints are that each jump be forward and to a valid
+ * code.  The code must terminate with either an accept or reject. 
+ * 'valid' is an array for use by the routine (it must be at least
+ * 'len' bytes long).  
+ *
+ * The kernel needs to be able to verify an application's filter code.
+ * Otherwise, a bogus program could easily crash the system.
+ */
+int
+bpf_validate(f, len)
+	struct bpf_insn *f;
+	int len;
+{
+	register int i;
+	register struct bpf_insn *p;
+
+	for (i = 0; i < len; ++i) {
+		/*
+		 * Check that that jumps are forward, and within 
+		 * the code block.
+		 */
+		p = &f[i];
+		if (BPF_CLASS(p->code) == BPF_JMP) {
+			register int from = i + 1;
+
+			if (BPF_OP(p->code) == BPF_JA) {
+				if (from + p->k >= len)
+					return 0;
+			}
+			else if (from + p->jt >= len || from + p->jf >= len)
+				return 0;
+		}
+		/*
+		 * Check that memory operations use valid addresses.
+		 */
+		if ((BPF_CLASS(p->code) == BPF_ST ||
+		     (BPF_CLASS(p->code) == BPF_LD && 
+		      (p->code & 0xe0) == BPF_MEM)) &&
+		    (p->k >= BPF_MEMWORDS || p->k < 0))
+			return 0;
+		/*
+		 * Check for constant division by 0.
+		 */
+		if (p->code == (BPF_ALU|BPF_DIV|BPF_K) && p->k == 0)
+			return 0;
+	}
+	return BPF_CLASS(f[len - 1].code) == BPF_RET;
+}
+#endif
diff --git a/sys/net/bpfdesc.h b/sys/net/bpfdesc.h
new file mode 100644
index 00000000000..a13320e86a6
--- /dev/null
+++ b/sys/net/bpfdesc.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 1990, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from the Stanford/CMU enet packet filter,
+ * (net/enet.c) distributed as part of 4.3BSD, and code contributed
+ * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
+ * Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *      @(#)bpfdesc.h	8.1 (Berkeley) 6/10/93
+ *
+ * @(#) $Header: bpfdesc.h,v 1.9 91/10/27 21:22:38 mccanne Exp $ (LBL)
+ */
+
+/*
+ * Descriptor associated with each open bpf file.
+ */
+struct bpf_d {
+	struct bpf_d	*bd_next;	/* Linked list of descriptors */
+	/*
+	 * Buffer slots: two mbuf clusters buffer the incoming packets.
+	 *   The model has three slots.  Sbuf is always occupied.
+	 *   sbuf (store) - Receive interrupt puts packets here.
+	 *   hbuf (hold) - When sbuf is full, put cluster here and
+	 *                 wakeup read (replace sbuf with fbuf).
+	 *   fbuf (free) - When read is done, put cluster here.
+	 * On receiving, if sbuf is full and fbuf is 0, packet is dropped.
+	 */
+	caddr_t		bd_sbuf;	/* store slot */
+	caddr_t		bd_hbuf;	/* hold slot */
+	caddr_t		bd_fbuf;	/* free slot */
+	int 		bd_slen;	/* current length of store buffer */
+	int 		bd_hlen;	/* current length of hold buffer */
+
+	int		bd_bufsize;	/* absolute length of buffers */
+
+	struct bpf_if *	bd_bif;		/* interface descriptor */
+	u_long		bd_rtout;	/* Read timeout in 'ticks' */
+	struct bpf_insn *bd_filter; 	/* filter code */
+	u_long		bd_rcount;	/* number of packets received */
+	u_long		bd_dcount;	/* number of packets dropped */
+
+	u_char		bd_promisc;	/* true if listening promiscuously */
+	u_char		bd_state;	/* idle, waiting, or timed out */
+	u_char		bd_immediate;	/* true to return on packet arrival */
+#if BSD < 199103
+	u_char		bd_selcoll;	/* true if selects collide */
+	int		bd_timedout;
+	struct proc *	bd_selproc;	/* process that last selected us */
+#else
+	u_char		bd_pad;		/* explicit alignment */
+	struct selinfo	bd_sel;		/* bsd select info */
+#endif
+};
+
+/*
+ * Descriptor associated with each attached hardware interface.
+ */
+struct bpf_if {
+	struct bpf_if *bif_next;	/* list of all interfaces */
+	struct bpf_d *bif_dlist;	/* descriptor list */
+	struct bpf_if **bif_driverp;	/* pointer into softc */
+	u_int bif_dlt;			/* link layer type */
+	u_int bif_hdrlen;		/* length of header (with padding) */
+	struct ifnet *bif_ifp;		/* correspoding interface */
+};
+
+#ifdef KERNEL
+int	 bpf_setf __P((struct bpf_d *, struct bpf_program *));
+#endif
diff --git a/sys/net/if.c b/sys/net/if.c
new file mode 100644
index 00000000000..36963885cc7
--- /dev/null
+++ b/sys/net/if.c
@@ -0,0 +1,670 @@
+/*
+ * Copyright (c) 1980, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)if.c	8.3 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/kernel.h>
+#include <sys/ioctl.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+
+int	ifqmaxlen = IFQ_MAXLEN;
+void	if_slowtimo __P((void *arg));
+
+/*
+ * Network interface utility routines.
+ *
+ * Routines with ifa_ifwith* names take sockaddr *'s as
+ * parameters.
+ */
+void
+ifinit()
+{
+	register struct ifnet *ifp;
+
+	for (ifp = ifnet; ifp; ifp = ifp->if_next)
+		if (ifp->if_snd.ifq_maxlen == 0)
+			ifp->if_snd.ifq_maxlen = ifqmaxlen;
+	if_slowtimo(0);
+}
+
+#ifdef vax
+/*
+ * Call each interface on a Unibus reset.
+ */
+void
+ifubareset(uban)
+	int uban;
+{
+	register struct ifnet *ifp;
+
+	for (ifp = ifnet; ifp; ifp = ifp->if_next)
+		if (ifp->if_reset)
+			(*ifp->if_reset)(ifp->if_unit, uban);
+}
+#endif
+
+int if_index = 0;
+struct ifaddr **ifnet_addrs;
+static char *sprint_d __P((u_int, char *, int));
+
+/*
+ * Attach an interface to the
+ * list of "active" interfaces.
+ */
+void
+if_attach(ifp)
+	struct ifnet *ifp;
+{
+	unsigned socksize, ifasize;
+	int namelen, unitlen, masklen, ether_output();
+	char workbuf[12], *unitname;
+	register struct ifnet **p = &ifnet;
+	register struct sockaddr_dl *sdl;
+	register struct ifaddr *ifa;
+	static int if_indexlim = 8;
+	extern void link_rtrequest();
+
+	while (*p)
+		p = &((*p)->if_next);
+	*p = ifp;
+	ifp->if_index = ++if_index;
+	if (ifnet_addrs == 0 || if_index >= if_indexlim) {
+		unsigned n = (if_indexlim <<= 1) * sizeof(ifa);
+		struct ifaddr **q = (struct ifaddr **)
+					malloc(n, M_IFADDR, M_WAITOK);
+		if (ifnet_addrs) {
+			bcopy((caddr_t)ifnet_addrs, (caddr_t)q, n/2);
+			free((caddr_t)ifnet_addrs, M_IFADDR);
+		}
+		ifnet_addrs = q;
+	}
+	/*
+	 * create a Link Level name for this device
+	 */
+	unitname = sprint_d((u_int)ifp->if_unit, workbuf, sizeof(workbuf));
+	namelen = strlen(ifp->if_name);
+	unitlen = strlen(unitname);
+#define _offsetof(t, m) ((int)((caddr_t)&((t *)0)->m))
+	masklen = _offsetof(struct sockaddr_dl, sdl_data[0]) +
+			       unitlen + namelen;
+	socksize = masklen + ifp->if_addrlen;
+#define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
+	socksize = ROUNDUP(socksize);
+	if (socksize < sizeof(*sdl))
+		socksize = sizeof(*sdl);
+	ifasize = sizeof(*ifa) + 2 * socksize;
+	if (ifa = (struct ifaddr *)malloc(ifasize, M_IFADDR, M_WAITOK)) {
+		bzero((caddr_t)ifa, ifasize);
+		sdl = (struct sockaddr_dl *)(ifa + 1);
+		sdl->sdl_len = socksize;
+		sdl->sdl_family = AF_LINK;
+		bcopy(ifp->if_name, sdl->sdl_data, namelen);
+		bcopy(unitname, namelen + (caddr_t)sdl->sdl_data, unitlen);
+		sdl->sdl_nlen = (namelen += unitlen);
+		sdl->sdl_index = ifp->if_index;
+		sdl->sdl_type = ifp->if_type;
+		ifnet_addrs[if_index - 1] = ifa;
+		ifa->ifa_ifp = ifp;
+		ifa->ifa_next = ifp->if_addrlist;
+		ifa->ifa_rtrequest = link_rtrequest;
+		ifp->if_addrlist = ifa;
+		ifa->ifa_addr = (struct sockaddr *)sdl;
+		sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
+		ifa->ifa_netmask = (struct sockaddr *)sdl;
+		sdl->sdl_len = masklen;
+		while (namelen != 0)
+			sdl->sdl_data[--namelen] = 0xff;
+	}
+	/* XXX -- Temporary fix before changing 10 ethernet drivers */
+	if (ifp->if_output == ether_output)
+		ether_ifattach(ifp);
+}
+/*
+ * Locate an interface based on a complete address.
+ */
+/*ARGSUSED*/
+struct ifaddr *
+ifa_ifwithaddr(addr)
+	register struct sockaddr *addr;
+{
+	register struct ifnet *ifp;
+	register struct ifaddr *ifa;
+
+#define	equal(a1, a2) \
+  (bcmp((caddr_t)(a1), (caddr_t)(a2), ((struct sockaddr *)(a1))->sa_len) == 0)
+	for (ifp = ifnet; ifp; ifp = ifp->if_next)
+	    for (ifa = ifp->if_addrlist; ifa; ifa = ifa->ifa_next) {
+		if (ifa->ifa_addr->sa_family != addr->sa_family)
+			continue;
+		if (equal(addr, ifa->ifa_addr))
+			return (ifa);
+		if ((ifp->if_flags & IFF_BROADCAST) && ifa->ifa_broadaddr &&
+		    equal(ifa->ifa_broadaddr, addr))
+			return (ifa);
+	}
+	return ((struct ifaddr *)0);
+}
+/*
+ * Locate the point to point interface with a given destination address.
+ */
+/*ARGSUSED*/
+struct ifaddr *
+ifa_ifwithdstaddr(addr)
+	register struct sockaddr *addr;
+{
+	register struct ifnet *ifp;
+	register struct ifaddr *ifa;
+
+	for (ifp = ifnet; ifp; ifp = ifp->if_next) 
+	    if (ifp->if_flags & IFF_POINTOPOINT)
+		for (ifa = ifp->if_addrlist; ifa; ifa = ifa->ifa_next) {
+			if (ifa->ifa_addr->sa_family != addr->sa_family)
+				continue;
+			if (equal(addr, ifa->ifa_dstaddr))
+				return (ifa);
+	}
+	return ((struct ifaddr *)0);
+}
+
+/*
+ * Find an interface on a specific network.  If many, choice
+ * is most specific found.
+ */
+struct ifaddr *
+ifa_ifwithnet(addr)
+	struct sockaddr *addr;
+{
+	register struct ifnet *ifp;
+	register struct ifaddr *ifa;
+	struct ifaddr *ifa_maybe = (struct ifaddr *) 0;
+	u_int af = addr->sa_family;
+	char *addr_data = addr->sa_data, *cplim;
+
+	if (af == AF_LINK) {
+	    register struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
+	    if (sdl->sdl_index && sdl->sdl_index <= if_index)
+		return (ifnet_addrs[sdl->sdl_index - 1]);
+	}
+	for (ifp = ifnet; ifp; ifp = ifp->if_next)
+	    for (ifa = ifp->if_addrlist; ifa; ifa = ifa->ifa_next) {
+		register char *cp, *cp2, *cp3;
+
+		if (ifa->ifa_addr->sa_family != af || ifa->ifa_netmask == 0)
+			next: continue;
+		cp = addr_data;
+		cp2 = ifa->ifa_addr->sa_data;
+		cp3 = ifa->ifa_netmask->sa_data;
+		cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
+		while (cp3 < cplim)
+			if ((*cp++ ^ *cp2++) & *cp3++)
+				goto next;
+		if (ifa_maybe == 0 ||
+		    rn_refines((caddr_t)ifa->ifa_netmask,
+		    (caddr_t)ifa_maybe->ifa_netmask))
+			ifa_maybe = ifa;
+	    }
+	return (ifa_maybe);
+}
+
+/*
+ * Find an interface using a specific address family
+ */
+struct ifaddr *
+ifa_ifwithaf(af)
+	register int af;
+{
+	register struct ifnet *ifp;
+	register struct ifaddr *ifa;
+
+	for (ifp = ifnet; ifp; ifp = ifp->if_next)
+	    for (ifa = ifp->if_addrlist; ifa; ifa = ifa->ifa_next)
+		if (ifa->ifa_addr->sa_family == af)
+			return (ifa);
+	return ((struct ifaddr *)0);
+}
+
+/*
+ * Find an interface address specific to an interface best matching
+ * a given address.
+ */
+struct ifaddr *
+ifaof_ifpforaddr(addr, ifp)
+	struct sockaddr *addr;
+	register struct ifnet *ifp;
+{
+	register struct ifaddr *ifa;
+	register char *cp, *cp2, *cp3;
+	register char *cplim;
+	struct ifaddr *ifa_maybe = 0;
+	u_int af = addr->sa_family;
+
+	if (af >= AF_MAX)
+		return (0);
+	for (ifa = ifp->if_addrlist; ifa; ifa = ifa->ifa_next) {
+		if (ifa->ifa_addr->sa_family != af)
+			continue;
+		ifa_maybe = ifa;
+		if (ifa->ifa_netmask == 0) {
+			if (equal(addr, ifa->ifa_addr) ||
+			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
+				return (ifa);
+			continue;
+		}
+		cp = addr->sa_data;
+		cp2 = ifa->ifa_addr->sa_data;
+		cp3 = ifa->ifa_netmask->sa_data;
+		cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
+		for (; cp3 < cplim; cp3++)
+			if ((*cp++ ^ *cp2++) & *cp3)
+				break;
+		if (cp3 == cplim)
+			return (ifa);
+	}
+	return (ifa_maybe);
+}
+
+#include <net/route.h>
+
+/*
+ * Default action when installing a route with a Link Level gateway.
+ * Lookup an appropriate real ifa to point to.
+ * This should be moved to /sys/net/link.c eventually.
+ */
+void
+link_rtrequest(cmd, rt, sa)
+	int cmd;
+	register struct rtentry *rt;
+	struct sockaddr *sa;
+{
+	register struct ifaddr *ifa;
+	struct sockaddr *dst;
+	struct ifnet *ifp;
+
+	if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) ||
+	    ((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0))
+		return;
+	if (ifa = ifaof_ifpforaddr(dst, ifp)) {
+		IFAFREE(rt->rt_ifa);
+		rt->rt_ifa = ifa;
+		ifa->ifa_refcnt++;
+		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
+			ifa->ifa_rtrequest(cmd, rt, sa);
+	}
+}
+
+/*
+ * Mark an interface down and notify protocols of
+ * the transition.
+ * NOTE: must be called at splnet or eqivalent.
+ */
+void
+if_down(ifp)
+	register struct ifnet *ifp;
+{
+	register struct ifaddr *ifa;
+
+	ifp->if_flags &= ~IFF_UP;
+	for (ifa = ifp->if_addrlist; ifa; ifa = ifa->ifa_next)
+		pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
+	if_qflush(&ifp->if_snd);
+	rt_ifmsg(ifp);
+}
+
+/*
+ * Mark an interface up and notify protocols of
+ * the transition.
+ * NOTE: must be called at splnet or eqivalent.
+ */
+void
+if_up(ifp)
+	register struct ifnet *ifp;
+{
+	register struct ifaddr *ifa;
+
+	ifp->if_flags |= IFF_UP;
+#ifdef notyet
+	/* this has no effect on IP, and will kill all iso connections XXX */
+	for (ifa = ifp->if_addrlist; ifa; ifa = ifa->ifa_next)
+		pfctlinput(PRC_IFUP, ifa->ifa_addr);
+#endif
+	rt_ifmsg(ifp);
+}
+
+/*
+ * Flush an interface queue.
+ */
+void
+if_qflush(ifq)
+	register struct ifqueue *ifq;
+{
+	register struct mbuf *m, *n;
+
+	n = ifq->ifq_head;
+	while (m = n) {
+		n = m->m_act;
+		m_freem(m);
+	}
+	ifq->ifq_head = 0;
+	ifq->ifq_tail = 0;
+	ifq->ifq_len = 0;
+}
+
+/*
+ * Handle interface watchdog timer routines.  Called
+ * from softclock, we decrement timers (if set) and
+ * call the appropriate interface routine on expiration.
+ */
+void
+if_slowtimo(arg)
+	void *arg;
+{
+	register struct ifnet *ifp;
+	int s = splimp();
+
+	for (ifp = ifnet; ifp; ifp = ifp->if_next) {
+		if (ifp->if_timer == 0 || --ifp->if_timer)
+			continue;
+		if (ifp->if_watchdog)
+			(*ifp->if_watchdog)(ifp->if_unit);
+	}
+	splx(s);
+	timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ);
+}
+
+/*
+ * Map interface name to
+ * interface structure pointer.
+ */
+struct ifnet *
+ifunit(name)
+	register char *name;
+{
+	register char *cp;
+	register struct ifnet *ifp;
+	int unit;
+	unsigned len;
+	char *ep, c;
+
+	for (cp = name; cp < name + IFNAMSIZ && *cp; cp++)
+		if (*cp >= '0' && *cp <= '9')
+			break;
+	if (*cp == '\0' || cp == name + IFNAMSIZ)
+		return ((struct ifnet *)0);
+	/*
+	 * Save first char of unit, and pointer to it,
+	 * so we can put a null there to avoid matching
+	 * initial substrings of interface names.
+	 */
+	len = cp - name + 1;
+	c = *cp;
+	ep = cp;
+	for (unit = 0; *cp >= '0' && *cp <= '9'; )
+		unit = unit * 10 + *cp++ - '0';
+	*ep = 0;
+	for (ifp = ifnet; ifp; ifp = ifp->if_next) {
+		if (bcmp(ifp->if_name, name, len))
+			continue;
+		if (unit == ifp->if_unit)
+			break;
+	}
+	*ep = c;
+	return (ifp);
+}
+
+/*
+ * Interface ioctls.
+ */
+int
+ifioctl(so, cmd, data, p)
+	struct socket *so;
+	int cmd;
+	caddr_t data;
+	struct proc *p;
+{
+	register struct ifnet *ifp;
+	register struct ifreq *ifr;
+	int error;
+
+	switch (cmd) {
+
+	case SIOCGIFCONF:
+	case OSIOCGIFCONF:
+		return (ifconf(cmd, data));
+	}
+	ifr = (struct ifreq *)data;
+	ifp = ifunit(ifr->ifr_name);
+	if (ifp == 0)
+		return (ENXIO);
+	switch (cmd) {
+
+	case SIOCGIFFLAGS:
+		ifr->ifr_flags = ifp->if_flags;
+		break;
+
+	case SIOCGIFMETRIC:
+		ifr->ifr_metric = ifp->if_metric;
+		break;
+
+	case SIOCSIFFLAGS:
+		if (error = suser(p->p_ucred, &p->p_acflag))
+			return (error);
+		if (ifp->if_flags & IFF_UP && (ifr->ifr_flags & IFF_UP) == 0) {
+			int s = splimp();
+			if_down(ifp);
+			splx(s);
+		}
+		if (ifr->ifr_flags & IFF_UP && (ifp->if_flags & IFF_UP) == 0) {
+			int s = splimp();
+			if_up(ifp);
+			splx(s);
+		}
+		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
+			(ifr->ifr_flags &~ IFF_CANTCHANGE);
+		if (ifp->if_ioctl)
+			(void) (*ifp->if_ioctl)(ifp, cmd, data);
+		break;
+
+	case SIOCSIFMETRIC:
+		if (error = suser(p->p_ucred, &p->p_acflag))
+			return (error);
+		ifp->if_metric = ifr->ifr_metric;
+		break;
+
+	case SIOCADDMULTI:
+	case SIOCDELMULTI:
+		if (error = suser(p->p_ucred, &p->p_acflag))
+			return (error);
+		if (ifp->if_ioctl == NULL)
+			return (EOPNOTSUPP);
+		return ((*ifp->if_ioctl)(ifp, cmd, data));
+
+	default:
+		if (so->so_proto == 0)
+			return (EOPNOTSUPP);
+#ifndef COMPAT_43
+		return ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL,
+			cmd, data, ifp));
+#else
+	    {
+		int ocmd = cmd;
+
+		switch (cmd) {
+
+		case SIOCSIFDSTADDR:
+		case SIOCSIFADDR:
+		case SIOCSIFBRDADDR:
+		case SIOCSIFNETMASK:
+#if BYTE_ORDER != BIG_ENDIAN
+			if (ifr->ifr_addr.sa_family == 0 &&
+			    ifr->ifr_addr.sa_len < 16) {
+				ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
+				ifr->ifr_addr.sa_len = 16;
+			}
+#else
+			if (ifr->ifr_addr.sa_len == 0)
+				ifr->ifr_addr.sa_len = 16;
+#endif
+			break;
+
+		case OSIOCGIFADDR:
+			cmd = SIOCGIFADDR;
+			break;
+
+		case OSIOCGIFDSTADDR:
+			cmd = SIOCGIFDSTADDR;
+			break;
+
+		case OSIOCGIFBRDADDR:
+			cmd = SIOCGIFBRDADDR;
+			break;
+
+		case OSIOCGIFNETMASK:
+			cmd = SIOCGIFNETMASK;
+		}
+		error =  ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL,
+							    cmd, data, ifp));
+		switch (ocmd) {
+
+		case OSIOCGIFADDR:
+		case OSIOCGIFDSTADDR:
+		case OSIOCGIFBRDADDR:
+		case OSIOCGIFNETMASK:
+			*(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
+		}
+		return (error);
+
+	    }
+#endif
+	}
+	return (0);
+}
+
+/*
+ * Return interface configuration
+ * of system.  List may be used
+ * in later ioctl's (above) to get
+ * other information.
+ */
+/*ARGSUSED*/
+int
+ifconf(cmd, data)
+	int cmd;
+	caddr_t data;
+{
+	register struct ifconf *ifc = (struct ifconf *)data;
+	register struct ifnet *ifp = ifnet;
+	register struct ifaddr *ifa;
+	register char *cp, *ep;
+	struct ifreq ifr, *ifrp;
+	int space = ifc->ifc_len, error = 0;
+
+	ifrp = ifc->ifc_req;
+	ep = ifr.ifr_name + sizeof (ifr.ifr_name) - 2;
+	for (; space > sizeof (ifr) && ifp; ifp = ifp->if_next) {
+		strncpy(ifr.ifr_name, ifp->if_name, sizeof (ifr.ifr_name) - 2);
+		for (cp = ifr.ifr_name; cp < ep && *cp; cp++)
+			continue;
+		*cp++ = '0' + ifp->if_unit; *cp = '\0';
+		if ((ifa = ifp->if_addrlist) == 0) {
+			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
+			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
+			    sizeof (ifr));
+			if (error)
+				break;
+			space -= sizeof (ifr), ifrp++;
+		} else 
+		    for ( ; space > sizeof (ifr) && ifa; ifa = ifa->ifa_next) {
+			register struct sockaddr *sa = ifa->ifa_addr;
+#ifdef COMPAT_43
+			if (cmd == OSIOCGIFCONF) {
+				struct osockaddr *osa =
+					 (struct osockaddr *)&ifr.ifr_addr;
+				ifr.ifr_addr = *sa;
+				osa->sa_family = sa->sa_family;
+				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
+						sizeof (ifr));
+				ifrp++;
+			} else
+#endif
+			if (sa->sa_len <= sizeof(*sa)) {
+				ifr.ifr_addr = *sa;
+				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
+						sizeof (ifr));
+				ifrp++;
+			} else {
+				space -= sa->sa_len - sizeof(*sa);
+				if (space < sizeof (ifr))
+					break;
+				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
+						sizeof (ifr.ifr_name));
+				if (error == 0)
+				    error = copyout((caddr_t)sa,
+				      (caddr_t)&ifrp->ifr_addr, sa->sa_len);
+				ifrp = (struct ifreq *)
+					(sa->sa_len + (caddr_t)&ifrp->ifr_addr);
+			}
+			if (error)
+				break;
+			space -= sizeof (ifr);
+		}
+	}
+	ifc->ifc_len -= space;
+	return (error);
+}
+
+static char *
+sprint_d(n, buf, buflen)
+	u_int n;
+	char *buf;
+	int buflen;
+{
+	register char *cp = buf + buflen - 1;
+
+	*cp = 0;
+	do {
+		cp--;
+		*cp = "0123456789"[n % 10];
+		n /= 10;
+	} while (n != 0);
+	return (cp);
+}
diff --git a/sys/net/if.h b/sys/net/if.h
new file mode 100644
index 00000000000..c27c4f9cf63
--- /dev/null
+++ b/sys/net/if.h
@@ -0,0 +1,363 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)if.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Structures defining a network interface, providing a packet
+ * transport mechanism (ala level 0 of the PUP protocols).
+ *
+ * Each interface accepts output datagrams of a specified maximum
+ * length, and provides higher level routines with input datagrams
+ * received from its medium.
+ *
+ * Output occurs when the routine if_output is called, with three parameters:
+ *	(*ifp->if_output)(ifp, m, dst, rt)
+ * Here m is the mbuf chain to be sent and dst is the destination address.
+ * The output routine encapsulates the supplied datagram if necessary,
+ * and then transmits it on its medium.
+ *
+ * On input, each interface unwraps the data received by it, and either
+ * places it on the input queue of a internetwork datagram routine
+ * and posts the associated software interrupt, or passes the datagram to a raw
+ * packet input routine.
+ *
+ * Routines exist for locating interfaces by their addresses
+ * or for locating a interface on a certain network, as well as more general
+ * routing and gateway routines maintaining information used to locate
+ * interfaces.  These routines live in the files if.c and route.c
+ */
+#ifndef _TIME_ /*  XXX fast fix for SNMP, going away soon */
+#include <sys/time.h>
+#endif
+
+#ifdef __STDC__
+/*
+ * Forward structure declarations for function prototypes [sic].
+ */
+struct	mbuf;
+struct	proc;
+struct	rtentry;	
+struct	socket;
+struct	ether_header;
+#endif
+/*
+ * Structure describing information about an interface
+ * which may be of interest to management entities.
+ */
+/*
+ * Structure defining a queue for a network interface.
+ *
+ * (Would like to call this struct ``if'', but C isn't PL/1.)
+ */
+
+struct ifnet {
+	char	*if_name;		/* name, e.g. ``en'' or ``lo'' */
+	struct	ifnet *if_next;		/* all struct ifnets are chained */
+	struct	ifaddr *if_addrlist;	/* linked list of addresses per if */
+        int	if_pcount;		/* number of promiscuous listeners */
+	caddr_t	if_bpf;			/* packet filter structure */
+	u_short	if_index;		/* numeric abbreviation for this if  */
+	short	if_unit;		/* sub-unit for lower level driver */
+	short	if_timer;		/* time 'til if_watchdog called */
+	short	if_flags;		/* up/down, broadcast, etc. */
+	struct	if_data {
+/* generic interface information */
+		u_char	ifi_type;	/* ethernet, tokenring, etc */
+		u_char	ifi_addrlen;	/* media address length */
+		u_char	ifi_hdrlen;	/* media header length */
+		u_long	ifi_mtu;	/* maximum transmission unit */
+		u_long	ifi_metric;	/* routing metric (external only) */
+		u_long	ifi_baudrate;	/* linespeed */
+/* volatile statistics */
+		u_long	ifi_ipackets;	/* packets received on interface */
+		u_long	ifi_ierrors;	/* input errors on interface */
+		u_long	ifi_opackets;	/* packets sent on interface */
+		u_long	ifi_oerrors;	/* output errors on interface */
+		u_long	ifi_collisions;	/* collisions on csma interfaces */
+		u_long	ifi_ibytes;	/* total number of octets received */
+		u_long	ifi_obytes;	/* total number of octets sent */
+		u_long	ifi_imcasts;	/* packets received via multicast */
+		u_long	ifi_omcasts;	/* packets sent via multicast */
+		u_long	ifi_iqdrops;	/* dropped on input, this interface */
+		u_long	ifi_noproto;	/* destined for unsupported protocol */
+		struct	timeval ifi_lastchange;/* last updated */
+	}	if_data;
+/* procedure handles */
+	int	(*if_init)		/* init routine */
+		__P((int));
+	int	(*if_output)		/* output routine (enqueue) */
+		__P((struct ifnet *, struct mbuf *, struct sockaddr *,
+		     struct rtentry *));
+	int	(*if_start)		/* initiate output routine */
+		__P((struct ifnet *));
+	int	(*if_done)		/* output complete routine */
+		__P((struct ifnet *));	/* (XXX not used; fake prototype) */
+	int	(*if_ioctl)		/* ioctl routine */
+		__P((struct ifnet *, int, caddr_t));
+	int	(*if_reset)	
+		__P((int));		/* new autoconfig will permit removal */
+	int	(*if_watchdog)		/* timer routine */
+		__P((int));
+	struct	ifqueue {
+		struct	mbuf *ifq_head;
+		struct	mbuf *ifq_tail;
+		int	ifq_len;
+		int	ifq_maxlen;
+		int	ifq_drops;
+	} if_snd;			/* output queue */
+};
+#define	if_mtu		if_data.ifi_mtu
+#define	if_type		if_data.ifi_type
+#define	if_addrlen	if_data.ifi_addrlen
+#define	if_hdrlen	if_data.ifi_hdrlen
+#define	if_metric	if_data.ifi_metric
+#define	if_baudrate	if_data.ifi_baudrate
+#define	if_ipackets	if_data.ifi_ipackets
+#define	if_ierrors	if_data.ifi_ierrors
+#define	if_opackets	if_data.ifi_opackets
+#define	if_oerrors	if_data.ifi_oerrors
+#define	if_collisions	if_data.ifi_collisions
+#define	if_ibytes	if_data.ifi_ibytes
+#define	if_obytes	if_data.ifi_obytes
+#define	if_imcasts	if_data.ifi_imcasts
+#define	if_omcasts	if_data.ifi_omcasts
+#define	if_iqdrops	if_data.ifi_iqdrops
+#define	if_noproto	if_data.ifi_noproto
+#define	if_lastchange	if_data.ifi_lastchange
+
+#define	IFF_UP		0x1		/* interface is up */
+#define	IFF_BROADCAST	0x2		/* broadcast address valid */
+#define	IFF_DEBUG	0x4		/* turn on debugging */
+#define	IFF_LOOPBACK	0x8		/* is a loopback net */
+#define	IFF_POINTOPOINT	0x10		/* interface is point-to-point link */
+#define	IFF_NOTRAILERS	0x20		/* avoid use of trailers */
+#define	IFF_RUNNING	0x40		/* resources allocated */
+#define	IFF_NOARP	0x80		/* no address resolution protocol */
+#define	IFF_PROMISC	0x100		/* receive all packets */
+#define	IFF_ALLMULTI	0x200		/* receive all multicast packets */
+#define	IFF_OACTIVE	0x400		/* transmission in progress */
+#define	IFF_SIMPLEX	0x800		/* can't hear own transmissions */
+#define	IFF_LINK0	0x1000		/* per link layer defined bit */
+#define	IFF_LINK1	0x2000		/* per link layer defined bit */
+#define	IFF_LINK2	0x4000		/* per link layer defined bit */
+#define	IFF_MULTICAST	0x8000		/* supports multicast */
+
+/* flags set internally only: */
+#define	IFF_CANTCHANGE \
+	(IFF_BROADCAST|IFF_POINTOPOINT|IFF_RUNNING|IFF_OACTIVE|\
+	    IFF_SIMPLEX|IFF_MULTICAST|IFF_ALLMULTI)
+
+/*
+ * Output queues (ifp->if_snd) and internetwork datagram level (pup level 1)
+ * input routines have queues of messages stored on ifqueue structures
+ * (defined above).  Entries are added to and deleted from these structures
+ * by these macros, which should be called with ipl raised to splimp().
+ */
+#define	IF_QFULL(ifq)		((ifq)->ifq_len >= (ifq)->ifq_maxlen)
+#define	IF_DROP(ifq)		((ifq)->ifq_drops++)
+#define	IF_ENQUEUE(ifq, m) { \
+	(m)->m_nextpkt = 0; \
+	if ((ifq)->ifq_tail == 0) \
+		(ifq)->ifq_head = m; \
+	else \
+		(ifq)->ifq_tail->m_nextpkt = m; \
+	(ifq)->ifq_tail = m; \
+	(ifq)->ifq_len++; \
+}
+#define	IF_PREPEND(ifq, m) { \
+	(m)->m_nextpkt = (ifq)->ifq_head; \
+	if ((ifq)->ifq_tail == 0) \
+		(ifq)->ifq_tail = (m); \
+	(ifq)->ifq_head = (m); \
+	(ifq)->ifq_len++; \
+}
+#define	IF_DEQUEUE(ifq, m) { \
+	(m) = (ifq)->ifq_head; \
+	if (m) { \
+		if (((ifq)->ifq_head = (m)->m_nextpkt) == 0) \
+			(ifq)->ifq_tail = 0; \
+		(m)->m_nextpkt = 0; \
+		(ifq)->ifq_len--; \
+	} \
+}
+
+#define	IFQ_MAXLEN	50
+#define	IFNET_SLOWHZ	1		/* granularity is 1 second */
+
+/*
+ * The ifaddr structure contains information about one address
+ * of an interface.  They are maintained by the different address families,
+ * are allocated and attached when an address is set, and are linked
+ * together so all addresses for an interface can be located.
+ */
+struct ifaddr {
+	struct	sockaddr *ifa_addr;	/* address of interface */
+	struct	sockaddr *ifa_dstaddr;	/* other end of p-to-p link */
+#define	ifa_broadaddr	ifa_dstaddr	/* broadcast address interface */
+	struct	sockaddr *ifa_netmask;	/* used to determine subnet */
+	struct	ifnet *ifa_ifp;		/* back-pointer to interface */
+	struct	ifaddr *ifa_next;	/* next address for interface */
+	void	(*ifa_rtrequest)();	/* check or clean routes (+ or -)'d */
+	u_short	ifa_flags;		/* mostly rt_flags for cloning */
+	short	ifa_refcnt;		/* extra to malloc for link info */
+	int	ifa_metric;		/* cost of going out this interface */
+#ifdef notdef
+	struct	rtentry *ifa_rt;	/* XXXX for ROUTETOIF ????? */
+#endif
+};
+#define	IFA_ROUTE	RTF_UP		/* route installed */
+
+/*
+ * Message format for use in obtaining information about interfaces
+ * from getkerninfo and the routing socket
+ */
+struct if_msghdr {
+	u_short	ifm_msglen;	/* to skip over non-understood messages */
+	u_char	ifm_version;	/* future binary compatability */
+	u_char	ifm_type;	/* message type */
+	int	ifm_addrs;	/* like rtm_addrs */
+	int	ifm_flags;	/* value of if_flags */
+	u_short	ifm_index;	/* index for associated ifp */
+	struct	if_data ifm_data;/* statistics and other data about if */
+};
+
+/*
+ * Message format for use in obtaining information about interface addresses
+ * from getkerninfo and the routing socket
+ */
+struct ifa_msghdr {
+	u_short	ifam_msglen;	/* to skip over non-understood messages */
+	u_char	ifam_version;	/* future binary compatability */
+	u_char	ifam_type;	/* message type */
+	int	ifam_addrs;	/* like rtm_addrs */
+	int	ifam_flags;	/* value of ifa_flags */
+	u_short	ifam_index;	/* index for associated ifp */
+	int	ifam_metric;	/* value of ifa_metric */
+};
+
+/*
+ * Interface request structure used for socket
+ * ioctl's.  All interface ioctl's must have parameter
+ * definitions which begin with ifr_name.  The
+ * remainder may be interface specific.
+ */
+struct	ifreq {
+#define	IFNAMSIZ	16
+	char	ifr_name[IFNAMSIZ];		/* if name, e.g. "en0" */
+	union {
+		struct	sockaddr ifru_addr;
+		struct	sockaddr ifru_dstaddr;
+		struct	sockaddr ifru_broadaddr;
+		short	ifru_flags;
+		int	ifru_metric;
+		caddr_t	ifru_data;
+	} ifr_ifru;
+#define	ifr_addr	ifr_ifru.ifru_addr	/* address */
+#define	ifr_dstaddr	ifr_ifru.ifru_dstaddr	/* other end of p-to-p link */
+#define	ifr_broadaddr	ifr_ifru.ifru_broadaddr	/* broadcast address */
+#define	ifr_flags	ifr_ifru.ifru_flags	/* flags */
+#define	ifr_metric	ifr_ifru.ifru_metric	/* metric */
+#define	ifr_data	ifr_ifru.ifru_data	/* for use by interface */
+};
+
+struct ifaliasreq {
+	char	ifra_name[IFNAMSIZ];		/* if name, e.g. "en0" */
+	struct	sockaddr ifra_addr;
+	struct	sockaddr ifra_broadaddr;
+	struct	sockaddr ifra_mask;
+};
+
+/*
+ * Structure used in SIOCGIFCONF request.
+ * Used to retrieve interface configuration
+ * for machine (useful for programs which
+ * must know all networks accessible).
+ */
+struct	ifconf {
+	int	ifc_len;		/* size of associated buffer */
+	union {
+		caddr_t	ifcu_buf;
+		struct	ifreq *ifcu_req;
+	} ifc_ifcu;
+#define	ifc_buf	ifc_ifcu.ifcu_buf	/* buffer address */
+#define	ifc_req	ifc_ifcu.ifcu_req	/* array of structures returned */
+};
+
+#include <net/if_arp.h>
+
+#ifdef KERNEL
+#define	IFAFREE(ifa) \
+	if ((ifa)->ifa_refcnt <= 0) \
+		ifafree(ifa); \
+	else \
+		(ifa)->ifa_refcnt--;
+
+struct	ifnet	*ifnet;
+
+void	ether_ifattach __P((struct ifnet *));
+void	ether_input __P((struct ifnet *, struct ether_header *, struct mbuf *));
+int	ether_output __P((struct ifnet *,
+	   struct mbuf *, struct sockaddr *, struct rtentry *));
+char	*ether_sprintf __P((u_char *));
+
+void	if_attach __P((struct ifnet *));
+void	if_down __P((struct ifnet *));
+void	if_qflush __P((struct ifqueue *));
+void	if_slowtimo __P((void *));
+void	if_up __P((struct ifnet *));
+#ifdef vax
+void	ifubareset __P((int));
+#endif
+int	ifconf __P((int, caddr_t));
+void	ifinit __P((void));
+int	ifioctl __P((struct socket *, int, caddr_t, struct proc *));
+int	ifpromisc __P((struct ifnet *, int));
+struct	ifnet *ifunit __P((char *));
+
+struct	ifaddr *ifa_ifwithaddr __P((struct sockaddr *));
+struct	ifaddr *ifa_ifwithaf __P((int));
+struct	ifaddr *ifa_ifwithdstaddr __P((struct sockaddr *));
+struct	ifaddr *ifa_ifwithnet __P((struct sockaddr *));
+struct	ifaddr *ifa_ifwithroute __P((int, struct sockaddr *,
+					struct sockaddr *));
+struct	ifaddr *ifaof_ifpforaddr __P((struct sockaddr *, struct ifnet *));
+void	ifafree __P((struct ifaddr *));
+void	link_rtrequest __P((int, struct rtentry *, struct sockaddr *));
+
+int	loioctl __P((struct ifnet *, int, caddr_t));
+void	loopattach __P((int));
+int	looutput __P((struct ifnet *,
+	   struct mbuf *, struct sockaddr *, struct rtentry *));
+void	lortrequest __P((int, struct rtentry *, struct sockaddr *));
+#endif
diff --git a/sys/net/if_arp.h b/sys/net/if_arp.h
new file mode 100644
index 00000000000..84581cbb98d
--- /dev/null
+++ b/sys/net/if_arp.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)if_arp.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Address Resolution Protocol.
+ *
+ * See RFC 826 for protocol description.  ARP packets are variable
+ * in size; the arphdr structure defines the fixed-length portion.
+ * Protocol type values are the same as those for 10 Mb/s Ethernet.
+ * It is followed by the variable-sized fields ar_sha, arp_spa,
+ * arp_tha and arp_tpa in that order, according to the lengths
+ * specified.  Field names used correspond to RFC 826.
+ */
+struct	arphdr {
+	u_short	ar_hrd;		/* format of hardware address */
+#define ARPHRD_ETHER 	1	/* ethernet hardware format */
+#define ARPHRD_FRELAY 	15	/* frame relay hardware format */
+	u_short	ar_pro;		/* format of protocol address */
+	u_char	ar_hln;		/* length of hardware address */
+	u_char	ar_pln;		/* length of protocol address */
+	u_short	ar_op;		/* one of: */
+#define	ARPOP_REQUEST	1	/* request to resolve address */
+#define	ARPOP_REPLY	2	/* response to previous request */
+#define	ARPOP_REVREQUEST 3	/* request protocol address given hardware */
+#define	ARPOP_REVREPLY	4	/* response giving protocol address */
+#define ARPOP_INVREQUEST 8 	/* request to identify peer */
+#define ARPOP_INVREPLY	9	/* response identifying peer */
+/*
+ * The remaining fields are variable in size,
+ * according to the sizes above.
+ */
+#ifdef COMMENT_ONLY
+	u_char	ar_sha[];	/* sender hardware address */
+	u_char	ar_spa[];	/* sender protocol address */
+	u_char	ar_tha[];	/* target hardware address */
+	u_char	ar_tpa[];	/* target protocol address */
+#endif
+};
+
+/*
+ * ARP ioctl request
+ */
+struct arpreq {
+	struct	sockaddr arp_pa;		/* protocol address */
+	struct	sockaddr arp_ha;		/* hardware address */
+	int	arp_flags;			/* flags */
+};
+/*  arp_flags and at_flags field values */
+#define	ATF_INUSE	0x01	/* entry in use */
+#define ATF_COM		0x02	/* completed entry (enaddr valid) */
+#define	ATF_PERM	0x04	/* permanent entry */
+#define	ATF_PUBL	0x08	/* publish entry (respond for other host) */
+#define	ATF_USETRAILERS	0x10	/* has requested trailers */
diff --git a/sys/net/if_dl.h b/sys/net/if_dl.h
new file mode 100644
index 00000000000..3e53449085a
--- /dev/null
+++ b/sys/net/if_dl.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)if_dl.h	8.1 (Berkeley) 6/10/93
+ */
+
+/* 
+ * A Link-Level Sockaddr may specify the interface in one of two
+ * ways: either by means of a system-provided index number (computed
+ * anew and possibly differently on every reboot), or by a human-readable
+ * string such as "il0" (for managerial convenience).
+ * 
+ * Census taking actions, such as something akin to SIOCGCONF would return
+ * both the index and the human name.
+ * 
+ * High volume transactions (such as giving a link-level ``from'' address
+ * in a recvfrom or recvmsg call) may be likely only to provide the indexed
+ * form, (which requires fewer copy operations and less space).
+ * 
+ * The form and interpretation  of the link-level address is purely a matter
+ * of convention between the device driver and its consumers; however, it is
+ * expected that all drivers for an interface of a given if_type will agree.
+ */
+
+/*
+ * Structure of a Link-Level sockaddr:
+ */
+struct sockaddr_dl {
+	u_char	sdl_len;	/* Total length of sockaddr */
+	u_char	sdl_family;	/* AF_DLI */
+	u_short	sdl_index;	/* if != 0, system given index for interface */
+	u_char	sdl_type;	/* interface type */
+	u_char	sdl_nlen;	/* interface name length, no trailing 0 reqd. */
+	u_char	sdl_alen;	/* link level address length */
+	u_char	sdl_slen;	/* link layer selector length */
+	char	sdl_data[12];	/* minimum work area, can be larger;
+				   contains both if name and ll address */
+};
+
+#define LLADDR(s) ((caddr_t)((s)->sdl_data + (s)->sdl_nlen))
+
+#ifndef KERNEL
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+void	link_addr __P((const char *, struct sockaddr_dl *));
+char	*link_ntoa __P((const struct sockaddr_dl *));
+__END_DECLS
+
+#endif /* !KERNEL */
diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c
new file mode 100644
index 00000000000..d4d6680fdb0
--- /dev/null
+++ b/sys/net/if_ethersubr.c
@@ -0,0 +1,675 @@
+/*
+ * Copyright (c) 1982, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)if_ethersubr.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/errno.h>
+#include <sys/syslog.h>
+
+#include <machine/cpu.h>
+
+#include <net/if.h>
+#include <net/netisr.h>
+#include <net/route.h>
+#include <net/if_llc.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+
+#ifdef INET
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#endif
+#include <netinet/if_ether.h>
+
+#ifdef NS
+#include <netns/ns.h>
+#include <netns/ns_if.h>
+#endif
+
+#ifdef ISO
+#include <netiso/argo_debug.h>
+#include <netiso/iso.h>
+#include <netiso/iso_var.h>
+#include <netiso/iso_snpac.h>
+#endif
+
+#ifdef LLC
+#include <netccitt/dll.h>
+#include <netccitt/llc_var.h>
+#endif
+
+#if defined(LLC) && defined(CCITT)
+extern struct ifqueue pkintrq;
+#endif
+
+u_char	etherbroadcastaddr[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+extern	struct ifnet loif;
+#define senderr(e) { error = (e); goto bad;}
+
+/*
+ * Ethernet output routine.
+ * Encapsulate a packet of type family for the local net.
+ * Use trailer local net encapsulation if enough data in first
+ * packet leaves a multiple of 512 bytes of data in remainder.
+ * Assumes that ifp is actually pointer to arpcom structure.
+ */
+int
+ether_output(ifp, m0, dst, rt0)
+	register struct ifnet *ifp;
+	struct mbuf *m0;
+	struct sockaddr *dst;
+	struct rtentry *rt0;
+{
+	short type;
+	int s, error = 0;
+ 	u_char edst[6];
+	register struct mbuf *m = m0;
+	register struct rtentry *rt;
+	struct mbuf *mcopy = (struct mbuf *)0;
+	register struct ether_header *eh;
+	int off, len = m->m_pkthdr.len;
+	struct arpcom *ac = (struct arpcom *)ifp;
+
+	if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING))
+		senderr(ENETDOWN);
+	ifp->if_lastchange = time;
+	if (rt = rt0) {
+		if ((rt->rt_flags & RTF_UP) == 0) {
+			if (rt0 = rt = rtalloc1(dst, 1))
+				rt->rt_refcnt--;
+			else 
+				senderr(EHOSTUNREACH);
+		}
+		if (rt->rt_flags & RTF_GATEWAY) {
+			if (rt->rt_gwroute == 0)
+				goto lookup;
+			if (((rt = rt->rt_gwroute)->rt_flags & RTF_UP) == 0) {
+				rtfree(rt); rt = rt0;
+			lookup: rt->rt_gwroute = rtalloc1(rt->rt_gateway, 1);
+				if ((rt = rt->rt_gwroute) == 0)
+					senderr(EHOSTUNREACH);
+			}
+		}
+		if (rt->rt_flags & RTF_REJECT)
+			if (rt->rt_rmx.rmx_expire == 0 ||
+			    time.tv_sec < rt->rt_rmx.rmx_expire)
+				senderr(rt == rt0 ? EHOSTDOWN : EHOSTUNREACH);
+	}
+	switch (dst->sa_family) {
+
+#ifdef INET
+	case AF_INET:
+		if (!arpresolve(ac, rt, m, dst, edst))
+			return (0);	/* if not yet resolved */
+		/* If broadcasting on a simplex interface, loopback a copy */
+		if ((m->m_flags & M_BCAST) && (ifp->if_flags & IFF_SIMPLEX))
+			mcopy = m_copy(m, 0, (int)M_COPYALL);
+		off = m->m_pkthdr.len - m->m_len;
+		type = ETHERTYPE_IP;
+		break;
+#endif
+#ifdef NS
+	case AF_NS:
+		type = ETHERTYPE_NS;
+ 		bcopy((caddr_t)&(((struct sockaddr_ns *)dst)->sns_addr.x_host),
+		    (caddr_t)edst, sizeof (edst));
+		if (!bcmp((caddr_t)edst, (caddr_t)&ns_thishost, sizeof(edst)))
+			return (looutput(ifp, m, dst, rt));
+		/* If broadcasting on a simplex interface, loopback a copy */
+		if ((m->m_flags & M_BCAST) && (ifp->if_flags & IFF_SIMPLEX))
+			mcopy = m_copy(m, 0, (int)M_COPYALL);
+		break;
+#endif
+#ifdef	ISO
+	case AF_ISO: {
+		int	snpalen;
+		struct	llc *l;
+		register struct sockaddr_dl *sdl;
+
+		if (rt && (sdl = (struct sockaddr_dl *)rt->rt_gateway) &&
+		    sdl->sdl_family == AF_LINK && sdl->sdl_alen > 0) {
+			bcopy(LLADDR(sdl), (caddr_t)edst, sizeof(edst));
+		} else if (error =
+			    iso_snparesolve(ifp, (struct sockaddr_iso *)dst,
+					    (char *)edst, &snpalen))
+			goto bad; /* Not Resolved */
+		/* If broadcasting on a simplex interface, loopback a copy */
+		if (*edst & 1)
+			m->m_flags |= (M_BCAST|M_MCAST);
+		if ((m->m_flags & M_BCAST) && (ifp->if_flags & IFF_SIMPLEX) &&
+		    (mcopy = m_copy(m, 0, (int)M_COPYALL))) {
+			M_PREPEND(mcopy, sizeof (*eh), M_DONTWAIT);
+			if (mcopy) {
+				eh = mtod(mcopy, struct ether_header *);
+				bcopy((caddr_t)edst,
+				      (caddr_t)eh->ether_dhost, sizeof (edst));
+				bcopy((caddr_t)ac->ac_enaddr,
+				      (caddr_t)eh->ether_shost, sizeof (edst));
+			}
+		}
+		M_PREPEND(m, 3, M_DONTWAIT);
+		if (m == NULL)
+			return (0);
+		type = m->m_pkthdr.len;
+		l = mtod(m, struct llc *);
+		l->llc_dsap = l->llc_ssap = LLC_ISO_LSAP;
+		l->llc_control = LLC_UI;
+		len += 3;
+		IFDEBUG(D_ETHER)
+			int i;
+			printf("unoutput: sending pkt to: ");
+			for (i=0; i<6; i++)
+				printf("%x ", edst[i] & 0xff);
+			printf("\n");
+		ENDDEBUG
+		} break;
+#endif /* ISO */
+#ifdef	LLC
+/*	case AF_NSAP: */
+	case AF_CCITT: {
+		register struct sockaddr_dl *sdl = 
+			(struct sockaddr_dl *) rt -> rt_gateway;
+
+		if (sdl && sdl->sdl_family == AF_LINK
+		    && sdl->sdl_alen > 0) {
+			bcopy(LLADDR(sdl), (char *)edst,
+				sizeof(edst));
+		} else goto bad; /* Not a link interface ? Funny ... */
+		if ((ifp->if_flags & IFF_SIMPLEX) && (*edst & 1) &&
+		    (mcopy = m_copy(m, 0, (int)M_COPYALL))) {
+			M_PREPEND(mcopy, sizeof (*eh), M_DONTWAIT);
+			if (mcopy) {
+				eh = mtod(mcopy, struct ether_header *);
+				bcopy((caddr_t)edst,
+				      (caddr_t)eh->ether_dhost, sizeof (edst));
+				bcopy((caddr_t)ac->ac_enaddr,
+				      (caddr_t)eh->ether_shost, sizeof (edst));
+			}
+		}
+		type = m->m_pkthdr.len;
+#ifdef LLC_DEBUG
+		{
+			int i;
+			register struct llc *l = mtod(m, struct llc *);
+
+			printf("ether_output: sending LLC2 pkt to: ");
+			for (i=0; i<6; i++)
+				printf("%x ", edst[i] & 0xff);
+			printf(" len 0x%x dsap 0x%x ssap 0x%x control 0x%x\n", 
+			       type & 0xff, l->llc_dsap & 0xff, l->llc_ssap &0xff,
+			       l->llc_control & 0xff);
+
+		}
+#endif /* LLC_DEBUG */
+		} break;
+#endif /* LLC */	
+
+	case AF_UNSPEC:
+		eh = (struct ether_header *)dst->sa_data;
+ 		bcopy((caddr_t)eh->ether_dhost, (caddr_t)edst, sizeof (edst));
+		type = eh->ether_type;
+		break;
+
+	default:
+		printf("%s%d: can't handle af%d\n", ifp->if_name, ifp->if_unit,
+			dst->sa_family);
+		senderr(EAFNOSUPPORT);
+	}
+
+
+	if (mcopy)
+		(void) looutput(ifp, mcopy, dst, rt);
+	/*
+	 * Add local net header.  If no space in first mbuf,
+	 * allocate another.
+	 */
+	M_PREPEND(m, sizeof (struct ether_header), M_DONTWAIT);
+	if (m == 0)
+		senderr(ENOBUFS);
+	eh = mtod(m, struct ether_header *);
+	type = htons((u_short)type);
+	bcopy((caddr_t)&type,(caddr_t)&eh->ether_type,
+		sizeof(eh->ether_type));
+ 	bcopy((caddr_t)edst, (caddr_t)eh->ether_dhost, sizeof (edst));
+ 	bcopy((caddr_t)ac->ac_enaddr, (caddr_t)eh->ether_shost,
+	    sizeof(eh->ether_shost));
+	s = splimp();
+	/*
+	 * Queue message on interface, and start output if interface
+	 * not yet active.
+	 */
+	if (IF_QFULL(&ifp->if_snd)) {
+		IF_DROP(&ifp->if_snd);
+		splx(s);
+		senderr(ENOBUFS);
+	}
+	IF_ENQUEUE(&ifp->if_snd, m);
+	if ((ifp->if_flags & IFF_OACTIVE) == 0)
+		(*ifp->if_start)(ifp);
+	splx(s);
+	ifp->if_obytes += len + sizeof (struct ether_header);
+	if (m->m_flags & M_MCAST)
+		ifp->if_omcasts++;
+	return (error);
+
+bad:
+	if (m)
+		m_freem(m);
+	return (error);
+}
+
+/*
+ * Process a received Ethernet packet;
+ * the packet is in the mbuf chain m without
+ * the ether header, which is provided separately.
+ */
+void
+ether_input(ifp, eh, m)
+	struct ifnet *ifp;
+	register struct ether_header *eh;
+	struct mbuf *m;
+{
+	register struct ifqueue *inq;
+	register struct llc *l;
+	struct arpcom *ac = (struct arpcom *)ifp;
+	int s;
+
+	if ((ifp->if_flags & IFF_UP) == 0) {
+		m_freem(m);
+		return;
+	}
+	ifp->if_lastchange = time;
+	ifp->if_ibytes += m->m_pkthdr.len + sizeof (*eh);
+	if (bcmp((caddr_t)etherbroadcastaddr, (caddr_t)eh->ether_dhost,
+	    sizeof(etherbroadcastaddr)) == 0)
+		m->m_flags |= M_BCAST;
+	else if (eh->ether_dhost[0] & 1)
+		m->m_flags |= M_MCAST;
+	if (m->m_flags & (M_BCAST|M_MCAST))
+		ifp->if_imcasts++;
+
+	switch (eh->ether_type) {
+#ifdef INET
+	case ETHERTYPE_IP:
+		schednetisr(NETISR_IP);
+		inq = &ipintrq;
+		break;
+
+	case ETHERTYPE_ARP:
+		schednetisr(NETISR_ARP);
+		inq = &arpintrq;
+		break;
+#endif
+#ifdef NS
+	case ETHERTYPE_NS:
+		schednetisr(NETISR_NS);
+		inq = &nsintrq;
+		break;
+
+#endif
+	default:
+#if defined (ISO) || defined (LLC)
+		if (eh->ether_type > ETHERMTU)
+			goto dropanyway;
+		l = mtod(m, struct llc *);
+		switch (l->llc_dsap) {
+#ifdef	ISO
+		case LLC_ISO_LSAP: 
+			switch (l->llc_control) {
+			case LLC_UI:
+				/* LLC_UI_P forbidden in class 1 service */
+				if ((l->llc_dsap == LLC_ISO_LSAP) &&
+				    (l->llc_ssap == LLC_ISO_LSAP)) {
+					/* LSAP for ISO */
+					if (m->m_pkthdr.len > eh->ether_type)
+						m_adj(m, eh->ether_type - m->m_pkthdr.len);
+					m->m_data += 3;		/* XXX */
+					m->m_len -= 3;		/* XXX */
+					m->m_pkthdr.len -= 3;	/* XXX */
+					M_PREPEND(m, sizeof *eh, M_DONTWAIT);
+					if (m == 0)
+						return;
+					*mtod(m, struct ether_header *) = *eh;
+					IFDEBUG(D_ETHER)
+						printf("clnp packet");
+					ENDDEBUG
+					schednetisr(NETISR_ISO);
+					inq = &clnlintrq;
+					break;
+				}
+				goto dropanyway;
+				
+			case LLC_XID:
+			case LLC_XID_P:
+				if(m->m_len < 6)
+					goto dropanyway;
+				l->llc_window = 0;
+				l->llc_fid = 9;
+				l->llc_class = 1;
+				l->llc_dsap = l->llc_ssap = 0;
+				/* Fall through to */
+			case LLC_TEST:
+			case LLC_TEST_P:
+			{
+				struct sockaddr sa;
+				register struct ether_header *eh2;
+				int i;
+				u_char c = l->llc_dsap;
+
+				l->llc_dsap = l->llc_ssap;
+				l->llc_ssap = c;
+				if (m->m_flags & (M_BCAST | M_MCAST))
+					bcopy((caddr_t)ac->ac_enaddr,
+					      (caddr_t)eh->ether_dhost, 6);
+				sa.sa_family = AF_UNSPEC;
+				sa.sa_len = sizeof(sa);
+				eh2 = (struct ether_header *)sa.sa_data;
+				for (i = 0; i < 6; i++) {
+					eh2->ether_shost[i] = c = eh->ether_dhost[i];
+					eh2->ether_dhost[i] = 
+						eh->ether_dhost[i] = eh->ether_shost[i];
+					eh->ether_shost[i] = c;
+				}
+				ifp->if_output(ifp, m, &sa, NULL);
+				return;
+			}
+			default:
+				m_freem(m);
+				return;
+			}
+			break;
+#endif /* ISO */
+#ifdef LLC
+		case LLC_X25_LSAP:
+		{
+			if (m->m_pkthdr.len > eh->ether_type)
+				m_adj(m, eh->ether_type - m->m_pkthdr.len);
+			M_PREPEND(m, sizeof(struct sdl_hdr) , M_DONTWAIT);
+			if (m == 0)
+				return;
+			if ( !sdl_sethdrif(ifp, eh->ether_shost, LLC_X25_LSAP,
+					    eh->ether_dhost, LLC_X25_LSAP, 6, 
+					    mtod(m, struct sdl_hdr *)))
+				panic("ETHER cons addr failure");
+			mtod(m, struct sdl_hdr *)->sdlhdr_len = eh->ether_type;
+#ifdef LLC_DEBUG
+				printf("llc packet\n");
+#endif /* LLC_DEBUG */
+			schednetisr(NETISR_CCITT);
+			inq = &llcintrq;
+			break;
+		}
+#endif /* LLC */
+		dropanyway:
+		default:
+			m_freem(m);
+			return;
+		}
+#else /* ISO || LLC */
+	    m_freem(m);
+	    return;
+#endif /* ISO || LLC */
+	}
+
+	s = splimp();
+	if (IF_QFULL(inq)) {
+		IF_DROP(inq);
+		m_freem(m);
+	} else
+		IF_ENQUEUE(inq, m);
+	splx(s);
+}
+
+/*
+ * Convert Ethernet address to printable (loggable) representation.
+ */
+static char digits[] = "0123456789abcdef";
+char *
+ether_sprintf(ap)
+	register u_char *ap;
+{
+	register i;
+	static char etherbuf[18];
+	register char *cp = etherbuf;
+
+	for (i = 0; i < 6; i++) {
+		*cp++ = digits[*ap >> 4];
+		*cp++ = digits[*ap++ & 0xf];
+		*cp++ = ':';
+	}
+	*--cp = 0;
+	return (etherbuf);
+}
+
+/*
+ * Perform common duties while attaching to interface list
+ */
+void
+ether_ifattach(ifp)
+	register struct ifnet *ifp;
+{
+	register struct ifaddr *ifa;
+	register struct sockaddr_dl *sdl;
+
+	ifp->if_type = IFT_ETHER;
+	ifp->if_addrlen = 6;
+	ifp->if_hdrlen = 14;
+	ifp->if_mtu = ETHERMTU;
+	for (ifa = ifp->if_addrlist; ifa; ifa = ifa->ifa_next)
+		if ((sdl = (struct sockaddr_dl *)ifa->ifa_addr) &&
+		    sdl->sdl_family == AF_LINK) {
+			sdl->sdl_type = IFT_ETHER;
+			sdl->sdl_alen = ifp->if_addrlen;
+			bcopy((caddr_t)((struct arpcom *)ifp)->ac_enaddr,
+			      LLADDR(sdl), ifp->if_addrlen);
+			break;
+		}
+}
+
+u_char	ether_ipmulticast_min[6] = { 0x01, 0x00, 0x5e, 0x00, 0x00, 0x00 };
+u_char	ether_ipmulticast_max[6] = { 0x01, 0x00, 0x5e, 0x7f, 0xff, 0xff };
+/*
+ * Add an Ethernet multicast address or range of addresses to the list for a
+ * given interface.
+ */
+int
+ether_addmulti(ifr, ac)
+	struct ifreq *ifr;
+	register struct arpcom *ac;
+{
+	register struct ether_multi *enm;
+	struct sockaddr_in *sin;
+	u_char addrlo[6];
+	u_char addrhi[6];
+	int s = splimp();
+
+	switch (ifr->ifr_addr.sa_family) {
+
+	case AF_UNSPEC:
+		bcopy(ifr->ifr_addr.sa_data, addrlo, 6);
+		bcopy(addrlo, addrhi, 6);
+		break;
+
+#ifdef INET
+	case AF_INET:
+		sin = (struct sockaddr_in *)&(ifr->ifr_addr);
+		if (sin->sin_addr.s_addr == INADDR_ANY) {
+			/*
+			 * An IP address of INADDR_ANY means listen to all
+			 * of the Ethernet multicast addresses used for IP.
+			 * (This is for the sake of IP multicast routers.)
+			 */
+			bcopy(ether_ipmulticast_min, addrlo, 6);
+			bcopy(ether_ipmulticast_max, addrhi, 6);
+		}
+		else {
+			ETHER_MAP_IP_MULTICAST(&sin->sin_addr, addrlo);
+			bcopy(addrlo, addrhi, 6);
+		}
+		break;
+#endif
+
+	default:
+		splx(s);
+		return (EAFNOSUPPORT);
+	}
+
+	/*
+	 * Verify that we have valid Ethernet multicast addresses.
+	 */
+	if ((addrlo[0] & 0x01) != 1 || (addrhi[0] & 0x01) != 1) {
+		splx(s);
+		return (EINVAL);
+	}
+	/*
+	 * See if the address range is already in the list.
+	 */
+	ETHER_LOOKUP_MULTI(addrlo, addrhi, ac, enm);
+	if (enm != NULL) {
+		/*
+		 * Found it; just increment the reference count.
+		 */
+		++enm->enm_refcount;
+		splx(s);
+		return (0);
+	}
+	/*
+	 * New address or range; malloc a new multicast record
+	 * and link it into the interface's multicast list.
+	 */
+	enm = (struct ether_multi *)malloc(sizeof(*enm), M_IFMADDR, M_NOWAIT);
+	if (enm == NULL) {
+		splx(s);
+		return (ENOBUFS);
+	}
+	bcopy(addrlo, enm->enm_addrlo, 6);
+	bcopy(addrhi, enm->enm_addrhi, 6);
+	enm->enm_ac = ac;
+	enm->enm_refcount = 1;
+	enm->enm_next = ac->ac_multiaddrs;
+	ac->ac_multiaddrs = enm;
+	ac->ac_multicnt++;
+	splx(s);
+	/*
+	 * Return ENETRESET to inform the driver that the list has changed
+	 * and its reception filter should be adjusted accordingly.
+	 */
+	return (ENETRESET);
+}
+
+/*
+ * Delete a multicast address record.
+ */
+int
+ether_delmulti(ifr, ac)
+	struct ifreq *ifr;
+	register struct arpcom *ac;
+{
+	register struct ether_multi *enm;
+	register struct ether_multi **p;
+	struct sockaddr_in *sin;
+	u_char addrlo[6];
+	u_char addrhi[6];
+	int s = splimp();
+
+	switch (ifr->ifr_addr.sa_family) {
+
+	case AF_UNSPEC:
+		bcopy(ifr->ifr_addr.sa_data, addrlo, 6);
+		bcopy(addrlo, addrhi, 6);
+		break;
+
+#ifdef INET
+	case AF_INET:
+		sin = (struct sockaddr_in *)&(ifr->ifr_addr);
+		if (sin->sin_addr.s_addr == INADDR_ANY) {
+			/*
+			 * An IP address of INADDR_ANY means stop listening
+			 * to the range of Ethernet multicast addresses used
+			 * for IP.
+			 */
+			bcopy(ether_ipmulticast_min, addrlo, 6);
+			bcopy(ether_ipmulticast_max, addrhi, 6);
+		}
+		else {
+			ETHER_MAP_IP_MULTICAST(&sin->sin_addr, addrlo);
+			bcopy(addrlo, addrhi, 6);
+		}
+		break;
+#endif
+
+	default:
+		splx(s);
+		return (EAFNOSUPPORT);
+	}
+
+	/*
+	 * Look up the address in our list.
+	 */
+	ETHER_LOOKUP_MULTI(addrlo, addrhi, ac, enm);
+	if (enm == NULL) {
+		splx(s);
+		return (ENXIO);
+	}
+	if (--enm->enm_refcount != 0) {
+		/*
+		 * Still some claims to this record.
+		 */
+		splx(s);
+		return (0);
+	}
+	/*
+	 * No remaining claims to this record; unlink and free it.
+	 */
+	for (p = &enm->enm_ac->ac_multiaddrs;
+	     *p != enm;
+	     p = &(*p)->enm_next)
+		continue;
+	*p = (*p)->enm_next;
+	free(enm, M_IFMADDR);
+	ac->ac_multicnt--;
+	splx(s);
+	/*
+	 * Return ENETRESET to inform the driver that the list has changed
+	 * and its reception filter should be adjusted accordingly.
+	 */
+	return (ENETRESET);
+}
diff --git a/sys/net/if_llc.h b/sys/net/if_llc.h
new file mode 100644
index 00000000000..90dcb07991d
--- /dev/null
+++ b/sys/net/if_llc.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *      @(#)if_llc.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * IEEE 802.2 Link Level Control headers, for use in conjunction with
+ * 802.{3,4,5} media access control methods.
+ *
+ * Headers here do not use bit fields due to shortcommings in many
+ * compilers.
+ */
+
+struct llc {
+	u_char	llc_dsap;
+	u_char	llc_ssap;
+	union {
+	    struct {
+		u_char control;
+		u_char format_id;
+		u_char class;
+		u_char window_x2;
+	    } type_u;
+	    struct {
+		u_char num_snd_x2;
+		u_char num_rcv_x2;
+	    } type_i;
+	    struct {
+		u_char control;
+		u_char num_rcv_x2;
+	    } type_s;
+	    struct {
+	        u_char control;
+		struct frmrinfo {
+			u_char rej_pdu_0;
+			u_char rej_pdu_1;
+			u_char frmr_control;
+			u_char frmr_control_ext;
+			u_char frmr_cause;
+		} frmrinfo;
+	    } type_frmr;
+	    struct {
+		u_char control;
+		u_char org_code[3];
+		u_short ether_type;
+	    } type_snap;
+	    struct {
+		u_char control;
+		u_char control_ext;
+	    } type_raw;
+	} llc_un;
+};
+#define llc_control            llc_un.type_u.control
+#define	llc_control_ext        llc_un.type_raw.control_ext
+#define llc_fid                llc_un.type_u.format_id
+#define llc_class              llc_un.type_u.class
+#define llc_window             llc_un.type_u.window_x2
+#define llc_frmrinfo           llc_un.type_frmr.frmrinfo
+#define llc_frmr_pdu0          llc_un.type_frmr.frmrinfo.rej_pdu0
+#define llc_frmr_pdu1          llc_un.type_frmr.frmrinfo.rej_pdu1
+#define llc_frmr_control       llc_un.type_frmr.frmrinfo.frmr_control
+#define llc_frmr_control_ext   llc_un.type_frmr.frmrinfo.frmr_control_ext
+#define llc_frmr_cause         llc_un.type_frmr.frmrinfo.frmr_control_ext
+
+/*
+ * Don't use sizeof(struct llc_un) for LLC header sizes
+ */
+#define LLC_ISFRAMELEN 4
+#define LLC_UFRAMELEN  3
+#define LLC_FRMRLEN    7
+
+/*
+ * Unnumbered LLC format commands
+ */
+#define LLC_UI		0x3
+#define LLC_UI_P	0x13
+#define LLC_DISC	0x43
+#define	LLC_DISC_P	0x53
+#define LLC_UA		0x63
+#define LLC_UA_P	0x73
+#define LLC_TEST	0xe3
+#define LLC_TEST_P	0xf3
+#define LLC_FRMR	0x87
+#define	LLC_FRMR_P	0x97
+#define LLC_DM		0x0f
+#define	LLC_DM_P	0x1f
+#define LLC_XID		0xaf
+#define LLC_XID_P	0xbf
+#define LLC_SABME	0x6f
+#define LLC_SABME_P	0x7f
+
+/*
+ * Supervisory LLC commands
+ */
+#define	LLC_RR		0x01
+#define	LLC_RNR		0x05
+#define	LLC_REJ		0x09
+
+/*
+ * Info format - dummy only
+ */
+#define	LLC_INFO	0x00
+
+/*
+ * ISO PDTR 10178 contains among others
+ */
+#define LLC_X25_LSAP	0x7e
+#define LLC_SNAP_LSAP	0xaa
+#define LLC_ISO_LSAP	0xfe
+
+
+
+
+
+
diff --git a/sys/net/if_loop.c b/sys/net/if_loop.c
new file mode 100644
index 00000000000..f09295e34be
--- /dev/null
+++ b/sys/net/if_loop.c
@@ -0,0 +1,247 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)if_loop.c	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Loopback interface driver for protocol testing and timing.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/errno.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <machine/cpu.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/netisr.h>
+#include <net/route.h>
+#include <net/bpf.h>
+
+#ifdef	INET
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#endif
+
+#ifdef NS
+#include <netns/ns.h>
+#include <netns/ns_if.h>
+#endif
+
+#ifdef ISO
+#include <netiso/iso.h>
+#include <netiso/iso_var.h>
+#endif
+
+#include "bpfilter.h"
+
+#define	LOMTU	(1024+512)
+
+struct	ifnet loif;
+
+/* ARGSUSED */
+void
+loopattach(n)
+	int n;
+{
+	register struct ifnet *ifp = &loif;
+
+#ifdef lint
+	n = n;			/* Highlander: there can only be one... */
+#endif
+	ifp->if_name = "lo";
+	ifp->if_mtu = LOMTU;
+	ifp->if_flags = IFF_LOOPBACK | IFF_MULTICAST;
+	ifp->if_ioctl = loioctl;
+	ifp->if_output = looutput;
+	ifp->if_type = IFT_LOOP;
+	ifp->if_hdrlen = 0;
+	ifp->if_addrlen = 0;
+	if_attach(ifp);
+#if NBPFILTER > 0
+	bpfattach(&ifp->if_bpf, ifp, DLT_NULL, sizeof(u_int));
+#endif
+}
+
+int
+looutput(ifp, m, dst, rt)
+	struct ifnet *ifp;
+	register struct mbuf *m;
+	struct sockaddr *dst;
+	register struct rtentry *rt;
+{
+	int s, isr;
+	register struct ifqueue *ifq = 0;
+
+	if ((m->m_flags & M_PKTHDR) == 0)
+		panic("looutput no HDR");
+	ifp->if_lastchange = time;
+#if NBPFILTER > 0
+	if (loif.if_bpf) {
+		/*
+		 * We need to prepend the address family as
+		 * a four byte field.  Cons up a dummy header
+		 * to pacify bpf.  This is safe because bpf
+		 * will only read from the mbuf (i.e., it won't
+		 * try to free it or keep a pointer a to it).
+		 */
+		struct mbuf m0;
+		u_int af = dst->sa_family;
+
+		m0.m_next = m;
+		m0.m_len = 4;
+		m0.m_data = (char *)&af;
+		
+		bpf_mtap(loif.if_bpf, &m0);
+	}
+#endif
+	m->m_pkthdr.rcvif = ifp;
+
+	if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
+		m_freem(m);
+		return (rt->rt_flags & RTF_BLACKHOLE ? 0 :
+		        rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
+	}
+	ifp->if_opackets++;
+	ifp->if_obytes += m->m_pkthdr.len;
+	switch (dst->sa_family) {
+
+#ifdef INET
+	case AF_INET:
+		ifq = &ipintrq;
+		isr = NETISR_IP;
+		break;
+#endif
+#ifdef NS
+	case AF_NS:
+		ifq = &nsintrq;
+		isr = NETISR_NS;
+		break;
+#endif
+#ifdef ISO
+	case AF_ISO:
+		ifq = &clnlintrq;
+		isr = NETISR_ISO;
+		break;
+#endif
+	default:
+		printf("lo%d: can't handle af%d\n", ifp->if_unit,
+			dst->sa_family);
+		m_freem(m);
+		return (EAFNOSUPPORT);
+	}
+	s = splimp();
+	if (IF_QFULL(ifq)) {
+		IF_DROP(ifq);
+		m_freem(m);
+		splx(s);
+		return (ENOBUFS);
+	}
+	IF_ENQUEUE(ifq, m);
+	schednetisr(isr);
+	ifp->if_ipackets++;
+	ifp->if_ibytes += m->m_pkthdr.len;
+	splx(s);
+	return (0);
+}
+
+/* ARGSUSED */
+void
+lortrequest(cmd, rt, sa)
+	int cmd;
+	struct rtentry *rt;
+	struct sockaddr *sa;
+{
+
+	if (rt)
+		rt->rt_rmx.rmx_mtu = LOMTU;
+}
+
+/*
+ * Process an ioctl request.
+ */
+/* ARGSUSED */
+int
+loioctl(ifp, cmd, data)
+	register struct ifnet *ifp;
+	int cmd;
+	caddr_t data;
+{
+	register struct ifaddr *ifa;
+	register struct ifreq *ifr;
+	register int error = 0;
+
+	switch (cmd) {
+
+	case SIOCSIFADDR:
+		ifp->if_flags |= IFF_UP;
+		ifa = (struct ifaddr *)data;
+		if (ifa != 0 && ifa->ifa_addr->sa_family == AF_ISO)
+			ifa->ifa_rtrequest = lortrequest;
+		/*
+		 * Everything else is done at a higher level.
+		 */
+		break;
+
+	case SIOCADDMULTI:
+	case SIOCDELMULTI:
+		ifr = (struct ifreq *)data;
+		if (ifr == 0) {
+			error = EAFNOSUPPORT;		/* XXX */
+			break;
+		}
+		switch (ifr->ifr_addr.sa_family) {
+
+#ifdef INET
+		case AF_INET:
+			break;
+#endif
+
+		default:
+			error = EAFNOSUPPORT;
+			break;
+		}
+		break;
+
+	default:
+		error = EINVAL;
+	}
+	return (error);
+}
diff --git a/sys/net/if_sl.c b/sys/net/if_sl.c
new file mode 100644
index 00000000000..56ce96f4b9d
--- /dev/null
+++ b/sys/net/if_sl.c
@@ -0,0 +1,839 @@
+/*
+ * Copyright (c) 1987, 1989, 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)if_sl.c	8.6 (Berkeley) 2/1/94
+ */
+
+/*
+ * Serial Line interface
+ *
+ * Rick Adams
+ * Center for Seismic Studies
+ * 1300 N 17th Street, Suite 1450
+ * Arlington, Virginia 22209
+ * (703)276-7900
+ * rick@seismo.ARPA
+ * seismo!rick
+ *
+ * Pounded on heavily by Chris Torek (chris@mimsy.umd.edu, umcp-cs!chris).
+ * N.B.: this belongs in netinet, not net, the way it stands now.
+ * Should have a link-layer type designation, but wouldn't be
+ * backwards-compatible.
+ *
+ * Converted to 4.3BSD Beta by Chris Torek.
+ * Other changes made at Berkeley, based in part on code by Kirk Smith.
+ * W. Jolitz added slip abort.
+ *
+ * Hacked almost beyond recognition by Van Jacobson (van@helios.ee.lbl.gov).
+ * Added priority queuing for "interactive" traffic; hooks for TCP
+ * header compression; ICMP filtering (at 2400 baud, some cretin
+ * pinging you can use up all your bandwidth).  Made low clist behavior
+ * more robust and slightly less likely to hang serial line.
+ * Sped up a bunch of things.
+ * 
+ * Note that splimp() is used throughout to block both (tty) input
+ * interrupts and network activity; thus, splimp must be >= spltty.
+ */
+
+#include "sl.h"
+#if NSL > 0
+
+#include "bpfilter.h"
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/mbuf.h>
+#include <sys/buf.h>
+#include <sys/dkstat.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/file.h>
+#include <sys/tty.h>
+#include <sys/kernel.h>
+#include <sys/conf.h>
+
+#include <machine/cpu.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/netisr.h>
+#include <net/route.h>
+
+#if INET
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#else
+Huh? Slip without inet?
+#endif
+
+#include <net/slcompress.h>
+#include <net/if_slvar.h>
+#include <net/slip.h>
+
+#if NBPFILTER > 0
+#include <sys/time.h>
+#include <net/bpf.h>
+#endif
+
+/*
+ * SLMAX is a hard limit on input packet size.  To simplify the code
+ * and improve performance, we require that packets fit in an mbuf
+ * cluster, and if we get a compressed packet, there's enough extra
+ * room to expand the header into a max length tcp/ip header (128
+ * bytes).  So, SLMAX can be at most
+ *	MCLBYTES - 128
+ *
+ * SLMTU is a hard limit on output packet size.  To insure good
+ * interactive response, SLMTU wants to be the smallest size that
+ * amortizes the header cost.  (Remember that even with
+ * type-of-service queuing, we have to wait for any in-progress
+ * packet to finish.  I.e., we wait, on the average, 1/2 * mtu /
+ * cps, where cps is the line speed in characters per second.
+ * E.g., 533ms wait for a 1024 byte MTU on a 9600 baud line.  The
+ * average compressed header size is 6-8 bytes so any MTU > 90
+ * bytes will give us 90% of the line bandwidth.  A 100ms wait is
+ * tolerable (500ms is not), so want an MTU around 296.  (Since TCP
+ * will send 256 byte segments (to allow for 40 byte headers), the
+ * typical packet size on the wire will be around 260 bytes).  In
+ * 4.3tahoe+ systems, we can set an MTU in a route so we do that &
+ * leave the interface MTU relatively high (so we don't IP fragment
+ * when acting as a gateway to someone using a stupid MTU).
+ *
+ * Similar considerations apply to SLIP_HIWAT:  It's the amount of
+ * data that will be queued 'downstream' of us (i.e., in clists
+ * waiting to be picked up by the tty output interrupt).  If we
+ * queue a lot of data downstream, it's immune to our t.o.s. queuing.
+ * E.g., if SLIP_HIWAT is 1024, the interactive traffic in mixed
+ * telnet/ftp will see a 1 sec wait, independent of the mtu (the
+ * wait is dependent on the ftp window size but that's typically
+ * 1k - 4k).  So, we want SLIP_HIWAT just big enough to amortize
+ * the cost (in idle time on the wire) of the tty driver running
+ * off the end of its clists & having to call back slstart for a
+ * new packet.  For a tty interface with any buffering at all, this
+ * cost will be zero.  Even with a totally brain dead interface (like
+ * the one on a typical workstation), the cost will be <= 1 character
+ * time.  So, setting SLIP_HIWAT to ~100 guarantees that we'll lose
+ * at most 1% while maintaining good interactive response.
+ */
+#if NBPFILTER > 0
+#define	BUFOFFSET	(128+sizeof(struct ifnet **)+SLIP_HDRLEN)
+#else
+#define	BUFOFFSET	(128+sizeof(struct ifnet **))
+#endif
+#define	SLMAX		(MCLBYTES - BUFOFFSET)
+#define	SLBUFSIZE	(SLMAX + BUFOFFSET)
+#define	SLMTU		296
+#define	SLIP_HIWAT	roundup(50,CBSIZE)
+#define	CLISTRESERVE	1024	/* Can't let clists get too low */
+
+/*
+ * SLIP ABORT ESCAPE MECHANISM:
+ *	(inspired by HAYES modem escape arrangement)
+ *	1sec escape 1sec escape 1sec escape { 1sec escape 1sec escape }
+ *	within window time signals a "soft" exit from slip mode by remote end
+ *	if the IFF_DEBUG flag is on.
+ */
+#define	ABT_ESC		'\033'	/* can't be t_intr - distant host must know it*/
+#define	ABT_IDLE	1	/* in seconds - idle before an escape */
+#define	ABT_COUNT	3	/* count of escapes for abort */
+#define	ABT_WINDOW	(ABT_COUNT*2+2)	/* in seconds - time to count */
+
+struct sl_softc sl_softc[NSL];
+
+#define FRAME_END	 	0xc0		/* Frame End */
+#define FRAME_ESCAPE		0xdb		/* Frame Esc */
+#define TRANS_FRAME_END	 	0xdc		/* transposed frame end */
+#define TRANS_FRAME_ESCAPE 	0xdd		/* transposed frame esc */
+
+extern struct timeval time;
+
+static int slinit __P((struct sl_softc *));
+static struct mbuf *sl_btom __P((struct sl_softc *, int));
+
+/*
+ * Called from boot code to establish sl interfaces.
+ */
+void
+slattach()
+{
+	register struct sl_softc *sc;
+	register int i = 0;
+
+	for (sc = sl_softc; i < NSL; sc++) {
+		sc->sc_if.if_name = "sl";
+		sc->sc_if.if_next = NULL;
+		sc->sc_if.if_unit = i++;
+		sc->sc_if.if_mtu = SLMTU;
+		sc->sc_if.if_flags =
+		    IFF_POINTOPOINT | SC_AUTOCOMP | IFF_MULTICAST;
+		sc->sc_if.if_type = IFT_SLIP;
+		sc->sc_if.if_ioctl = slioctl;
+		sc->sc_if.if_output = sloutput;
+		sc->sc_if.if_snd.ifq_maxlen = 50;
+		sc->sc_fastq.ifq_maxlen = 32;
+		if_attach(&sc->sc_if);
+#if NBPFILTER > 0
+		bpfattach(&sc->sc_bpf, &sc->sc_if, DLT_SLIP, SLIP_HDRLEN);
+#endif
+	}
+}
+
+static int
+slinit(sc)
+	register struct sl_softc *sc;
+{
+	register caddr_t p;
+
+	if (sc->sc_ep == (u_char *) 0) {
+		MCLALLOC(p, M_WAIT);
+		if (p)
+			sc->sc_ep = (u_char *)p + SLBUFSIZE;
+		else {
+			printf("sl%d: can't allocate buffer\n", sc - sl_softc);
+			sc->sc_if.if_flags &= ~IFF_UP;
+			return (0);
+		}
+	}
+	sc->sc_buf = sc->sc_ep - SLMAX;
+	sc->sc_mp = sc->sc_buf;
+	sl_compress_init(&sc->sc_comp);
+	return (1);
+}
+
+/*
+ * Line specific open routine.
+ * Attach the given tty to the first available sl unit.
+ */
+/* ARGSUSED */
+int
+slopen(dev, tp)
+	dev_t dev;
+	register struct tty *tp;
+{
+	struct proc *p = curproc;		/* XXX */
+	register struct sl_softc *sc;
+	register int nsl;
+	int error;
+
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+
+	if (tp->t_line == SLIPDISC)
+		return (0);
+
+	for (nsl = NSL, sc = sl_softc; --nsl >= 0; sc++)
+		if (sc->sc_ttyp == NULL) {
+			if (slinit(sc) == 0)
+				return (ENOBUFS);
+			tp->t_sc = (caddr_t)sc;
+			sc->sc_ttyp = tp;
+			sc->sc_if.if_baudrate = tp->t_ospeed;
+			ttyflush(tp, FREAD | FWRITE);
+			return (0);
+		}
+	return (ENXIO);
+}
+
+/*
+ * Line specific close routine.
+ * Detach the tty from the sl unit.
+ */
+void
+slclose(tp)
+	struct tty *tp;
+{
+	register struct sl_softc *sc;
+	int s;
+
+	ttywflush(tp);
+	s = splimp();		/* actually, max(spltty, splnet) */
+	tp->t_line = 0;
+	sc = (struct sl_softc *)tp->t_sc;
+	if (sc != NULL) {
+		if_down(&sc->sc_if);
+		sc->sc_ttyp = NULL;
+		tp->t_sc = NULL;
+		MCLFREE((caddr_t)(sc->sc_ep - SLBUFSIZE));
+		sc->sc_ep = 0;
+		sc->sc_mp = 0;
+		sc->sc_buf = 0;
+	}
+	splx(s);
+}
+
+/*
+ * Line specific (tty) ioctl routine.
+ * Provide a way to get the sl unit number.
+ */
+/* ARGSUSED */
+int
+sltioctl(tp, cmd, data, flag)
+	struct tty *tp;
+	int cmd;
+	caddr_t data;
+	int flag;
+{
+	struct sl_softc *sc = (struct sl_softc *)tp->t_sc;
+
+	switch (cmd) {
+	case SLIOCGUNIT:
+		*(int *)data = sc->sc_if.if_unit;
+		break;
+
+	default:
+		return (-1);
+	}
+	return (0);
+}
+
+/*
+ * Queue a packet.  Start transmission if not active.
+ * Compression happens in slstart; if we do it here, IP TOS
+ * will cause us to not compress "background" packets, because
+ * ordering gets trashed.  It can be done for all packets in slstart.
+ */
+int
+sloutput(ifp, m, dst, rtp)
+	struct ifnet *ifp;
+	register struct mbuf *m;
+	struct sockaddr *dst;
+	struct rtentry *rtp;
+{
+	register struct sl_softc *sc = &sl_softc[ifp->if_unit];
+	register struct ip *ip;
+	register struct ifqueue *ifq;
+	int s;
+
+	/*
+	 * `Cannot happen' (see slioctl).  Someday we will extend
+	 * the line protocol to support other address families.
+	 */
+	if (dst->sa_family != AF_INET) {
+		printf("sl%d: af%d not supported\n", sc->sc_if.if_unit,
+			dst->sa_family);
+		m_freem(m);
+		sc->sc_if.if_noproto++;
+		return (EAFNOSUPPORT);
+	}
+
+	if (sc->sc_ttyp == NULL) {
+		m_freem(m);
+		return (ENETDOWN);	/* sort of */
+	}
+	if ((sc->sc_ttyp->t_state & TS_CARR_ON) == 0 &&
+	    (sc->sc_ttyp->t_cflag & CLOCAL) == 0) {
+		m_freem(m);
+		return (EHOSTUNREACH);
+	}
+	ifq = &sc->sc_if.if_snd;
+	ip = mtod(m, struct ip *);
+	if (sc->sc_if.if_flags & SC_NOICMP && ip->ip_p == IPPROTO_ICMP) {
+		m_freem(m);
+		return (ENETRESET);		/* XXX ? */
+	}
+	if (ip->ip_tos & IPTOS_LOWDELAY)
+		ifq = &sc->sc_fastq;
+	s = splimp();
+	if (IF_QFULL(ifq)) {
+		IF_DROP(ifq);
+		m_freem(m);
+		splx(s);
+		sc->sc_if.if_oerrors++;
+		return (ENOBUFS);
+	}
+	IF_ENQUEUE(ifq, m);
+	sc->sc_if.if_lastchange = time;
+	if (sc->sc_ttyp->t_outq.c_cc == 0)
+		slstart(sc->sc_ttyp);
+	splx(s);
+	return (0);
+}
+
+/*
+ * Start output on interface.  Get another datagram
+ * to send from the interface queue and map it to
+ * the interface before starting output.
+ */
+void
+slstart(tp)
+	register struct tty *tp;
+{
+	register struct sl_softc *sc = (struct sl_softc *)tp->t_sc;
+	register struct mbuf *m;
+	register u_char *cp;
+	register struct ip *ip;
+	int s;
+	struct mbuf *m2;
+#if NBPFILTER > 0
+	u_char bpfbuf[SLMTU + SLIP_HDRLEN];
+	register int len;
+#endif
+	extern int cfreecount;
+
+	for (;;) {
+		/*
+		 * If there is more in the output queue, just send it now.
+		 * We are being called in lieu of ttstart and must do what
+		 * it would.
+		 */
+		if (tp->t_outq.c_cc != 0) {
+			(*tp->t_oproc)(tp);
+			if (tp->t_outq.c_cc > SLIP_HIWAT)
+				return;
+		}
+		/*
+		 * This happens briefly when the line shuts down.
+		 */
+		if (sc == NULL)
+			return;
+
+		/*
+		 * Get a packet and send it to the interface.
+		 */
+		s = splimp();
+		IF_DEQUEUE(&sc->sc_fastq, m);
+		if (m)
+			sc->sc_if.if_omcasts++;		/* XXX */
+		else
+			IF_DEQUEUE(&sc->sc_if.if_snd, m);
+		splx(s);
+		if (m == NULL)
+			return;
+
+		/*
+		 * We do the header compression here rather than in sloutput
+		 * because the packets will be out of order if we are using TOS
+		 * queueing, and the connection id compression will get
+		 * munged when this happens.
+		 */
+#if NBPFILTER > 0
+		if (sc->sc_bpf) {
+			/*
+			 * We need to save the TCP/IP header before it's
+			 * compressed.  To avoid complicated code, we just
+			 * copy the entire packet into a stack buffer (since
+			 * this is a serial line, packets should be short
+			 * and/or the copy should be negligible cost compared
+			 * to the packet transmission time).
+			 */
+			register struct mbuf *m1 = m;
+			register u_char *cp = bpfbuf + SLIP_HDRLEN;
+
+			len = 0;
+			do {
+				register int mlen = m1->m_len;
+
+				bcopy(mtod(m1, caddr_t), cp, mlen);
+				cp += mlen;
+				len += mlen;
+			} while (m1 = m1->m_next);
+		}
+#endif
+		if ((ip = mtod(m, struct ip *))->ip_p == IPPROTO_TCP) {
+			if (sc->sc_if.if_flags & SC_COMPRESS)
+				*mtod(m, u_char *) |= sl_compress_tcp(m, ip,
+				    &sc->sc_comp, 1);
+		}
+#if NBPFILTER > 0
+		if (sc->sc_bpf) {
+			/*
+			 * Put the SLIP pseudo-"link header" in place.  The
+			 * compressed header is now at the beginning of the
+			 * mbuf.
+			 */
+			bpfbuf[SLX_DIR] = SLIPDIR_OUT;
+			bcopy(mtod(m, caddr_t), &bpfbuf[SLX_CHDR], CHDR_LEN);
+			bpf_tap(sc->sc_bpf, bpfbuf, len + SLIP_HDRLEN);
+		}
+#endif
+		sc->sc_if.if_lastchange = time;
+
+		/*
+		 * If system is getting low on clists, just flush our
+		 * output queue (if the stuff was important, it'll get
+		 * retransmitted).
+		 */
+		if (cfreecount < CLISTRESERVE + SLMTU) {
+			m_freem(m);
+			sc->sc_if.if_collisions++;
+			continue;
+		}
+		/*
+		 * The extra FRAME_END will start up a new packet, and thus
+		 * will flush any accumulated garbage.  We do this whenever
+		 * the line may have been idle for some time.
+		 */
+		if (tp->t_outq.c_cc == 0) {
+			++sc->sc_if.if_obytes;
+			(void) putc(FRAME_END, &tp->t_outq);
+		}
+
+		while (m) {
+			register u_char *ep;
+
+			cp = mtod(m, u_char *); ep = cp + m->m_len;
+			while (cp < ep) {
+				/*
+				 * Find out how many bytes in the string we can
+				 * handle without doing something special.
+				 */
+				register u_char *bp = cp;
+
+				while (cp < ep) {
+					switch (*cp++) {
+					case FRAME_ESCAPE:
+					case FRAME_END:
+						--cp;
+						goto out;
+					}
+				}
+				out:
+				if (cp > bp) {
+					/*
+					 * Put n characters at once
+					 * into the tty output queue.
+					 */
+					if (b_to_q((char *)bp, cp - bp,
+					    &tp->t_outq))
+						break;
+					sc->sc_if.if_obytes += cp - bp;
+				}
+				/*
+				 * If there are characters left in the mbuf,
+				 * the first one must be special..
+				 * Put it out in a different form.
+				 */
+				if (cp < ep) {
+					if (putc(FRAME_ESCAPE, &tp->t_outq))
+						break;
+					if (putc(*cp++ == FRAME_ESCAPE ?
+					   TRANS_FRAME_ESCAPE : TRANS_FRAME_END,
+					   &tp->t_outq)) {
+						(void) unputc(&tp->t_outq);
+						break;
+					}
+					sc->sc_if.if_obytes += 2;
+				}
+			}
+			MFREE(m, m2);
+			m = m2;
+		}
+
+		if (putc(FRAME_END, &tp->t_outq)) {
+			/*
+			 * Not enough room.  Remove a char to make room
+			 * and end the packet normally.
+			 * If you get many collisions (more than one or two
+			 * a day) you probably do not have enough clists
+			 * and you should increase "nclist" in param.c.
+			 */
+			(void) unputc(&tp->t_outq);
+			(void) putc(FRAME_END, &tp->t_outq);
+			sc->sc_if.if_collisions++;
+		} else {
+			++sc->sc_if.if_obytes;
+			sc->sc_if.if_opackets++;
+		}
+	}
+}
+
+/*
+ * Copy data buffer to mbuf chain; add ifnet pointer.
+ */
+static struct mbuf *
+sl_btom(sc, len)
+	register struct sl_softc *sc;
+	register int len;
+{
+	register struct mbuf *m;
+
+	MGETHDR(m, M_DONTWAIT, MT_DATA);
+	if (m == NULL)
+		return (NULL);
+
+	/*
+	 * If we have more than MHLEN bytes, it's cheaper to
+	 * queue the cluster we just filled & allocate a new one
+	 * for the input buffer.  Otherwise, fill the mbuf we
+	 * allocated above.  Note that code in the input routine
+	 * guarantees that packet will fit in a cluster.
+	 */
+	if (len >= MHLEN) {
+		MCLGET(m, M_DONTWAIT);
+		if ((m->m_flags & M_EXT) == 0) {
+			/*
+			 * we couldn't get a cluster - if memory's this
+			 * low, it's time to start dropping packets.
+			 */
+			(void) m_free(m);
+			return (NULL);
+		}
+		sc->sc_ep = mtod(m, u_char *) + SLBUFSIZE;
+		m->m_data = (caddr_t)sc->sc_buf;
+		m->m_ext.ext_buf = (caddr_t)((int)sc->sc_buf &~ MCLOFSET);
+	} else
+		bcopy((caddr_t)sc->sc_buf, mtod(m, caddr_t), len);
+
+	m->m_len = len;
+	m->m_pkthdr.len = len;
+	m->m_pkthdr.rcvif = &sc->sc_if;
+	return (m);
+}
+
+/*
+ * tty interface receiver interrupt.
+ */
+void
+slinput(c, tp)
+	register int c;
+	register struct tty *tp;
+{
+	register struct sl_softc *sc;
+	register struct mbuf *m;
+	register int len;
+	int s;
+#if NBPFILTER > 0
+	u_char chdr[CHDR_LEN];
+#endif
+
+	tk_nin++;
+	sc = (struct sl_softc *)tp->t_sc;
+	if (sc == NULL)
+		return;
+	if (c & TTY_ERRORMASK || ((tp->t_state & TS_CARR_ON) == 0 &&
+	    (tp->t_cflag & CLOCAL) == 0)) {
+		sc->sc_flags |= SC_ERROR;
+		return;
+	}
+	c &= TTY_CHARMASK;
+
+	++sc->sc_if.if_ibytes;
+
+	if (sc->sc_if.if_flags & IFF_DEBUG) {
+		if (c == ABT_ESC) {
+			/*
+			 * If we have a previous abort, see whether
+			 * this one is within the time limit.
+			 */
+			if (sc->sc_abortcount &&
+			    time.tv_sec >= sc->sc_starttime + ABT_WINDOW)
+				sc->sc_abortcount = 0;
+			/*
+			 * If we see an abort after "idle" time, count it;
+			 * record when the first abort escape arrived.
+			 */
+			if (time.tv_sec >= sc->sc_lasttime + ABT_IDLE) {
+				if (++sc->sc_abortcount == 1)
+					sc->sc_starttime = time.tv_sec;
+				if (sc->sc_abortcount >= ABT_COUNT) {
+					slclose(tp);
+					return;
+				}
+			}
+		} else
+			sc->sc_abortcount = 0;
+		sc->sc_lasttime = time.tv_sec;
+	}
+
+	switch (c) {
+
+	case TRANS_FRAME_ESCAPE:
+		if (sc->sc_escape)
+			c = FRAME_ESCAPE;
+		break;
+
+	case TRANS_FRAME_END:
+		if (sc->sc_escape)
+			c = FRAME_END;
+		break;
+
+	case FRAME_ESCAPE:
+		sc->sc_escape = 1;
+		return;
+
+	case FRAME_END:
+		if(sc->sc_flags & SC_ERROR) {
+			sc->sc_flags &= ~SC_ERROR;
+			goto newpack;
+		}
+		len = sc->sc_mp - sc->sc_buf;
+		if (len < 3)
+			/* less than min length packet - ignore */
+			goto newpack;
+
+#if NBPFILTER > 0
+		if (sc->sc_bpf) {
+			/*
+			 * Save the compressed header, so we
+			 * can tack it on later.  Note that we
+			 * will end up copying garbage in some
+			 * cases but this is okay.  We remember
+			 * where the buffer started so we can
+			 * compute the new header length.
+			 */
+			bcopy(sc->sc_buf, chdr, CHDR_LEN);
+		}
+#endif
+
+		if ((c = (*sc->sc_buf & 0xf0)) != (IPVERSION << 4)) {
+			if (c & 0x80)
+				c = TYPE_COMPRESSED_TCP;
+			else if (c == TYPE_UNCOMPRESSED_TCP)
+				*sc->sc_buf &= 0x4f; /* XXX */
+			/*
+			 * We've got something that's not an IP packet.
+			 * If compression is enabled, try to decompress it.
+			 * Otherwise, if `auto-enable' compression is on and
+			 * it's a reasonable packet, decompress it and then
+			 * enable compression.  Otherwise, drop it.
+			 */
+			if (sc->sc_if.if_flags & SC_COMPRESS) {
+				len = sl_uncompress_tcp(&sc->sc_buf, len,
+							(u_int)c, &sc->sc_comp);
+				if (len <= 0)
+					goto error;
+			} else if ((sc->sc_if.if_flags & SC_AUTOCOMP) &&
+			    c == TYPE_UNCOMPRESSED_TCP && len >= 40) {
+				len = sl_uncompress_tcp(&sc->sc_buf, len,
+							(u_int)c, &sc->sc_comp);
+				if (len <= 0)
+					goto error;
+				sc->sc_if.if_flags |= SC_COMPRESS;
+			} else
+				goto error;
+		}
+#if NBPFILTER > 0
+		if (sc->sc_bpf) {
+			/*
+			 * Put the SLIP pseudo-"link header" in place.
+			 * We couldn't do this any earlier since
+			 * decompression probably moved the buffer
+			 * pointer.  Then, invoke BPF.
+			 */
+			register u_char *hp = sc->sc_buf - SLIP_HDRLEN;
+
+			hp[SLX_DIR] = SLIPDIR_IN;
+			bcopy(chdr, &hp[SLX_CHDR], CHDR_LEN);
+			bpf_tap(sc->sc_bpf, hp, len + SLIP_HDRLEN);
+		}
+#endif
+		m = sl_btom(sc, len);
+		if (m == NULL)
+			goto error;
+
+		sc->sc_if.if_ipackets++;
+		sc->sc_if.if_lastchange = time;
+		s = splimp();
+		if (IF_QFULL(&ipintrq)) {
+			IF_DROP(&ipintrq);
+			sc->sc_if.if_ierrors++;
+			sc->sc_if.if_iqdrops++;
+			m_freem(m);
+		} else {
+			IF_ENQUEUE(&ipintrq, m);
+			schednetisr(NETISR_IP);
+		}
+		splx(s);
+		goto newpack;
+	}
+	if (sc->sc_mp < sc->sc_ep) {
+		*sc->sc_mp++ = c;
+		sc->sc_escape = 0;
+		return;
+	}
+
+	/* can't put lower; would miss an extra frame */
+	sc->sc_flags |= SC_ERROR;
+
+error:
+	sc->sc_if.if_ierrors++;
+newpack:
+	sc->sc_mp = sc->sc_buf = sc->sc_ep - SLMAX;
+	sc->sc_escape = 0;
+}
+
+/*
+ * Process an ioctl request.
+ */
+int
+slioctl(ifp, cmd, data)
+	register struct ifnet *ifp;
+	int cmd;
+	caddr_t data;
+{
+	register struct ifaddr *ifa = (struct ifaddr *)data;
+	register struct ifreq *ifr;
+	register int s = splimp(), error = 0;
+
+	switch (cmd) {
+
+	case SIOCSIFADDR:
+		if (ifa->ifa_addr->sa_family == AF_INET)
+			ifp->if_flags |= IFF_UP;
+		else
+			error = EAFNOSUPPORT;
+		break;
+
+	case SIOCSIFDSTADDR:
+		if (ifa->ifa_addr->sa_family != AF_INET)
+			error = EAFNOSUPPORT;
+		break;
+
+	case SIOCADDMULTI:
+	case SIOCDELMULTI:
+		ifr = (struct ifreq *)data;
+		if (ifr == 0) {
+			error = EAFNOSUPPORT;		/* XXX */
+			break;
+		}
+		switch (ifr->ifr_addr.sa_family) {
+
+#ifdef INET
+		case AF_INET:
+			break;
+#endif
+
+		default:
+			error = EAFNOSUPPORT;
+			break;
+		}
+		break;
+
+	default:
+		error = EINVAL;
+	}
+	splx(s);
+	return (error);
+}
+#endif
diff --git a/sys/net/if_slvar.h b/sys/net/if_slvar.h
new file mode 100644
index 00000000000..e7b27647284
--- /dev/null
+++ b/sys/net/if_slvar.h
@@ -0,0 +1,80 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)if_slvar.h	8.3 (Berkeley) 2/1/94
+ *
+ * $Header: if_slvar.h,v 1.3 89/05/31 02:25:18 van Exp $
+ */
+
+/*
+ * Definitions for SLIP interface data structures
+ * 
+ * (This exists so programs like slstats can get at the definition
+ *  of sl_softc.)
+ */
+struct sl_softc {
+	struct	ifnet sc_if;		/* network-visible interface */
+	struct	ifqueue sc_fastq;	/* interactive output queue */
+	struct	tty *sc_ttyp;		/* pointer to tty structure */
+	u_char	*sc_mp;			/* pointer to next available buf char */
+	u_char	*sc_ep;			/* pointer to last available buf char */
+	u_char	*sc_buf;		/* input buffer */
+	u_int	sc_flags;		/* see below */
+	u_int	sc_escape;	/* =1 if last char input was FRAME_ESCAPE */
+	long	sc_lasttime;		/* last time a char arrived */
+	long	sc_abortcount;		/* number of abort esacpe chars */
+	long	sc_starttime;		/* time of first abort in window */
+#ifdef INET				/* XXX */
+	struct	slcompress sc_comp;	/* tcp compression data */
+#endif
+	caddr_t	sc_bpf;			/* BPF data */
+};
+
+/* internal flags */
+#define	SC_ERROR	0x0001		/* had an input error */
+
+/* visible flags */
+#define	SC_COMPRESS	IFF_LINK0	/* compress TCP traffic */
+#define	SC_NOICMP	IFF_LINK1	/* supress ICMP traffic */
+#define	SC_AUTOCOMP	IFF_LINK2	/* auto-enable TCP compression */
+
+#ifdef KERNEL
+void	slattach __P((void));
+void	slclose __P((struct tty *));
+void	slinput __P((int, struct tty *));
+int	slioctl __P((struct ifnet *, int, caddr_t));
+int	slopen __P((dev_t, struct tty *));
+int	sloutput __P((struct ifnet *,
+	    struct mbuf *, struct sockaddr *, struct rtentry *));
+void	slstart __P((struct tty *));
+int	sltioctl __P((struct tty *, int, caddr_t, int));
+#endif /* KERNEL */
diff --git a/sys/net/if_types.h b/sys/net/if_types.h
new file mode 100644
index 00000000000..030f234fbac
--- /dev/null
+++ b/sys/net/if_types.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 1989, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)if_types.h	8.2 (Berkeley) 4/20/94
+ */
+
+/*
+ * Interface types for benefit of parsing media address headers.
+ * This list is derived from the SNMP list of ifTypes, currently
+ * documented in RFC1573.
+ */
+
+#define	IFT_OTHER	0x1		/* none of the following */
+#define	IFT_1822	0x2		/* old-style arpanet imp */
+#define	IFT_HDH1822	0x3		/* HDH arpanet imp */
+#define	IFT_X25DDN	0x4		/* x25 to imp */
+#define	IFT_X25		0x5		/* PDN X25 interface (RFC877) */
+#define	IFT_ETHER	0x6		/* Ethernet CSMACD */
+#define	IFT_ISO88023	0x7		/* CMSA CD */
+#define	IFT_ISO88024	0x8		/* Token Bus */
+#define	IFT_ISO88025	0x9		/* Token Ring */
+#define	IFT_ISO88026	0xa		/* MAN */
+#define	IFT_STARLAN	0xb
+#define	IFT_P10		0xc		/* Proteon 10MBit ring */
+#define	IFT_P80		0xd		/* Proteon 10MBit ring */
+#define	IFT_HY		0xe		/* Hyperchannel */
+#define	IFT_FDDI	0xf
+#define	IFT_LAPB	0x10
+#define	IFT_SDLC	0x11
+#define	IFT_T1		0x12
+#define	IFT_CEPT	0x13		/* E1 - european T1 */
+#define	IFT_ISDNBASIC	0x14
+#define	IFT_ISDNPRIMARY	0x15
+#define	IFT_PTPSERIAL	0x16		/* Proprietary PTP serial */
+#define	IFT_PPP		0x17		/* RFC 1331 */
+#define	IFT_LOOP	0x18		/* loopback */
+#define	IFT_EON		0x19		/* ISO over IP */
+#define	IFT_XETHER	0x1a		/* obsolete 3MB experimental ethernet */
+#define	IFT_NSIP	0x1b		/* XNS over IP */
+#define	IFT_SLIP	0x1c		/* IP over generic TTY */
+#define	IFT_ULTRA	0x1d		/* Ultra Technologies */
+#define	IFT_DS3		0x1e		/* Generic T3 */
+#define	IFT_SIP		0x1f		/* SMDS */
+#define	IFT_FRELAY	0x20		/* Frame Relay DTE only */
+#define	IFT_RS232	0x21
+#define	IFT_PARA	0x22		/* parallel-port */
+#define	IFT_ARCNET	0x23
+#define	IFT_ARCNETPLUS	0x24
+#define	IFT_ATM		0x25		/* ATM cells */
+#define	IFT_MIOX25	0x26
+#define	IFT_SONET	0x27		/* SONET or SDH */
+#define	IFT_X25PLE	0x28
+#define	IFT_ISO88022LLC	0x29
+#define	IFT_LOCALTALK	0x2a
+#define	IFT_SMDSDXI	0x2b
+#define	IFT_FRELAYDCE	0x2c		/* Frame Relay DCE */
+#define	IFT_V35		0x2d
+#define	IFT_HSSI	0x2e
+#define	IFT_HIPPI	0x2f
+#define	IFT_MODEM	0x30		/* Generic Modem */
+#define	IFT_AAL5	0x31		/* AAL5 over ATM */
+#define	IFT_SONETPATH	0x32
+#define	IFT_SONETVT	0x33
+#define	IFT_SMDSICIP	0x34		/* SMDS InterCarrier Interface */
+#define	IFT_PROPVIRTUAL	0x35		/* Proprietary Virtual/internal */
+#define	IFT_PROPMUX	0x36		/* Proprietary Multiplexing */
diff --git a/sys/net/netisr.h b/sys/net/netisr.h
new file mode 100644
index 00000000000..e2e465379d3
--- /dev/null
+++ b/sys/net/netisr.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 1980, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)netisr.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * The networking code runs off software interrupts.
+ *
+ * You can switch into the network by doing splnet() and return by splx().
+ * The software interrupt level for the network is higher than the software
+ * level for the clock (so you can enter the network in routines called
+ * at timeout time).
+ */
+#if defined(vax) || defined(tahoe)
+#define	setsoftnet()	mtpr(SIRR, 12)
+#endif
+
+/*
+ * Each ``pup-level-1'' input queue has a bit in a ``netisr'' status
+ * word which is used to de-multiplex a single software
+ * interrupt used for scheduling the network code to calls
+ * on the lowest level routine of each protocol.
+ */
+#define	NETISR_RAW	0		/* same as AF_UNSPEC */
+#define	NETISR_IP	2		/* same as AF_INET */
+#define	NETISR_IMP	3		/* same as AF_IMPLINK */
+#define	NETISR_NS	6		/* same as AF_NS */
+#define	NETISR_ISO	7		/* same as AF_ISO */
+#define	NETISR_CCITT	10		/* same as AF_CCITT */
+#define	NETISR_ARP	18		/* same as AF_LINK */
+
+#define	schednetisr(anisr)	{ netisr |= 1<<(anisr); setsoftnet(); }
+
+#ifdef i386
+/* XXX Temporary -- soon to vanish - wfj */
+#define	NETISR_SCLK	11		/* softclock */
+#define	NETISR_AST	12		/* ast -- resched */
+
+#undef	schednetisr
+#define	schednetisr(anisr)	{\
+	if(netisr == 0) { \
+		softem++; \
+	} \
+	netisr |= 1<<(anisr); \
+}
+#ifndef LOCORE
+#ifdef KERNEL
+int	softem;	
+#endif
+#endif
+#endif /* i386 */
+
+#ifndef LOCORE
+#ifdef KERNEL
+int	netisr;				/* scheduling bits for network */
+#endif
+#endif
diff --git a/sys/net/radix.c b/sys/net/radix.c
new file mode 100644
index 00000000000..f182eb77abf
--- /dev/null
+++ b/sys/net/radix.c
@@ -0,0 +1,757 @@
+/*
+ * Copyright (c) 1988, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)radix.c	8.2 (Berkeley) 1/4/94
+ */
+
+/*
+ * Routines to build and maintain radix trees for routing lookups.
+ */
+#ifndef RNF_NORMAL
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#define	M_DONTWAIT M_NOWAIT
+#ifdef	KERNEL
+#include <sys/domain.h>
+#endif
+#endif
+
+#include <net/radix.h>
+
+int	max_keylen;
+struct radix_mask *rn_mkfreelist;
+struct radix_node_head *mask_rnhead;
+static int gotOddMasks;
+static char *maskedKey;
+static char *rn_zeros, *rn_ones;
+
+#define rn_masktop (mask_rnhead->rnh_treetop)
+#undef Bcmp
+#define Bcmp(a, b, l) (l == 0 ? 0 : bcmp((caddr_t)(a), (caddr_t)(b), (u_long)l))
+/*
+ * The data structure for the keys is a radix tree with one way
+ * branching removed.  The index rn_b at an internal node n represents a bit
+ * position to be tested.  The tree is arranged so that all descendants
+ * of a node n have keys whose bits all agree up to position rn_b - 1.
+ * (We say the index of n is rn_b.)
+ *
+ * There is at least one descendant which has a one bit at position rn_b,
+ * and at least one with a zero there.
+ *
+ * A route is determined by a pair of key and mask.  We require that the
+ * bit-wise logical and of the key and mask to be the key.
+ * We define the index of a route to associated with the mask to be
+ * the first bit number in the mask where 0 occurs (with bit number 0
+ * representing the highest order bit).
+ * 
+ * We say a mask is normal if every bit is 0, past the index of the mask.
+ * If a node n has a descendant (k, m) with index(m) == index(n) == rn_b,
+ * and m is a normal mask, then the route applies to every descendant of n.
+ * If the index(m) < rn_b, this implies the trailing last few bits of k
+ * before bit b are all 0, (and hence consequently true of every descendant
+ * of n), so the route applies to all descendants of the node as well.
+ *
+ * The present version of the code makes no use of normal routes,
+ * but similar logic shows that a non-normal mask m such that
+ * index(m) <= index(n) could potentially apply to many children of n.
+ * Thus, for each non-host route, we attach its mask to a list at an internal
+ * node as high in the tree as we can go. 
+ */
+
+struct radix_node *
+rn_search(v_arg, head)
+	void *v_arg;
+	struct radix_node *head;
+{
+	register struct radix_node *x;
+	register caddr_t v;
+
+	for (x = head, v = v_arg; x->rn_b >= 0;) {
+		if (x->rn_bmask & v[x->rn_off])
+			x = x->rn_r;
+		else
+			x = x->rn_l;
+	}
+	return (x);
+};
+
+struct radix_node *
+rn_search_m(v_arg, head, m_arg)
+	struct radix_node *head;
+	void *v_arg, *m_arg;
+{
+	register struct radix_node *x;
+	register caddr_t v = v_arg, m = m_arg;
+
+	for (x = head; x->rn_b >= 0;) {
+		if ((x->rn_bmask & m[x->rn_off]) &&
+		    (x->rn_bmask & v[x->rn_off]))
+			x = x->rn_r;
+		else
+			x = x->rn_l;
+	}
+	return x;
+};
+
+int
+rn_refines(m_arg, n_arg)
+	void *m_arg, *n_arg;
+{
+	register caddr_t m = m_arg, n = n_arg;
+	register caddr_t lim, lim2 = lim = n + *(u_char *)n;
+	int longer = (*(u_char *)n++) - (int)(*(u_char *)m++);
+	int masks_are_equal = 1;
+
+	if (longer > 0)
+		lim -= longer;
+	while (n < lim) {
+		if (*n & ~(*m))
+			return 0;
+		if (*n++ != *m++)
+			masks_are_equal = 0;
+			
+	}
+	while (n < lim2)
+		if (*n++)
+			return 0;
+	if (masks_are_equal && (longer < 0))
+		for (lim2 = m - longer; m < lim2; )
+			if (*m++)
+				return 1;
+	return (!masks_are_equal);
+}
+
+
+struct radix_node *
+rn_match(v_arg, head)
+	void *v_arg;
+	struct radix_node_head *head;
+{
+	caddr_t v = v_arg;
+	register struct radix_node *t = head->rnh_treetop, *x;
+	register caddr_t cp = v, cp2, cp3;
+	caddr_t cplim, mstart;
+	struct radix_node *saved_t, *top = t;
+	int off = t->rn_off, vlen = *(u_char *)cp, matched_off;
+
+	/*
+	 * Open code rn_search(v, top) to avoid overhead of extra
+	 * subroutine call.
+	 */
+	for (; t->rn_b >= 0; ) {
+		if (t->rn_bmask & cp[t->rn_off])
+			t = t->rn_r;
+		else
+			t = t->rn_l;
+	}
+	/*
+	 * See if we match exactly as a host destination
+	 */
+	cp += off; cp2 = t->rn_key + off; cplim = v + vlen;
+	for (; cp < cplim; cp++, cp2++)
+		if (*cp != *cp2)
+			goto on1;
+	/*
+	 * This extra grot is in case we are explicitly asked
+	 * to look up the default.  Ugh!
+	 */
+	if ((t->rn_flags & RNF_ROOT) && t->rn_dupedkey)
+		t = t->rn_dupedkey;
+	return t;
+on1:
+	matched_off = cp - v;
+	saved_t = t;
+	do {
+	    if (t->rn_mask) {
+		/*
+		 * Even if we don't match exactly as a hosts;
+		 * we may match if the leaf we wound up at is
+		 * a route to a net.
+		 */
+		cp3 = matched_off + t->rn_mask;
+		cp2 = matched_off + t->rn_key;
+		for (; cp < cplim; cp++)
+			if ((*cp2++ ^ *cp) & *cp3++)
+				break;
+		if (cp == cplim)
+			return t;
+		cp = matched_off + v;
+	    }
+	} while (t = t->rn_dupedkey);
+	t = saved_t;
+	/* start searching up the tree */
+	do {
+		register struct radix_mask *m;
+		t = t->rn_p;
+		if (m = t->rn_mklist) {
+			/*
+			 * After doing measurements here, it may
+			 * turn out to be faster to open code
+			 * rn_search_m here instead of always
+			 * copying and masking.
+			 */
+			off = min(t->rn_off, matched_off);
+			mstart = maskedKey + off;
+			do {
+				cp2 = mstart;
+				cp3 = m->rm_mask + off;
+				for (cp = v + off; cp < cplim;)
+					*cp2++ =  *cp++ & *cp3++;
+				x = rn_search(maskedKey, t);
+				while (x && x->rn_mask != m->rm_mask)
+					x = x->rn_dupedkey;
+				if (x &&
+				    (Bcmp(mstart, x->rn_key + off,
+					vlen - off) == 0))
+					    return x;
+			} while (m = m->rm_mklist);
+		}
+	} while (t != top);
+	return 0;
+};
+		
+#ifdef RN_DEBUG
+int	rn_nodenum;
+struct	radix_node *rn_clist;
+int	rn_saveinfo;
+int	rn_debug =  1;
+#endif
+
+struct radix_node *
+rn_newpair(v, b, nodes)
+	void *v;
+	int b;
+	struct radix_node nodes[2];
+{
+	register struct radix_node *tt = nodes, *t = tt + 1;
+	t->rn_b = b; t->rn_bmask = 0x80 >> (b & 7);
+	t->rn_l = tt; t->rn_off = b >> 3;
+	tt->rn_b = -1; tt->rn_key = (caddr_t)v; tt->rn_p = t;
+	tt->rn_flags = t->rn_flags = RNF_ACTIVE;
+#ifdef RN_DEBUG
+	tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++;
+	tt->rn_twin = t; tt->rn_ybro = rn_clist; rn_clist = tt;
+#endif
+	return t;
+}
+
+struct radix_node *
+rn_insert(v_arg, head, dupentry, nodes)
+	void *v_arg;
+	struct radix_node_head *head;
+	int *dupentry;
+	struct radix_node nodes[2];
+{
+	caddr_t v = v_arg;
+	struct radix_node *top = head->rnh_treetop;
+	int head_off = top->rn_off, vlen = (int)*((u_char *)v);
+	register struct radix_node *t = rn_search(v_arg, top);
+	register caddr_t cp = v + head_off;
+	register int b;
+	struct radix_node *tt;
+    	/*
+	 *find first bit at which v and t->rn_key differ
+	 */
+    {
+	register caddr_t cp2 = t->rn_key + head_off;
+	register int cmp_res;
+	caddr_t cplim = v + vlen;
+
+	while (cp < cplim)
+		if (*cp2++ != *cp++)
+			goto on1;
+	*dupentry = 1;
+	return t;
+on1:
+	*dupentry = 0;
+	cmp_res = (cp[-1] ^ cp2[-1]) & 0xff;
+	for (b = (cp - v) << 3; cmp_res; b--)
+		cmp_res >>= 1;
+    }
+    {
+	register struct radix_node *p, *x = top;
+	cp = v;
+	do {
+		p = x;
+		if (cp[x->rn_off] & x->rn_bmask) 
+			x = x->rn_r;
+		else x = x->rn_l;
+	} while (b > (unsigned) x->rn_b); /* x->rn_b < b && x->rn_b >= 0 */
+#ifdef RN_DEBUG
+	if (rn_debug)
+		printf("Going In:\n"), traverse(p);
+#endif
+	t = rn_newpair(v_arg, b, nodes); tt = t->rn_l;
+	if ((cp[p->rn_off] & p->rn_bmask) == 0)
+		p->rn_l = t;
+	else
+		p->rn_r = t;
+	x->rn_p = t; t->rn_p = p; /* frees x, p as temp vars below */
+	if ((cp[t->rn_off] & t->rn_bmask) == 0) {
+		t->rn_r = x;
+	} else {
+		t->rn_r = tt; t->rn_l = x;
+	}
+#ifdef RN_DEBUG
+	if (rn_debug)
+		printf("Coming out:\n"), traverse(p);
+#endif
+    }
+	return (tt);
+}
+
+struct radix_node *
+rn_addmask(n_arg, search, skip)
+	int search, skip;
+	void *n_arg;
+{
+	caddr_t netmask = (caddr_t)n_arg;
+	register struct radix_node *x;
+	register caddr_t cp, cplim;
+	register int b, mlen, j;
+	int maskduplicated;
+
+	mlen = *(u_char *)netmask;
+	if (search) {
+		x = rn_search(netmask, rn_masktop);
+		mlen = *(u_char *)netmask;
+		if (Bcmp(netmask, x->rn_key, mlen) == 0)
+			return (x);
+	}
+	R_Malloc(x, struct radix_node *, max_keylen + 2 * sizeof (*x));
+	if (x == 0)
+		return (0);
+	Bzero(x, max_keylen + 2 * sizeof (*x));
+	cp = (caddr_t)(x + 2);
+	Bcopy(netmask, cp, mlen);
+	netmask = cp;
+	x = rn_insert(netmask, mask_rnhead, &maskduplicated, x);
+	/*
+	 * Calculate index of mask.
+	 */
+	cplim = netmask + mlen;
+	for (cp = netmask + skip; cp < cplim; cp++)
+		if (*(u_char *)cp != 0xff)
+			break;
+	b = (cp - netmask) << 3;
+	if (cp != cplim) {
+		if (*cp != 0) {
+			gotOddMasks = 1;
+			for (j = 0x80; j; b++, j >>= 1)  
+				if ((j & *cp) == 0)
+					break;
+		}
+	}
+	x->rn_b = -1 - b;
+	return (x);
+}
+
+struct radix_node *
+rn_addroute(v_arg, n_arg, head, treenodes)
+	void *v_arg, *n_arg;
+	struct radix_node_head *head;
+	struct radix_node treenodes[2];
+{
+	caddr_t v = (caddr_t)v_arg, netmask = (caddr_t)n_arg;
+	register struct radix_node *t, *x, *tt;
+	struct radix_node *saved_tt, *top = head->rnh_treetop;
+	short b = 0, b_leaf;
+	int mlen, keyduplicated;
+	caddr_t cplim;
+	struct radix_mask *m, **mp;
+
+	/*
+	 * In dealing with non-contiguous masks, there may be
+	 * many different routes which have the same mask.
+	 * We will find it useful to have a unique pointer to
+	 * the mask to speed avoiding duplicate references at
+	 * nodes and possibly save time in calculating indices.
+	 */
+	if (netmask)  {
+		x = rn_search(netmask, rn_masktop);
+		mlen = *(u_char *)netmask;
+		if (Bcmp(netmask, x->rn_key, mlen) != 0) {
+			x = rn_addmask(netmask, 0, top->rn_off);
+			if (x == 0)
+				return (0);
+		}
+		netmask = x->rn_key;
+		b = -1 - x->rn_b;
+	}
+	/*
+	 * Deal with duplicated keys: attach node to previous instance
+	 */
+	saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes);
+	if (keyduplicated) {
+		do {
+			if (tt->rn_mask == netmask)
+				return (0);
+			t = tt;
+			if (netmask == 0 ||
+			    (tt->rn_mask && rn_refines(netmask, tt->rn_mask)))
+				break;
+		} while (tt = tt->rn_dupedkey);
+		/*
+		 * If the mask is not duplicated, we wouldn't
+		 * find it among possible duplicate key entries
+		 * anyway, so the above test doesn't hurt.
+		 *
+		 * We sort the masks for a duplicated key the same way as
+		 * in a masklist -- most specific to least specific.
+		 * This may require the unfortunate nuisance of relocating
+		 * the head of the list.
+		 */
+		if (tt && t == saved_tt) {
+			struct	radix_node *xx = x;
+			/* link in at head of list */
+			(tt = treenodes)->rn_dupedkey = t;
+			tt->rn_flags = t->rn_flags;
+			tt->rn_p = x = t->rn_p;
+			if (x->rn_l == t) x->rn_l = tt; else x->rn_r = tt;
+			saved_tt = tt; x = xx;
+		} else {
+			(tt = treenodes)->rn_dupedkey = t->rn_dupedkey;
+			t->rn_dupedkey = tt;
+		}
+#ifdef RN_DEBUG
+		t=tt+1; tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++;
+		tt->rn_twin = t; tt->rn_ybro = rn_clist; rn_clist = tt;
+#endif
+		t = saved_tt;
+		tt->rn_key = (caddr_t) v;
+		tt->rn_b = -1;
+		tt->rn_flags = t->rn_flags & ~RNF_ROOT;
+	}
+	/*
+	 * Put mask in tree.
+	 */
+	if (netmask) {
+		tt->rn_mask = netmask;
+		tt->rn_b = x->rn_b;
+	}
+	t = saved_tt->rn_p;
+	b_leaf = -1 - t->rn_b;
+	if (t->rn_r == saved_tt) x = t->rn_l; else x = t->rn_r;
+	/* Promote general routes from below */
+	if (x->rn_b < 0) { 
+		if (x->rn_mask && (x->rn_b >= b_leaf) && x->rn_mklist == 0) {
+			MKGet(m);
+			if (m) {
+				Bzero(m, sizeof *m);
+				m->rm_b = x->rn_b;
+				m->rm_mask = x->rn_mask;
+				x->rn_mklist = t->rn_mklist = m;
+			}
+		}
+	} else if (x->rn_mklist) {
+		/*
+		 * Skip over masks whose index is > that of new node
+		 */
+		for (mp = &x->rn_mklist; m = *mp; mp = &m->rm_mklist)
+			if (m->rm_b >= b_leaf)
+				break;
+		t->rn_mklist = m; *mp = 0;
+	}
+	/* Add new route to highest possible ancestor's list */
+	if ((netmask == 0) || (b > t->rn_b ))
+		return tt; /* can't lift at all */
+	b_leaf = tt->rn_b;
+	do {
+		x = t;
+		t = t->rn_p;
+	} while (b <= t->rn_b && x != top);
+	/*
+	 * Search through routes associated with node to
+	 * insert new route according to index.
+	 * For nodes of equal index, place more specific
+	 * masks first.
+	 */
+	cplim = netmask + mlen;
+	for (mp = &x->rn_mklist; m = *mp; mp = &m->rm_mklist) {
+		if (m->rm_b < b_leaf)
+			continue;
+		if (m->rm_b > b_leaf)
+			break;
+		if (m->rm_mask == netmask) {
+			m->rm_refs++;
+			tt->rn_mklist = m;
+			return tt;
+		}
+		if (rn_refines(netmask, m->rm_mask))
+			break;
+	}
+	MKGet(m);
+	if (m == 0) {
+		printf("Mask for route not entered\n");
+		return (tt);
+	}
+	Bzero(m, sizeof *m);
+	m->rm_b = b_leaf;
+	m->rm_mask = netmask;
+	m->rm_mklist = *mp;
+	*mp = m;
+	tt->rn_mklist = m;
+	return tt;
+}
+
+struct radix_node *
+rn_delete(v_arg, netmask_arg, head)
+	void *v_arg, *netmask_arg;
+	struct radix_node_head *head;
+{
+	register struct radix_node *t, *p, *x, *tt;
+	struct radix_mask *m, *saved_m, **mp;
+	struct radix_node *dupedkey, *saved_tt, *top;
+	caddr_t v, netmask;
+	int b, head_off, vlen;
+
+	v = v_arg;
+	netmask = netmask_arg;
+	x = head->rnh_treetop;
+	tt = rn_search(v, x);
+	head_off = x->rn_off;
+	vlen =  *(u_char *)v;
+	saved_tt = tt;
+	top = x;
+	if (tt == 0 ||
+	    Bcmp(v + head_off, tt->rn_key + head_off, vlen - head_off))
+		return (0);
+	/*
+	 * Delete our route from mask lists.
+	 */
+	if (dupedkey = tt->rn_dupedkey) {
+		if (netmask) 
+			netmask = rn_search(netmask, rn_masktop)->rn_key;
+		while (tt->rn_mask != netmask)
+			if ((tt = tt->rn_dupedkey) == 0)
+				return (0);
+	}
+	if (tt->rn_mask == 0 || (saved_m = m = tt->rn_mklist) == 0)
+		goto on1;
+	if (m->rm_mask != tt->rn_mask) {
+		printf("rn_delete: inconsistent annotation\n");
+		goto on1;
+	}
+	if (--m->rm_refs >= 0)
+		goto on1;
+	b = -1 - tt->rn_b;
+	t = saved_tt->rn_p;
+	if (b > t->rn_b)
+		goto on1; /* Wasn't lifted at all */
+	do {
+		x = t;
+		t = t->rn_p;
+	} while (b <= t->rn_b && x != top);
+	for (mp = &x->rn_mklist; m = *mp; mp = &m->rm_mklist)
+		if (m == saved_m) {
+			*mp = m->rm_mklist;
+			MKFree(m);
+			break;
+		}
+	if (m == 0)
+		printf("rn_delete: couldn't find our annotation\n");
+on1:
+	/*
+	 * Eliminate us from tree
+	 */
+	if (tt->rn_flags & RNF_ROOT)
+		return (0);
+#ifdef RN_DEBUG
+	/* Get us out of the creation list */
+	for (t = rn_clist; t && t->rn_ybro != tt; t = t->rn_ybro) {}
+	if (t) t->rn_ybro = tt->rn_ybro;
+#endif
+	t = tt->rn_p;
+	if (dupedkey) {
+		if (tt == saved_tt) {
+			x = dupedkey; x->rn_p = t;
+			if (t->rn_l == tt) t->rn_l = x; else t->rn_r = x;
+		} else {
+			for (x = p = saved_tt; p && p->rn_dupedkey != tt;)
+				p = p->rn_dupedkey;
+			if (p) p->rn_dupedkey = tt->rn_dupedkey;
+			else printf("rn_delete: couldn't find us\n");
+		}
+		t = tt + 1;
+		if  (t->rn_flags & RNF_ACTIVE) {
+#ifndef RN_DEBUG
+			*++x = *t; p = t->rn_p;
+#else
+			b = t->rn_info; *++x = *t; t->rn_info = b; p = t->rn_p;
+#endif
+			if (p->rn_l == t) p->rn_l = x; else p->rn_r = x;
+			x->rn_l->rn_p = x; x->rn_r->rn_p = x;
+		}
+		goto out;
+	}
+	if (t->rn_l == tt) x = t->rn_r; else x = t->rn_l;
+	p = t->rn_p;
+	if (p->rn_r == t) p->rn_r = x; else p->rn_l = x;
+	x->rn_p = p;
+	/*
+	 * Demote routes attached to us.
+	 */
+	if (t->rn_mklist) {
+		if (x->rn_b >= 0) {
+			for (mp = &x->rn_mklist; m = *mp;)
+				mp = &m->rm_mklist;
+			*mp = t->rn_mklist;
+		} else {
+			for (m = t->rn_mklist; m;) {
+				struct radix_mask *mm = m->rm_mklist;
+				if (m == x->rn_mklist && (--(m->rm_refs) < 0)) {
+					x->rn_mklist = 0;
+					MKFree(m);
+				} else 
+					printf("%s %x at %x\n",
+					    "rn_delete: Orphaned Mask", m, x);
+				m = mm;
+			}
+		}
+	}
+	/*
+	 * We may be holding an active internal node in the tree.
+	 */
+	x = tt + 1;
+	if (t != x) {
+#ifndef RN_DEBUG
+		*t = *x;
+#else
+		b = t->rn_info; *t = *x; t->rn_info = b;
+#endif
+		t->rn_l->rn_p = t; t->rn_r->rn_p = t;
+		p = x->rn_p;
+		if (p->rn_l == x) p->rn_l = t; else p->rn_r = t;
+	}
+out:
+	tt->rn_flags &= ~RNF_ACTIVE;
+	tt[1].rn_flags &= ~RNF_ACTIVE;
+	return (tt);
+}
+
+int
+rn_walktree(h, f, w)
+	struct radix_node_head *h;
+	register int (*f)();
+	void *w;
+{
+	int error;
+	struct radix_node *base, *next;
+	register struct radix_node *rn = h->rnh_treetop;
+	/*
+	 * This gets complicated because we may delete the node
+	 * while applying the function f to it, so we need to calculate
+	 * the successor node in advance.
+	 */
+	/* First time through node, go left */
+	while (rn->rn_b >= 0)
+		rn = rn->rn_l;
+	for (;;) {
+		base = rn;
+		/* If at right child go back up, otherwise, go right */
+		while (rn->rn_p->rn_r == rn && (rn->rn_flags & RNF_ROOT) == 0)
+			rn = rn->rn_p;
+		/* Find the next *leaf* since next node might vanish, too */
+		for (rn = rn->rn_p->rn_r; rn->rn_b >= 0;)
+			rn = rn->rn_l;
+		next = rn;
+		/* Process leaves */
+		while (rn = base) {
+			base = rn->rn_dupedkey;
+			if (!(rn->rn_flags & RNF_ROOT) && (error = (*f)(rn, w)))
+				return (error);
+		}
+		rn = next;
+		if (rn->rn_flags & RNF_ROOT)
+			return (0);
+	}
+	/* NOTREACHED */
+}
+
+int
+rn_inithead(head, off)
+	void **head;
+	int off;
+{
+	register struct radix_node_head *rnh;
+	register struct radix_node *t, *tt, *ttt;
+	if (*head)
+		return (1);
+	R_Malloc(rnh, struct radix_node_head *, sizeof (*rnh));
+	if (rnh == 0)
+		return (0);
+	Bzero(rnh, sizeof (*rnh));
+	*head = rnh;
+	t = rn_newpair(rn_zeros, off, rnh->rnh_nodes);
+	ttt = rnh->rnh_nodes + 2;
+	t->rn_r = ttt;
+	t->rn_p = t;
+	tt = t->rn_l;
+	tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE;
+	tt->rn_b = -1 - off;
+	*ttt = *tt;
+	ttt->rn_key = rn_ones;
+	rnh->rnh_addaddr = rn_addroute;
+	rnh->rnh_deladdr = rn_delete;
+	rnh->rnh_matchaddr = rn_match;
+	rnh->rnh_walktree = rn_walktree;
+	rnh->rnh_treetop = t;
+	return (1);
+}
+
+void
+rn_init()
+{
+	char *cp, *cplim;
+#ifdef KERNEL
+	struct domain *dom;
+
+	for (dom = domains; dom; dom = dom->dom_next)
+		if (dom->dom_maxrtkey > max_keylen)
+			max_keylen = dom->dom_maxrtkey;
+#endif
+	if (max_keylen == 0) {
+		printf("rn_init: radix functions require max_keylen be set\n");
+		return;
+	}
+	R_Malloc(rn_zeros, char *, 3 * max_keylen);
+	if (rn_zeros == NULL)
+		panic("rn_init");
+	Bzero(rn_zeros, 3 * max_keylen);
+	rn_ones = cp = rn_zeros + max_keylen;
+	maskedKey = cplim = rn_ones + max_keylen;
+	while (cp < cplim)
+		*cp++ = -1;
+	if (rn_inithead((void **)&mask_rnhead, 0) == 0)
+		panic("rn_init 2");
+}
diff --git a/sys/net/radix.h b/sys/net/radix.h
new file mode 100644
index 00000000000..a11057f0439
--- /dev/null
+++ b/sys/net/radix.h
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 1988, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)radix.h	8.1 (Berkeley) 6/10/93
+ */
+
+#ifndef _RADIX_H_
+#define	_RADIX_H_
+
+/*
+ * Radix search tree node layout.
+ */
+
+struct radix_node {
+	struct	radix_mask *rn_mklist;	/* list of masks contained in subtree */
+	struct	radix_node *rn_p;	/* parent */
+	short	rn_b;			/* bit offset; -1-index(netmask) */
+	char	rn_bmask;		/* node: mask for bit test*/
+	u_char	rn_flags;		/* enumerated next */
+#define RNF_NORMAL	1		/* leaf contains normal route */
+#define RNF_ROOT	2		/* leaf is root leaf for tree */
+#define RNF_ACTIVE	4		/* This node is alive (for rtfree) */
+	union {
+		struct {			/* leaf only data: */
+			caddr_t	rn_Key;	/* object of search */
+			caddr_t	rn_Mask;	/* netmask, if present */
+			struct	radix_node *rn_Dupedkey;
+		} rn_leaf;
+		struct {			/* node only data: */
+			int	rn_Off;		/* where to start compare */
+			struct	radix_node *rn_L;/* progeny */
+			struct	radix_node *rn_R;/* progeny */
+		}rn_node;
+	}		rn_u;
+#ifdef RN_DEBUG
+	int rn_info;
+	struct radix_node *rn_twin;
+	struct radix_node *rn_ybro;
+#endif
+};
+
+#define rn_dupedkey rn_u.rn_leaf.rn_Dupedkey
+#define rn_key rn_u.rn_leaf.rn_Key
+#define rn_mask rn_u.rn_leaf.rn_Mask
+#define rn_off rn_u.rn_node.rn_Off
+#define rn_l rn_u.rn_node.rn_L
+#define rn_r rn_u.rn_node.rn_R
+
+/*
+ * Annotations to tree concerning potential routes applying to subtrees.
+ */
+
+extern struct radix_mask {
+	short	rm_b;			/* bit offset; -1-index(netmask) */
+	char	rm_unused;		/* cf. rn_bmask */
+	u_char	rm_flags;		/* cf. rn_flags */
+	struct	radix_mask *rm_mklist;	/* more masks to try */
+	caddr_t	rm_mask;		/* the mask */
+	int	rm_refs;		/* # of references to this struct */
+} *rn_mkfreelist;
+
+#define MKGet(m) {\
+	if (rn_mkfreelist) {\
+		m = rn_mkfreelist; \
+		rn_mkfreelist = (m)->rm_mklist; \
+	} else \
+		R_Malloc(m, struct radix_mask *, sizeof (*(m))); }\
+
+#define MKFree(m) { (m)->rm_mklist = rn_mkfreelist; rn_mkfreelist = (m);}
+
+struct radix_node_head {
+	struct	radix_node *rnh_treetop;
+	int	rnh_addrsize;		/* permit, but not require fixed keys */
+	int	rnh_pktsize;		/* permit, but not require fixed keys */
+	struct	radix_node *(*rnh_addaddr)	/* add based on sockaddr */
+		__P((void *v, void *mask,
+		     struct radix_node_head *head, struct radix_node nodes[]));
+	struct	radix_node *(*rnh_addpkt)	/* add based on packet hdr */
+		__P((void *v, void *mask,
+		     struct radix_node_head *head, struct radix_node nodes[]));
+	struct	radix_node *(*rnh_deladdr)	/* remove based on sockaddr */
+		__P((void *v, void *mask, struct radix_node_head *head));
+	struct	radix_node *(*rnh_delpkt)	/* remove based on packet hdr */
+		__P((void *v, void *mask, struct radix_node_head *head));
+	struct	radix_node *(*rnh_matchaddr)	/* locate based on sockaddr */
+		__P((void *v, struct radix_node_head *head));
+	struct	radix_node *(*rnh_matchpkt)	/* locate based on packet hdr */
+		__P((void *v, struct radix_node_head *head));
+	int	(*rnh_walktree)			/* traverse tree */
+		__P((struct radix_node_head *head, int (*f)(), void *w));
+	struct	radix_node rnh_nodes[3];	/* empty tree for common case */
+};
+
+
+#ifndef KERNEL
+#define Bcmp(a, b, n) bcmp(((char *)(a)), ((char *)(b)), (n))
+#define Bzero(p, n) bzero((char *)(p), (int)(n));
+#define R_Malloc(p, t, n) (p = (t) malloc((unsigned int)(n)))
+#define Free(p) free((char *)p);
+#else
+#define Bcmp(a, b, n) bcmp(((caddr_t)(a)), ((caddr_t)(b)), (unsigned)(n))
+#define Bcopy(a, b, n) bcopy(((caddr_t)(a)), ((caddr_t)(b)), (unsigned)(n))
+#define Bzero(p, n) bzero((caddr_t)(p), (unsigned)(n));
+#define R_Malloc(p, t, n) (p = (t) malloc((unsigned long)(n), M_RTABLE, M_DONTWAIT))
+#define Free(p) free((caddr_t)p, M_RTABLE);
+
+void	 rn_init __P((void));
+int	 rn_inithead __P((void **, int));
+int	 rn_refines __P((void *, void *));
+int	 rn_walktree __P((struct radix_node_head *, int (*)(), void *));
+struct radix_node
+	 *rn_addmask __P((void *, int, int)),
+	 *rn_addroute __P((void *, void *, struct radix_node_head *,
+			struct radix_node [2])),
+	 *rn_delete __P((void *, void *, struct radix_node_head *)),
+	 *rn_insert __P((void *, struct radix_node_head *, int *,
+			struct radix_node [2])),
+	 *rn_match __P((void *, struct radix_node_head *)),
+	 *rn_newpair __P((void *, int, struct radix_node[2])),
+	 *rn_search __P((void *, struct radix_node *)),
+	 *rn_search_m __P((void *, struct radix_node *, void *));
+
+#endif /*KERNEL*/
+#endif /* _RADIX_H_ */
diff --git a/sys/net/raw_cb.c b/sys/net/raw_cb.c
new file mode 100644
index 00000000000..e44192d2e83
--- /dev/null
+++ b/sys/net/raw_cb.c
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 1980, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)raw_cb.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+
+#include <net/if.h>
+#include <net/route.h>
+#include <net/raw_cb.h>
+#include <netinet/in.h>
+
+/*
+ * Routines to manage the raw protocol control blocks. 
+ *
+ * TODO:
+ *	hash lookups by protocol family/protocol + address family
+ *	take care of unique address problems per AF?
+ *	redo address binding to allow wildcards
+ */
+
+u_long	raw_sendspace = RAWSNDQ;
+u_long	raw_recvspace = RAWRCVQ;
+
+/*
+ * Allocate a control block and a nominal amount
+ * of buffer space for the socket.
+ */
+int
+raw_attach(so, proto)
+	register struct socket *so;
+	int proto;
+{
+	register struct rawcb *rp = sotorawcb(so);
+	int error;
+
+	/*
+	 * It is assumed that raw_attach is called
+	 * after space has been allocated for the
+	 * rawcb.
+	 */
+	if (rp == 0)
+		return (ENOBUFS);
+	if (error = soreserve(so, raw_sendspace, raw_recvspace))
+		return (error);
+	rp->rcb_socket = so;
+	rp->rcb_proto.sp_family = so->so_proto->pr_domain->dom_family;
+	rp->rcb_proto.sp_protocol = proto;
+	insque(rp, &rawcb);
+	return (0);
+}
+
+/*
+ * Detach the raw connection block and discard
+ * socket resources.
+ */
+void
+raw_detach(rp)
+	register struct rawcb *rp;
+{
+	struct socket *so = rp->rcb_socket;
+
+	so->so_pcb = 0;
+	sofree(so);
+	remque(rp);
+#ifdef notdef
+	if (rp->rcb_laddr)
+		m_freem(dtom(rp->rcb_laddr));
+	rp->rcb_laddr = 0;
+#endif
+	free((caddr_t)(rp), M_PCB);
+}
+
+/*
+ * Disconnect and possibly release resources.
+ */
+void
+raw_disconnect(rp)
+	struct rawcb *rp;
+{
+
+#ifdef notdef
+	if (rp->rcb_faddr)
+		m_freem(dtom(rp->rcb_faddr));
+	rp->rcb_faddr = 0;
+#endif
+	if (rp->rcb_socket->so_state & SS_NOFDREF)
+		raw_detach(rp);
+}
+
+#ifdef notdef
+int
+raw_bind(so, nam)
+	register struct socket *so;
+	struct mbuf *nam;
+{
+	struct sockaddr *addr = mtod(nam, struct sockaddr *);
+	register struct rawcb *rp;
+
+	if (ifnet == 0)
+		return (EADDRNOTAVAIL);
+	rp = sotorawcb(so);
+	nam = m_copym(nam, 0, M_COPYALL, M_WAITOK);
+	rp->rcb_laddr = mtod(nam, struct sockaddr *);
+	return (0);
+}
+#endif
diff --git a/sys/net/raw_cb.h b/sys/net/raw_cb.h
new file mode 100644
index 00000000000..6003e181edb
--- /dev/null
+++ b/sys/net/raw_cb.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 1980, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)raw_cb.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Raw protocol interface control block.  Used
+ * to tie a socket to the generic raw interface.
+ */
+struct rawcb {
+	struct	rawcb *rcb_next;	/* doubly linked list */
+	struct	rawcb *rcb_prev;
+	struct	socket *rcb_socket;	/* back pointer to socket */
+	struct	sockaddr *rcb_faddr;	/* destination address */
+	struct	sockaddr *rcb_laddr;	/* socket's address */
+	struct	sockproto rcb_proto;	/* protocol family, protocol */
+};
+
+#define	sotorawcb(so)		((struct rawcb *)(so)->so_pcb)
+
+/*
+ * Nominal space allocated to a raw socket.
+ */
+#define	RAWSNDQ		8192
+#define	RAWRCVQ		8192
+
+#ifdef KERNEL
+struct rawcb rawcb;			/* head of list */
+
+int	 raw_attach __P((struct socket *, int));
+void	 raw_ctlinput __P((int, struct sockaddr *));
+void	 raw_detach __P((struct rawcb *));
+void	 raw_disconnect __P((struct rawcb *));
+void	 raw_init __P((void));
+void	 raw_input __P((struct mbuf *,
+	    struct sockproto *, struct sockaddr *, struct sockaddr *));
+int	 raw_usrreq __P((struct socket *,
+	    int, struct mbuf *, struct mbuf *, struct mbuf *));
+#endif
diff --git a/sys/net/raw_usrreq.c b/sys/net/raw_usrreq.c
new file mode 100644
index 00000000000..560106ef95c
--- /dev/null
+++ b/sys/net/raw_usrreq.c
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 1980, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)raw_usrreq.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+
+#include <net/if.h>
+#include <net/route.h>
+#include <net/netisr.h>
+#include <net/raw_cb.h>
+
+/*
+ * Initialize raw connection block q.
+ */
+void
+raw_init()
+{
+
+	rawcb.rcb_next = rawcb.rcb_prev = &rawcb;
+}
+
+
+/*
+ * Raw protocol input routine.  Find the socket
+ * associated with the packet(s) and move them over.  If
+ * nothing exists for this packet, drop it.
+ */
+/*
+ * Raw protocol interface.
+ */
+void
+raw_input(m0, proto, src, dst)
+	struct mbuf *m0;
+	register struct sockproto *proto;
+	struct sockaddr *src, *dst;
+{
+	register struct rawcb *rp;
+	register struct mbuf *m = m0;
+	register int sockets = 0;
+	struct socket *last;
+
+	last = 0;
+	for (rp = rawcb.rcb_next; rp != &rawcb; rp = rp->rcb_next) {
+		if (rp->rcb_proto.sp_family != proto->sp_family)
+			continue;
+		if (rp->rcb_proto.sp_protocol  &&
+		    rp->rcb_proto.sp_protocol != proto->sp_protocol)
+			continue;
+		/*
+		 * We assume the lower level routines have
+		 * placed the address in a canonical format
+		 * suitable for a structure comparison.
+		 *
+		 * Note that if the lengths are not the same
+		 * the comparison will fail at the first byte.
+		 */
+#define	equal(a1, a2) \
+  (bcmp((caddr_t)(a1), (caddr_t)(a2), a1->sa_len) == 0)
+		if (rp->rcb_laddr && !equal(rp->rcb_laddr, dst))
+			continue;
+		if (rp->rcb_faddr && !equal(rp->rcb_faddr, src))
+			continue;
+		if (last) {
+			struct mbuf *n;
+			if (n = m_copy(m, 0, (int)M_COPYALL)) {
+				if (sbappendaddr(&last->so_rcv, src,
+				    n, (struct mbuf *)0) == 0)
+					/* should notify about lost packet */
+					m_freem(n);
+				else {
+					sorwakeup(last);
+					sockets++;
+				}
+			}
+		}
+		last = rp->rcb_socket;
+	}
+	if (last) {
+		if (sbappendaddr(&last->so_rcv, src,
+		    m, (struct mbuf *)0) == 0)
+			m_freem(m);
+		else {
+			sorwakeup(last);
+			sockets++;
+		}
+	} else
+		m_freem(m);
+}
+
+/*ARGSUSED*/
+void
+raw_ctlinput(cmd, arg)
+	int cmd;
+	struct sockaddr *arg;
+{
+
+	if (cmd < 0 || cmd > PRC_NCMDS)
+		return;
+	/* INCOMPLETE */
+}
+
+/*ARGSUSED*/
+int
+raw_usrreq(so, req, m, nam, control)
+	struct socket *so;
+	int req;
+	struct mbuf *m, *nam, *control;
+{
+	register struct rawcb *rp = sotorawcb(so);
+	register int error = 0;
+	int len;
+
+	if (req == PRU_CONTROL)
+		return (EOPNOTSUPP);
+	if (control && control->m_len) {
+		error = EOPNOTSUPP;
+		goto release;
+	}
+	if (rp == 0) {
+		error = EINVAL;
+		goto release;
+	}
+	switch (req) {
+
+	/*
+	 * Allocate a raw control block and fill in the
+	 * necessary info to allow packets to be routed to
+	 * the appropriate raw interface routine.
+	 */
+	case PRU_ATTACH:
+		if ((so->so_state & SS_PRIV) == 0) {
+			error = EACCES;
+			break;
+		}
+		error = raw_attach(so, (int)nam);
+		break;
+
+	/*
+	 * Destroy state just before socket deallocation.
+	 * Flush data or not depending on the options.
+	 */
+	case PRU_DETACH:
+		if (rp == 0) {
+			error = ENOTCONN;
+			break;
+		}
+		raw_detach(rp);
+		break;
+
+#ifdef notdef
+	/*
+	 * If a socket isn't bound to a single address,
+	 * the raw input routine will hand it anything
+	 * within that protocol family (assuming there's
+	 * nothing else around it should go to). 
+	 */
+	case PRU_CONNECT:
+		if (rp->rcb_faddr) {
+			error = EISCONN;
+			break;
+		}
+		nam = m_copym(nam, 0, M_COPYALL, M_WAIT);
+		rp->rcb_faddr = mtod(nam, struct sockaddr *);
+		soisconnected(so);
+		break;
+
+	case PRU_BIND:
+		if (rp->rcb_laddr) {
+			error = EINVAL;			/* XXX */
+			break;
+		}
+		error = raw_bind(so, nam);
+		break;
+#endif
+
+	case PRU_CONNECT2:
+		error = EOPNOTSUPP;
+		goto release;
+
+	case PRU_DISCONNECT:
+		if (rp->rcb_faddr == 0) {
+			error = ENOTCONN;
+			break;
+		}
+		raw_disconnect(rp);
+		soisdisconnected(so);
+		break;
+
+	/*
+	 * Mark the connection as being incapable of further input.
+	 */
+	case PRU_SHUTDOWN:
+		socantsendmore(so);
+		break;
+
+	/*
+	 * Ship a packet out.  The appropriate raw output
+	 * routine handles any massaging necessary.
+	 */
+	case PRU_SEND:
+		if (nam) {
+			if (rp->rcb_faddr) {
+				error = EISCONN;
+				break;
+			}
+			rp->rcb_faddr = mtod(nam, struct sockaddr *);
+		} else if (rp->rcb_faddr == 0) {
+			error = ENOTCONN;
+			break;
+		}
+		error = (*so->so_proto->pr_output)(m, so);
+		m = NULL;
+		if (nam)
+			rp->rcb_faddr = 0;
+		break;
+
+	case PRU_ABORT:
+		raw_disconnect(rp);
+		sofree(so);
+		soisdisconnected(so);
+		break;
+
+	case PRU_SENSE:
+		/*
+		 * stat: don't bother with a blocksize.
+		 */
+		return (0);
+
+	/*
+	 * Not supported.
+	 */
+	case PRU_RCVOOB:
+	case PRU_RCVD:
+		return(EOPNOTSUPP);
+
+	case PRU_LISTEN:
+	case PRU_ACCEPT:
+	case PRU_SENDOOB:
+		error = EOPNOTSUPP;
+		break;
+
+	case PRU_SOCKADDR:
+		if (rp->rcb_laddr == 0) {
+			error = EINVAL;
+			break;
+		}
+		len = rp->rcb_laddr->sa_len;
+		bcopy((caddr_t)rp->rcb_laddr, mtod(nam, caddr_t), (unsigned)len);
+		nam->m_len = len;
+		break;
+
+	case PRU_PEERADDR:
+		if (rp->rcb_faddr == 0) {
+			error = ENOTCONN;
+			break;
+		}
+		len = rp->rcb_faddr->sa_len;
+		bcopy((caddr_t)rp->rcb_faddr, mtod(nam, caddr_t), (unsigned)len);
+		nam->m_len = len;
+		break;
+
+	default:
+		panic("raw_usrreq");
+	}
+release:
+	if (m != NULL)
+		m_freem(m);
+	return (error);
+}
diff --git a/sys/net/route.c b/sys/net/route.c
new file mode 100644
index 00000000000..96902dace19
--- /dev/null
+++ b/sys/net/route.c
@@ -0,0 +1,538 @@
+/*
+ * Copyright (c) 1980, 1986, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)route.c	8.2 (Berkeley) 11/15/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/ioctl.h>
+
+#include <net/if.h>
+#include <net/route.h>
+#include <net/raw_cb.h>
+
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+
+#ifdef NS
+#include <netns/ns.h>
+#endif
+
+#define	SA(p) ((struct sockaddr *)(p))
+
+int	rttrash;		/* routes not in table but not freed */
+struct	sockaddr wildcard;	/* zero valued cookie for wildcard searches */
+
+void
+rtable_init(table)
+	void **table;
+{
+	struct domain *dom;
+	for (dom = domains; dom; dom = dom->dom_next)
+		if (dom->dom_rtattach)
+			dom->dom_rtattach(&table[dom->dom_family],
+			    dom->dom_rtoffset);
+}
+
+void
+route_init()
+{
+	rn_init();	/* initialize all zeroes, all ones, mask table */
+	rtable_init((void **)rt_tables);
+}
+
+/*
+ * Packet routing routines.
+ */
+void
+rtalloc(ro)
+	register struct route *ro;
+{
+	if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP))
+		return;				 /* XXX */
+	ro->ro_rt = rtalloc1(&ro->ro_dst, 1);
+}
+
+struct rtentry *
+rtalloc1(dst, report)
+	register struct sockaddr *dst;
+	int report;
+{
+	register struct radix_node_head *rnh = rt_tables[dst->sa_family];
+	register struct rtentry *rt;
+	register struct radix_node *rn;
+	struct rtentry *newrt = 0;
+	struct rt_addrinfo info;
+	int  s = splnet(), err = 0, msgtype = RTM_MISS;
+
+	if (rnh && (rn = rnh->rnh_matchaddr((caddr_t)dst, rnh)) &&
+	    ((rn->rn_flags & RNF_ROOT) == 0)) {
+		newrt = rt = (struct rtentry *)rn;
+		if (report && (rt->rt_flags & RTF_CLONING)) {
+			err = rtrequest(RTM_RESOLVE, dst, SA(0),
+					      SA(0), 0, &newrt);
+			if (err) {
+				newrt = rt;
+				rt->rt_refcnt++;
+				goto miss;
+			}
+			if ((rt = newrt) && (rt->rt_flags & RTF_XRESOLVE)) {
+				msgtype = RTM_RESOLVE;
+				goto miss;
+			}
+		} else
+			rt->rt_refcnt++;
+	} else {
+		rtstat.rts_unreach++;
+	miss:	if (report) {
+			bzero((caddr_t)&info, sizeof(info));
+			info.rti_info[RTAX_DST] = dst;
+			rt_missmsg(msgtype, &info, 0, err);
+		}
+	}
+	splx(s);
+	return (newrt);
+}
+
+void
+rtfree(rt)
+	register struct rtentry *rt;
+{
+	register struct ifaddr *ifa;
+
+	if (rt == 0)
+		panic("rtfree");
+	rt->rt_refcnt--;
+	if (rt->rt_refcnt <= 0 && (rt->rt_flags & RTF_UP) == 0) {
+		if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT))
+			panic ("rtfree 2");
+		rttrash--;
+		if (rt->rt_refcnt < 0) {
+			printf("rtfree: %x not freed (neg refs)\n", rt);
+			return;
+		}
+		ifa = rt->rt_ifa;
+		IFAFREE(ifa);
+		Free(rt_key(rt));
+		Free(rt);
+	}
+}
+
+void
+ifafree(ifa)
+	register struct ifaddr *ifa;
+{
+	if (ifa == NULL)
+		panic("ifafree");
+	if (ifa->ifa_refcnt == 0)
+		free(ifa, M_IFADDR);
+	else
+		ifa->ifa_refcnt--;
+}
+
+/*
+ * Force a routing table entry to the specified
+ * destination to go through the given gateway.
+ * Normally called as a result of a routing redirect
+ * message from the network layer.
+ *
+ * N.B.: must be called at splnet
+ *
+ */
+int
+rtredirect(dst, gateway, netmask, flags, src, rtp)
+	struct sockaddr *dst, *gateway, *netmask, *src;
+	int flags;
+	struct rtentry **rtp;
+{
+	register struct rtentry *rt;
+	int error = 0;
+	short *stat = 0;
+	struct rt_addrinfo info;
+	struct ifaddr *ifa;
+
+	/* verify the gateway is directly reachable */
+	if ((ifa = ifa_ifwithnet(gateway)) == 0) {
+		error = ENETUNREACH;
+		goto out;
+	}
+	rt = rtalloc1(dst, 0);
+	/*
+	 * If the redirect isn't from our current router for this dst,
+	 * it's either old or wrong.  If it redirects us to ourselves,
+	 * we have a routing loop, perhaps as a result of an interface
+	 * going down recently.
+	 */
+#define	equal(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0)
+	if (!(flags & RTF_DONE) && rt &&
+	     (!equal(src, rt->rt_gateway) || rt->rt_ifa != ifa))
+		error = EINVAL;
+	else if (ifa_ifwithaddr(gateway))
+		error = EHOSTUNREACH;
+	if (error)
+		goto done;
+	/*
+	 * Create a new entry if we just got back a wildcard entry
+	 * or the the lookup failed.  This is necessary for hosts
+	 * which use routing redirects generated by smart gateways
+	 * to dynamically build the routing tables.
+	 */
+	if ((rt == 0) || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
+		goto create;
+	/*
+	 * Don't listen to the redirect if it's
+	 * for a route to an interface. 
+	 */
+	if (rt->rt_flags & RTF_GATEWAY) {
+		if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
+			/*
+			 * Changing from route to net => route to host.
+			 * Create new route, rather than smashing route to net.
+			 */
+		create:
+			flags |=  RTF_GATEWAY | RTF_DYNAMIC;
+			error = rtrequest((int)RTM_ADD, dst, gateway,
+				    netmask, flags,
+				    (struct rtentry **)0);
+			stat = &rtstat.rts_dynamic;
+		} else {
+			/*
+			 * Smash the current notion of the gateway to
+			 * this destination.  Should check about netmask!!!
+			 */
+			rt->rt_flags |= RTF_MODIFIED;
+			flags |= RTF_MODIFIED;
+			stat = &rtstat.rts_newgateway;
+			rt_setgate(rt, rt_key(rt), gateway);
+		}
+	} else
+		error = EHOSTUNREACH;
+done:
+	if (rt) {
+		if (rtp && !error)
+			*rtp = rt;
+		else
+			rtfree(rt);
+	}
+out:
+	if (error)
+		rtstat.rts_badredirect++;
+	else if (stat != NULL)
+		(*stat)++;
+	bzero((caddr_t)&info, sizeof(info));
+	info.rti_info[RTAX_DST] = dst;
+	info.rti_info[RTAX_GATEWAY] = gateway;
+	info.rti_info[RTAX_NETMASK] = netmask;
+	info.rti_info[RTAX_AUTHOR] = src;
+	rt_missmsg(RTM_REDIRECT, &info, flags, error);
+}
+
+/*
+* Routing table ioctl interface.
+*/
+int
+rtioctl(req, data, p)
+	int req;
+	caddr_t data;
+	struct proc *p;
+{
+	return (EOPNOTSUPP);
+}
+
+struct ifaddr *
+ifa_ifwithroute(flags, dst, gateway)
+	int flags;
+	struct sockaddr	*dst, *gateway;
+{
+	register struct ifaddr *ifa;
+	if ((flags & RTF_GATEWAY) == 0) {
+		/*
+		 * If we are adding a route to an interface,
+		 * and the interface is a pt to pt link
+		 * we should search for the destination
+		 * as our clue to the interface.  Otherwise
+		 * we can use the local address.
+		 */
+		ifa = 0;
+		if (flags & RTF_HOST) 
+			ifa = ifa_ifwithdstaddr(dst);
+		if (ifa == 0)
+			ifa = ifa_ifwithaddr(gateway);
+	} else {
+		/*
+		 * If we are adding a route to a remote net
+		 * or host, the gateway may still be on the
+		 * other end of a pt to pt link.
+		 */
+		ifa = ifa_ifwithdstaddr(gateway);
+	}
+	if (ifa == 0)
+		ifa = ifa_ifwithnet(gateway);
+	if (ifa == 0) {
+		struct rtentry *rt = rtalloc1(dst, 0);
+		if (rt == 0)
+			return (0);
+		rt->rt_refcnt--;
+		if ((ifa = rt->rt_ifa) == 0)
+			return (0);
+	}
+	if (ifa->ifa_addr->sa_family != dst->sa_family) {
+		struct ifaddr *oifa = ifa;
+		ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
+		if (ifa == 0)
+			ifa = oifa;
+	}
+	return (ifa);
+}
+
+#define ROUNDUP(a) (a>0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
+
+int
+rtrequest(req, dst, gateway, netmask, flags, ret_nrt)
+	int req, flags;
+	struct sockaddr *dst, *gateway, *netmask;
+	struct rtentry **ret_nrt;
+{
+	int s = splnet(); int error = 0;
+	register struct rtentry *rt;
+	register struct radix_node *rn;
+	register struct radix_node_head *rnh;
+	struct ifaddr *ifa;
+	struct sockaddr *ndst;
+#define senderr(x) { error = x ; goto bad; }
+
+	if ((rnh = rt_tables[dst->sa_family]) == 0)
+		senderr(ESRCH);
+	if (flags & RTF_HOST)
+		netmask = 0;
+	switch (req) {
+	case RTM_DELETE:
+		if ((rn = rnh->rnh_deladdr(dst, netmask, rnh)) == 0)
+			senderr(ESRCH);
+		if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
+			panic ("rtrequest delete");
+		rt = (struct rtentry *)rn;
+		rt->rt_flags &= ~RTF_UP;
+		if (rt->rt_gwroute) {
+			rt = rt->rt_gwroute; RTFREE(rt);
+			(rt = (struct rtentry *)rn)->rt_gwroute = 0;
+		}
+		if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest)
+			ifa->ifa_rtrequest(RTM_DELETE, rt, SA(0));
+		rttrash++;
+		if (ret_nrt)
+			*ret_nrt = rt;
+		else if (rt->rt_refcnt <= 0) {
+			rt->rt_refcnt++;
+			rtfree(rt);
+		}
+		break;
+
+	case RTM_RESOLVE:
+		if (ret_nrt == 0 || (rt = *ret_nrt) == 0)
+			senderr(EINVAL);
+		ifa = rt->rt_ifa;
+		flags = rt->rt_flags & ~RTF_CLONING;
+		gateway = rt->rt_gateway;
+		if ((netmask = rt->rt_genmask) == 0)
+			flags |= RTF_HOST;
+		goto makeroute;
+
+	case RTM_ADD:
+		if ((ifa = ifa_ifwithroute(flags, dst, gateway)) == 0)
+			senderr(ENETUNREACH);
+	makeroute:
+		R_Malloc(rt, struct rtentry *, sizeof(*rt));
+		if (rt == 0)
+			senderr(ENOBUFS);
+		Bzero(rt, sizeof(*rt));
+		rt->rt_flags = RTF_UP | flags;
+		if (rt_setgate(rt, dst, gateway)) {
+			Free(rt);
+			senderr(ENOBUFS);
+		}
+		ndst = rt_key(rt);
+		if (netmask) {
+			rt_maskedcopy(dst, ndst, netmask);
+		} else
+			Bcopy(dst, ndst, dst->sa_len);
+		rn = rnh->rnh_addaddr((caddr_t)ndst, (caddr_t)netmask,
+					rnh, rt->rt_nodes);
+		if (rn == 0) {
+			if (rt->rt_gwroute)
+				rtfree(rt->rt_gwroute);
+			Free(rt_key(rt));
+			Free(rt);
+			senderr(EEXIST);
+		}
+		ifa->ifa_refcnt++;
+		rt->rt_ifa = ifa;
+		rt->rt_ifp = ifa->ifa_ifp;
+		if (req == RTM_RESOLVE)
+			rt->rt_rmx = (*ret_nrt)->rt_rmx; /* copy metrics */
+		if (ifa->ifa_rtrequest)
+			ifa->ifa_rtrequest(req, rt, SA(ret_nrt ? *ret_nrt : 0));
+		if (ret_nrt) {
+			*ret_nrt = rt;
+			rt->rt_refcnt++;
+		}
+		break;
+	}
+bad:
+	splx(s);
+	return (error);
+}
+
+int
+rt_setgate(rt0, dst, gate)
+	struct rtentry *rt0;
+	struct sockaddr *dst, *gate;
+{
+	caddr_t new, old;
+	int dlen = ROUNDUP(dst->sa_len), glen = ROUNDUP(gate->sa_len);
+	register struct rtentry *rt = rt0;
+
+	if (rt->rt_gateway == 0 || glen > ROUNDUP(rt->rt_gateway->sa_len)) {
+		old = (caddr_t)rt_key(rt);
+		R_Malloc(new, caddr_t, dlen + glen);
+		if (new == 0)
+			return 1;
+		rt->rt_nodes->rn_key = new;
+	} else {
+		new = rt->rt_nodes->rn_key;
+		old = 0;
+	}
+	Bcopy(gate, (rt->rt_gateway = (struct sockaddr *)(new + dlen)), glen);
+	if (old) {
+		Bcopy(dst, new, dlen);
+		Free(old);
+	}
+	if (rt->rt_gwroute) {
+		rt = rt->rt_gwroute; RTFREE(rt);
+		rt = rt0; rt->rt_gwroute = 0;
+	}
+	if (rt->rt_flags & RTF_GATEWAY) {
+		rt->rt_gwroute = rtalloc1(gate, 1);
+	}
+	return 0;
+}
+
+void
+rt_maskedcopy(src, dst, netmask)
+	struct sockaddr *src, *dst, *netmask;
+{
+	register u_char *cp1 = (u_char *)src;
+	register u_char *cp2 = (u_char *)dst;
+	register u_char *cp3 = (u_char *)netmask;
+	u_char *cplim = cp2 + *cp3;
+	u_char *cplim2 = cp2 + *cp1;
+
+	*cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
+	cp3 += 2;
+	if (cplim > cplim2)
+		cplim = cplim2;
+	while (cp2 < cplim)
+		*cp2++ = *cp1++ & *cp3++;
+	if (cp2 < cplim2)
+		bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2));
+}
+
+/*
+ * Set up a routing table entry, normally
+ * for an interface.
+ */
+int
+rtinit(ifa, cmd, flags)
+	register struct ifaddr *ifa;
+	int cmd, flags;
+{
+	register struct rtentry *rt;
+	register struct sockaddr *dst;
+	register struct sockaddr *deldst;
+	struct mbuf *m = 0;
+	struct rtentry *nrt = 0;
+	int error;
+
+	dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
+	if (cmd == RTM_DELETE) {
+		if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
+			m = m_get(M_WAIT, MT_SONAME);
+			deldst = mtod(m, struct sockaddr *);
+			rt_maskedcopy(dst, deldst, ifa->ifa_netmask);
+			dst = deldst;
+		}
+		if (rt = rtalloc1(dst, 0)) {
+			rt->rt_refcnt--;
+			if (rt->rt_ifa != ifa) {
+				if (m)
+					(void) m_free(m);
+				return (flags & RTF_HOST ? EHOSTUNREACH
+							: ENETUNREACH);
+			}
+		}
+	}
+	error = rtrequest(cmd, dst, ifa->ifa_addr, ifa->ifa_netmask,
+			flags | ifa->ifa_flags, &nrt);
+	if (m)
+		(void) m_free(m);
+	if (cmd == RTM_DELETE && error == 0 && (rt = nrt)) {
+		rt_newaddrmsg(cmd, ifa, error, nrt);
+		if (rt->rt_refcnt <= 0) {
+			rt->rt_refcnt++;
+			rtfree(rt);
+		}
+	}
+	if (cmd == RTM_ADD && error == 0 && (rt = nrt)) {
+		rt->rt_refcnt--;
+		if (rt->rt_ifa != ifa) {
+			printf("rtinit: wrong ifa (%x) was (%x)\n", ifa,
+				rt->rt_ifa);
+			if (rt->rt_ifa->ifa_rtrequest)
+			    rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt, SA(0));
+			IFAFREE(rt->rt_ifa);
+			rt->rt_ifa = ifa;
+			rt->rt_ifp = ifa->ifa_ifp;
+			ifa->ifa_refcnt++;
+			if (ifa->ifa_rtrequest)
+			    ifa->ifa_rtrequest(RTM_ADD, rt, SA(0));
+		}
+		rt_newaddrmsg(cmd, ifa, error, nrt);
+	}
+	return (error);
+}
diff --git a/sys/net/route.h b/sys/net/route.h
new file mode 100644
index 00000000000..2fbed9ea0a1
--- /dev/null
+++ b/sys/net/route.h
@@ -0,0 +1,261 @@
+/*
+ * Copyright (c) 1980, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)route.h	8.3 (Berkeley) 4/19/94
+ */
+
+/*
+ * Kernel resident routing tables.
+ * 
+ * The routing tables are initialized when interface addresses
+ * are set by making entries for all directly connected interfaces.
+ */
+
+/*
+ * A route consists of a destination address and a reference
+ * to a routing entry.  These are often held by protocols
+ * in their control blocks, e.g. inpcb.
+ */
+struct route {
+	struct	rtentry *ro_rt;
+	struct	sockaddr ro_dst;
+};
+
+/*
+ * These numbers are used by reliable protocols for determining
+ * retransmission behavior and are included in the routing structure.
+ */
+struct rt_metrics {
+	u_long	rmx_locks;	/* Kernel must leave these values alone */
+	u_long	rmx_mtu;	/* MTU for this path */
+	u_long	rmx_hopcount;	/* max hops expected */
+	u_long	rmx_expire;	/* lifetime for route, e.g. redirect */
+	u_long	rmx_recvpipe;	/* inbound delay-bandwith product */
+	u_long	rmx_sendpipe;	/* outbound delay-bandwith product */
+	u_long	rmx_ssthresh;	/* outbound gateway buffer limit */
+	u_long	rmx_rtt;	/* estimated round trip time */
+	u_long	rmx_rttvar;	/* estimated rtt variance */
+	u_long	rmx_pksent;	/* packets sent using this route */
+};
+
+/*
+ * rmx_rtt and rmx_rttvar are stored as microseconds;
+ * RTTTOPRHZ(rtt) converts to a value suitable for use
+ * by a protocol slowtimo counter.
+ */
+#define	RTM_RTTUNIT	1000000	/* units for rtt, rttvar, as units per sec */
+#define	RTTTOPRHZ(r)	((r) / (RTM_RTTUNIT / PR_SLOWHZ))
+
+/*
+ * We distinguish between routes to hosts and routes to networks,
+ * preferring the former if available.  For each route we infer
+ * the interface to use from the gateway address supplied when
+ * the route was entered.  Routes that forward packets through
+ * gateways are marked so that the output routines know to address the
+ * gateway rather than the ultimate destination.
+ */
+#ifndef RNF_NORMAL
+#include <net/radix.h>
+#endif
+struct rtentry {
+	struct	radix_node rt_nodes[2];	/* tree glue, and other values */
+#define	rt_key(r)	((struct sockaddr *)((r)->rt_nodes->rn_key))
+#define	rt_mask(r)	((struct sockaddr *)((r)->rt_nodes->rn_mask))
+	struct	sockaddr *rt_gateway;	/* value */
+	short	rt_flags;		/* up/down?, host/net */
+	short	rt_refcnt;		/* # held references */
+	u_long	rt_use;			/* raw # packets forwarded */
+	struct	ifnet *rt_ifp;		/* the answer: interface to use */
+	struct	ifaddr *rt_ifa;		/* the answer: interface to use */
+	struct	sockaddr *rt_genmask;	/* for generation of cloned routes */
+	caddr_t	rt_llinfo;		/* pointer to link level info cache */
+	struct	rt_metrics rt_rmx;	/* metrics used by rx'ing protocols */
+	struct	rtentry *rt_gwroute;	/* implied entry for gatewayed routes */
+};
+
+/*
+ * Following structure necessary for 4.3 compatibility;
+ * We should eventually move it to a compat file.
+ */
+struct ortentry {
+	u_long	rt_hash;		/* to speed lookups */
+	struct	sockaddr rt_dst;	/* key */
+	struct	sockaddr rt_gateway;	/* value */
+	short	rt_flags;		/* up/down?, host/net */
+	short	rt_refcnt;		/* # held references */
+	u_long	rt_use;			/* raw # packets forwarded */
+	struct	ifnet *rt_ifp;		/* the answer: interface to use */
+};
+
+#define	RTF_UP		0x1		/* route usable */
+#define	RTF_GATEWAY	0x2		/* destination is a gateway */
+#define	RTF_HOST	0x4		/* host entry (net otherwise) */
+#define	RTF_REJECT	0x8		/* host or net unreachable */
+#define	RTF_DYNAMIC	0x10		/* created dynamically (by redirect) */
+#define	RTF_MODIFIED	0x20		/* modified dynamically (by redirect) */
+#define RTF_DONE	0x40		/* message confirmed */
+#define RTF_MASK	0x80		/* subnet mask present */
+#define RTF_CLONING	0x100		/* generate new routes on use */
+#define RTF_XRESOLVE	0x200		/* external daemon resolves name */
+#define RTF_LLINFO	0x400		/* generated by ARP or ESIS */
+#define RTF_STATIC	0x800		/* manually added */
+#define RTF_BLACKHOLE	0x1000		/* just discard pkts (during updates) */
+#define RTF_PROTO2	0x4000		/* protocol specific routing flag */
+#define RTF_PROTO1	0x8000		/* protocol specific routing flag */
+
+
+/*
+ * Routing statistics.
+ */
+struct	rtstat {
+	short	rts_badredirect;	/* bogus redirect calls */
+	short	rts_dynamic;		/* routes created by redirects */
+	short	rts_newgateway;		/* routes modified by redirects */
+	short	rts_unreach;		/* lookups which failed */
+	short	rts_wildcard;		/* lookups satisfied by a wildcard */
+};
+/*
+ * Structures for routing messages.
+ */
+struct rt_msghdr {
+	u_short	rtm_msglen;	/* to skip over non-understood messages */
+	u_char	rtm_version;	/* future binary compatibility */
+	u_char	rtm_type;	/* message type */
+	u_short	rtm_index;	/* index for associated ifp */
+	int	rtm_flags;	/* flags, incl. kern & message, e.g. DONE */
+	int	rtm_addrs;	/* bitmask identifying sockaddrs in msg */
+	pid_t	rtm_pid;	/* identify sender */
+	int	rtm_seq;	/* for sender to identify action */
+	int	rtm_errno;	/* why failed */
+	int	rtm_use;	/* from rtentry */
+	u_long	rtm_inits;	/* which metrics we are initializing */
+	struct	rt_metrics rtm_rmx; /* metrics themselves */
+};
+
+#define RTM_VERSION	3	/* Up the ante and ignore older versions */
+
+#define RTM_ADD		0x1	/* Add Route */
+#define RTM_DELETE	0x2	/* Delete Route */
+#define RTM_CHANGE	0x3	/* Change Metrics or flags */
+#define RTM_GET		0x4	/* Report Metrics */
+#define RTM_LOSING	0x5	/* Kernel Suspects Partitioning */
+#define RTM_REDIRECT	0x6	/* Told to use different route */
+#define RTM_MISS	0x7	/* Lookup failed on this address */
+#define RTM_LOCK	0x8	/* fix specified metrics */
+#define RTM_OLDADD	0x9	/* caused by SIOCADDRT */
+#define RTM_OLDDEL	0xa	/* caused by SIOCDELRT */
+#define RTM_RESOLVE	0xb	/* req to resolve dst to LL addr */
+#define RTM_NEWADDR	0xc	/* address being added to iface */
+#define RTM_DELADDR	0xd	/* address being removed from iface */
+#define RTM_IFINFO	0xe	/* iface going up/down etc. */
+
+#define RTV_MTU		0x1	/* init or lock _mtu */
+#define RTV_HOPCOUNT	0x2	/* init or lock _hopcount */
+#define RTV_EXPIRE	0x4	/* init or lock _hopcount */
+#define RTV_RPIPE	0x8	/* init or lock _recvpipe */
+#define RTV_SPIPE	0x10	/* init or lock _sendpipe */
+#define RTV_SSTHRESH	0x20	/* init or lock _ssthresh */
+#define RTV_RTT		0x40	/* init or lock _rtt */
+#define RTV_RTTVAR	0x80	/* init or lock _rttvar */
+
+/*
+ * Bitmask values for rtm_addr.
+ */
+#define RTA_DST		0x1	/* destination sockaddr present */
+#define RTA_GATEWAY	0x2	/* gateway sockaddr present */
+#define RTA_NETMASK	0x4	/* netmask sockaddr present */
+#define RTA_GENMASK	0x8	/* cloning mask sockaddr present */
+#define RTA_IFP		0x10	/* interface name sockaddr present */
+#define RTA_IFA		0x20	/* interface addr sockaddr present */
+#define RTA_AUTHOR	0x40	/* sockaddr for author of redirect */
+#define RTA_BRD		0x80	/* for NEWADDR, broadcast or p-p dest addr */
+
+/*
+ * Index offsets for sockaddr array for alternate internal encoding.
+ */
+#define RTAX_DST	0	/* destination sockaddr present */
+#define RTAX_GATEWAY	1	/* gateway sockaddr present */
+#define RTAX_NETMASK	2	/* netmask sockaddr present */
+#define RTAX_GENMASK	3	/* cloning mask sockaddr present */
+#define RTAX_IFP	4	/* interface name sockaddr present */
+#define RTAX_IFA	5	/* interface addr sockaddr present */
+#define RTAX_AUTHOR	6	/* sockaddr for author of redirect */
+#define RTAX_BRD	7	/* for NEWADDR, broadcast or p-p dest addr */
+#define RTAX_MAX	8	/* size of array to allocate */
+
+struct rt_addrinfo {
+	int	rti_addrs;
+	struct	sockaddr *rti_info[RTAX_MAX];
+};
+
+struct route_cb {
+	int	ip_count;
+	int	ns_count;
+	int	iso_count;
+	int	any_count;
+};
+
+#ifdef KERNEL
+#define	RTFREE(rt) \
+	if ((rt)->rt_refcnt <= 1) \
+		rtfree(rt); \
+	else \
+		(rt)->rt_refcnt--;
+
+struct	route_cb route_cb;
+struct	rtstat	rtstat;
+struct	radix_node_head *rt_tables[AF_MAX+1];
+
+void	 route_init __P((void));
+int	 route_output __P((struct mbuf *, struct socket *));
+int	 route_usrreq __P((struct socket *,
+	    int, struct mbuf *, struct mbuf *, struct mbuf *));
+void	 rt_ifmsg __P((struct ifnet *));
+void	 rt_maskedcopy __P((struct sockaddr *,
+	    struct sockaddr *, struct sockaddr *));
+void	 rt_missmsg __P((int, struct rt_addrinfo *, int, int));
+void	 rt_newaddrmsg __P((int, struct ifaddr *, int, struct rtentry *));
+int	 rt_setgate __P((struct rtentry *,
+	    struct sockaddr *, struct sockaddr *));
+void	 rt_setmetrics __P((u_long, struct rt_metrics *, struct rt_metrics *));
+void	 rtable_init __P((void **));
+void	 rtalloc __P((struct route *));
+struct rtentry *
+	 rtalloc1 __P((struct sockaddr *, int));
+void	 rtfree __P((struct rtentry *));
+int	 rtinit __P((struct ifaddr *, int, int));
+int	 rtioctl __P((int, caddr_t, struct proc *));
+int	 rtredirect __P((struct sockaddr *, struct sockaddr *,
+	    struct sockaddr *, int, struct sockaddr *, struct rtentry **));
+int	 rtrequest __P((int, struct sockaddr *,
+	    struct sockaddr *, struct sockaddr *, int, struct rtentry **));
+#endif
diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c
new file mode 100644
index 00000000000..d128121708d
--- /dev/null
+++ b/sys/net/rtsock.c
@@ -0,0 +1,833 @@
+/*
+ * Copyright (c) 1988, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)rtsock.c	8.3 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+
+#include <net/if.h>
+#include <net/route.h>
+#include <net/raw_cb.h>
+
+struct	sockaddr route_dst = { 2, PF_ROUTE, };
+struct	sockaddr route_src = { 2, PF_ROUTE, };
+struct	sockproto route_proto = { PF_ROUTE, };
+
+struct walkarg {
+	int	w_op, w_arg, w_given, w_needed, w_tmemsize;
+	caddr_t	w_where, w_tmem;
+};
+
+static struct mbuf *
+		rt_msg1 __P((int, struct rt_addrinfo *));
+static int	rt_msg2 __P((int,
+		    struct rt_addrinfo *, caddr_t, struct walkarg *));
+static void	rt_xaddrs __P((caddr_t, caddr_t, struct rt_addrinfo *));
+
+/* Sleazy use of local variables throughout file, warning!!!! */
+#define dst	info.rti_info[RTAX_DST]
+#define gate	info.rti_info[RTAX_GATEWAY]
+#define netmask	info.rti_info[RTAX_NETMASK]
+#define genmask	info.rti_info[RTAX_GENMASK]
+#define ifpaddr	info.rti_info[RTAX_IFP]
+#define ifaaddr	info.rti_info[RTAX_IFA]
+#define brdaddr	info.rti_info[RTAX_BRD]
+
+/*ARGSUSED*/
+int
+route_usrreq(so, req, m, nam, control)
+	register struct socket *so;
+	int req;
+	struct mbuf *m, *nam, *control;
+{
+	register int error = 0;
+	register struct rawcb *rp = sotorawcb(so);
+	int s;
+
+	if (req == PRU_ATTACH) {
+		MALLOC(rp, struct rawcb *, sizeof(*rp), M_PCB, M_WAITOK);
+		if (so->so_pcb = (caddr_t)rp)
+			bzero(so->so_pcb, sizeof(*rp));
+
+	}
+	if (req == PRU_DETACH && rp) {
+		int af = rp->rcb_proto.sp_protocol;
+		if (af == AF_INET)
+			route_cb.ip_count--;
+		else if (af == AF_NS)
+			route_cb.ns_count--;
+		else if (af == AF_ISO)
+			route_cb.iso_count--;
+		route_cb.any_count--;
+	}
+	s = splnet();
+	error = raw_usrreq(so, req, m, nam, control);
+	rp = sotorawcb(so);
+	if (req == PRU_ATTACH && rp) {
+		int af = rp->rcb_proto.sp_protocol;
+		if (error) {
+			free((caddr_t)rp, M_PCB);
+			splx(s);
+			return (error);
+		}
+		if (af == AF_INET)
+			route_cb.ip_count++;
+		else if (af == AF_NS)
+			route_cb.ns_count++;
+		else if (af == AF_ISO)
+			route_cb.iso_count++;
+		rp->rcb_faddr = &route_src;
+		route_cb.any_count++;
+		soisconnected(so);
+		so->so_options |= SO_USELOOPBACK;
+	}
+	splx(s);
+	return (error);
+}
+
+/*ARGSUSED*/
+int
+route_output(m, so)
+	register struct mbuf *m;
+	struct socket *so;
+{
+	register struct rt_msghdr *rtm = 0;
+	register struct rtentry *rt = 0;
+	struct rtentry *saved_nrt = 0;
+	struct rt_addrinfo info;
+	int len, error = 0;
+	struct ifnet *ifp = 0;
+	struct ifaddr *ifa = 0;
+
+#define senderr(e) { error = e; goto flush;}
+	if (m == 0 || ((m->m_len < sizeof(long)) &&
+		       (m = m_pullup(m, sizeof(long))) == 0))
+		return (ENOBUFS);
+	if ((m->m_flags & M_PKTHDR) == 0)
+		panic("route_output");
+	len = m->m_pkthdr.len;
+	if (len < sizeof(*rtm) ||
+	    len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
+		dst = 0;
+		senderr(EINVAL);
+	}
+	R_Malloc(rtm, struct rt_msghdr *, len);
+	if (rtm == 0) {
+		dst = 0;
+		senderr(ENOBUFS);
+	}
+	m_copydata(m, 0, len, (caddr_t)rtm);
+	if (rtm->rtm_version != RTM_VERSION) {
+		dst = 0;
+		senderr(EPROTONOSUPPORT);
+	}
+	rtm->rtm_pid = curproc->p_pid;
+	info.rti_addrs = rtm->rtm_addrs;
+	rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info);
+	if (dst == 0)
+		senderr(EINVAL);
+	if (genmask) {
+		struct radix_node *t;
+		t = rn_addmask((caddr_t)genmask, 1, 2);
+		if (t && Bcmp(genmask, t->rn_key, *(u_char *)genmask) == 0)
+			genmask = (struct sockaddr *)(t->rn_key);
+		else
+			senderr(ENOBUFS);
+	}
+	switch (rtm->rtm_type) {
+
+	case RTM_ADD:
+		if (gate == 0)
+			senderr(EINVAL);
+		error = rtrequest(RTM_ADD, dst, gate, netmask,
+					rtm->rtm_flags, &saved_nrt);
+		if (error == 0 && saved_nrt) {
+			rt_setmetrics(rtm->rtm_inits,
+				&rtm->rtm_rmx, &saved_nrt->rt_rmx);
+			saved_nrt->rt_refcnt--;
+			saved_nrt->rt_genmask = genmask;
+		}
+		break;
+
+	case RTM_DELETE:
+		error = rtrequest(RTM_DELETE, dst, gate, netmask,
+				rtm->rtm_flags, (struct rtentry **)0);
+		break;
+
+	case RTM_GET:
+	case RTM_CHANGE:
+	case RTM_LOCK:
+		rt = rtalloc1(dst, 0);
+		if (rt == 0)
+			senderr(ESRCH);
+		if (rtm->rtm_type != RTM_GET) {/* XXX: too grotty */
+			struct radix_node *rn;
+			extern struct radix_node_head *mask_rnhead;
+
+			if (Bcmp(dst, rt_key(rt), dst->sa_len) != 0)
+				senderr(ESRCH);
+			if (netmask && (rn = rn_search(netmask,
+					    mask_rnhead->rnh_treetop)))
+				netmask = (struct sockaddr *)rn->rn_key;
+			for (rn = rt->rt_nodes; rn; rn = rn->rn_dupedkey)
+				if (netmask == (struct sockaddr *)rn->rn_mask)
+					break;
+			if (rn == 0)
+				senderr(ETOOMANYREFS);
+			rt = (struct rtentry *)rn;
+		}
+		switch(rtm->rtm_type) {
+
+		case RTM_GET:
+			dst = rt_key(rt);
+			gate = rt->rt_gateway;
+			netmask = rt_mask(rt);
+			genmask = rt->rt_genmask;
+			if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
+				if (ifp = rt->rt_ifp) {
+					ifpaddr = ifp->if_addrlist->ifa_addr;
+					ifaaddr = rt->rt_ifa->ifa_addr;
+					rtm->rtm_index = ifp->if_index;
+				} else {
+					ifpaddr = 0;
+					ifaaddr = 0;
+			    }
+			}
+			len = rt_msg2(RTM_GET, &info, (caddr_t)0,
+				(struct walkarg *)0);
+			if (len > rtm->rtm_msglen) {
+				struct rt_msghdr *new_rtm;
+				R_Malloc(new_rtm, struct rt_msghdr *, len);
+				if (new_rtm == 0)
+					senderr(ENOBUFS);
+				Bcopy(rtm, new_rtm, rtm->rtm_msglen);
+				Free(rtm); rtm = new_rtm;
+			}
+			(void)rt_msg2(RTM_GET, &info, (caddr_t)rtm,
+				(struct walkarg *)0);
+			rtm->rtm_flags = rt->rt_flags;
+			rtm->rtm_rmx = rt->rt_rmx;
+			rtm->rtm_addrs = info.rti_addrs;
+			break;
+
+		case RTM_CHANGE:
+			if (gate && rt_setgate(rt, rt_key(rt), gate))
+				senderr(EDQUOT);
+			/* new gateway could require new ifaddr, ifp;
+			   flags may also be different; ifp may be specified
+			   by ll sockaddr when protocol address is ambiguous */
+			if (ifpaddr && (ifa = ifa_ifwithnet(ifpaddr)) &&
+			    (ifp = ifa->ifa_ifp))
+				ifa = ifaof_ifpforaddr(ifaaddr ? ifaaddr : gate,
+							ifp);
+			else if ((ifaaddr && (ifa = ifa_ifwithaddr(ifaaddr))) ||
+				 (ifa = ifa_ifwithroute(rt->rt_flags,
+							rt_key(rt), gate)))
+				ifp = ifa->ifa_ifp;
+			if (ifa) {
+				register struct ifaddr *oifa = rt->rt_ifa;
+				if (oifa != ifa) {
+				    if (oifa && oifa->ifa_rtrequest)
+					oifa->ifa_rtrequest(RTM_DELETE,
+								rt, gate);
+				    IFAFREE(rt->rt_ifa);
+				    rt->rt_ifa = ifa;
+				    ifa->ifa_refcnt++;
+				    rt->rt_ifp = ifp;
+				}
+			}
+			rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
+					&rt->rt_rmx);
+			if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
+			       rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, gate);
+			if (genmask)
+				rt->rt_genmask = genmask;
+			/*
+			 * Fall into
+			 */
+		case RTM_LOCK:
+			rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
+			rt->rt_rmx.rmx_locks |=
+				(rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
+			break;
+		}
+		break;
+
+	default:
+		senderr(EOPNOTSUPP);
+	}
+
+flush:
+	if (rtm) {
+		if (error)
+			rtm->rtm_errno = error;
+		else 
+			rtm->rtm_flags |= RTF_DONE;
+	}
+	if (rt)
+		rtfree(rt);
+    {
+	register struct rawcb *rp = 0;
+	/*
+	 * Check to see if we don't want our own messages.
+	 */
+	if ((so->so_options & SO_USELOOPBACK) == 0) {
+		if (route_cb.any_count <= 1) {
+			if (rtm)
+				Free(rtm);
+			m_freem(m);
+			return (error);
+		}
+		/* There is another listener, so construct message */
+		rp = sotorawcb(so);
+	}
+	if (rtm) {
+		m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
+		Free(rtm);
+	}
+	if (rp)
+		rp->rcb_proto.sp_family = 0; /* Avoid us */
+	if (dst)
+		route_proto.sp_protocol = dst->sa_family;
+	raw_input(m, &route_proto, &route_src, &route_dst);
+	if (rp)
+		rp->rcb_proto.sp_family = PF_ROUTE;
+    }
+	return (error);
+}
+
+void
+rt_setmetrics(which, in, out)
+	u_long which;
+	register struct rt_metrics *in, *out;
+{
+#define metric(f, e) if (which & (f)) out->e = in->e;
+	metric(RTV_RPIPE, rmx_recvpipe);
+	metric(RTV_SPIPE, rmx_sendpipe);
+	metric(RTV_SSTHRESH, rmx_ssthresh);
+	metric(RTV_RTT, rmx_rtt);
+	metric(RTV_RTTVAR, rmx_rttvar);
+	metric(RTV_HOPCOUNT, rmx_hopcount);
+	metric(RTV_MTU, rmx_mtu);
+	metric(RTV_EXPIRE, rmx_expire);
+#undef metric
+}
+
+#define ROUNDUP(a) \
+	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
+#define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len))
+
+static void
+rt_xaddrs(cp, cplim, rtinfo)
+	register caddr_t cp, cplim;
+	register struct rt_addrinfo *rtinfo;
+{
+	register struct sockaddr *sa;
+	register int i;
+
+	bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info));
+	for (i = 0; (i < RTAX_MAX) && (cp < cplim); i++) {
+		if ((rtinfo->rti_addrs & (1 << i)) == 0)
+			continue;
+		rtinfo->rti_info[i] = sa = (struct sockaddr *)cp;
+		ADVANCE(cp, sa);
+	}
+}
+
+/*
+ * Copy data from a buffer back into the indicated mbuf chain,
+ * starting "off" bytes from the beginning, extending the mbuf
+ * chain if necessary.
+ */
+void
+m_copyback(m0, off, len, cp)
+	struct	mbuf *m0;
+	register int off;
+	register int len;
+	caddr_t cp;
+{
+	register int mlen;
+	register struct mbuf *m = m0, *n;
+	int totlen = 0;
+
+	if (m0 == 0)
+		return;
+	while (off > (mlen = m->m_len)) {
+		off -= mlen;
+		totlen += mlen;
+		if (m->m_next == 0) {
+			n = m_getclr(M_DONTWAIT, m->m_type);
+			if (n == 0)
+				goto out;
+			n->m_len = min(MLEN, len + off);
+			m->m_next = n;
+		}
+		m = m->m_next;
+	}
+	while (len > 0) {
+		mlen = min (m->m_len - off, len);
+		bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen);
+		cp += mlen;
+		len -= mlen;
+		mlen += off;
+		off = 0;
+		totlen += mlen;
+		if (len == 0)
+			break;
+		if (m->m_next == 0) {
+			n = m_get(M_DONTWAIT, m->m_type);
+			if (n == 0)
+				break;
+			n->m_len = min(MLEN, len);
+			m->m_next = n;
+		}
+		m = m->m_next;
+	}
+out:	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
+		m->m_pkthdr.len = totlen;
+}
+
+static struct mbuf *
+rt_msg1(type, rtinfo)
+	int type;
+	register struct rt_addrinfo *rtinfo;
+{
+	register struct rt_msghdr *rtm;
+	register struct mbuf *m;
+	register int i;
+	register struct sockaddr *sa;
+	int len, dlen;
+
+	m = m_gethdr(M_DONTWAIT, MT_DATA);
+	if (m == 0)
+		return (m);
+	switch (type) {
+
+	case RTM_DELADDR:
+	case RTM_NEWADDR:
+		len = sizeof(struct ifa_msghdr);
+		break;
+
+	case RTM_IFINFO:
+		len = sizeof(struct if_msghdr);
+		break;
+
+	default:
+		len = sizeof(struct rt_msghdr);
+	}
+	if (len > MHLEN)
+		panic("rt_msg1");
+	m->m_pkthdr.len = m->m_len = len;
+	m->m_pkthdr.rcvif = 0;
+	rtm = mtod(m, struct rt_msghdr *);
+	bzero((caddr_t)rtm, len);
+	for (i = 0; i < RTAX_MAX; i++) {
+		if ((sa = rtinfo->rti_info[i]) == NULL)
+			continue;
+		rtinfo->rti_addrs |= (1 << i);
+		dlen = ROUNDUP(sa->sa_len);
+		m_copyback(m, len, dlen, (caddr_t)sa);
+		len += dlen;
+	}
+	if (m->m_pkthdr.len != len) {
+		m_freem(m);
+		return (NULL);
+	}
+	rtm->rtm_msglen = len;
+	rtm->rtm_version = RTM_VERSION;
+	rtm->rtm_type = type;
+	return (m);
+}
+
+static int
+rt_msg2(type, rtinfo, cp, w)
+	int type;
+	register struct rt_addrinfo *rtinfo;
+	caddr_t cp;
+	struct walkarg *w;
+{
+	register int i;
+	int len, dlen, second_time = 0;
+	caddr_t cp0;
+
+	rtinfo->rti_addrs = 0;
+again:
+	switch (type) {
+
+	case RTM_DELADDR:
+	case RTM_NEWADDR:
+		len = sizeof(struct ifa_msghdr);
+		break;
+
+	case RTM_IFINFO:
+		len = sizeof(struct if_msghdr);
+		break;
+
+	default:
+		len = sizeof(struct rt_msghdr);
+	}
+	if (cp0 = cp)
+		cp += len;
+	for (i = 0; i < RTAX_MAX; i++) {
+		register struct sockaddr *sa;
+
+		if ((sa = rtinfo->rti_info[i]) == 0)
+			continue;
+		rtinfo->rti_addrs |= (1 << i);
+		dlen = ROUNDUP(sa->sa_len);
+		if (cp) {
+			bcopy((caddr_t)sa, cp, (unsigned)dlen);
+			cp += dlen;
+		}
+		len += dlen;
+	}
+	if (cp == 0 && w != NULL && !second_time) {
+		register struct walkarg *rw = w;
+
+		rw->w_needed += len;
+		if (rw->w_needed <= 0 && rw->w_where) {
+			if (rw->w_tmemsize < len) {
+				if (rw->w_tmem)
+					free(rw->w_tmem, M_RTABLE);
+				if (rw->w_tmem = (caddr_t)
+						malloc(len, M_RTABLE, M_NOWAIT))
+					rw->w_tmemsize = len;
+			}
+			if (rw->w_tmem) {
+				cp = rw->w_tmem;
+				second_time = 1;
+				goto again;
+			} else
+				rw->w_where = 0;
+		}
+	}
+	if (cp) {
+		register struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
+
+		rtm->rtm_version = RTM_VERSION;
+		rtm->rtm_type = type;
+		rtm->rtm_msglen = len;
+	}
+	return (len);
+}
+
+/*
+ * This routine is called to generate a message from the routing
+ * socket indicating that a redirect has occured, a routing lookup
+ * has failed, or that a protocol has detected timeouts to a particular
+ * destination.
+ */
+void
+rt_missmsg(type, rtinfo, flags, error)
+	int type, flags, error;
+	register struct rt_addrinfo *rtinfo;
+{
+	register struct rt_msghdr *rtm;
+	register struct mbuf *m;
+	struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
+
+	if (route_cb.any_count == 0)
+		return;
+	m = rt_msg1(type, rtinfo);
+	if (m == 0)
+		return;
+	rtm = mtod(m, struct rt_msghdr *);
+	rtm->rtm_flags = RTF_DONE | flags;
+	rtm->rtm_errno = error;
+	rtm->rtm_addrs = rtinfo->rti_addrs;
+	route_proto.sp_protocol = sa ? sa->sa_family : 0;
+	raw_input(m, &route_proto, &route_src, &route_dst);
+}
+
+/*
+ * This routine is called to generate a message from the routing
+ * socket indicating that the status of a network interface has changed.
+ */
+void
+rt_ifmsg(ifp)
+	register struct ifnet *ifp;
+{
+	register struct if_msghdr *ifm;
+	struct mbuf *m;
+	struct rt_addrinfo info;
+
+	if (route_cb.any_count == 0)
+		return;
+	bzero((caddr_t)&info, sizeof(info));
+	m = rt_msg1(RTM_IFINFO, &info);
+	if (m == 0)
+		return;
+	ifm = mtod(m, struct if_msghdr *);
+	ifm->ifm_index = ifp->if_index;
+	ifm->ifm_flags = ifp->if_flags;
+	ifm->ifm_data = ifp->if_data;
+	ifm->ifm_addrs = 0;
+	route_proto.sp_protocol = 0;
+	raw_input(m, &route_proto, &route_src, &route_dst);
+}
+
+/*
+ * This is called to generate messages from the routing socket
+ * indicating a network interface has had addresses associated with it.
+ * if we ever reverse the logic and replace messages TO the routing
+ * socket indicate a request to configure interfaces, then it will
+ * be unnecessary as the routing socket will automatically generate
+ * copies of it.
+ */
+void
+rt_newaddrmsg(cmd, ifa, error, rt)
+	int cmd, error;
+	register struct ifaddr *ifa;
+	register struct rtentry *rt;
+{
+	struct rt_addrinfo info;
+	struct sockaddr *sa;
+	int pass;
+	struct mbuf *m;
+	struct ifnet *ifp = ifa->ifa_ifp;
+
+	if (route_cb.any_count == 0)
+		return;
+	for (pass = 1; pass < 3; pass++) {
+		bzero((caddr_t)&info, sizeof(info));
+		if ((cmd == RTM_ADD && pass == 1) ||
+		    (cmd == RTM_DELETE && pass == 2)) {
+			register struct ifa_msghdr *ifam;
+			int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
+
+			ifaaddr = sa = ifa->ifa_addr;
+			ifpaddr = ifp->if_addrlist->ifa_addr;
+			netmask = ifa->ifa_netmask;
+			brdaddr = ifa->ifa_dstaddr;
+			if ((m = rt_msg1(ncmd, &info)) == NULL)
+				continue;
+			ifam = mtod(m, struct ifa_msghdr *);
+			ifam->ifam_index = ifp->if_index;
+			ifam->ifam_metric = ifa->ifa_metric;
+			ifam->ifam_flags = ifa->ifa_flags;
+			ifam->ifam_addrs = info.rti_addrs;
+		}
+		if ((cmd == RTM_ADD && pass == 2) ||
+		    (cmd == RTM_DELETE && pass == 1)) {
+			register struct rt_msghdr *rtm;
+			
+			if (rt == 0)
+				continue;
+			netmask = rt_mask(rt);
+			dst = sa = rt_key(rt);
+			gate = rt->rt_gateway;
+			if ((m = rt_msg1(cmd, &info)) == NULL)
+				continue;
+			rtm = mtod(m, struct rt_msghdr *);
+			rtm->rtm_index = ifp->if_index;
+			rtm->rtm_flags |= rt->rt_flags;
+			rtm->rtm_errno = error;
+			rtm->rtm_addrs = info.rti_addrs;
+		}
+		route_proto.sp_protocol = sa ? sa->sa_family : 0;
+		raw_input(m, &route_proto, &route_src, &route_dst);
+	}
+}
+
+/*
+ * This is used in dumping the kernel table via sysctl().
+ */
+int
+sysctl_dumpentry(rn, w)
+	struct radix_node *rn;
+	register struct walkarg *w;
+{
+	register struct rtentry *rt = (struct rtentry *)rn;
+	int error = 0, size;
+	struct rt_addrinfo info;
+
+	if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
+		return 0;
+	bzero((caddr_t)&info, sizeof(info));
+	dst = rt_key(rt);
+	gate = rt->rt_gateway;
+	netmask = rt_mask(rt);
+	genmask = rt->rt_genmask;
+	size = rt_msg2(RTM_GET, &info, 0, w);
+	if (w->w_where && w->w_tmem) {
+		register struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
+
+		rtm->rtm_flags = rt->rt_flags;
+		rtm->rtm_use = rt->rt_use;
+		rtm->rtm_rmx = rt->rt_rmx;
+		rtm->rtm_index = rt->rt_ifp->if_index;
+		rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
+		rtm->rtm_addrs = info.rti_addrs;
+		if (error = copyout((caddr_t)rtm, w->w_where, size))
+			w->w_where = NULL;
+		else
+			w->w_where += size;
+	}
+	return (error);
+}
+
+int
+sysctl_iflist(af, w)
+	int	af;
+	register struct	walkarg *w;
+{
+	register struct ifnet *ifp;
+	register struct ifaddr *ifa;
+	struct	rt_addrinfo info;
+	int	len, error = 0;
+
+	bzero((caddr_t)&info, sizeof(info));
+	for (ifp = ifnet; ifp; ifp = ifp->if_next) {
+		if (w->w_arg && w->w_arg != ifp->if_index)
+			continue;
+		ifa = ifp->if_addrlist;
+		ifpaddr = ifa->ifa_addr;
+		len = rt_msg2(RTM_IFINFO, &info, (caddr_t)0, w);
+		ifpaddr = 0;
+		if (w->w_where && w->w_tmem) {
+			register struct if_msghdr *ifm;
+
+			ifm = (struct if_msghdr *)w->w_tmem;
+			ifm->ifm_index = ifp->if_index;
+			ifm->ifm_flags = ifp->if_flags;
+			ifm->ifm_data = ifp->if_data;
+			ifm->ifm_addrs = info.rti_addrs;
+			if (error = copyout((caddr_t)ifm, w->w_where, len))
+				return (error);
+			w->w_where += len;
+		}
+		while (ifa = ifa->ifa_next) {
+			if (af && af != ifa->ifa_addr->sa_family)
+				continue;
+			ifaaddr = ifa->ifa_addr;
+			netmask = ifa->ifa_netmask;
+			brdaddr = ifa->ifa_dstaddr;
+			len = rt_msg2(RTM_NEWADDR, &info, 0, w);
+			if (w->w_where && w->w_tmem) {
+				register struct ifa_msghdr *ifam;
+
+				ifam = (struct ifa_msghdr *)w->w_tmem;
+				ifam->ifam_index = ifa->ifa_ifp->if_index;
+				ifam->ifam_flags = ifa->ifa_flags;
+				ifam->ifam_metric = ifa->ifa_metric;
+				ifam->ifam_addrs = info.rti_addrs;
+				if (error = copyout(w->w_tmem, w->w_where, len))
+					return (error);
+				w->w_where += len;
+			}
+		}
+		ifaaddr = netmask = brdaddr = 0;
+	}
+	return (0);
+}
+
+int
+sysctl_rtable(name, namelen, where, given, new, newlen)
+	int	*name;
+	int	namelen;
+	caddr_t	where;
+	size_t	*given;
+	caddr_t	*new;
+	size_t	newlen;
+{
+	register struct radix_node_head *rnh;
+	int	i, s, error = EINVAL;
+	u_char  af;
+	struct	walkarg w;
+
+	if (new)
+		return (EPERM);
+	if (namelen != 3)
+		return (EINVAL);
+	af = name[0];
+	Bzero(&w, sizeof(w));
+	w.w_where = where;
+	w.w_given = *given;
+	w.w_needed = 0 - w.w_given;
+	w.w_op = name[1];
+	w.w_arg = name[2];
+
+	s = splnet();
+	switch (w.w_op) {
+
+	case NET_RT_DUMP:
+	case NET_RT_FLAGS:
+		for (i = 1; i <= AF_MAX; i++)
+			if ((rnh = rt_tables[i]) && (af == 0 || af == i) &&
+			    (error = rnh->rnh_walktree(rnh,
+							sysctl_dumpentry, &w)))
+				break;
+		break;
+
+	case NET_RT_IFLIST:
+		error = sysctl_iflist(af, &w);
+	}
+	splx(s);
+	if (w.w_tmem)
+		free(w.w_tmem, M_RTABLE);
+	w.w_needed += w.w_given;
+	if (where) {
+		*given = w.w_where - where;
+		if (*given < w.w_needed)
+			return (ENOMEM);
+	} else {
+		*given = (11 * w.w_needed) / 10;
+	}
+	return (error);
+}
+
+/*
+ * Definitions of protocols supported in the ROUTE domain.
+ */
+
+extern	struct domain routedomain;		/* or at least forward */
+
+struct protosw routesw[] = {
+{ SOCK_RAW,	&routedomain,	0,		PR_ATOMIC|PR_ADDR,
+  raw_input,	route_output,	raw_ctlinput,	0,
+  route_usrreq,
+  raw_init,	0,		0,		0,
+  sysctl_rtable,
+}
+};
+
+struct domain routedomain =
+    { PF_ROUTE, "route", route_init, 0, 0,
+      routesw, &routesw[sizeof(routesw)/sizeof(routesw[0])] };
diff --git a/sys/net/slcompress.c b/sys/net/slcompress.c
new file mode 100644
index 00000000000..70af9358e37
--- /dev/null
+++ b/sys/net/slcompress.c
@@ -0,0 +1,535 @@
+/*-
+ * Copyright (c) 1989, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)slcompress.c	8.2 (Berkeley) 4/16/94
+ */
+
+/*
+ * Routines to compress and uncompess tcp packets (for transmission
+ * over low speed serial lines.
+ *
+ * Van Jacobson (van@helios.ee.lbl.gov), Dec 31, 1989:
+ *	- Initial distribution.
+ *
+ * static char rcsid[] =
+ * "$Header: slcompress.c,v 1.19 89/12/31 08:52:59 van Exp $";
+ */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
+
+#include <net/slcompress.h>
+
+#ifndef SL_NO_STATS
+#define INCR(counter) ++comp->counter;
+#else
+#define INCR(counter)
+#endif
+
+#define BCMP(p1, p2, n) bcmp((char *)(p1), (char *)(p2), (int)(n))
+#define BCOPY(p1, p2, n) bcopy((char *)(p1), (char *)(p2), (int)(n))
+#ifndef KERNEL
+#define ovbcopy bcopy
+#endif
+
+void
+sl_compress_init(comp)
+	struct slcompress *comp;
+{
+	register u_int i;
+	register struct cstate *tstate = comp->tstate;
+
+	bzero((char *)comp, sizeof(*comp));
+	for (i = MAX_STATES - 1; i > 0; --i) {
+		tstate[i].cs_id = i;
+		tstate[i].cs_next = &tstate[i - 1];
+	}
+	tstate[0].cs_next = &tstate[MAX_STATES - 1];
+	tstate[0].cs_id = 0;
+	comp->last_cs = &tstate[0];
+	comp->last_recv = 255;
+	comp->last_xmit = 255;
+	comp->flags = SLF_TOSS;
+}
+
+
+/* ENCODE encodes a number that is known to be non-zero.  ENCODEZ
+ * checks for zero (since zero has to be encoded in the long, 3 byte
+ * form).
+ */
+#define ENCODE(n) { \
+	if ((u_short)(n) >= 256) { \
+		*cp++ = 0; \
+		cp[1] = (n); \
+		cp[0] = (n) >> 8; \
+		cp += 2; \
+	} else { \
+		*cp++ = (n); \
+	} \
+}
+#define ENCODEZ(n) { \
+	if ((u_short)(n) >= 256 || (u_short)(n) == 0) { \
+		*cp++ = 0; \
+		cp[1] = (n); \
+		cp[0] = (n) >> 8; \
+		cp += 2; \
+	} else { \
+		*cp++ = (n); \
+	} \
+}
+
+#define DECODEL(f) { \
+	if (*cp == 0) {\
+		(f) = htonl(ntohl(f) + ((cp[1] << 8) | cp[2])); \
+		cp += 3; \
+	} else { \
+		(f) = htonl(ntohl(f) + (u_long)*cp++); \
+	} \
+}
+
+#define DECODES(f) { \
+	if (*cp == 0) {\
+		(f) = htons(ntohs(f) + ((cp[1] << 8) | cp[2])); \
+		cp += 3; \
+	} else { \
+		(f) = htons(ntohs(f) + (u_long)*cp++); \
+	} \
+}
+
+#define DECODEU(f) { \
+	if (*cp == 0) {\
+		(f) = htons((cp[1] << 8) | cp[2]); \
+		cp += 3; \
+	} else { \
+		(f) = htons((u_long)*cp++); \
+	} \
+}
+
+u_int
+sl_compress_tcp(m, ip, comp, compress_cid)
+	struct mbuf *m;
+	register struct ip *ip;
+	struct slcompress *comp;
+	int compress_cid;
+{
+	register struct cstate *cs = comp->last_cs->cs_next;
+	register u_int hlen = ip->ip_hl;
+	register struct tcphdr *oth;
+	register struct tcphdr *th;
+	register u_int deltaS, deltaA;
+	register u_int changes = 0;
+	u_char new_seq[16];
+	register u_char *cp = new_seq;
+
+	/*
+	 * Bail if this is an IP fragment or if the TCP packet isn't
+	 * `compressible' (i.e., ACK isn't set or some other control bit is
+	 * set).  (We assume that the caller has already made sure the
+	 * packet is IP proto TCP).
+	 */
+	if ((ip->ip_off & htons(0x3fff)) || m->m_len < 40)
+		return (TYPE_IP);
+
+	th = (struct tcphdr *)&((int *)ip)[hlen];
+	if ((th->th_flags & (TH_SYN|TH_FIN|TH_RST|TH_ACK)) != TH_ACK)
+		return (TYPE_IP);
+	/*
+	 * Packet is compressible -- we're going to send either a
+	 * COMPRESSED_TCP or UNCOMPRESSED_TCP packet.  Either way we need
+	 * to locate (or create) the connection state.  Special case the
+	 * most recently used connection since it's most likely to be used
+	 * again & we don't have to do any reordering if it's used.
+	 */
+	INCR(sls_packets)
+	if (ip->ip_src.s_addr != cs->cs_ip.ip_src.s_addr ||
+	    ip->ip_dst.s_addr != cs->cs_ip.ip_dst.s_addr ||
+	    *(int *)th != ((int *)&cs->cs_ip)[cs->cs_ip.ip_hl]) {
+		/*
+		 * Wasn't the first -- search for it.
+		 *
+		 * States are kept in a circularly linked list with
+		 * last_cs pointing to the end of the list.  The
+		 * list is kept in lru order by moving a state to the
+		 * head of the list whenever it is referenced.  Since
+		 * the list is short and, empirically, the connection
+		 * we want is almost always near the front, we locate
+		 * states via linear search.  If we don't find a state
+		 * for the datagram, the oldest state is (re-)used.
+		 */
+		register struct cstate *lcs;
+		register struct cstate *lastcs = comp->last_cs;
+
+		do {
+			lcs = cs; cs = cs->cs_next;
+			INCR(sls_searches)
+			if (ip->ip_src.s_addr == cs->cs_ip.ip_src.s_addr
+			    && ip->ip_dst.s_addr == cs->cs_ip.ip_dst.s_addr
+			    && *(int *)th == ((int *)&cs->cs_ip)[cs->cs_ip.ip_hl])
+				goto found;
+		} while (cs != lastcs);
+
+		/*
+		 * Didn't find it -- re-use oldest cstate.  Send an
+		 * uncompressed packet that tells the other side what
+		 * connection number we're using for this conversation.
+		 * Note that since the state list is circular, the oldest
+		 * state points to the newest and we only need to set
+		 * last_cs to update the lru linkage.
+		 */
+		INCR(sls_misses)
+		comp->last_cs = lcs;
+		hlen += th->th_off;
+		hlen <<= 2;
+		goto uncompressed;
+
+	found:
+		/*
+		 * Found it -- move to the front on the connection list.
+		 */
+		if (cs == lastcs)
+			comp->last_cs = lcs;
+		else {
+			lcs->cs_next = cs->cs_next;
+			cs->cs_next = lastcs->cs_next;
+			lastcs->cs_next = cs;
+		}
+	}
+
+	/*
+	 * Make sure that only what we expect to change changed. The first
+	 * line of the `if' checks the IP protocol version, header length &
+	 * type of service.  The 2nd line checks the "Don't fragment" bit.
+	 * The 3rd line checks the time-to-live and protocol (the protocol
+	 * check is unnecessary but costless).  The 4th line checks the TCP
+	 * header length.  The 5th line checks IP options, if any.  The 6th
+	 * line checks TCP options, if any.  If any of these things are
+	 * different between the previous & current datagram, we send the
+	 * current datagram `uncompressed'.
+	 */
+	oth = (struct tcphdr *)&((int *)&cs->cs_ip)[hlen];
+	deltaS = hlen;
+	hlen += th->th_off;
+	hlen <<= 2;
+
+	if (((u_short *)ip)[0] != ((u_short *)&cs->cs_ip)[0] ||
+	    ((u_short *)ip)[3] != ((u_short *)&cs->cs_ip)[3] ||
+	    ((u_short *)ip)[4] != ((u_short *)&cs->cs_ip)[4] ||
+	    th->th_off != oth->th_off ||
+	    (deltaS > 5 &&
+	     BCMP(ip + 1, &cs->cs_ip + 1, (deltaS - 5) << 2)) ||
+	    (th->th_off > 5 &&
+	     BCMP(th + 1, oth + 1, (th->th_off - 5) << 2)))
+		goto uncompressed;
+
+	/*
+	 * Figure out which of the changing fields changed.  The
+	 * receiver expects changes in the order: urgent, window,
+	 * ack, seq (the order minimizes the number of temporaries
+	 * needed in this section of code).
+	 */
+	if (th->th_flags & TH_URG) {
+		deltaS = ntohs(th->th_urp);
+		ENCODEZ(deltaS);
+		changes |= NEW_U;
+	} else if (th->th_urp != oth->th_urp)
+		/* argh! URG not set but urp changed -- a sensible
+		 * implementation should never do this but RFC793
+		 * doesn't prohibit the change so we have to deal
+		 * with it. */
+		 goto uncompressed;
+
+	if (deltaS = (u_short)(ntohs(th->th_win) - ntohs(oth->th_win))) {
+		ENCODE(deltaS);
+		changes |= NEW_W;
+	}
+
+	if (deltaA = ntohl(th->th_ack) - ntohl(oth->th_ack)) {
+		if (deltaA > 0xffff)
+			goto uncompressed;
+		ENCODE(deltaA);
+		changes |= NEW_A;
+	}
+
+	if (deltaS = ntohl(th->th_seq) - ntohl(oth->th_seq)) {
+		if (deltaS > 0xffff)
+			goto uncompressed;
+		ENCODE(deltaS);
+		changes |= NEW_S;
+	}
+
+	switch(changes) {
+
+	case 0:
+		/*
+		 * Nothing changed. If this packet contains data and the
+		 * last one didn't, this is probably a data packet following
+		 * an ack (normal on an interactive connection) and we send
+		 * it compressed.  Otherwise it's probably a retransmit,
+		 * retransmitted ack or window probe.  Send it uncompressed
+		 * in case the other side missed the compressed version.
+		 */
+		if (ip->ip_len != cs->cs_ip.ip_len &&
+		    ntohs(cs->cs_ip.ip_len) == hlen)
+			break;
+
+		/* (fall through) */
+
+	case SPECIAL_I:
+	case SPECIAL_D:
+		/*
+		 * actual changes match one of our special case encodings --
+		 * send packet uncompressed.
+		 */
+		goto uncompressed;
+
+	case NEW_S|NEW_A:
+		if (deltaS == deltaA &&
+		    deltaS == ntohs(cs->cs_ip.ip_len) - hlen) {
+			/* special case for echoed terminal traffic */
+			changes = SPECIAL_I;
+			cp = new_seq;
+		}
+		break;
+
+	case NEW_S:
+		if (deltaS == ntohs(cs->cs_ip.ip_len) - hlen) {
+			/* special case for data xfer */
+			changes = SPECIAL_D;
+			cp = new_seq;
+		}
+		break;
+	}
+
+	deltaS = ntohs(ip->ip_id) - ntohs(cs->cs_ip.ip_id);
+	if (deltaS != 1) {
+		ENCODEZ(deltaS);
+		changes |= NEW_I;
+	}
+	if (th->th_flags & TH_PUSH)
+		changes |= TCP_PUSH_BIT;
+	/*
+	 * Grab the cksum before we overwrite it below.  Then update our
+	 * state with this packet's header.
+	 */
+	deltaA = ntohs(th->th_sum);
+	BCOPY(ip, &cs->cs_ip, hlen);
+
+	/*
+	 * We want to use the original packet as our compressed packet.
+	 * (cp - new_seq) is the number of bytes we need for compressed
+	 * sequence numbers.  In addition we need one byte for the change
+	 * mask, one for the connection id and two for the tcp checksum.
+	 * So, (cp - new_seq) + 4 bytes of header are needed.  hlen is how
+	 * many bytes of the original packet to toss so subtract the two to
+	 * get the new packet size.
+	 */
+	deltaS = cp - new_seq;
+	cp = (u_char *)ip;
+	if (compress_cid == 0 || comp->last_xmit != cs->cs_id) {
+		comp->last_xmit = cs->cs_id;
+		hlen -= deltaS + 4;
+		cp += hlen;
+		*cp++ = changes | NEW_C;
+		*cp++ = cs->cs_id;
+	} else {
+		hlen -= deltaS + 3;
+		cp += hlen;
+		*cp++ = changes;
+	}
+	m->m_len -= hlen;
+	m->m_data += hlen;
+	*cp++ = deltaA >> 8;
+	*cp++ = deltaA;
+	BCOPY(new_seq, cp, deltaS);
+	INCR(sls_compressed)
+	return (TYPE_COMPRESSED_TCP);
+
+	/*
+	 * Update connection state cs & send uncompressed packet ('uncompressed'
+	 * means a regular ip/tcp packet but with the 'conversation id' we hope
+	 * to use on future compressed packets in the protocol field).
+	 */
+uncompressed:
+	BCOPY(ip, &cs->cs_ip, hlen);
+	ip->ip_p = cs->cs_id;
+	comp->last_xmit = cs->cs_id;
+	return (TYPE_UNCOMPRESSED_TCP);
+}
+
+
+int
+sl_uncompress_tcp(bufp, len, type, comp)
+	u_char **bufp;
+	int len;
+	u_int type;
+	struct slcompress *comp;
+{
+	register u_char *cp;
+	register u_int hlen, changes;
+	register struct tcphdr *th;
+	register struct cstate *cs;
+	register struct ip *ip;
+
+	switch (type) {
+
+	case TYPE_UNCOMPRESSED_TCP:
+		ip = (struct ip *) *bufp;
+		if (ip->ip_p >= MAX_STATES)
+			goto bad;
+		cs = &comp->rstate[comp->last_recv = ip->ip_p];
+		comp->flags &=~ SLF_TOSS;
+		ip->ip_p = IPPROTO_TCP;
+		hlen = ip->ip_hl;
+		hlen += ((struct tcphdr *)&((int *)ip)[hlen])->th_off;
+		hlen <<= 2;
+		BCOPY(ip, &cs->cs_ip, hlen);
+		cs->cs_ip.ip_sum = 0;
+		cs->cs_hlen = hlen;
+		INCR(sls_uncompressedin)
+		return (len);
+
+	default:
+		goto bad;
+
+	case TYPE_COMPRESSED_TCP:
+		break;
+	}
+	/* We've got a compressed packet. */
+	INCR(sls_compressedin)
+	cp = *bufp;
+	changes = *cp++;
+	if (changes & NEW_C) {
+		/* Make sure the state index is in range, then grab the state.
+		 * If we have a good state index, clear the 'discard' flag. */
+		if (*cp >= MAX_STATES)
+			goto bad;
+
+		comp->flags &=~ SLF_TOSS;
+		comp->last_recv = *cp++;
+	} else {
+		/* this packet has an implicit state index.  If we've
+		 * had a line error since the last time we got an
+		 * explicit state index, we have to toss the packet. */
+		if (comp->flags & SLF_TOSS) {
+			INCR(sls_tossed)
+			return (0);
+		}
+	}
+	cs = &comp->rstate[comp->last_recv];
+	hlen = cs->cs_ip.ip_hl << 2;
+	th = (struct tcphdr *)&((u_char *)&cs->cs_ip)[hlen];
+	th->th_sum = htons((*cp << 8) | cp[1]);
+	cp += 2;
+	if (changes & TCP_PUSH_BIT)
+		th->th_flags |= TH_PUSH;
+	else
+		th->th_flags &=~ TH_PUSH;
+
+	switch (changes & SPECIALS_MASK) {
+	case SPECIAL_I:
+		{
+		register u_int i = ntohs(cs->cs_ip.ip_len) - cs->cs_hlen;
+		th->th_ack = htonl(ntohl(th->th_ack) + i);
+		th->th_seq = htonl(ntohl(th->th_seq) + i);
+		}
+		break;
+
+	case SPECIAL_D:
+		th->th_seq = htonl(ntohl(th->th_seq) + ntohs(cs->cs_ip.ip_len)
+				   - cs->cs_hlen);
+		break;
+
+	default:
+		if (changes & NEW_U) {
+			th->th_flags |= TH_URG;
+			DECODEU(th->th_urp)
+		} else
+			th->th_flags &=~ TH_URG;
+		if (changes & NEW_W)
+			DECODES(th->th_win)
+		if (changes & NEW_A)
+			DECODEL(th->th_ack)
+		if (changes & NEW_S)
+			DECODEL(th->th_seq)
+		break;
+	}
+	if (changes & NEW_I) {
+		DECODES(cs->cs_ip.ip_id)
+	} else
+		cs->cs_ip.ip_id = htons(ntohs(cs->cs_ip.ip_id) + 1);
+
+	/*
+	 * At this point, cp points to the first byte of data in the
+	 * packet.  If we're not aligned on a 4-byte boundary, copy the
+	 * data down so the ip & tcp headers will be aligned.  Then back up
+	 * cp by the tcp/ip header length to make room for the reconstructed
+	 * header (we assume the packet we were handed has enough space to
+	 * prepend 128 bytes of header).  Adjust the length to account for
+	 * the new header & fill in the IP total length.
+	 */
+	len -= (cp - *bufp);
+	if (len < 0)
+		/* we must have dropped some characters (crc should detect
+		 * this but the old slip framing won't) */
+		goto bad;
+
+	if ((int)cp & 3) {
+		if (len > 0)
+			(void) ovbcopy(cp, (caddr_t)((int)cp &~ 3), len);
+		cp = (u_char *)((int)cp &~ 3);
+	}
+	cp -= cs->cs_hlen;
+	len += cs->cs_hlen;
+	cs->cs_ip.ip_len = htons(len);
+	BCOPY(&cs->cs_ip, cp, cs->cs_hlen);
+	*bufp = cp;
+
+	/* recompute the ip header checksum */
+	{
+		register u_short *bp = (u_short *)cp;
+		for (changes = 0; hlen > 0; hlen -= 2)
+			changes += *bp++;
+		changes = (changes & 0xffff) + (changes >> 16);
+		changes = (changes & 0xffff) + (changes >> 16);
+		((struct ip *)cp)->ip_sum = ~ changes;
+	}
+	return (len);
+bad:
+	comp->flags |= SLF_TOSS;
+	INCR(sls_errorin)
+	return (0);
+}
diff --git a/sys/net/slcompress.h b/sys/net/slcompress.h
new file mode 100644
index 00000000000..cefe940f198
--- /dev/null
+++ b/sys/net/slcompress.h
@@ -0,0 +1,157 @@
+/*	slcompress.h	8.1	93/06/10	*/
+/*
+ * Definitions for tcp compression routines.
+ *
+ * $Header: slcompress.h,v 1.10 89/12/31 08:53:02 van Exp $
+ *
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	Van Jacobson (van@helios.ee.lbl.gov), Dec 31, 1989:
+ *	- Initial distribution.
+ */
+
+#define MAX_STATES 16		/* must be > 2 and < 256 */
+#define MAX_HDR MLEN		/* XXX 4bsd-ism: should really be 128 */
+
+/*
+ * Compressed packet format:
+ *
+ * The first octet contains the packet type (top 3 bits), TCP
+ * 'push' bit, and flags that indicate which of the 4 TCP sequence
+ * numbers have changed (bottom 5 bits).  The next octet is a
+ * conversation number that associates a saved IP/TCP header with
+ * the compressed packet.  The next two octets are the TCP checksum
+ * from the original datagram.  The next 0 to 15 octets are
+ * sequence number changes, one change per bit set in the header
+ * (there may be no changes and there are two special cases where
+ * the receiver implicitly knows what changed -- see below).
+ * 
+ * There are 5 numbers which can change (they are always inserted
+ * in the following order): TCP urgent pointer, window,
+ * acknowlegement, sequence number and IP ID.  (The urgent pointer
+ * is different from the others in that its value is sent, not the
+ * change in value.)  Since typical use of SLIP links is biased
+ * toward small packets (see comments on MTU/MSS below), changes
+ * use a variable length coding with one octet for numbers in the
+ * range 1 - 255 and 3 octets (0, MSB, LSB) for numbers in the
+ * range 256 - 65535 or 0.  (If the change in sequence number or
+ * ack is more than 65535, an uncompressed packet is sent.)
+ */
+
+/*
+ * Packet types (must not conflict with IP protocol version)
+ *
+ * The top nibble of the first octet is the packet type.  There are
+ * three possible types: IP (not proto TCP or tcp with one of the
+ * control flags set); uncompressed TCP (a normal IP/TCP packet but
+ * with the 8-bit protocol field replaced by an 8-bit connection id --
+ * this type of packet syncs the sender & receiver); and compressed
+ * TCP (described above).
+ *
+ * LSB of 4-bit field is TCP "PUSH" bit (a worthless anachronism) and
+ * is logically part of the 4-bit "changes" field that follows.  Top
+ * three bits are actual packet type.  For backward compatibility
+ * and in the interest of conserving bits, numbers are chosen so the
+ * IP protocol version number (4) which normally appears in this nibble
+ * means "IP packet".
+ */
+
+/* packet types */
+#define TYPE_IP 0x40
+#define TYPE_UNCOMPRESSED_TCP 0x70
+#define TYPE_COMPRESSED_TCP 0x80
+#define TYPE_ERROR 0x00
+
+/* Bits in first octet of compressed packet */
+#define NEW_C	0x40	/* flag bits for what changed in a packet */
+#define NEW_I	0x20
+#define NEW_S	0x08
+#define NEW_A	0x04
+#define NEW_W	0x02
+#define NEW_U	0x01
+
+/* reserved, special-case values of above */
+#define SPECIAL_I (NEW_S|NEW_W|NEW_U)		/* echoed interactive traffic */
+#define SPECIAL_D (NEW_S|NEW_A|NEW_W|NEW_U)	/* unidirectional data */
+#define SPECIALS_MASK (NEW_S|NEW_A|NEW_W|NEW_U)
+
+#define TCP_PUSH_BIT 0x10
+
+
+/*
+ * "state" data for each active tcp conversation on the wire.  This is
+ * basically a copy of the entire IP/TCP header from the last packet
+ * we saw from the conversation together with a small identifier
+ * the transmit & receive ends of the line use to locate saved header.
+ */
+struct cstate {
+	struct cstate *cs_next;	/* next most recently used cstate (xmit only) */
+	u_short cs_hlen;	/* size of hdr (receive only) */
+	u_char cs_id;		/* connection # associated with this state */
+	u_char cs_filler;
+	union {
+		char csu_hdr[MAX_HDR];
+		struct ip csu_ip;	/* ip/tcp hdr from most recent packet */
+	} slcs_u;
+};
+#define cs_ip slcs_u.csu_ip
+#define cs_hdr slcs_u.csu_hdr
+
+/*
+ * all the state data for one serial line (we need one of these
+ * per line).
+ */
+struct slcompress {
+	struct cstate *last_cs;	/* most recently used tstate */
+	u_char last_recv;	/* last rcvd conn. id */
+	u_char last_xmit;	/* last sent conn. id */
+	u_short flags;
+#ifndef SL_NO_STATS
+	int sls_packets;	/* outbound packets */
+	int sls_compressed;	/* outbound compressed packets */
+	int sls_searches;	/* searches for connection state */
+	int sls_misses;		/* times couldn't find conn. state */
+	int sls_uncompressedin;	/* inbound uncompressed packets */
+	int sls_compressedin;	/* inbound compressed packets */
+	int sls_errorin;	/* inbound unknown type packets */
+	int sls_tossed;		/* inbound packets tossed because of error */
+#endif
+	struct cstate tstate[MAX_STATES];	/* xmit connection states */
+	struct cstate rstate[MAX_STATES];	/* receive connection states */
+};
+/* flag values */
+#define SLF_TOSS 1		/* tossing rcvd frames because of input err */
+
+void	 sl_compress_init __P((struct slcompress *));
+u_int	 sl_compress_tcp __P((struct mbuf *,
+	    struct ip *, struct slcompress *, int));
+int	 sl_uncompress_tcp __P((u_char **, int, u_int, struct slcompress *));
diff --git a/sys/net/slip.h b/sys/net/slip.h
new file mode 100644
index 00000000000..4caeb464df3
--- /dev/null
+++ b/sys/net/slip.h
@@ -0,0 +1,51 @@
+/*-
+ * Copyright (c) 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)slip.h	8.1 (Berkeley) 2/12/94
+ */
+
+/* Ioctls operating on SLIP ttys. */
+#define	SLIOCGUNIT	_IOR('t', 88, int)	/* get slip unit number */
+
+/*
+ * Definitions of the pseudo-link-level header attached to slip
+ * packets grabbed by the packet filter (bpf) traffic monitor.
+ */
+#define	SLIP_HDRLEN	16		/* BPF SLIP header length */
+
+/* Offsets into BPF SLIP header. */
+#define	SLX_DIR		0		/* direction; see below */
+#define	SLX_CHDR	1		/* compressed header data */
+#define	CHDR_LEN	15		/* length of compressed header data */
+
+#define	SLIPDIR_IN	0		/* incoming */
+#define	SLIPDIR_OUT	1		/* outgoing */
diff --git a/sys/netccitt/README.hdlc b/sys/netccitt/README.hdlc
new file mode 100644
index 00000000000..24b5fef96df
--- /dev/null
+++ b/sys/netccitt/README.hdlc
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ *                                                                          
+ * @(#)README.hdlc	8.1 (Berkeley) 6/10/93
+ *
+ *  X.25 HDLC DATA LINK LEVEL:                                           
+ *
+ *                                                                            
+ *  This module implements the Link  Level of the Open Systems Interconnect
+ *  Model.  The implementation  is based  on the ISO  High-Level  Data Link 
+ *  Control (HDLC).  These procedures  subscribe to the  principles  of the 
+ *  ISO-Class of Procedures for  point-to-point. These procedures implement
+ *  two-way  asynchronous balanced mode (LAPB) as recommented by the CCITT.
+ *
+ *  The HDLC protocol layer interface consists of the following procedures:
+ *    Hd_init       (pr_init) 
+ *    Hd_ouput      (pr_output)
+ *    Hd_input      (pr_input)
+ *    Hd_timer      (pr_slowtimo)                       
+ *             
+ *  Note: Supervisory commands RR, RNR and REJ are  not transmitted by this
+ *        station.
+ *     
+ *        This station never enters a busy (RNR) condition.
+ *		
+ *	  The "Generate_rr" variable can  be set to FALSE.  This means that
+ *	  we NEVER  send an RR.  This works just fine if  the network level
+ *	  is X.25 packet protocol -- which it is.
+ *
+ *        Currently, this is only a DTE implementation.
+ *
+ *  Think about:
+ *        If the remote is busy, no iframes are sent. The remote sends a RR
+ *	  to clear this condition. However, this RR may be damaged, causing
+ *	  a possible deadlock. A solution is to poll with iframe (P(S)==P(R)
+ *	  of RNR) indefinitly.
+ *
+ *
+ *  Date:             February 1984
+ *
+ *  Author:           Gerald W. Neufeld
+ *                     
+ *  Installation:     Department of Computer Science
+ *                    University of British Columbia
+ *                    Vancouver, BC, CANADA.
+ *
+ *  History:
+ *  
+ *                                                                      
+ */
diff --git a/sys/netccitt/README.packet b/sys/netccitt/README.packet
new file mode 100644
index 00000000000..858d75cccdc
--- /dev/null
+++ b/sys/netccitt/README.packet
@@ -0,0 +1,36 @@
+/*
+ * @(#)README.packet	8.1 (Berkeley) 6/10/93
+ *
+ *   X.25 NETWORK PACKET LEVEL:
+ *
+ *   This implementation is based on Recommentation X.25 as agreed at the 
+ *   March 1976 and the February 1980  meetings of CCITT Study Group VII. 
+ *   However, not all aspects are implemented. The following is a list of 
+ *   features which are not yet or may never be implemented:
+ *  
+ *   1. D bit
+ *   2. PVC
+ *   3. fast select
+ *  
+ *
+ *   Note: This implementation is for DTEs only.
+ *
+ *         Currently, only the 1976 verison is implemented.
+ *
+ *
+ *   Date:          February, 1984
+ *
+ *   Author:        Gerald W. Neufeld
+ *
+ *   Installation:  Department of Computer Science
+ *                  University of British Columbia
+ *                  Vancouver, BC, CANADA
+ *
+ *   To Do:   	    Find some reasonable heuristic for piggybacking packet
+ *		    level acks.
+ *
+ *   Bugs:	    Clear might be sent before data is all out.
+ * 
+ *   History:
+ *
+ */
diff --git a/sys/netccitt/ccitt_proto.c b/sys/netccitt/ccitt_proto.c
new file mode 100644
index 00000000000..d832fd38f92
--- /dev/null
+++ b/sys/netccitt/ccitt_proto.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Laboratory for Computation Vision and the Computer Science Department
+ * of the University of British Columbia.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ccitt_proto.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/domain.h>
+
+#include <netccitt/x25.h>
+
+#include <net/radix.h>
+
+/*
+ * Definitions of protocols supported in the CCITT domain.
+ */
+
+extern	struct domain ccittdomain;
+#define DOMAIN &ccittdomain
+
+#ifdef LLC
+int	llc_output();
+void	llc_ctlinput(), llc_init(), llc_timer();
+#endif
+#ifdef HDLC
+int	hd_output();
+void	hd_ctlinput(), hd_init(), hd_timer();
+#endif
+int	pk_usrreq(), pk_ctloutput();
+void	pk_timer(), pk_init(), pk_input(), pk_ctlinput();
+
+struct protosw ccittsw[] = {
+#ifdef LLC
+ {	0,		DOMAIN,		IEEEPROTO_802LLC,0,
+	0,		llc_output,	llc_ctlinput,	0,
+	0,
+	llc_init,	0,	 	llc_timer,	0,
+ },
+#endif
+#ifdef HDLC
+ {	0,		DOMAIN,		CCITTPROTO_HDLC,0,
+	0,		hd_output,	hd_ctlinput,	0,
+	0,
+	hd_init,	0,	 	hd_timer,	0,
+ },
+#endif
+ {	SOCK_STREAM,	DOMAIN,		CCITTPROTO_X25,	PR_CONNREQUIRED|PR_ATOMIC|PR_WANTRCVD,
+	pk_input,	0,		pk_ctlinput,	pk_ctloutput,
+	pk_usrreq,
+	pk_init,	0,		pk_timer,	0,
+ }
+};
+
+struct domain ccittdomain =
+	{ AF_CCITT, "ccitt", 0, 0, 0, ccittsw,
+		&ccittsw[sizeof(ccittsw)/sizeof(ccittsw[0])], 0,
+		rn_inithead, 32, sizeof (struct sockaddr_x25) };
diff --git a/sys/netccitt/dll.h b/sys/netccitt/dll.h
new file mode 100644
index 00000000000..46ded88eda8
--- /dev/null
+++ b/sys/netccitt/dll.h
@@ -0,0 +1,83 @@
+/* 
+ * Copyright (C) Dirk Husemann, Computer Science Department IV, 
+ * 		 University of Erlangen-Nuremberg, Germany, 1990, 1991, 1992
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)dll.h	8.1 (Berkeley) 6/10/93
+ */
+
+/* 
+ * We define the additional PRC_* codes in here
+ */
+#ifdef KERNEL
+#ifndef PRC_IFUP
+#define PRC_IFUP		   3
+#endif
+#define PRC_CONNECT_INDICATION     8
+#define PRC_CONNECT_REQUEST        9
+#define PRC_DISCONNECT_REQUEST     10
+#define PRC_DISCONNECT_INDICATION  11
+#define PRC_RESET_REQUEST          12
+#endif
+
+/*
+ * Data link layer configuration --- basically a copy of the relevant parts
+ * of x25config, implemented to become a little bit more network
+ * layer independent. (Probably only used for casting et al.)
+ */
+struct dllconfig {
+       u_short dllcfg_unused0:4,
+               dllcfg_unused1:4,
+               dllcfg_trace:1,     /* link level tracing flag */
+               dllcfg_window:7;    /* link level window size */
+       u_short dllcfg_xchxid:1,    /* exchange XID (not yet) */
+               dllcfg_unused2:7;   /* here be dragons */
+};
+
+struct dll_ctlinfo {
+	union {
+		struct {
+			struct	dllconfig *dctli_up_cfg;
+			u_char	dctli_up_lsap;
+		} CTLI_UP;
+		struct {
+			caddr_t dctli_down_pcb;
+			struct rtentry *dctli_down_rt;
+			struct dllconfig *dctli_down_llconf;
+		} CTLI_DOWN;
+	} CTLIun;
+};
+#define dlcti_cfg  CTLIun.CTLI_UP.dctli_up_cfg
+#define dlcti_lsap CTLIun.CTLI_UP.dctli_up_lsap
+#define dlcti_pcb  CTLIun.CTLI_DOWN.dctli_down_pcb
+#define dlcti_rt   CTLIun.CTLI_DOWN.dctli_down_rt
+#define dlcti_conf CTLIun.CTLI_DOWN.dctli_down_llconf
diff --git a/sys/netccitt/hd_debug.c b/sys/netccitt/hd_debug.c
new file mode 100644
index 00000000000..b8a45a3f59a
--- /dev/null
+++ b/sys/netccitt/hd_debug.c
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Laboratory for Computation Vision and the Computer Science Department
+ * of the University of British Columbia.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)hd_debug.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+
+#include <netccitt/hdlc.h>
+#include <netccitt/hd_var.h>
+#include <netccitt/x25.h>
+
+#ifdef HDLCDEBUG
+#define NTRACE		32
+
+struct	hdlctrace {
+	struct	hdcb *ht_hdp;
+	short	ht_dir;
+	struct	mbuf *ht_frame;
+	struct	timeval ht_time;
+} hdtrace[NTRACE];
+
+int	lasttracelogged, freezetrace;
+#endif
+
+hd_trace (hdp, direction, frame)
+struct hdcb *hdp;
+register struct Hdlc_frame *frame;
+{
+	register char *s;
+	register int nr, pf, ns, i;
+	struct Hdlc_iframe *iframe = (struct Hdlc_iframe *) frame;
+
+#ifdef HDLCDEBUG
+	hd_savetrace (hdp, direction, frame);
+#endif
+	if (hdp -> hd_xcp -> xc_ltrace) {
+		if (direction == RX)
+			printf ("F-In:  ");
+		else if (direction == 2)
+				printf ("F-Xmt: ");
+			else
+				printf ("F-Out:   ");
+
+		nr = iframe -> nr;
+		pf = iframe -> pf;
+		ns = iframe -> ns;
+
+		switch (hd_decode (hdp, frame)) {
+		case SABM: 
+			printf ("SABM   : PF=%d\n", pf);
+			break;
+
+		case DISC: 
+			printf ("DISC   : PF=%d\n", pf);
+			break;
+
+		case DM: 
+			printf ("DM     : PF=%d\n", pf);
+			break;
+
+		case FRMR: 
+			{
+			register struct Frmr_frame *f = (struct Frmr_frame *)frame;
+
+			printf ("FRMR   : PF=%d, TEXT=", pf);
+			for (s = (char *) frame, i = 0; i < 5; ++i, ++s)
+				printf ("%x ", (int) * s & 0xff);
+			printf ("\n");
+			printf ("control=%x v(s)=%d v(r)=%d w%d x%d y%d z%d\n",
+				f->frmr_control, f->frmr_ns, f->frmr_nr,
+				f->frmr_w, f->frmr_x, f->frmr_y, f->frmr_z);
+			break;
+			}
+
+		case UA: 
+			printf ("UA     : PF=%d\n", pf);
+			break;
+
+		case RR: 
+			printf ("RR     : N(R)=%d, PF=%d\n", nr, pf);
+			break;
+
+		case RNR: 
+			printf ("RNR    : N(R)=%d, PF=%d\n", nr, pf);
+			break;
+
+		case REJ: 
+			printf ("REJ    : N(R)=%d, PF=%d\n", nr, pf);
+			break;
+
+		case IFRAME: 
+			{
+			register struct mbuf *m;
+			register int len = 0;
+
+			for(m = dtom (frame); m; m = m -> m_next)
+				len += m -> m_len;
+			len -= HDHEADERLN;
+			printf ("IFRAME : N(R)=%d, PF=%d, N(S)=%d, DATA(%d)=",
+				nr, pf, ns, len);
+			for (s = (char *)iframe->i_field, i = 0; i < 3; ++i, ++s)
+				printf ("%x ", (int) *s & 0xff);
+			printf ("\n");
+			break;
+			}
+
+		default: 
+			printf ("ILLEGAL: ");
+			for (s = (char *) frame, i = 0; i < 5; ++i, ++s)
+				printf ("%x ", (int) *s & 0xff);
+			printf ("\n");
+		}
+
+	}
+}
+
+#ifdef HDLCDEBUG
+static
+hd_savetrace (hdp, dir, frame)
+struct hdcb *hdp;
+struct Hdlc_frame *frame;
+{
+	register struct hdlctrace *htp;
+	register struct mbuf *m;
+
+	if (freezetrace)
+		return;
+	htp = &hdtrace[lasttracelogged];
+	lasttracelogged = (lasttracelogged + 1) % NTRACE;
+	if (m = htp->ht_frame)
+		m_freem (m);
+	m = dtom (frame);
+	htp->ht_frame = m_copy (m, 0, m->m_len);
+	htp->ht_hdp = hdp;
+	htp->ht_dir = dir;
+	htp->ht_time = time;
+}
+
+hd_dumptrace (hdp)
+struct hdcb *hdp;
+{
+	register int i, ltrace;
+	register struct hdlctrace *htp;
+
+	freezetrace = 1;
+	hd_status (hdp);
+	printf ("retransmit queue:");
+	for (i = 0; i < 8; i++)
+		printf (" %x", hdp -> hd_retxq[i]);
+	printf ("\n");
+	ltrace = hdp -> hd_xcp -> xc_ltrace;
+	hdp -> hd_xcp -> xc_ltrace = 1;
+	for (i = 0; i < NTRACE; i++) {
+		htp = &hdtrace[(lasttracelogged + i) % NTRACE];
+		if (htp->ht_hdp != hdp || htp->ht_frame == 0)
+			continue;
+		printf ("%d/%d	", htp->ht_time.tv_sec & 0xff,
+			htp->ht_time.tv_usec / 10000);
+		hd_trace (htp->ht_hdp, htp->ht_dir,
+			mtod (htp->ht_frame, struct Hdlc_frame *));
+		m_freem (htp->ht_frame);
+		htp->ht_frame = 0;
+	}
+	hdp -> hd_xcp -> xc_ltrace = ltrace;
+	freezetrace = 0;
+}
+#endif
diff --git a/sys/netccitt/hd_input.c b/sys/netccitt/hd_input.c
new file mode 100644
index 00000000000..eb939d03199
--- /dev/null
+++ b/sys/netccitt/hd_input.c
@@ -0,0 +1,669 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Laboratory for Computation Vision and the Computer Science Department
+ * of the University of British Columbia.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)hd_input.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+
+#include <netccitt/hdlc.h>
+#include <netccitt/hd_var.h>
+#include <netccitt/x25.h>
+
+static frame_reject();
+static rej_routine();
+static free_iframes();
+/*
+ *      HDLC INPUT INTERFACE
+ *
+ *      This routine is called when the HDLC physical device has
+ *      completed reading a frame.
+ */
+
+hdintr ()
+{
+	register struct mbuf *m;
+	register struct hdcb *hdp;
+	register struct ifnet *ifp;
+	register int s;
+	static struct ifnet *lastifp;
+	static struct hdcb *lasthdp;
+
+	for (;;) {
+		s = splimp ();
+		IF_DEQUEUE (&hdintrq, m);
+		splx (s);
+		if (m == 0)
+			break;
+		if (m->m_len < HDHEADERLN) {
+			printf ("hdintr: packet too short (len=%d)\n",
+				m->m_len);
+			m_freem (m);
+			continue;
+		}
+		if ((m->m_flags & M_PKTHDR) == 0)
+			panic("hdintr");
+		ifp = m->m_pkthdr.rcvif;
+
+		/*
+		 * look up the appropriate hdlc control block
+		 */
+
+		if (ifp == lastifp)
+			hdp = lasthdp;
+		else {
+			for (hdp = hdcbhead; hdp; hdp = hdp->hd_next)
+				if (hdp->hd_ifp == ifp)
+					break;
+			if (hdp == 0) {
+				printf ("hdintr: unknown interface %x\n", ifp);
+				m_freem (m);
+				continue;
+			}
+			lastifp = ifp;
+			lasthdp = hdp;
+		}
+
+		/* Process_rxframe returns FALSE if the frame was NOT queued
+		   for the next higher layers. */
+		if (process_rxframe (hdp, m) == FALSE)
+			m_freem (m);
+	}
+}
+
+process_rxframe (hdp, fbuf)
+register struct hdcb *hdp;
+register struct mbuf *fbuf;
+{
+	register int queued = FALSE, frametype, pf;
+	register struct Hdlc_frame *frame;
+
+	frame = mtod (fbuf, struct Hdlc_frame *);
+	pf = ((struct Hdlc_iframe *) frame) -> pf;
+
+	hd_trace (hdp, RX, frame);
+	if (frame -> address != ADDRESS_A && frame -> address != ADDRESS_B)
+		return (queued);
+
+	switch ((frametype = hd_decode (hdp, frame)) + hdp->hd_state) {
+	case DM + DISC_SENT:
+	case UA + DISC_SENT:
+		/*
+		 * Link now closed.  Leave timer running
+		 * so hd_timer() can periodically check the
+		 * status of interface driver flag bit IFF_UP.
+		 */
+		hdp->hd_state = DISCONNECTED;
+		break;
+
+	case DM + INIT:
+	case UA + INIT:
+		/*
+		 * This is a non-standard state change needed for DCEs
+		 * that do dynamic link selection.  We can't go into the
+		 * usual "SEND DM" state because a DM is a SARM in LAP.
+		 */
+		hd_writeinternal (hdp, SABM, POLLOFF);
+		hdp->hd_state = SABM_SENT;
+		SET_TIMER (hdp);
+		break;
+
+	case SABM + DM_SENT: 
+	case SABM + WAIT_SABM: 
+		hd_writeinternal (hdp, UA, pf);
+	case UA + SABM_SENT: 
+	case UA + WAIT_UA: 
+		KILL_TIMER (hdp);
+		hd_initvars (hdp);
+		hdp->hd_state = ABM;
+		hd_message (hdp, "Link level operational");
+		/* Notify the packet level - to send RESTART. */
+		(void) pk_ctlinput (PRC_LINKUP, hdp->hd_pkp);
+		break;
+
+	case SABM + SABM_SENT: 
+		/* Got a SABM collision. Acknowledge the remote's SABM
+		   via UA but still wait for UA. */
+		hd_writeinternal (hdp, UA, pf);
+		break;
+
+	case SABM + ABM: 
+		/* Request to reset the link from the remote. */
+		KILL_TIMER (hdp);
+		hd_message (hdp, "Link reset");
+#ifdef HDLCDEBUG
+		hd_dumptrace (hdp);
+#endif
+		hd_flush (hdp->hd_ifp);
+		hd_writeinternal (hdp, UA, pf);
+		hd_initvars (hdp);
+		(void) pk_ctlinput (PRC_LINKRESET, hdp->hd_pkp);
+		hdp->hd_resets++;
+		break;
+
+	case SABM + WAIT_UA: 
+		hd_writeinternal (hdp, UA, pf);
+		break;
+
+	case DM + ABM: 
+		hd_message (hdp, "DM received: link down");
+#ifdef HDLCDEBUG
+		hd_dumptrace (hdp);
+#endif
+		(void) pk_ctlinput (PRC_LINKDOWN, hdp->hd_pkp);
+		hd_flush (hdp->hd_ifp);
+	case DM + DM_SENT: 
+	case DM + WAIT_SABM: 
+	case DM + WAIT_UA: 
+		hd_writeinternal (hdp, SABM, pf);
+		hdp->hd_state = SABM_SENT;
+		SET_TIMER (hdp);
+		break;
+
+	case DISC + INIT:
+	case DISC + DM_SENT: 
+	case DISC + SABM_SENT: 
+		/* Note: This is a non-standard state change. */
+		hd_writeinternal (hdp, UA, pf);
+		hd_writeinternal (hdp, SABM, POLLOFF);
+		hdp->hd_state = SABM_SENT;
+		SET_TIMER (hdp);
+		break;
+
+	case DISC + WAIT_UA: 
+		hd_writeinternal (hdp, DM, pf);
+		SET_TIMER (hdp);
+		hdp->hd_state = DM_SENT;
+		break;
+
+	case DISC + ABM: 
+		hd_message (hdp, "DISC received: link down");
+		(void) pk_ctlinput (PRC_LINKDOWN, hdp->hd_pkp);
+	case DISC + WAIT_SABM: 
+		hd_writeinternal (hdp, UA, pf);
+		hdp->hd_state = DM_SENT;
+		SET_TIMER (hdp);
+		break;
+
+	case UA + ABM: 
+		hd_message (hdp, "UA received: link down");
+		(void) pk_ctlinput (PRC_LINKDOWN, hdp->hd_pkp);
+	case UA + WAIT_SABM: 
+		hd_writeinternal (hdp, DM, pf);
+		hdp->hd_state = DM_SENT;
+		SET_TIMER (hdp);
+		break;
+
+	case FRMR + DM_SENT: 
+		hd_writeinternal (hdp, SABM, pf);
+		hdp->hd_state = SABM_SENT;
+		SET_TIMER (hdp);
+		break;
+
+	case FRMR + WAIT_SABM: 
+		hd_writeinternal (hdp, DM, pf);
+		hdp->hd_state = DM_SENT;
+		SET_TIMER (hdp);
+		break;
+
+	case FRMR + ABM: 
+		hd_message (hdp, "FRMR received: link down");
+		(void) pk_ctlinput (PRC_LINKDOWN, hdp->hd_pkp);
+#ifdef HDLCDEBUG
+		hd_dumptrace (hdp);
+#endif
+		hd_flush (hdp->hd_ifp);
+		hd_writeinternal (hdp, SABM, pf);
+		hdp->hd_state = WAIT_UA;
+		SET_TIMER (hdp);
+		break;
+
+	case RR + ABM: 
+	case RNR + ABM: 
+	case REJ + ABM: 
+		process_sframe (hdp, (struct Hdlc_sframe *)frame, frametype);
+		break;
+
+	case IFRAME + ABM: 
+		queued = process_iframe (hdp, fbuf, (struct Hdlc_iframe *)frame);
+		break;
+
+	case IFRAME + SABM_SENT: 
+	case RR + SABM_SENT: 
+	case RNR + SABM_SENT: 
+	case REJ + SABM_SENT: 
+		hd_writeinternal (hdp, DM, POLLON);
+		hdp->hd_state = DM_SENT;
+		SET_TIMER (hdp);
+		break;
+
+	case IFRAME + WAIT_SABM: 
+	case RR + WAIT_SABM: 
+	case RNR + WAIT_SABM: 
+	case REJ + WAIT_SABM: 
+		hd_writeinternal (hdp, FRMR, POLLOFF);
+		SET_TIMER (hdp);
+		break;
+
+	case ILLEGAL + SABM_SENT: 
+		hdp->hd_unknown++;
+		hd_writeinternal (hdp, DM, POLLOFF);
+		hdp->hd_state = DM_SENT;
+		SET_TIMER (hdp);
+		break;
+
+	case ILLEGAL + ABM: 
+		hd_message (hdp, "Unknown frame received: link down");
+		(void) pk_ctlinput (PRC_LINKDOWN, hdp->hd_pkp);
+	case ILLEGAL + WAIT_SABM:
+		hdp->hd_unknown++;
+#ifdef HDLCDEBUG
+		hd_dumptrace (hdp);
+#endif
+		hd_writeinternal (hdp, FRMR, POLLOFF);
+		hdp->hd_state = WAIT_SABM;
+		SET_TIMER (hdp);
+		break;
+	}
+
+	return (queued);
+}
+
+process_iframe (hdp, fbuf, frame)
+register struct hdcb *hdp;
+struct mbuf *fbuf;
+register struct Hdlc_iframe *frame;
+{
+	register int    nr = frame -> nr,
+	                ns = frame -> ns,
+	                pf = frame -> pf;
+	register int    queued = FALSE;
+
+	/* 
+	 *  Validate the iframe's N(R) value. It's N(R) value must be in
+	 *   sync with our V(S) value and our "last received nr".
+	 */
+
+	if (valid_nr (hdp, nr, FALSE) == FALSE) {
+		frame_reject (hdp, Z, frame);
+		return (queued);
+	}
+
+
+	/* 
+	 *  This section tests the IFRAME for proper sequence. That is, it's
+	 *  sequence number N(S) MUST be equal to V(S).
+	 */
+
+	if (ns != hdp->hd_vr) {
+		hdp->hd_invalid_ns++;
+		if (pf || (hdp->hd_condition & REJ_CONDITION) == 0) {
+			hdp->hd_condition |= REJ_CONDITION;
+			/*
+			 * Flush the transmit queue. This is ugly but we
+			 * have no choice.  A reject response must be
+			 * immediately sent to the DCE.  Failure to do so
+			 * may result in another out of sequence iframe
+			 * arriving (and thus sending another reject)
+			 * before the first reject is transmitted. This
+			 * will cause the DCE to receive two or more
+			 * rejects back to back, which must never happen.
+			 */
+			hd_flush (hdp->hd_ifp);
+			hd_writeinternal (hdp, REJ, pf);
+		}
+		return (queued);
+	}
+	hdp->hd_condition &= ~REJ_CONDITION;
+
+	/* 
+	 *  This section finally tests the IFRAME's sequence number against
+	 *  the window size (K)  and the sequence number of the  last frame
+	 *  we have acknowledged.  If the IFRAME is completely correct then 
+	 *  it is queued for the packet level.
+	 */
+
+	if (ns != (hdp -> hd_lasttxnr + hdp -> hd_xcp -> xc_lwsize) % MODULUS) {
+		hdp -> hd_vr = (hdp -> hd_vr + 1) % MODULUS;
+		if (pf == 1) {
+			/* Must generate a RR or RNR with final bit on. */
+			hd_writeinternal (hdp, RR, POLLON);
+		} else
+			/*    
+			 *  Hopefully we can piggyback the RR, if not we will generate
+			 *  a RR when T3 timer expires.
+			 */
+			if (hdp -> hd_rrtimer == 0)
+				hdp->hd_rrtimer = hd_t3;
+
+		/* Forward iframe to packet level of X.25. */
+		fbuf -> m_data += HDHEADERLN;
+		fbuf -> m_len -= HDHEADERLN;
+		fbuf -> m_pkthdr.len -= HDHEADERLN;
+		fbuf -> m_pkthdr.rcvif = (struct ifnet *)hdp -> hd_pkp;
+#ifdef BSD4_3
+		fbuf->m_act = 0;	/* probably not necessary */
+#else
+		{
+			register struct mbuf *m;
+			
+			for (m = fbuf; m -> m_next; m = m -> m_next)
+				m -> m_act = (struct mbuf *) 0;
+			m -> m_act = (struct mbuf *) 1;
+		}
+#endif
+		pk_input (fbuf);
+		queued = TRUE;
+		hd_start (hdp);
+	} else {
+		/* 
+		 *  Here if the remote station has transmitted more iframes then
+		 *  the number which have been acknowledged plus K. 
+		 */
+		hdp->hd_invalid_ns++;
+		frame_reject (hdp, W, frame);
+	}
+	return (queued);
+}
+
+/* 
+ *  This routine is used to determine if a value (the middle parameter)
+ *  is between two other values. The low value is  the first  parameter
+ *  the high value is the last parameter. The routine checks the middle
+ *  value to see if it is within the range of the first and last values.
+ *  The reason we need this routine is the values are modulo some  base
+ *  hence a simple test for greater or less than is not sufficient.
+ */
+
+bool
+range_check (rear, value, front)
+int     rear,
+        value,
+        front;
+{
+	register bool result = FALSE;
+
+	if (front > rear)
+		result = (rear <= value) && (value <= front);
+	else
+		result = (rear <= value) || (value <= front);
+
+	return (result);
+}
+
+/* 
+ *  This routine handles all the frame reject conditions which can
+ *  arise as a result  of secondary  processing.  The frame reject
+ *  condition Y (frame length error) are handled elsewhere.
+ */
+
+static
+frame_reject (hdp, rejectcode, frame)
+struct hdcb *hdp;
+struct Hdlc_iframe *frame;
+{
+	register struct Frmr_frame *frmr = &hd_frmr;
+
+	frmr -> frmr_control = ((struct Hdlc_frame *) frame) -> control;
+
+	frmr -> frmr_ns = frame -> ns;
+	frmr -> frmr_f1_0 = 0;
+	frmr -> frmr_nr = frame -> nr;
+	frmr -> frmr_f2_0 = 0;
+
+	frmr -> frmr_0000 = 0;
+	frmr -> frmr_w = frmr -> frmr_x = frmr -> frmr_y =
+		frmr -> frmr_z = 0;
+	switch (rejectcode) {
+	case Z: 
+		frmr -> frmr_z = 1;/* invalid N(R). */
+		break;
+
+	case Y: 
+		frmr -> frmr_y = 1;/* iframe length error. */
+		break;
+
+	case X: 
+		frmr -> frmr_x = 1;/* invalid information field. */
+		frmr -> frmr_w = 1;
+		break;
+
+	case W: 
+		frmr -> frmr_w = 1;/* invalid N(S). */
+	}
+
+	hd_writeinternal (hdp, FRMR, POLLOFF);
+
+	hdp->hd_state = WAIT_SABM;
+	SET_TIMER (hdp);
+}
+
+/* 
+ *  This procedure is invoked when ever we receive a supervisor
+ *  frame such as RR, RNR and REJ. All processing for these
+ *  frames is done here.
+ */
+
+process_sframe (hdp, frame, frametype)
+register struct hdcb *hdp;
+register struct Hdlc_sframe *frame;
+int frametype;
+{
+	register int nr = frame -> nr, pf = frame -> pf, pollbit = 0;
+
+	if (valid_nr (hdp, nr, pf) == TRUE) {
+		switch (frametype) {
+		case RR: 
+			hdp->hd_condition &= ~REMOTE_RNR_CONDITION;
+			break;
+
+		case RNR: 
+			hdp->hd_condition |= REMOTE_RNR_CONDITION;
+			hdp->hd_retxcnt = 0;
+			break;
+
+		case REJ: 
+			hdp->hd_condition &= ~REMOTE_RNR_CONDITION;
+			rej_routine (hdp, nr);
+		}
+
+		if (pf == 1) {
+			hdp->hd_retxcnt = 0;
+			hdp->hd_condition &= ~TIMER_RECOVERY_CONDITION;
+
+			if (frametype == RR && hdp->hd_lastrxnr == hdp->hd_vs
+				&& hdp->hd_timer == 0 && hdp->hd_txq.head == 0)
+				hd_writeinternal(hdp, RR, pf);
+			else
+			/* If any iframes have been queued because of the
+			   timer condition, transmit then now. */
+			if (hdp->hd_condition & REMOTE_RNR_CONDITION) {
+				/* Remote is busy or timer condition, so only
+				   send one. */
+				if (hdp->hd_vs != hdp->hd_retxqi)
+					hd_send_iframe (hdp, hdp->hd_retxq[hdp->hd_vs], pollbit);
+			}
+			else	/* Flush the retransmit list first. */
+				while (hdp->hd_vs != hdp->hd_retxqi)
+					hd_send_iframe (hdp, hdp->hd_retxq[hdp->hd_vs], POLLOFF);
+		}
+
+		hd_start (hdp);
+	} else
+		frame_reject (hdp, Z, (struct Hdlc_iframe *)frame);	/* Invalid N(R). */
+}
+
+/* 
+ *  This routine tests the validity of the N(R) which we have received.
+ *  If it is ok,  then all the  iframes which it acknowledges  (if any)
+ *  will be freed.
+ */
+
+bool
+valid_nr (hdp, nr, finalbit)
+register struct hdcb *hdp;
+register int finalbit;
+{
+	/* Make sure it really does acknowledge something. */
+	if (hdp->hd_lastrxnr == nr)
+		return (TRUE);
+
+	/* 
+	 *  This section validates the frame's  N(R) value.  It's N(R) value
+	 *  must be  in syncronization  with  our V(S)  value and  our "last
+	 *  received nr" variable. If it is correct then we are able to send
+	 *  more IFRAME's, else frame reject condition is entered.
+	 */
+
+	if (range_check (hdp->hd_lastrxnr, nr, hdp->hd_vs) == FALSE) {
+		if ((hdp->hd_condition & TIMER_RECOVERY_CONDITION) &&
+				range_check (hdp->hd_vs, nr, hdp->hd_xx) == TRUE)
+			hdp->hd_vs = nr;
+
+		else {
+			hdp->hd_invalid_nr++;
+			return (FALSE);
+		}
+	}
+
+	/* 
+	 *  If we get to here, we do have a valid frame  but it might be out
+	 *  of sequence.  However, we should  still accept the receive state
+	 *  number N(R) since it has already passed our previous test and it
+	 *  does acknowledge frames which we are sending.
+	 */
+
+	KILL_TIMER (hdp);
+	free_iframes (hdp, &nr, finalbit);/* Free all acknowledged iframes */
+	if (nr != hdp->hd_vs)
+		SET_TIMER (hdp);
+
+	return (TRUE);
+}
+
+/* 
+ *  This routine determines how many iframes need to be retransmitted.
+ *  It then resets the Send State Variable V(S) to accomplish this.
+ */
+
+static
+rej_routine (hdp, rejnr)
+register struct hdcb *hdp;
+register int rejnr;
+{
+	register int anchor;
+
+	/*
+	 * Flush the output queue.  Any iframes queued for
+	 * transmission will be out of sequence.
+	 */
+
+	hd_flush (hdp->hd_ifp);
+
+	/* 
+	 *  Determine how many frames should be re-transmitted. In the case 
+	 *  of a normal REJ this  should be 1 to K.  In the case of a timer
+	 *  recovery REJ (ie. a REJ with the Final Bit on) this could be 0. 
+	 */
+
+	anchor = hdp->hd_vs;
+	if (hdp->hd_condition & TIMER_RECOVERY_CONDITION)
+		anchor = hdp->hd_xx;
+
+	anchor = (anchor - rejnr + 8) % MODULUS;
+
+	if (anchor > 0) {
+
+		/* There is at least one iframe to retransmit. */
+		KILL_TIMER (hdp);
+		hdp->hd_vs = rejnr;
+
+		while (hdp->hd_vs != hdp->hd_retxqi)
+			hd_send_iframe (hdp, hdp->hd_retxq[hdp->hd_vs], POLLOFF);
+
+	}
+	hd_start (hdp);
+}
+
+/* 
+ *  This routine frees iframes from the retransmit queue. It is called
+ *  when a previously written iframe is acknowledged.
+ */
+
+static
+free_iframes (hdp, nr, finalbit)
+register struct hdcb *hdp;
+int *nr;
+register int finalbit;
+
+{
+	register int    i, k;
+
+	/* 
+	 *  We  need to do the  following  because  of a  funny quirk  in  the 
+	 *  protocol.  This case  occures  when  in Timer  recovery  condition 
+	 *  we get  a  N(R)  which  acknowledges all  the outstanding  iframes
+	 *  but with  the Final Bit off. In this case we need to save the last
+	 *  iframe for possible retransmission even though it has already been 
+	 *  acknowledged!
+	 */
+
+	if ((hdp->hd_condition & TIMER_RECOVERY_CONDITION) && *nr == hdp->hd_xx && finalbit == 0) {
+		*nr = (*nr - 1 + 8) % MODULUS;
+/*		printf ("QUIRK\n"); */
+	}
+
+	k = (*nr - hdp->hd_lastrxnr + 8) % MODULUS;
+
+	/* Loop here freeing all acknowledged iframes. */
+	for (i = 0; i < k; ++i) {
+		m_freem (hdp->hd_retxq[hdp->hd_lastrxnr]);
+		hdp->hd_retxq[hdp->hd_lastrxnr] = 0;
+		hdp->hd_lastrxnr = (hdp->hd_lastrxnr + 1) % MODULUS;
+	}
+
+}
diff --git a/sys/netccitt/hd_output.c b/sys/netccitt/hd_output.c
new file mode 100644
index 00000000000..05992e1deb1
--- /dev/null
+++ b/sys/netccitt/hd_output.c
@@ -0,0 +1,247 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Laboratory for Computation Vision and the Computer Science Department
+ * of the University of British Columbia.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)hd_output.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/socket.h>
+#include <sys/syslog.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+
+#include <netccitt/hdlc.h>
+#include <netccitt/hd_var.h>
+#include <netccitt/x25.h>
+
+/*
+ *      HDLC OUTPUT INTERFACE
+ *
+ *      This routine is called when the X.25 packet layer output routine
+ *      has a information frame (iframe)  to write.   It is  also called 
+ *      by the input and control routines of the HDLC layer.
+ */
+
+hd_output (hdp, m0)
+register struct hdcb *hdp;
+struct mbuf *m0;
+{
+	struct x25config *xcp;
+	register struct mbuf *m = m0;
+	int len;
+
+	if (m == NULL)
+		panic ("hd_output");
+	if ((m->m_flags & M_PKTHDR) == 0)
+		panic ("hd_output 2");
+
+	if (hdp->hd_state != ABM) {
+		m_freem (m);
+		return;
+	}
+
+	/*
+	 * Make room for the hdlc header either by prepending
+	 * another mbuf, or by adjusting the offset and length
+	 * of the first mbuf in the mbuf chain.
+	 */
+
+	M_PREPEND(m, HDHEADERLN, M_DONTWAIT);
+	if (m == NULL)
+		return;
+	for (len = 0; m; m = m->m_next)
+		len += m->m_len;
+	m = m0;
+	m->m_pkthdr.len = len;
+
+	hd_append (&hdp->hd_txq, m);
+	hd_start (hdp);
+}
+
+hd_start (hdp)
+register struct hdcb *hdp;
+{
+	register struct mbuf *m;
+
+	/* 
+	 * The iframe is only transmitted if all these conditions are FALSE.
+	 * The iframe remains queued (hdp->hd_txq) however and will be
+	 * transmitted as soon as these conditions are cleared.
+	 */
+
+	while (!(hdp->hd_condition & (TIMER_RECOVERY_CONDITION | REMOTE_RNR_CONDITION | REJ_CONDITION))) {
+		if (hdp->hd_vs == (hdp->hd_lastrxnr + hdp->hd_xcp->xc_lwsize) % MODULUS) {
+
+			/* We have now exceeded the  maximum  number  of 
+			   outstanding iframes. Therefore,  we must wait 
+			   until  at least  one is acknowledged if this 
+			   condition  is not  turned off before we are
+			   requested to write another iframe. */
+			hdp->hd_window_condition++;
+			break;
+		}
+
+		/* hd_remove top iframe from transmit queue. */
+		if ((m = hd_remove (&hdp->hd_txq)) == NULL)
+			break;
+
+		hd_send_iframe (hdp, m, POLLOFF);
+	}
+}
+
+/* 
+ *  This procedure is passed a buffer descriptor for an iframe. It builds
+ *  the rest of the control part of the frame and then writes it out.  It
+ *  also  starts the  acknowledgement  timer and keeps  the iframe in the
+ *  Retransmit queue (Retxq) just in case  we have to do this again.
+ *
+ *  Note: This routine is also called from hd_input.c when retransmission
+ *       of old frames is required.
+ */
+
+hd_send_iframe (hdp, buf, poll_bit)
+register struct hdcb *hdp;
+register struct mbuf *buf;
+int poll_bit;
+{
+	register struct Hdlc_iframe *iframe;
+	struct mbuf *m;
+
+	KILL_TIMER (hdp);
+
+	if (buf == 0) {
+		printf ("hd_send_iframe: zero arg\n");
+#ifdef HDLCDEBUG
+		hd_status (hdp);
+		hd_dumptrace (hdp);
+#endif
+		hdp->hd_vs = (hdp->hd_vs + 7) % MODULUS;
+		return;
+	}
+	iframe = mtod (buf, struct Hdlc_iframe *);
+
+	iframe -> hdlc_0 = 0;
+	iframe -> nr = hdp->hd_vr;
+	iframe -> pf = poll_bit;
+	iframe -> ns = hdp->hd_vs;
+	iframe -> address = ADDRESS_B;
+	hdp->hd_lasttxnr = hdp->hd_vr;
+	hdp->hd_rrtimer = 0;
+
+	if (hdp->hd_vs == hdp->hd_retxqi) {
+		/* Check for retransmissions. */
+		/* Put iframe only once in the Retransmission queue. */
+		hdp->hd_retxq[hdp->hd_retxqi] = buf;
+		hdp->hd_retxqi = (hdp->hd_retxqi + 1) % MODULUS;
+		hdp->hd_iframes_out++;
+	}
+
+	hdp->hd_vs = (hdp->hd_vs + 1) % MODULUS;
+
+	hd_trace (hdp, TX, (struct Hdlc_frame *)iframe);
+
+	/* Write buffer on device. */
+	m = hdp->hd_dontcopy ? buf : m_copy(buf, 0, (int)M_COPYALL);
+	if (m == 0) {
+		printf("hdlc: out of mbufs\n");
+		return;
+	}
+	(*hdp->hd_output)(hdp, m);
+	SET_TIMER (hdp);
+}
+
+hd_ifoutput(hdp, m)
+register struct mbuf *m;
+register struct hdcb *hdp;
+{
+	/*
+	 * Queue message on interface, and start output if interface
+	 * not yet active.
+	 */
+	register struct ifnet *ifp = hdp->hd_ifp;
+	int s = splimp();
+
+	if (IF_QFULL(&ifp->if_snd)) {
+		IF_DROP(&ifp->if_snd);
+	    /* printf("%s%d: HDLC says OK to send but queue full, may hang\n",
+			ifp->if_name, ifp->if_unit);*/
+		m_freem(m);
+	} else {
+		IF_ENQUEUE(&ifp->if_snd, m);
+		if ((ifp->if_flags & IFF_OACTIVE) == 0)
+			(*ifp->if_start)(ifp);
+	}
+	splx(s);
+}
+
+
+/* 
+ *  This routine gets control when the timer expires because we have not
+ *  received an acknowledgement for a iframe.
+ */
+
+hd_resend_iframe (hdp)
+register struct hdcb *hdp;
+{
+
+	if (hdp->hd_retxcnt++ < hd_n2) {
+		if (!(hdp->hd_condition & TIMER_RECOVERY_CONDITION)) {
+			hdp->hd_xx = hdp->hd_vs;
+			hdp->hd_condition |= TIMER_RECOVERY_CONDITION;
+		}
+
+		hdp->hd_vs = hdp->hd_lastrxnr;
+		hd_send_iframe (hdp, hdp->hd_retxq[hdp->hd_vs], POLLON);
+	} else {
+		/* At this point we have not received a RR even after N2
+		   retries - attempt to reset link. */
+
+		hd_initvars (hdp);
+		hd_writeinternal (hdp, SABM, POLLOFF);
+		hdp->hd_state = WAIT_UA;
+		SET_TIMER (hdp);
+		hd_message (hdp, "Timer recovery failed: link down");
+		(void) pk_ctlinput (PRC_LINKDOWN, hdp->hd_pkp);
+	}
+}
diff --git a/sys/netccitt/hd_subr.c b/sys/netccitt/hd_subr.c
new file mode 100644
index 00000000000..c75ab07568e
--- /dev/null
+++ b/sys/netccitt/hd_subr.c
@@ -0,0 +1,391 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Laboratory for Computation Vision and the Computer Science Department
+ * of the University of British Columbia.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)hd_subr.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+
+#include <netccitt/hdlc.h>
+#include <netccitt/hd_var.h>
+#include <netccitt/x25.h>
+#include <netccitt/pk_var.h>
+
+hd_init ()
+{
+
+	hdintrq.ifq_maxlen = IFQ_MAXLEN;
+}
+
+hd_ctlinput (prc, addr)
+struct sockaddr *addr;
+{
+	register struct x25config *xcp = (struct x25config *)addr;
+	register struct hdcb *hdp;
+	register struct ifaddr *ifa;
+	struct ifnet *ifp;
+	caddr_t pk_newlink();
+
+	if (addr->sa_family != AF_CCITT)
+		return (EAFNOSUPPORT);
+	if (xcp->xc_lptype != HDLCPROTO_LAPB)
+		return (EPROTONOSUPPORT);
+	ifa = ifa_ifwithaddr(addr);
+	if (ifa == 0 || ifa->ifa_addr->sa_family != AF_CCITT ||
+	    (ifp = ifa->ifa_ifp) == 0)
+		panic ("hd_ctlinput");
+	for (hdp = hdcbhead; hdp; hdp = hdp->hd_next)
+		if (hdp->hd_ifp == ifp)
+			break;
+
+	if (hdp == 0) {		/* new interface */
+		int error, hd_ifoutput(), hd_output();
+
+		/* an hdcb is now too big to fit in an mbuf */
+		MALLOC(hdp, struct hdcb *, sizeof (*hdp), M_PCB, M_DONTWAIT);
+		if (hdp == 0)
+			return (ENOBUFS);
+		bzero((caddr_t)hdp, sizeof(*hdp));
+		hdp->hd_pkp =
+			(caddr_t) pk_newlink ((struct x25_ifaddr *) ifa, 
+					      (caddr_t) hdp);
+		((struct x25_ifaddr *)ifa)->ia_pkcb = 
+			(struct pkcb *) hdp->hd_pkp;
+		if (hdp -> hd_pkp == 0) {
+			free(hdp, M_PCB);
+			return (ENOBUFS);
+		}
+		hdp->hd_ifp = ifp;
+		hdp->hd_ifa = ifa;
+		hdp->hd_xcp = xcp;
+		hdp->hd_state = INIT;
+		hdp->hd_output = hd_ifoutput;
+		hdp->hd_next = hdcbhead;
+		hdcbhead = hdp;
+	} else if (hdp->hd_pkp == 0) { /* interface got reconfigured */
+		hdp->hd_pkp =
+			(caddr_t) pk_newlink ((struct x25_ifaddr *) ifa, 
+					      (caddr_t) hdp);
+		((struct x25_ifaddr *)ifa)->ia_pkcb = 
+			(struct pkcb *) hdp->hd_pkp;
+		if (hdp -> hd_pkp == 0) {
+			free(hdp, M_PCB);
+			return (ENOBUFS);
+		}
+	}
+
+	switch (prc) {
+	case PRC_IFUP:
+		if (xcp->xc_lwsize == 0 ||
+			xcp->xc_lwsize > MAX_WINDOW_SIZE)
+				xcp->xc_lwsize = MAX_WINDOW_SIZE;
+		if (hdp->hd_state == INIT)
+			SET_TIMER (hdp);
+		break;
+
+	case PRC_IFDOWN:
+		if (hdp->hd_state == ABM)
+			hd_message (hdp, "Operator shutdown: link closed");
+		(void) pk_ctlinput (PRC_LINKDOWN, hdp->hd_pkp);
+
+		/* fall thru to ... */
+
+	case PRC_DISCONNECT_REQUEST:
+		/* drop reference to pkcb --- it's dead meat */
+		hdp->hd_pkp = (caddr_t) 0;
+		((struct x25_ifaddr *)ifa)->ia_pkcb = (struct pkcb *) 0;
+
+		hd_writeinternal (hdp, DISC, POLLON);
+		hdp->hd_state = DISC_SENT;
+		SET_TIMER (hdp);
+	}
+	return (0);
+}
+
+hd_initvars (hdp)
+register struct hdcb *hdp;
+{
+	register struct mbuf *m;
+	register int i;
+
+	/* Clear Transmit queue. */
+	while ((m = hd_remove (&hdp->hd_txq)) != NULL)
+		m_freem (m);
+
+	/* Clear Retransmit queue. */
+	i = hdp->hd_lastrxnr;
+	while (i != hdp->hd_retxqi) {
+		m_freem (hdp->hd_retxq[i]);
+		i = (i + 1) % MODULUS;
+	}
+	hdp->hd_retxqi = 0;
+
+	hdp->hd_vs = hdp->hd_vr = 0;
+	hdp->hd_lasttxnr = hdp->hd_lastrxnr = 0;
+	hdp->hd_rrtimer = 0;
+	KILL_TIMER(hdp);
+	hdp->hd_retxcnt = 0;
+	hdp->hd_condition = 0;
+}
+
+hd_decode (hdp, frame)
+register struct hdcb *hdp;
+struct Hdlc_frame *frame;
+{
+	register int frametype = ILLEGAL;
+	register struct Hdlc_iframe *iframe = (struct Hdlc_iframe *) frame;
+	register struct Hdlc_sframe *sframe = (struct Hdlc_sframe *) frame;
+	register struct Hdlc_uframe *uframe = (struct Hdlc_uframe *) frame;
+
+	if (iframe -> hdlc_0 == 0) {
+		frametype = IFRAME;
+		hdp->hd_iframes_in++;
+	}
+
+	else if (sframe -> hdlc_01 == 1) {
+		/* Supervisory format. */
+		switch (sframe -> s2) {
+		case 0: 
+			frametype = RR;
+			hdp->hd_rrs_in++;
+			break;
+
+		case 1: 
+			frametype = RNR;
+			hdp->hd_rnrs_in++;
+			break;
+
+		case 2: 
+			frametype = REJ;
+			hdp->hd_rejs_in++;
+		}
+	}
+	else if (uframe -> hdlc_11 == 3) {
+		/* Unnumbered format. */
+		switch (uframe -> m3) {
+		case 0: 
+			frametype = DM;
+			break;
+
+		case 1: 
+			frametype = SABM;
+			break;
+
+		case 2: 
+			frametype = DISC;
+			break;
+
+		case 3: 
+			frametype = UA;
+			break;
+
+		case 4: 
+			frametype = FRMR;
+			hdp->hd_frmrs_in++;
+		}
+	}
+	return (frametype);
+}
+
+/* 
+ *  This routine is called when the HDLC layer internally  generates a
+ *  command or  response  for  the remote machine ( eg. RR, UA etc. ). 
+ *  Only supervisory or unnumbered frames are processed.
+ */
+
+hd_writeinternal (hdp, frametype, pf)
+register struct hdcb *hdp;
+register int frametype, pf;
+{
+	register struct mbuf *buf;
+	struct Hdlc_frame *frame;
+	register struct Hdlc_sframe *sframe;
+	register struct Hdlc_uframe *uframe;
+
+	MGETHDR (buf, M_DONTWAIT, MT_HEADER);
+	if (buf == 0)
+		return;
+	frame = mtod (buf, struct Hdlc_frame *);
+	sframe = mtod (buf, struct Hdlc_sframe *);
+	uframe = mtod (buf, struct Hdlc_uframe *);
+
+	/* Assume a response - address structure for DTE */
+	frame -> address = ADDRESS_A;
+	buf -> m_len = 2;
+	buf -> m_act = buf -> m_next = NULL;
+
+	switch (frametype) {
+	case RR: 
+		frame -> control = RR_CONTROL;
+		hdp->hd_rrs_out++;
+		break;
+
+	case RNR: 
+		frame -> control = RNR_CONTROL;
+		hdp->hd_rnrs_out++;
+		break;
+
+	case REJ: 
+		frame -> control = REJ_CONTROL;
+		hdp->hd_rejs_out++;
+		break;
+
+	case SABM: 
+		frame -> control = SABM_CONTROL;
+		frame -> address = ADDRESS_B;
+		break;
+
+	case DISC: 
+		if ((hdp->hd_ifp->if_flags & IFF_UP) == 0) {
+			hdp->hd_state = DISCONNECTED;
+			(void) m_freem (buf);
+			hd_flush (hdp->hd_ifp);
+			return;
+		}
+		frame -> control = DISC_CONTROL;
+		frame -> address = ADDRESS_B;
+		break;
+
+	case DM: 
+		frame -> control = DM_CONTROL;
+		break;
+
+	case UA: 
+		frame -> control = UA_CONTROL;
+		break;
+
+	case FRMR: 
+		frame -> control = FRMR_CONTROL;
+		bcopy ((caddr_t)&hd_frmr, (caddr_t)frame -> info, 3);
+		buf -> m_len = 5;
+		hdp->hd_frmrs_out++;
+
+	}
+
+	if (sframe -> hdlc_01 == 1) {
+		/* Supervisory format - RR, REJ, or RNR. */
+		sframe -> nr = hdp->hd_vr;
+		sframe -> pf = pf;
+		hdp->hd_lasttxnr = hdp->hd_vr;
+		hdp->hd_rrtimer = 0;
+	}
+	else
+		uframe -> pf = pf;
+
+	hd_trace (hdp, TX, frame);
+	buf -> m_pkthdr.len = buf -> m_len;
+	(*hdp->hd_output) (hdp, buf);
+}
+
+struct mbuf *
+hd_remove (q)
+struct hdtxq *q;
+{
+	register struct mbuf *m;
+
+	m = q -> head;
+	if (m) {
+		if ((q -> head = m -> m_act) == NULL)
+			q -> tail = NULL;
+		m -> m_act = 0;
+	}
+	return (m);
+}
+
+hd_append (q, m)
+register struct hdtxq *q;
+register struct mbuf *m;
+{
+
+	m -> m_act = NULL;
+	if (q -> tail == NULL)
+		q -> head = m;
+	else
+		q -> tail -> m_act = m;
+	q -> tail = m;
+}
+
+hd_flush (ifp)
+struct ifnet *ifp;
+{
+	register struct mbuf *m;
+	register int s;
+
+	while (1) {
+		s = splimp ();
+		IF_DEQUEUE (&ifp->if_snd, m);
+		splx (s);
+		if (m == 0)
+			break;
+		m_freem (m);
+	}
+}
+
+hd_message (hdp, msg)
+struct hdcb *hdp;
+char *msg;
+{
+	char *format_ntn ();
+
+	if (hdcbhead -> hd_next)
+		printf ("HDLC(%s): %s\n", format_ntn (hdp->hd_xcp), msg);
+	else
+		printf ("HDLC: %s\n", msg);
+}
+
+#ifdef HDLCDEBUG
+hd_status (hdp)
+struct hdcb *hdp;
+{
+	printf ("HDLC STATUS:\n V(S)=%d, V(R)=%d, retxqi=%d,\n",
+		hdp->hd_vs, hdp->hd_vr, hdp->hd_retxqi);
+
+	printf ("Last_rx_nr=%d, Last_tx_nr=%d,\n Condition=%d, Xx=%d\n",
+		hdp->hd_lastrxnr, hdp->hd_lasttxnr, hdp->hd_condition, hdp->hd_xx);
+}
+#endif
diff --git a/sys/netccitt/hd_timer.c b/sys/netccitt/hd_timer.c
new file mode 100644
index 00000000000..a3bf12addf0
--- /dev/null
+++ b/sys/netccitt/hd_timer.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Laboratory for Computation Vision and the Computer Science Department
+ * of the University of British Columbia.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)hd_timer.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+
+#include <netccitt/hdlc.h>
+#include <netccitt/hd_var.h>
+#include <netccitt/x25.h>
+
+/*
+ * these can be patched with adb if the
+ * default values are inappropriate
+ */
+
+int	hd_t1 = T1;
+int	hd_t3 = T3;
+int	hd_n2 = N2;
+
+/*
+ *  HDLC TIMER 
+ *
+ *  This routine is called every 500ms by the kernel. Decrement timer by this
+ *  amount - if expired then process the event.
+ */
+
+hd_timer ()
+{
+	register struct hdcb *hdp;
+	register int s = splimp ();
+
+	for (hdp = hdcbhead; hdp; hdp = hdp->hd_next) {
+		if (hdp->hd_rrtimer && (--hdp->hd_rrtimer == 0)) {
+			if (hdp->hd_lasttxnr != hdp->hd_vr)
+				hd_writeinternal (hdp, RR, POLLOFF);
+		}
+
+		if (!(hdp->hd_timer && --hdp->hd_timer == 0))
+			continue;
+
+		switch (hdp->hd_state) {
+		case INIT: 
+		case DISC_SENT:
+			hd_writeinternal (hdp, DISC, POLLON);
+			break;
+
+		case ABM: 
+			if (hdp->hd_lastrxnr != hdp->hd_vs) {	/* XXX */
+				hdp->hd_timeouts++;
+				hd_resend_iframe (hdp);
+			}
+			break;
+
+		case WAIT_SABM: 
+			hd_writeinternal (hdp, FRMR, POLLOFF);
+			if (++hdp->hd_retxcnt == hd_n2) {
+				hdp->hd_retxcnt = 0;
+				hd_writeinternal (hdp, SABM, POLLOFF);
+				hdp->hd_state = WAIT_UA;
+			}
+			break;
+
+		case DM_SENT: 
+			if (++hdp->hd_retxcnt == hd_n2) {
+				/* Notify the packet level. */
+				(void) pk_ctlinput (PRC_LINKDOWN, hdp->hd_pkp);
+				hdp->hd_retxcnt = 0;
+				hdp->hd_state = SABM_SENT;
+				hd_writeinternal (hdp, SABM, POLLOFF);
+			} else
+				hd_writeinternal (hdp, DM, POLLOFF);
+			break;
+
+		case WAIT_UA: 
+			if (++hdp->hd_retxcnt == hd_n2) {
+				hdp->hd_retxcnt = 0;
+				hd_writeinternal (hdp, DM, POLLOFF);
+				hdp->hd_state = DM_SENT;
+			} else
+				hd_writeinternal (hdp, SABM, POLLOFF);
+			break;
+
+		case SABM_SENT: 
+			/* Do this indefinitely. */
+			hd_writeinternal (hdp, SABM, POLLON);
+			break;
+
+		case DISCONNECTED:
+			/*
+			 * Poll the interface driver flags waiting
+			 * for the IFF_UP bit to come on.
+			 */
+			if (hdp->hd_ifp->if_flags & IFF_UP)
+				hdp->hd_state = INIT;
+
+		}
+		SET_TIMER (hdp);
+	}
+
+	splx (s);
+}
diff --git a/sys/netccitt/hd_var.h b/sys/netccitt/hd_var.h
new file mode 100644
index 00000000000..5fefe0869a4
--- /dev/null
+++ b/sys/netccitt/hd_var.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Laboratory for Computation Vision and the Computer Science Department
+ * of the University of British Columbia.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)hd_var.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ *
+ * hdlc control block
+ *
+ */
+
+struct	hdtxq {
+	struct	mbuf *head;
+	struct	mbuf *tail;
+};
+
+struct	hdcb {
+	struct	hdcb *hd_next;	/* pointer to next hdlc control block */
+	char	hd_state;	/* link state */
+	char	hd_vs;		/* send state variable */
+	char	hd_vr;		/* receive state variable */
+	char	hd_lastrxnr;	/* last received N(R) */
+	char	hd_lasttxnr;	/* last transmitted N(R) */
+	char	hd_condition;
+#define TIMER_RECOVERY_CONDITION        0x01
+#define REJ_CONDITION                   0x02
+#define REMOTE_RNR_CONDITION            0X04
+	char	hd_retxcnt;
+	char	hd_xx;
+	struct	hdtxq hd_txq;
+	struct	mbuf *hd_retxq[MODULUS];
+	char	hd_retxqi;
+	char	hd_rrtimer;
+	char	hd_timer;
+#define SET_TIMER(hdp)		hdp->hd_timer = hd_t1
+#define KILL_TIMER(hdp)		hdp->hd_timer = 0
+	char	hd_dontcopy;	/* if-driver doesn't free I-frames */
+	struct	ifnet *hd_ifp;	/* device's network visible interface */
+	struct	ifaddr *hd_ifa;	/* device's X.25 network address */
+	struct	x25config *hd_xcp;
+	caddr_t	hd_pkp;		/* Level III junk */
+	int	(*hd_output)();	/* separate entry for HDLC direct output */
+
+	/* link statistics */
+
+	long	hd_iframes_in;
+	long	hd_iframes_out;
+	long	hd_rrs_in;
+	long	hd_rrs_out;
+	short	hd_rejs_in;
+	short	hd_rejs_out;
+	long	hd_window_condition;
+	short	hd_invalid_ns;
+	short	hd_invalid_nr;
+	short	hd_timeouts;
+	short	hd_resets;
+	short	hd_unknown;
+	short	hd_frmrs_in;
+	short	hd_frmrs_out;
+	short	hd_rnrs_in;
+	short	hd_rnrs_out;
+};
+
+#ifdef KERNEL
+struct	hdcb *hdcbhead;		/* head of linked list of hdcb's */
+struct	Frmr_frame hd_frmr;	/* rejected frame diagnostic info */
+struct	ifqueue hdintrq;	/* hdlc packet input queue */
+
+int	hd_t1;			/* timer T1 value */
+int	hd_t3;			/* RR send timer */
+int	hd_n2;			/* frame retransmission limit */
+#endif
diff --git a/sys/netccitt/hdlc.h b/sys/netccitt/hdlc.h
new file mode 100644
index 00000000000..60cf7adf072
--- /dev/null
+++ b/sys/netccitt/hdlc.h
@@ -0,0 +1,156 @@
+/*-
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by the
+ * Laboratory for Computation Vision and the Computer Science Department
+ * of the University of British Columbia.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)hdlc.h	8.1 (Berkeley) 6/10/93
+ */
+
+#ifndef ORDER4
+#define FALSE   0
+#define TRUE    1
+typedef u_char octet;
+typedef char    bool;
+
+/*
+ *  HDLC Packet format definitions
+ *  This will eventually have to be rewritten without reference
+ *  to bit fields, to be compliant with ANSI C and alignment safe.
+ */
+
+#if BYTE_ORDER == BIG_ENDIAN
+#define ORDER4(a, b, c, d) a , b , c , d
+#define ORDER5(a, b, c, d, e) a , b , c , d , e
+#endif
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+#define ORDER4(a, b, c, d) d , c , b , a
+#define ORDER5(a, b, c, d, e) e , d , c , b , a
+#endif
+#endif
+
+#define MAX_INFO_LEN    4096+3+4
+#define ADDRESS_A       3	/* B'00000011' */
+#define ADDRESS_B       1	/* B'00000001' */
+
+struct Hdlc_iframe {
+	octet	address;
+	octet	ORDER4(nr:3, pf:1, ns:3, hdlc_0:1);
+	octet    i_field[MAX_INFO_LEN];
+};
+
+struct Hdlc_sframe {
+	octet	address;
+	octet	ORDER4(nr:3, pf:1, s2:2, hdlc_01:2);
+};
+
+struct	Hdlc_uframe {
+	octet	address;
+	octet	ORDER4(m3:3, pf:1, m2:2, hdlc_11:2);
+};
+
+struct	Frmr_frame {
+	octet	address;
+	octet	control;
+	octet	frmr_control;
+	octet	ORDER4(frmr_nr:3, frmr_f1_0:1, frmr_ns:3, frmr_f2_0:1);
+	octet	ORDER5(frmr_0000:4, frmr_z:1, frmr_y:1, frmr_x:1, frmr_w:1);
+};
+
+#define HDHEADERLN	2
+#define MINFRLN		2		/* Minimum frame length. */
+
+struct	Hdlc_frame {
+	octet	address;
+	octet	control;
+	octet	info[3];	/* min for FRMR */
+};
+
+#define SABM_CONTROL 057	/* B'00101111' */
+#define UA_CONTROL   0143	/* B'01100011' */
+#define DISC_CONTROL 0103	/* B'01000011' */
+#define DM_CONTROL   017	/* B'00001111' */
+#define FRMR_CONTROL 0207	/* B'10000111' */
+#define RR_CONTROL   01		/* B'00000001' */
+#define RNR_CONTROL  05		/* B'00000101' */
+#define REJ_CONTROL  011	/* B'00001001' */
+
+#define POLLOFF  0
+#define POLLON   1
+
+/* Define Link State constants. */
+
+#define INIT		0
+#define DM_SENT		1
+#define SABM_SENT	2
+#define ABM		3
+#define WAIT_SABM	4
+#define WAIT_UA		5
+#define DISC_SENT	6
+#define DISCONNECTED	7
+#define MAXSTATE	8
+
+/* The following constants are used in a switch statement to process
+   frames read from the communications line. */
+
+#define SABM     0 * MAXSTATE
+#define DM       1 * MAXSTATE
+#define DISC     2 * MAXSTATE
+#define UA       3 * MAXSTATE
+#define FRMR     4 * MAXSTATE
+#define RR       5 * MAXSTATE
+#define RNR      6 * MAXSTATE
+#define REJ      7 * MAXSTATE
+#define IFRAME   8 * MAXSTATE
+#define ILLEGAL  9 * MAXSTATE
+
+#define T1	(3 * PR_SLOWHZ)		/*  IFRAME TIMEOUT - 3 seconds */
+#define T3	(T1 / 2)		/*  RR generate timeout - 1.5 seconds */
+#define N2	10
+#define MODULUS 8
+#define MAX_WINDOW_SIZE 7
+
+#define Z  0
+#define Y  1
+#define X  2
+#define W  3
+#define A  4
+
+#define TX 0
+#define RX 1
+
+bool	range_check ();
+bool	valid_nr ();
+struct	mbuf *hd_remove ();
diff --git a/sys/netccitt/if_x25subr.c b/sys/netccitt/if_x25subr.c
new file mode 100644
index 00000000000..6f00496a18b
--- /dev/null
+++ b/sys/netccitt/if_x25subr.c
@@ -0,0 +1,801 @@
+/*
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)if_x25subr.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/ioctl.h>
+#include <sys/errno.h>
+#include <sys/syslog.h>
+
+#include <machine/mtpr.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/netisr.h>
+#include <net/route.h>
+
+#include <netccitt/x25.h>
+#include <netccitt/x25err.h>
+#include <netccitt/pk.h>
+#include <netccitt/pk_var.h>
+
+#ifdef INET
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#endif
+
+#ifdef NS
+#include <netns/ns.h>
+#include <netns/ns_if.h>
+#endif
+
+#ifdef ISO
+int tp_incoming();
+#include <netiso/argo_debug.h>
+#include <netiso/iso.h>
+#include <netiso/iso_var.h>
+#endif
+
+extern	struct ifnet loif;
+struct llinfo_x25 llinfo_x25 = {&llinfo_x25, &llinfo_x25};
+#ifndef _offsetof
+#define _offsetof(t, m) ((int)((caddr_t)&((t *)0)->m))
+#endif
+struct sockaddr *x25_dgram_sockmask;
+struct sockaddr_x25 x25_dgmask = {
+ _offsetof(struct sockaddr_x25, x25_udata[1]),			/* _len */
+ 0,								/* _family */
+ 0,								/* _net */
+ { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, /* _addr */
+ {0},								/* opts */
+ -1,								/* _udlen */
+ {-1}								/* _udata */
+};
+ 
+struct if_x25stats {
+	int	ifx_wrongplen;
+	int	ifx_nophdr;
+} if_x25stats;
+int x25_autoconnect = 0;
+
+#define senderr(x) {error = x; goto bad;}
+/*
+ * Ancillary routines
+ */
+static struct llinfo_x25 *
+x25_lxalloc(rt)
+register struct rtentry *rt;
+{
+	register struct llinfo_x25 *lx;
+	register struct sockaddr *dst = rt_key(rt);
+	register struct ifaddr *ifa;
+
+	MALLOC(lx, struct llinfo_x25 *, sizeof (*lx), M_PCB, M_NOWAIT);
+	if (lx == 0)
+		return lx;
+	Bzero(lx, sizeof(*lx));
+	lx->lx_rt = rt;
+	lx->lx_family = dst->sa_family;
+	rt->rt_refcnt++;
+	if (rt->rt_llinfo)
+		insque(lx, (struct llinfo_x25 *)rt->rt_llinfo);
+	else {
+		rt->rt_llinfo = (caddr_t)lx;
+		insque(lx, &llinfo_x25);
+	}
+	for (ifa = rt->rt_ifp->if_addrlist; ifa; ifa = ifa->ifa_next) {
+		if (ifa->ifa_addr->sa_family == AF_CCITT)
+			lx->lx_ia = (struct x25_ifaddr *)ifa;
+	}
+	return lx;
+}
+x25_lxfree(lx)
+register struct llinfo_x25 *lx;
+{
+	register struct rtentry *rt = lx->lx_rt;
+	register struct pklcd *lcp = lx->lx_lcd;
+
+	if (lcp) {
+		lcp->lcd_upper = 0;
+		pk_disconnect(lcp);
+	}
+	if ((rt->rt_llinfo == (caddr_t)lx) && (lx->lx_next->lx_rt == rt))
+		rt->rt_llinfo = (caddr_t)lx->lx_next;
+	else
+		rt->rt_llinfo = 0;
+	RTFREE(rt);
+	remque(lx);
+	FREE(lx, M_PCB);
+}
+/*
+ * Process a x25 packet as datagram;
+ */
+x25_ifinput(lcp, m)
+struct pklcd *lcp;
+register struct mbuf *m;
+{
+	struct llinfo_x25 *lx = (struct llinfo_x25 *)lcp->lcd_upnext;
+	register struct ifnet *ifp;
+	struct ifqueue *inq;
+	extern struct timeval time;
+	int s, len, isr;
+ 
+	if (m == 0 || lcp->lcd_state != DATA_TRANSFER) {
+		x25_connect_callback(lcp, 0);
+		return;
+	}
+	pk_flowcontrol(lcp, 0, 1); /* Generate RR */
+	ifp = m->m_pkthdr.rcvif;
+	ifp->if_lastchange = time;
+	switch (m->m_type) {
+	default:
+		if (m)
+			m_freem(m);
+		return;
+
+	case MT_DATA:
+		/* FALLTHROUGH */;
+	}
+	switch (lx->lx_family) {
+#ifdef INET
+	case AF_INET:
+		isr = NETISR_IP;
+		inq = &ipintrq;
+		break;
+
+#endif
+#ifdef NS
+	case AF_NS:
+		isr = NETISR_NS;
+		inq = &nsintrq;
+		break;
+
+#endif
+#ifdef	ISO
+	case AF_ISO:
+		isr = NETISR_ISO;
+		inq = &clnlintrq;
+		break;
+#endif
+	default:
+		m_freem(m);
+		ifp->if_noproto++;
+		return;
+	}
+	s = splimp();
+	schednetisr(isr);
+	if (IF_QFULL(inq)) {
+		IF_DROP(inq);
+		m_freem(m);
+	} else {
+		IF_ENQUEUE(inq, m);
+		ifp->if_ibytes += m->m_pkthdr.len;
+	}
+	splx(s);
+}
+x25_connect_callback(lcp, m)
+register struct pklcd *lcp;
+register struct mbuf *m;
+{
+	register struct llinfo_x25 *lx = (struct llinfo_x25 *)lcp->lcd_upnext;
+	int do_clear = 1;
+	if (m == 0)
+		goto refused;
+	if (m->m_type != MT_CONTROL) {
+		printf("x25_connect_callback: should panic\n");
+		goto refused;
+	}
+	switch (pk_decode(mtod(m, struct x25_packet *))) {
+	case CALL_ACCEPTED:
+		lcp->lcd_upper = x25_ifinput;
+		if (lcp->lcd_sb.sb_mb)
+			lcp->lcd_send(lcp); /* XXX start queued packets */
+		return;
+	default:
+		do_clear = 0;
+	refused:
+		lcp->lcd_upper = 0;
+		lx->lx_lcd = 0;
+		if (do_clear)
+			pk_disconnect(lcp);
+		return;
+	}
+}
+#define SA(p) ((struct sockaddr *)(p))
+#define RT(p) ((struct rtentry *)(p))
+
+x25_dgram_incoming(lcp, m0)
+register struct pklcd *lcp;
+struct mbuf *m0;
+{
+	register struct rtentry *rt, *nrt;
+	register struct mbuf *m = m0->m_next; /* m0 has calling sockaddr_x25 */
+	void x25_rtrequest();
+
+	rt = rtalloc1(SA(&lcp->lcd_faddr), 0);
+	if (rt == 0) {
+refuse: 	lcp->lcd_upper = 0;
+		pk_close(lcp);
+		return;
+	}
+	rt->rt_refcnt--;
+	if ((nrt = RT(rt->rt_llinfo)) == 0 || rt_mask(rt) != x25_dgram_sockmask)
+		goto refuse;
+	if ((nrt->rt_flags & RTF_UP) == 0) {
+		rt->rt_llinfo = (caddr_t)rtalloc1(rt->rt_gateway, 0);
+		rtfree(nrt);
+		if ((nrt = RT(rt->rt_llinfo)) == 0)
+			goto refuse;
+		nrt->rt_refcnt--;
+	}
+	if (nrt->rt_ifa == 0 || nrt->rt_ifa->ifa_rtrequest != x25_rtrequest)
+		goto refuse;
+	lcp->lcd_send(lcp); /* confirm call */
+	x25_rtattach(lcp, nrt);
+	m_freem(m);
+}
+
+/*
+ * X.25 output routine.
+ */
+x25_ifoutput(ifp, m0, dst, rt)
+struct	ifnet *ifp;
+struct	mbuf *m0;
+struct	sockaddr *dst;
+register struct	rtentry *rt;
+{
+	register struct	mbuf *m = m0;
+	register struct	llinfo_x25 *lx;
+	struct pklcd *lcp;
+	int             s, error = 0;
+
+int plen;
+for (plen = 0; m; m = m->m_next)
+	plen += m->m_len;
+m = m0;
+
+	if ((ifp->if_flags & IFF_UP) == 0)
+		senderr(ENETDOWN);
+	while (rt == 0 || (rt->rt_flags & RTF_GATEWAY)) {
+		if (rt) {
+			if (rt->rt_llinfo) {
+				rt = (struct rtentry *)rt->rt_llinfo;
+				continue;
+			}
+			dst = rt->rt_gateway;
+		}
+		if ((rt = rtalloc1(dst, 1)) == 0)
+			senderr(EHOSTUNREACH);
+		rt->rt_refcnt--;
+	}
+	/*
+	 * Sanity checks.
+	 */
+	if ((rt->rt_ifp != ifp) ||
+	    (rt->rt_flags & (RTF_CLONING | RTF_GATEWAY)) ||
+	    ((lx = (struct llinfo_x25 *)rt->rt_llinfo) == 0)) {
+		senderr(ENETUNREACH);
+	}
+if ((m->m_flags & M_PKTHDR) == 0) {
+	if_x25stats.ifx_nophdr++;
+	m = m_gethdr(M_NOWAIT, MT_HEADER);
+	if (m == 0)
+		senderr(ENOBUFS);
+	m->m_pkthdr.len = plen;
+	m->m_next = m0;
+}
+if (plen != m->m_pkthdr.len) {
+	if_x25stats.ifx_wrongplen++;
+	m->m_pkthdr.len = plen;
+}
+next_circuit:
+	lcp = lx->lx_lcd;
+	if (lcp == 0) {
+		lx->lx_lcd = lcp = pk_attach((struct socket *)0);
+		if (lcp == 0)
+			senderr(ENOBUFS);
+		lcp->lcd_upper = x25_connect_callback;
+		lcp->lcd_upnext = (caddr_t)lx;
+		lcp->lcd_packetsize = lx->lx_ia->ia_xc.xc_psize;
+		lcp->lcd_flags = X25_MBS_HOLD;
+	}
+	switch (lcp->lcd_state) {
+	case READY:
+		if (dst->sa_family == AF_INET &&
+		    ifp->if_type == IFT_X25DDN &&
+		    rt->rt_gateway->sa_family != AF_CCITT)
+			x25_ddnip_to_ccitt(dst, rt);
+		if (rt->rt_gateway->sa_family != AF_CCITT) {
+			if ((rt->rt_flags & RTF_XRESOLVE) == 0)
+				senderr(EHOSTUNREACH);
+		} else if (x25_autoconnect)
+			error = pk_connect(lcp,
+					(struct sockaddr_x25 *)rt->rt_gateway);
+		if (error)
+			senderr(error);
+		/* FALLTHROUGH */
+	case SENT_CALL:
+	case DATA_TRANSFER:
+		if (sbspace(&lcp->lcd_sb) < 0) {
+			lx = lx->lx_next;
+			if (lx->lx_rt != rt)
+				senderr(ENOSPC);
+			goto next_circuit;
+		}
+		if (lx->lx_ia)
+			lcp->lcd_dg_timer =
+				       lx->lx_ia->ia_xc.xc_dg_idletimo;
+		pk_send(lcp, m);
+		break;
+	default:
+		/*
+		 * We count on the timer routine to close idle
+		 * connections, if there are not enough circuits to go
+		 * around.
+		 *
+		 * So throw away data for now.
+		 * After we get it all working, we'll rewrite to handle
+		 * actively closing connections (other than by timers),
+		 * when circuits get tight.
+		 *
+		 * In the DDN case, the imp itself closes connections
+		 * under heavy load.
+		 */
+		error = ENOBUFS;
+	bad:
+		if (m)
+			m_freem(m);
+	}
+	return (error);
+}
+
+/*
+ * Simpleminded timer routine.
+ */
+x25_iftimeout(ifp)
+struct ifnet *ifp;
+{
+	register struct pkcb *pkcb = 0;
+	register struct pklcd **lcpp, *lcp;
+	int s = splimp();
+
+	FOR_ALL_PKCBS(pkcb)
+	    if (pkcb->pk_ia->ia_ifp == ifp)
+		for (lcpp = pkcb->pk_chan + pkcb->pk_maxlcn;
+		     --lcpp > pkcb->pk_chan;)
+			if ((lcp = *lcpp) &&
+			    lcp->lcd_state == DATA_TRANSFER &&
+			    (lcp->lcd_flags & X25_DG_CIRCUIT) &&
+			    (lcp->lcd_dg_timer && --lcp->lcd_dg_timer == 0)) {
+				lcp->lcd_upper(lcp, 0);
+			}
+	splx(s);
+}
+/*
+ * This routine gets called when validating additions of new routes
+ * or deletions of old ones.
+ */
+x25_rtrequest(cmd, rt, dst)
+register struct rtentry *rt;
+struct sockaddr *dst;
+{
+	register struct llinfo_x25 *lx = (struct llinfo_x25 *)rt->rt_llinfo;
+	register struct sockaddr_x25 *sa =(struct sockaddr_x25 *)rt->rt_gateway;
+	register struct pklcd *lcp;
+
+	/* would put this pk_init, except routing table doesn't
+	   exist yet. */
+	if (x25_dgram_sockmask == 0) {
+		struct radix_node *rn_addmask();
+		x25_dgram_sockmask =
+			SA(rn_addmask((caddr_t)&x25_dgmask, 0, 4)->rn_key);
+	}
+	if (rt->rt_flags & RTF_GATEWAY) {
+		if (rt->rt_llinfo)
+			RTFREE((struct rtentry *)rt->rt_llinfo);
+		rt->rt_llinfo = (cmd == RTM_ADD) ? 
+			(caddr_t)rtalloc1(rt->rt_gateway, 1) : 0;
+		return;
+	}
+	if ((rt->rt_flags & RTF_HOST) == 0)
+		return;
+	if (cmd == RTM_DELETE) {
+		while (rt->rt_llinfo)
+			x25_lxfree((struct llinfo *)rt->rt_llinfo);
+		x25_rtinvert(RTM_DELETE, rt->rt_gateway, rt);
+		return;
+	}
+	if (lx == 0 && (lx = x25_lxalloc(rt)) == 0)
+		return;
+	if ((lcp = lx->lx_lcd) && lcp->lcd_state != READY) {
+		/*
+		 * This can only happen on a RTM_CHANGE operation
+		 * though cmd will be RTM_ADD.
+		 */
+		if (lcp->lcd_ceaddr &&
+		    Bcmp(rt->rt_gateway, lcp->lcd_ceaddr,
+					 lcp->lcd_ceaddr->x25_len) != 0) {
+			x25_rtinvert(RTM_DELETE, lcp->lcd_ceaddr, rt);
+			lcp->lcd_upper = 0;
+			pk_disconnect(lcp);
+		}
+		lcp = 0;
+	}
+	x25_rtinvert(RTM_ADD, rt->rt_gateway, rt);
+}
+
+int x25_dont_rtinvert = 0;
+
+x25_rtinvert(cmd, sa, rt)
+register struct sockaddr *sa;
+register struct rtentry *rt;
+{
+	struct rtentry *rt2 = 0;
+	/*
+	 * rt_gateway contains PID indicating which proto
+	 * family on the other end, so will be different
+	 * from general host route via X.25.
+	 */
+	if (rt->rt_ifp->if_type == IFT_X25DDN || x25_dont_rtinvert)
+		return;
+	if (sa->sa_family != AF_CCITT)
+		return;
+	if (cmd != RTM_DELETE) {
+		rtrequest(RTM_ADD, sa, rt_key(rt), x25_dgram_sockmask,
+				RTF_PROTO2, &rt2);
+		if (rt2) {
+			rt2->rt_llinfo = (caddr_t) rt;
+			rt->rt_refcnt++;
+		}
+		return;
+	}
+	rt2 = rt;
+	if ((rt = rtalloc1(sa, 0)) == 0 ||
+	    (rt->rt_flags & RTF_PROTO2) == 0 ||
+	    rt->rt_llinfo != (caddr_t)rt2) {
+		printf("x25_rtchange: inverse route screwup\n");
+		return;
+	} else
+		rt2->rt_refcnt--;
+	rtrequest(RTM_DELETE, sa, rt_key(rt2), x25_dgram_sockmask,
+				0, (struct rtentry **) 0);
+}
+
+static struct sockaddr_x25 blank_x25 = {sizeof blank_x25, AF_CCITT};
+/*
+ * IP to X25 address routine copyright ACC, used by permission.
+ */
+union imp_addr {
+	struct in_addr  ip;
+	struct imp {
+		u_char		s_net;
+		u_char		s_host;
+		u_char		s_lh;
+		u_char		s_impno;
+	}		    imp;
+};
+
+/*
+ * The following is totally bogus and here only to preserve
+ * the IP to X.25 translation.
+ */
+x25_ddnip_to_ccitt(src, rt)
+struct sockaddr_in *src;
+register struct rtentry *rt;
+{
+	register struct sockaddr_x25 *dst = (struct sockaddr_x25 *)rt->rt_gateway;
+	union imp_addr imp_addr;
+	int             imp_no, imp_port, temp;
+	char *x25addr = dst->x25_addr;
+
+
+	imp_addr.ip = src->sin_addr;
+	*dst = blank_x25;
+	if ((imp_addr.imp.s_net & 0x80) == 0x00) {	/* class A */
+	    imp_no = imp_addr.imp.s_impno;
+	    imp_port = imp_addr.imp.s_host;
+	} else if ((imp_addr.imp.s_net & 0xc0) == 0x80) {	/* class B */
+	    imp_no = imp_addr.imp.s_impno;
+	    imp_port = imp_addr.imp.s_lh;
+	} else {		/* class C */
+	    imp_no = imp_addr.imp.s_impno / 32;
+	    imp_port = imp_addr.imp.s_impno % 32;
+	}
+
+	x25addr[0] = 12; /* length */
+	/* DNIC is cleared by struct copy above */
+
+	if (imp_port < 64) {	/* Physical:  0000 0 IIIHH00 [SS] *//* s_impno
+				 *  -> III, s_host -> HH */
+	    x25addr[5] = 0;	/* set flag bit */
+	    x25addr[6] = imp_no / 100;
+	    x25addr[7] = (imp_no % 100) / 10;
+	    x25addr[8] = imp_no % 10;
+	    x25addr[9] = imp_port / 10;
+	    x25addr[10] = imp_port % 10;
+	} else {		/* Logical:   0000 1 RRRRR00 [SS]	 *//* s
+				 * _host * 256 + s_impno -> RRRRR */
+	    temp = (imp_port << 8) + imp_no;
+	    x25addr[5] = 1;
+	    x25addr[6] = temp / 10000;
+	    x25addr[7] = (temp % 10000) / 1000;
+	    x25addr[8] = (temp % 1000) / 100;
+	    x25addr[9] = (temp % 100) / 10;
+	    x25addr[10] = temp % 10;
+	}
+}
+
+/*
+ * This routine is a sketch and is not to be believed!!!!!
+ *
+ * This is a utility routine to be called by x25 devices when a
+ * call request is honored with the intent of starting datagram forwarding.
+ */
+x25_dg_rtinit(dst, ia, af)
+struct sockaddr_x25 *dst;
+register struct x25_ifaddr *ia;
+{
+	struct sockaddr *sa = 0;
+	struct rtentry *rt;
+	struct in_addr my_addr;
+	static struct sockaddr_in sin = {sizeof(sin), AF_INET};
+
+	if (ia->ia_ifp->if_type == IFT_X25DDN && af == AF_INET) {
+	/*
+	 * Inverse X25 to IP mapping copyright and courtesy ACC.
+	 */
+		int             imp_no, imp_port, temp;
+		union imp_addr imp_addr;
+	    {
+		/*
+		 * First determine our IP addr for network
+		 */
+		register struct in_ifaddr *ina;
+		extern struct in_ifaddr *in_ifaddr;
+
+		for (ina = in_ifaddr; ina; ina = ina->ia_next)
+			if (ina->ia_ifp == ia->ia_ifp) {
+				my_addr = ina->ia_addr.sin_addr;
+				break;
+			}
+	    }
+	    {
+
+		register char *x25addr = dst->x25_addr;
+
+		switch (x25addr[5] & 0x0f) {
+		  case 0:	/* Physical:  0000 0 IIIHH00 [SS]	 */
+		    imp_no =
+			((int) (x25addr[6] & 0x0f) * 100) +
+			((int) (x25addr[7] & 0x0f) * 10) +
+			((int) (x25addr[8] & 0x0f));
+
+
+		    imp_port =
+			((int) (x25addr[9] & 0x0f) * 10) +
+			((int) (x25addr[10] & 0x0f));
+		    break;
+		  case 1:	/* Logical:   0000 1 RRRRR00 [SS]	 */
+		    temp = ((int) (x25addr[6] & 0x0f) * 10000)
+			+ ((int) (x25addr[7] & 0x0f) * 1000)
+			+ ((int) (x25addr[8] & 0x0f) * 100)
+			+ ((int) (x25addr[9] & 0x0f) * 10)
+			+ ((int) (x25addr[10] & 0x0f));
+
+		    imp_port = temp >> 8;
+		    imp_no = temp & 0xff;
+		    break;
+		  default:
+		    return (0L);
+		}
+		imp_addr.ip = my_addr;
+		if ((imp_addr.imp.s_net & 0x80) == 0x00) {
+		/* class A */
+		    imp_addr.imp.s_host = imp_port;
+		    imp_addr.imp.s_impno = imp_no;
+		    imp_addr.imp.s_lh = 0;
+		} else if ((imp_addr.imp.s_net & 0xc0) == 0x80) {
+		/* class B */
+		    imp_addr.imp.s_lh = imp_port;
+		    imp_addr.imp.s_impno = imp_no;
+		} else {
+		/* class C */
+		    imp_addr.imp.s_impno = (imp_no << 5) + imp_port;
+		}
+	    }
+		sin.sin_addr = imp_addr.ip;
+		sa = (struct sockaddr *)&sin;
+	} else {
+		/*
+		 * This uses the X25 routing table to do inverse
+		 * lookup of x25 address to sockaddr.
+		 */
+		if (rt = rtalloc1(SA(dst), 0)) {
+			sa = rt->rt_gateway;
+			rt->rt_refcnt--;
+		}
+	}
+	/* 
+	 * Call to rtalloc1 will create rtentry for reverse path
+	 * to callee by virtue of cloning magic and will allocate
+	 * space for local control block.
+	 */
+	if (sa && (rt = rtalloc1(sa, 1)))
+		rt->rt_refcnt--;
+}
+int x25_startproto = 1;
+
+pk_init()
+{
+	/*
+	 * warning, sizeof (struct sockaddr_x25) > 32,
+	 * but contains no data of interest beyond 32
+	 */
+	if (x25_startproto) {
+		pk_protolisten(0xcc, 1, x25_dgram_incoming);
+		pk_protolisten(0x81, 1, x25_dgram_incoming);
+	}
+}
+
+struct x25_dgproto {
+	u_char spi;
+	u_char spilen;
+	int (*f)();
+} x25_dgprototab[] = {
+#if defined(ISO) && defined(TPCONS)
+{ 0x0, 0, tp_incoming},
+#endif
+{ 0xcc, 1, x25_dgram_incoming},
+{ 0xcd, 1, x25_dgram_incoming},
+{ 0x81, 1, x25_dgram_incoming},
+};
+
+pk_user_protolisten(info)
+register u_char *info;
+{
+	register struct x25_dgproto *dp = x25_dgprototab
+		    + ((sizeof x25_dgprototab) / (sizeof *dp));
+	register struct pklcd *lcp;
+	
+	while (dp > x25_dgprototab)
+		if ((--dp)->spi == info[0])
+			goto gotspi;
+	return ESRCH;
+
+gotspi:	if (info[1])
+		return pk_protolisten(dp->spi, dp->spilen, dp->f);
+	for (lcp = pk_listenhead; lcp; lcp = lcp->lcd_listen)
+		if (lcp->lcd_laddr.x25_udlen == dp->spilen &&
+		    Bcmp(&dp->spi, lcp->lcd_laddr.x25_udata, dp->spilen) == 0) {
+			pk_disconnect(lcp);
+			return 0;
+		}
+	return ESRCH;
+}
+
+/*
+ * This routine transfers an X.25 circuit to or from a routing entry.
+ * If the supplied circuit is * in DATA_TRANSFER state, it is added to the
+ * routing entry.  If freshly allocated, it glues back the vc from
+ * the rtentry to the socket.
+ */
+pk_rtattach(so, m0)
+register struct socket *so;
+struct mbuf *m0;
+{
+	register struct pklcd *lcp = (struct pklcd *)so->so_pcb;
+	register struct mbuf *m = m0;
+	struct sockaddr *dst = mtod(m, struct sockaddr *);
+	register struct rtentry *rt = rtalloc1(dst, 0);
+	register struct llinfo_x25 *lx;
+	caddr_t cp;
+#define ROUNDUP(a) \
+	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
+#define transfer_sockbuf(s, f, l) \
+	while (m = (s)->sb_mb)\
+		{(s)->sb_mb = m->m_act; m->m_act = 0; sbfree((s), m); f(l, m);}
+
+	if (rt)
+		rt->rt_refcnt--;
+	cp = (dst->sa_len < m->m_len) ? ROUNDUP(dst->sa_len) + (caddr_t)dst : 0;
+	while (rt &&
+	       ((cp == 0 && rt_mask(rt) != 0) ||
+		(cp != 0 && (rt_mask(rt) == 0 ||
+			     Bcmp(cp, rt_mask(rt), rt_mask(rt)->sa_len)) != 0)))
+			rt = (struct rtentry *)rt->rt_nodes->rn_dupedkey;
+	if (rt == 0 || (rt->rt_flags & RTF_GATEWAY) ||
+	    (lx = (struct llinfo_x25 *)rt->rt_llinfo) == 0)
+		return ESRCH;
+	if (lcp == 0)
+		return ENOTCONN;
+	switch (lcp->lcd_state) {
+	default:
+		return ENOTCONN;
+
+	case READY:
+		/* Detach VC from rtentry */
+		if (lx->lx_lcd == 0)
+			return ENOTCONN;
+		lcp->lcd_so = 0;
+		pk_close(lcp);
+		lcp = lx->lx_lcd;
+		if (lx->lx_next->lx_rt == rt)
+			x25_lxfree(lx);
+		lcp->lcd_so = so;
+		lcp->lcd_upper = 0;
+		lcp->lcd_upnext = 0;
+		transfer_sockbuf(&lcp->lcd_sb, sbappendrecord, &so->so_snd);
+		soisconnected(so);
+		return 0;
+
+	case DATA_TRANSFER:
+		/* Add VC to rtentry */
+		lcp->lcd_so = 0;
+		lcp->lcd_sb = so->so_snd; /* structure copy */
+		bzero((caddr_t)&so->so_snd, sizeof(so->so_snd)); /* XXXXXX */
+		so->so_pcb = 0;
+		x25_rtattach(lcp, rt);
+		transfer_sockbuf(&so->so_rcv, x25_ifinput, lcp);
+		soisdisconnected(so);
+	}
+	return 0;
+}
+x25_rtattach(lcp0, rt)
+register struct pklcd *lcp0;
+struct rtentry *rt;
+{
+	register struct llinfo_x25 *lx = (struct llinfo_x25 *)rt->rt_llinfo;
+	register struct pklcd *lcp;
+	register struct mbuf *m;
+	if (lcp = lx->lx_lcd) { /* adding an additional VC */
+		if (lcp->lcd_state == READY) {
+			transfer_sockbuf(&lcp->lcd_sb, pk_output, lcp0);
+			lcp->lcd_upper = 0;
+			pk_close(lcp);
+		} else {
+			lx = x25_lxalloc(rt);
+			if (lx == 0)
+				return ENOBUFS;
+		}
+	}
+	lx->lx_lcd = lcp = lcp0;
+	lcp->lcd_upper = x25_ifinput;
+	lcp->lcd_upnext = (caddr_t)lx;
+}
diff --git a/sys/netccitt/llc_input.c b/sys/netccitt/llc_input.c
new file mode 100644
index 00000000000..7a01973d979
--- /dev/null
+++ b/sys/netccitt/llc_input.c
@@ -0,0 +1,468 @@
+/* 
+ * Copyright (C) Dirk Husemann, Computer Science Department IV, 
+ * 		 University of Erlangen-Nuremberg, Germany, 1990, 1991, 1992
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * 
+ * This code is derived from software contributed to Berkeley by
+ * Dirk Husemann and the Computer Science Department (IV) of
+ * the University of Erlangen-Nuremberg, Germany.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)llc_input.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_llc.h>
+#include <net/route.h>
+
+#include <netccitt/dll.h>
+#include <netccitt/llc_var.h>
+
+/*
+ * This module implements LLC as specified by ISO 8802-2.
+ */
+
+
+/*
+ * llcintr() handles all LLC frames (except ISO CLNS ones for the time being)
+ *           and tries to pass them on to the appropriate network layer entity.
+ */
+void
+llcintr()
+{
+	register struct mbuf *m;
+	register int i;
+	register int frame_kind;
+	register u_char cmdrsp;
+	struct llc_linkcb *linkp;
+	struct rtentry *sirt;
+	struct npaidbentry *sapinfo;
+	struct sdl_hdr *sdlhdr;
+	struct llc *frame;
+	char *c;
+	long expected_len;
+
+	struct ifnet   *ifp;
+	struct rtentry *llrt;
+	struct rtentry *nlrt;
+
+	for (;;) {
+		i = splimp();
+		IF_DEQUEUE(&llcintrq, m);
+		splx(i);
+		if (m == 0)
+			break;
+#ifdef		DIAGNOSTIC
+		if ((m->m_flags & M_PKTHDR) == 0)
+			panic("llcintr no HDR");
+#endif
+		/*
+		 * Get ifp this packet was received on
+		 */
+		ifp = m->m_pkthdr.rcvif;
+
+		sdlhdr = mtod(m, struct sdl_hdr *);
+
+		/*
+		 * [Copied from net/ip_input.c]
+		 *
+		 * Check that the amount of data in the buffers is
+		 * at least as much as the LLC header tells us.
+		 * Trim mbufs if longer than expected.
+		 * Drop packets if shorter than we think they are.
+		 *
+		 * Layout of mbuf chain at this point:
+		 *
+		 *  +-------------------------------+----+	-\
+                 *  |  sockaddr_dl src - sdlhdr_src | 20 |	  \
+                 *  +-------------------------------+----+	   |
+		 *  |  sockaddr_dl dst - sdlhdr_dst | 20 |	    > sizeof(struct sdl_hdr) == 44
+		 *  +-------------------------------+----+	   |
+                 *  |  LLC frame len - sdlhdr_len   | 04 |	  /
+		 *  +-------------------------------+----+	-/
+		 * /
+		 * | m_next
+		 * \
+                 *  +----------------------------+----+	 -\
+                 *  |  llc DSAP 		 | 01 |	   \
+		 *  +----------------------------+----+	    |
+                 *  |  llc SSAP 		 | 01 |	    |
+		 *  +----------------------------+----+	     > sdlhdr_len
+                 *  |  llc control      	 | 01 |	    |
+		 *  +----------------------------+----+	    |
+		 *  |  ...                       |    |	   /
+		 *      			      	 -/
+		 *
+		 * Thus the we expect to have exactly 
+		 * (sdlhdr->sdlhdr_len+sizeof(struct sdl_hdr)) in the mbuf chain
+		 */
+		expected_len = sdlhdr->sdlhdr_len + sizeof(struct sdl_hdr);
+
+		if (m->m_pkthdr.len < expected_len) {
+			m_freem(m);
+			continue;
+		}
+		if (m->m_pkthdr.len > expected_len) {
+			if (m->m_len == m->m_pkthdr.len) {
+				m->m_len = expected_len;
+				m->m_pkthdr.len = expected_len;
+			} else
+				m_adj(m, expected_len - m->m_pkthdr.len);
+		}
+
+		/*
+		 * Get llc header
+		 */
+		if (m->m_len > sizeof(struct sdl_hdr))
+			frame = mtod((struct mbuf *)((struct sdl_hdr*)(m+1)),
+			     	     struct llc *);
+		else frame = mtod(m->m_next, struct llc *);
+		if (frame == (struct llc *) NULL)
+			panic("llcintr no llc header");
+
+		/*
+		 * Now check for bogus I/S frame, i.e. those with a control
+		 * field telling us they're an I/S frame yet their length
+		 * is less than the established I/S frame length (DSAP + SSAP +
+		 * control + N(R)&P/F = 4) --- we drop those suckers
+		 */
+		if (((frame->llc_control & 0x03) != 0x03) 
+		    && ((expected_len - sizeof(struct sdl_hdr)) < LLC_ISFRAMELEN)) {
+			m_freem(m);
+			printf("llc: hurz error\n");
+			continue;
+		}
+
+		/*
+		 * Get link control block for the addressed link connection.
+		 * If there is none we take care of it later on.
+		 */
+		cmdrsp = (frame->llc_ssap & 0x01);
+		frame->llc_ssap &= ~0x01;
+		if (llrt = rtalloc1((struct sockaddr *)&sdlhdr->sdlhdr_src, 0))
+			llrt->rt_refcnt--;
+#ifdef notyet
+		else llrt = npaidb_enter(&sdlhdr->sdlhdr_src, 0, 0, 0);
+#endif /* notyet */
+		else {
+			/* 
+			 * We cannot do anything currently here as we
+			 * don't `know' this link --- drop it 
+			 */
+			m_freem(m);
+			continue;
+		}
+		linkp = ((struct npaidbentry *)(llrt->rt_llinfo))->np_link;
+		nlrt = ((struct npaidbentry *)(llrt->rt_llinfo))->np_rt;
+
+		/*
+		 * If the link is not existing right now, we can try and look up
+		 * the SAP info block.
+		 */
+		if ((linkp == 0) && frame->llc_ssap) 
+			sapinfo = llc_getsapinfo(frame->llc_dsap, ifp);
+
+		/*
+		 * Handle XID and TEST frames
+		 * XID:		if DLSAP == 0, return 	type-of-services
+		 *					window-0
+		 *					DLSAP-0
+		 *					format-identifier-?
+		 * 		if DLSAP != 0, locate sapcb and return
+		 *					type-of-services
+		 *					SAP-window
+		 *					format-identifier-?
+		 * TEST:	swap (snpah_dst, snpah_src) and return frame
+		 *
+		 * Also toggle the CMD/RESP bit
+		 *
+		 * Is this behaviour correct? Check ISO 8802-2 (90)!
+		 */
+		frame_kind = llc_decode(frame, (struct llc_linkcb *)0);
+		switch(frame_kind) {
+		case LLCFT_XID:
+			if (linkp || sapinfo) {
+				if (linkp)
+			   		frame->llc_window = linkp->llcl_window;
+			   	else frame->llc_window = sapinfo->si_window;
+			 	frame->llc_fid = 9;			/* XXX */
+			  	frame->llc_class = sapinfo->si_class;
+			 	frame->llc_ssap = frame->llc_dsap;
+			} else {
+			 	frame->llc_window = 0;
+			     	frame->llc_fid = 9;
+				frame->llc_class = 1;
+				frame->llc_dsap = frame->llc_ssap = 0;
+			}
+
+			/* fall thru to */
+		case LLCFT_TEST:
+			sdl_swapaddr(&(mtod(m, struct sdl_hdr *)->sdlhdr_dst),
+				     &(mtod(m, struct sdl_hdr *)->sdlhdr_src));
+
+			/* Now set the CMD/RESP bit */
+			frame->llc_ssap |= (cmdrsp == 0x0 ? 0x1 : 0x0);
+
+			/* Ship it out again */
+			(*ifp->if_output)(ifp, m,
+					  (struct sockaddr *) &(mtod(m, struct sdl_hdr *)->sdlhdr_dst),
+					  (struct rtentry *) 0);
+			continue;
+		}
+
+		/*
+		 * Create link control block in case it is not existing
+		 */
+		if (linkp == 0 && sapinfo) {
+			if ((linkp = llc_newlink(&sdlhdr->sdlhdr_src, ifp, nlrt,
+						     (nlrt == 0) ? 0 : nlrt->rt_llinfo,
+						     llrt)) == 0) {
+				printf("llcintr: couldn't create new link\n");
+				m_freem(m);
+				continue;
+			}
+			((struct npaidbentry *)llrt->rt_llinfo)->np_link = linkp;
+		} else if (linkp == 0) {
+			/* The link is not known to us, drop the frame and continue */
+			m_freem(m);
+			continue;
+		}
+
+		/*
+		 * Drop SNPA header and get rid of empty mbuf at the
+		 * front of the mbuf chain (I don't like 'em)
+		 */
+		m_adj(m, sizeof(struct sdl_hdr));
+		/* 
+		 * LLC_UFRAMELEN is sufficient, m_pullup() will pull up
+		 * the min(m->m_len, maxprotohdr_len [=40]) thus doing
+		 * the trick ...
+		 */
+		if ((m = m_pullup(m, LLC_UFRAMELEN)))
+			/*
+			 * Pass it on thru the elements of procedure
+			 */
+			llc_input(linkp, m, cmdrsp);
+	}
+	return;
+}
+
+/*
+ * llc_input() --- We deal with the various incoming frames here.
+ *                 Basically we (indirectly) call the appropriate
+ *                 state handler function that's pointed to by
+ *                 llcl_statehandler.
+ * 
+ *                 The statehandler returns an action code ---
+ *                 further actions like 
+ *                         o notify network layer
+ *                         o block further sending
+ *                         o deblock link
+ *                         o ...
+ *                 are then enacted accordingly.
+ */
+llc_input(struct llc_linkcb *linkp, struct mbuf *m, u_char cmdrsp)
+{
+	int frame_kind;
+	int pollfinal;
+	int action = 0;
+	struct llc *frame;
+	struct ifnet *ifp = linkp->llcl_if;
+
+	if ((frame = mtod(m, struct llc *)) == (struct llc *) 0) {
+		m_freem(m);
+		return 0;
+	}
+	pollfinal = ((frame->llc_control & 0x03) == 0x03) ? 
+		LLCGBITS(frame->llc_control, u_pf) :
+			LLCGBITS(frame->llc_control_ext, s_pf);
+
+	/*
+	 * first decode the frame
+	 */
+	frame_kind = llc_decode(frame, linkp);
+
+	switch (action = llc_statehandler(linkp, frame, frame_kind, cmdrsp, 
+					  pollfinal)) {
+	case LLC_DATA_INDICATION:
+		m_adj(m, LLC_ISFRAMELEN);
+		if (m = m_pullup(m, NLHDRSIZEGUESS)) {
+			m->m_pkthdr.rcvif = (struct ifnet *)linkp->llcl_nlnext;
+			(*linkp->llcl_sapinfo->si_input)(m);
+		}
+		break;
+	}
+
+	/* release mbuf if not an info frame */
+	if (action != LLC_DATA_INDICATION && m)
+		m_freem(m);
+
+	/* try to get frames out ... */
+	llc_start(linkp);
+
+	return 0;
+}
+
+/*
+ * This routine is called by configuration setup. It sets up a station control
+ * block and notifies all registered upper level protocols.
+ */
+caddr_t
+llc_ctlinput(int prc, struct sockaddr *addr, caddr_t info)
+{
+	struct ifnet *ifp;
+	struct ifaddr *ifa;
+	struct dll_ctlinfo *ctlinfo = (struct dll_ctlinfo *)info;
+	u_char sap;
+	struct dllconfig *config;
+	caddr_t pcb;
+	struct rtentry *nlrt;
+	struct rtentry *llrt;
+	struct llc_linkcb *linkp;
+	register int i;
+
+	/* info must point to something valid at all times */
+	if (info == 0)
+		return 0;
+
+	if (prc == PRC_IFUP || prc == PRC_IFDOWN) {
+		/* we use either this set ... */
+		ifa = ifa_ifwithaddr(addr);
+		ifp = ifa ? ifa->ifa_ifp : 0;
+		if (ifp == 0)
+			return 0;
+
+		sap = ctlinfo->dlcti_lsap;
+		config = ctlinfo->dlcti_cfg;
+		pcb = (caddr_t) 0;
+		nlrt = (struct rtentry *) 0;
+	} else {
+		/* or this one */
+		sap = 0; 
+		config = (struct dllconfig *) 0;
+		pcb = ctlinfo->dlcti_pcb;
+		nlrt = ctlinfo->dlcti_rt;
+
+		if ((llrt = rtalloc1(nlrt->rt_gateway, 0)))
+			llrt->rt_refcnt--;
+		else return 0;
+
+		linkp = ((struct npaidbentry *)llrt->rt_llinfo)->np_link;
+	}
+	
+	switch (prc) {
+	case PRC_IFUP:
+		(void) llc_setsapinfo(ifp, addr->sa_family, sap, config);
+		return 0;
+
+	case PRC_IFDOWN: {
+		register struct llc_linkcb *linkp;
+		register struct llc_linkcb *nlinkp;
+		register int i;
+
+		/*
+		 * All links are accessible over the doubly linked list llccb_q
+		 */
+		if (!LQEMPTY) {
+			/*
+			 * A for-loop is not that great an idea as the linkp
+			 * will get deleted by llc_timer()
+			 */
+			linkp = LQFIRST;
+			while (LQVALID(linkp)) {
+				nlinkp = LQNEXT(linkp);
+				if (linkp->llcl_if = ifp) {
+					i = splimp();
+					(void)llc_statehandler(linkp, (struct llc *)0,
+							       NL_DISCONNECT_REQUEST,
+							       0, 1);
+					splx(i);
+				}
+				linkp = nlinkp;
+			}
+		}
+	}
+	
+	case PRC_CONNECT_REQUEST: 
+		if (linkp == 0) {
+			if ((linkp = llc_newlink((struct sockaddr_dl *) nlrt->rt_gateway, 
+						 nlrt->rt_ifp, nlrt, 
+						 pcb, llrt)) == 0)
+				return (0);
+			((struct npaidbentry *)llrt->rt_llinfo)->np_link = linkp;
+			i = splimp();
+			(void)llc_statehandler(linkp, (struct llc *) 0,
+						NL_CONNECT_REQUEST, 0, 1);
+			splx(i);
+		}
+		return ((caddr_t)linkp);
+	
+	case PRC_DISCONNECT_REQUEST:
+		if (linkp == 0) 
+			panic("no link control block!");
+
+		i = splimp();
+		(void)llc_statehandler(linkp, (struct llc *) 0,
+				       NL_DISCONNECT_REQUEST, 0, 1);
+		splx(i);
+
+		/*
+		 * The actual removal of the link control block is done by the
+		 * cleaning neutrum (i.e. llc_timer()).
+		 */
+		break;
+	
+	case PRC_RESET_REQUEST:
+		if (linkp == 0) 
+			panic("no link control block!");
+
+		i = splimp();
+		(void)llc_statehandler(linkp, (struct llc *) 0,
+				       NL_RESET_REQUEST, 0, 1);
+		splx(i);
+
+		break;
+
+	}
+	
+	return 0;
+}
diff --git a/sys/netccitt/llc_output.c b/sys/netccitt/llc_output.c
new file mode 100644
index 00000000000..98d0328a5f5
--- /dev/null
+++ b/sys/netccitt/llc_output.c
@@ -0,0 +1,304 @@
+/* 
+ * Copyright (C) Dirk Husemann, Computer Science Department IV, 
+ * 		 University of Erlangen-Nuremberg, Germany, 1990, 1991, 1992
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * 
+ * This code is derived from software contributed to Berkeley by
+ * Dirk Husemann and the Computer Science Department (IV) of
+ * the University of Erlangen-Nuremberg, Germany.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)llc_output.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_llc.h>
+#include <net/route.h>
+
+#include <netccitt/dll.h>
+#include <netccitt/llc_var.h>
+
+/*
+ * llc_output() --- called by an upper layer (network layer) entity whenever
+ *                  there is an INFO frame to be transmitted. We enqueue the
+ *                  info frame and call llc_start() to do the actual sending.
+ */
+
+llc_output(struct llc_linkcb *linkp, struct mbuf *m)
+{
+	register int i;
+
+	i = splimp();
+	LLC_ENQUEUE(linkp, m);
+	llc_start(linkp);
+	splx(i);
+
+}
+
+
+/*
+ * llc_start() --- We try to subsequently dequeue all the frames available and
+ *                 send them out.
+ */
+void
+llc_start(struct llc_linkcb *linkp)
+{
+	register int i;
+	register struct mbuf *m;
+	int action;
+
+	while ((LLC_STATEEQ(linkp, NORMAL) || LLC_STATEEQ(linkp, BUSY) ||
+		LLC_STATEEQ(linkp, REJECT)) && 
+	       (linkp->llcl_slotsfree > 0) && 
+	       (LLC_GETFLAG(linkp, REMOTE_BUSY) == 0)) {
+		LLC_DEQUEUE(linkp, m);
+		if (m == NULL)
+			break;
+		LLC_SETFRAME(linkp, m);
+		(void)llc_statehandler(linkp, (struct llc *) 0, NL_DATA_REQUEST, 
+				       0, 0);
+	}
+}
+
+
+/*
+ * llc_send() --- Handles single frames. If dealing with INFO frames we need to
+ *                prepend the LLC header, otherwise we just allocate an mbuf.
+ *                In both cases the actual send is done by llc_rawsend().
+ */
+llc_send(struct llc_linkcb *linkp, int frame_kind, int cmdrsp, int pollfinal)
+{
+	register struct mbuf *m = (struct mbuf *)0;
+	register struct llc *frame;
+
+	if (frame_kind == LLCFT_INFO)
+		m = linkp->llcl_output_buffers[llc_seq2slot(linkp,
+							    linkp->llcl_vs)];
+	LLC_GETHDR(frame, m);
+
+	/* pass it on to llc_rawsend() */
+	llc_rawsend(linkp, m, frame, frame_kind, linkp->llcl_vs, cmdrsp, pollfinal);
+
+	if (frame_kind == LLCFT_INFO)
+		LLC_INC(linkp->llcl_vs);
+
+	return 0;
+}
+
+/* 
+ * llc_resend() --- llc_resend() retransmits all unacknowledged INFO frames.
+ */
+llc_resend(struct llc_linkcb *linkp, int cmdrsp, int pollfinal)
+{
+	register struct llc *frame;
+	register struct mbuf *m;
+	register int seq, slot;
+
+	if (linkp->llcl_slotsfree < linkp->llcl_window)
+		/* assert lock between nr_received & V(S) */
+		if (linkp->llcl_nr_received != linkp->llcl_vs)
+			panic("llc: V(S) != N(R) received\n");
+
+		for (slot = llc_seq2slot(linkp, linkp->llcl_vs);
+		     slot != linkp->llcl_freeslot; 
+		     LLC_INC(linkp->llcl_vs), 
+		     slot = llc_seq2slot(linkp, linkp->llcl_vs)) {
+			m = linkp->llcl_output_buffers[slot];
+			LLC_GETHDR(frame, m);
+			llc_rawsend(linkp, m, frame, LLCFT_INFO, linkp->llcl_vs, 
+				    cmdrsp, pollfinal);
+			pollfinal = 0;
+		}
+	
+	return 0;
+}
+
+/*
+ * llc_rawsend() --- constructs an LLC frame and sends it out via the
+ *                   associated interface of the link control block.
+ *
+ * We need to make sure that outgoing frames have the correct length,
+ * in particular the 4 byte ones (RR, RNR, REJ) as LLC_GETHDR() will
+ * set the mbuf len to 3 as default len for non INFO frames ...
+ *
+ * Frame kind             Length (w/o MAC header, {D,S}SAP incl.)
+ * --------------------------------------------------------------
+ * DISC, SABME, UA, DM    3 bytes  ({D,S}SAP + CONTROL)
+ * RR, RNR, REJ           4 bytes  ({D,S}SAP + CONTROL0 + CONTROL1)
+ * XID                    6 bytes  ({D,S}SAP + CONTROL0 + FI,CLASS,WINDOW)
+ * FRMR                   7 bytes  ({D,S}SAP + CONTROL0 + REJ CONTROL,V(S),V(R),CAUSE)
+ * INFO                   4 -- MTU
+ * UI, TEST               3 -- MTU
+ *
+ */
+#define LLC_SETLEN(m, l) (m)->m_pkthdr.len = (m)->m_len = (l)
+
+llc_rawsend(struct llc_linkcb *linkp, struct mbuf *m, struct llc *frame,
+	    int frame_kind, int vs, int cmdrsp, int pollfinal)
+{
+	register short adjust = LLC_UFRAMELEN;
+	struct ifnet *ifp;
+
+	switch (frame_kind) {
+	/* supervisory and information frames */
+	case LLCFT_INFO:
+		frame->llc_control = LLC_INFO;
+		LLCSBITS(frame->llc_control, i_ns, vs);
+		LLCSBITS(frame->llc_control_ext, i_nr, linkp->llcl_vr);
+		adjust = LLC_ISFRAMELEN;
+		break;
+	case LLCFT_RR:
+		frame->llc_control = LLC_RR;
+		LLC_SETLEN(m, LLC_ISFRAMELEN);
+		LLCSBITS(frame->llc_control_ext, s_nr, linkp->llcl_vr);
+		adjust = LLC_ISFRAMELEN;
+		break;
+	case LLCFT_RNR:
+		frame->llc_control = LLC_RNR;
+		LLC_SETLEN(m, LLC_ISFRAMELEN);
+		LLCSBITS(frame->llc_control_ext, s_nr, linkp->llcl_vr);
+		adjust = LLC_ISFRAMELEN;
+		break;
+	case LLCFT_REJ:
+		frame->llc_control = LLC_REJ;
+		LLC_SETLEN(m, LLC_ISFRAMELEN);
+		LLCSBITS(frame->llc_control_ext, s_nr, linkp->llcl_vr);
+		adjust = LLC_ISFRAMELEN;
+		break;
+	/* unnumbered frames */
+	case LLCFT_DM:
+		frame->llc_control = LLC_DM;
+		break;
+	case LLCFT_SABME:
+		frame->llc_control = LLC_SABME;
+		break;
+	case LLCFT_DISC:
+		frame->llc_control = LLC_DISC;
+		break;
+	case LLCFT_UA:
+		frame->llc_control = LLC_UA;
+		break;
+	case LLCFT_UI:
+		frame->llc_control = LLC_UI;
+		break;
+	case LLCFT_FRMR:
+		frame->llc_control = LLC_FRMR;
+		/* get more space --- FRMR frame are longer then usual */
+		LLC_SETLEN(m, LLC_FRMRLEN);
+		bcopy((caddr_t) &linkp->llcl_frmrinfo, 
+		      (caddr_t) &frame->llc_frmrinfo,
+		      sizeof(struct frmrinfo));
+		break;
+	default:
+		/*
+		 * We don't send {XID, TEST} frames
+		 */
+		if (m)
+			m_freem(m);
+		return;
+	}
+ 
+	/* 
+	 * Fill in DSAP/SSAP
+	 */
+	frame->llc_dsap = frame->llc_ssap = LLSAPADDR(&linkp->llcl_addr);
+	frame->llc_ssap |= cmdrsp;
+
+	/*
+	 * Check for delayed action pending. ISO 8802-2, 7.9.2 (5)
+	 * and ISO 8802-2, 7.9.2.3 (32), (34), (36) pertain to this
+	 * piece of code --- hopefully we got it right here (i.e.
+	 * in the spirit of (32), (34), and (36) ...
+	 */
+	switch (frame_kind) {
+	case LLCFT_RR:
+	case LLCFT_RNR:
+	case LLCFT_REJ:
+	case LLCFT_INFO:
+		switch (LLC_GETFLAG(linkp, DACTION)) {
+		case LLC_DACKCMD:
+		case LLC_DACKRSP:
+			LLC_STOPTIMER(linkp, DACTION);
+			break;
+		case LLC_DACKCMDPOLL:
+			if (cmdrsp == LLC_CMD) {
+				pollfinal = 1;
+				LLC_STOPTIMER(linkp, DACTION);
+			}
+			break;
+		case LLC_DACKRSPFINAL:
+			if (cmdrsp == LLC_RSP) {
+				pollfinal = 1;
+				LLC_STOPTIMER(linkp, DACTION);
+			}
+			break;
+		}
+		break;
+	}
+	 
+	if (adjust == LLC_UFRAMELEN)
+		LLCSBITS(frame->llc_control, u_pf, pollfinal);
+	else LLCSBITS(frame->llc_control_ext, s_pf, pollfinal);
+
+	/*
+	 * Get interface to send frame onto
+	 */
+	ifp = linkp->llcl_if;
+	if (frame_kind == LLCFT_INFO) {
+		/* 
+		 * send out a copy of the frame, retain the
+		 * original
+		 */
+		(*ifp->if_output)(ifp, m_copy(m, 0, (int)M_COPYALL),
+				  rt_key(linkp->llcl_nlrt),
+				  linkp->llcl_nlrt);
+		/*
+		 * Account for the LLC header and let it ``disappear''
+		 * as the raw info frame payload is what we hold in
+		 * the output_buffers of the link.
+		 */
+		m_adj(m, LLC_ISFRAMELEN);
+	} else (*ifp->if_output)(ifp, m, 
+				 rt_key(linkp->llcl_nlrt),
+				 linkp->llcl_nlrt);
+}
+
diff --git a/sys/netccitt/llc_subr.c b/sys/netccitt/llc_subr.c
new file mode 100644
index 00000000000..46848fdf5bc
--- /dev/null
+++ b/sys/netccitt/llc_subr.c
@@ -0,0 +1,2356 @@
+/* 
+ * Copyright (C) Dirk Husemann, Computer Science Department IV, 
+ * 		 University of Erlangen-Nuremberg, Germany, 1990, 1991, 1992
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * 
+ * This code is derived from software contributed to Berkeley by
+ * Dirk Husemann and the Computer Science Department (IV) of
+ * the University of Erlangen-Nuremberg, Germany.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)llc_subr.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_llc.h>
+#include <net/route.h>
+
+#include <netccitt/dll.h>
+#include <netccitt/llc_var.h>
+
+/*
+ * Frame names for diagnostic messages
+ */
+char *frame_names[] = { "INFO", "RR", "RNR", "REJ", "DM", "SABME", "DISC",
+	"UA", "FRMR", "UI", "XID", "TEST", "ILLEGAL", "TIMER", "N2xT1"};
+
+
+/*
+ * Trace level
+ */
+int llc_tracelevel = LLCTR_URGENT;
+
+/*
+ * Values for accessing various bitfields
+ */
+struct bitslice llc_bitslice[] = {
+/*	  mask, shift value */
+	{ 0x1,  0x0 },
+	{ 0xfe, 0x1 },
+	{ 0x3,  0x0 },
+	{ 0xc,  0x2 },
+	{ 0x10, 0x4 },
+	{ 0xe0, 0x5 },
+	{ 0x1f, 0x0 }
+};
+
+/*
+ * We keep the link control blocks on a doubly linked list - 
+ * primarily for checking in llc_time() 
+ */
+
+struct llccb_q llccb_q = { &llccb_q, &llccb_q };
+
+/*
+ * Flag for signalling wether route tree for AF_LINK has been
+ * initialized yet.
+ */
+
+int af_link_rts_init_done = 0; 
+
+
+/*
+ * Functions dealing with struct sockaddr_dl */
+
+/* Compare sdl_a w/ sdl_b */
+
+sdl_cmp(struct sockaddr_dl *sdl_a, struct sockaddr_dl *sdl_b)
+{
+	if (LLADDRLEN(sdl_a) != LLADDRLEN(sdl_b))
+		return(1);
+	return(bcmp((caddr_t) sdl_a->sdl_data, (caddr_t) sdl_b->sdl_data,
+		    LLADDRLEN(sdl_a)));
+}
+
+/* Copy sdl_f to sdl_t */
+
+sdl_copy(struct sockaddr_dl *sdl_f, struct sockaddr_dl *sdl_t)
+{
+	bcopy((caddr_t) sdl_f, (caddr_t) sdl_t, sdl_f->sdl_len);
+}
+
+/* Swap sdl_a w/ sdl_b */
+
+sdl_swapaddr(struct sockaddr_dl *sdl_a, struct sockaddr_dl *sdl_b)
+{
+	struct sockaddr_dl sdl_tmp;
+
+	sdl_copy(sdl_a, &sdl_tmp); 
+	sdl_copy(sdl_b, sdl_a); 
+	sdl_copy(&sdl_tmp, sdl_b);
+}
+
+/* Fetch the sdl of the associated if */
+
+struct sockaddr_dl * 
+sdl_getaddrif(struct ifnet *ifp)
+{
+	register struct ifaddr *ifa;
+
+	for(ifa = ifp->if_addrlist; ifa; ifa = ifa->ifa_next) 	
+		if (ifa->ifa_addr->sa_family == AF_LINK ) 		
+			return((struct sockaddr_dl *)(ifa->ifa_addr));
+
+	return((struct sockaddr_dl *)0);
+}
+
+/* Check addr of interface with the one given */
+
+sdl_checkaddrif(struct ifnet *ifp, struct sockaddr_dl *sdl_c)
+{
+	register struct ifaddr *ifa;
+
+	for(ifa = ifp->if_addrlist; ifa; ifa = ifa->ifa_next) 	
+		if ((ifa->ifa_addr->sa_family == AF_LINK ) && 	 
+		    !sdl_cmp((struct sockaddr_dl *)(ifa->ifa_addr), sdl_c))
+			return(1);
+	
+	return(0);
+}
+
+/* Build an sdl from MAC addr, DLSAP addr, and interface */
+
+sdl_setaddrif(struct ifnet *ifp, u_char *mac_addr, u_char dlsap_addr, 
+	      u_char mac_len, struct sockaddr_dl *sdl_to)
+{
+	register struct sockaddr_dl *sdl_tmp;
+
+	if ((sdl_tmp = sdl_getaddrif(ifp)) ) { 	
+		sdl_copy(sdl_tmp, sdl_to); 	
+		bcopy((caddr_t) mac_addr, (caddr_t) LLADDR(sdl_to), mac_len);
+		*(LLADDR(sdl_to)+mac_len) = dlsap_addr;
+		sdl_to->sdl_alen = mac_len+1; 	
+		return(1); 
+	} else return(0);
+}
+
+/* Fill out the sdl header aggregate */
+
+sdl_sethdrif(struct ifnet *ifp, u_char *mac_src, u_char dlsap_src, u_char *mac_dst,
+	     u_char dlsap_dst, u_char mac_len, struct sdl_hdr *sdlhdr_to)
+{
+	if ( !sdl_setaddrif(ifp, mac_src, dlsap_src, mac_len,
+			     &sdlhdr_to->sdlhdr_src) ||
+	     !sdl_setaddrif(ifp, mac_dst, dlsap_dst, mac_len,
+			     &sdlhdr_to->sdlhdr_dst) )
+		return(0);
+	else return(1);
+}
+
+static struct sockaddr_dl sap_saddr; 
+static struct sockaddr_dl sap_sgate = {
+	sizeof(struct sockaddr_dl), /* _len */ 
+	AF_LINK                     /* _af */
+};
+
+/*
+ * Set sapinfo for SAP address, llcconfig, af, and interface
+ */
+struct npaidbentry *
+llc_setsapinfo(struct ifnet *ifp, u_char af, u_char sap, struct dllconfig *llconf)
+{
+	struct protosw *pp; 
+	struct sockaddr_dl *ifdl_addr; 
+	struct rtentry *sirt = (struct rtentry *)0; 
+	struct npaidbentry *sapinfo; 
+	u_char saploc; 
+	int size = sizeof(struct npaidbentry);
+
+	USES_AF_LINK_RTS;
+
+	/* 
+	 * We rely/assume that only STREAM protocols will make use of 
+	 * connection oriented LLC2. If this will one day not be the 
+	 * case this will obviously fail. 
+	 */ 
+	pp = pffindtype (af, SOCK_STREAM); 
+	if (pp == 0 || pp->pr_input == 0 || pp->pr_ctlinput == 0) { 	
+		printf("network	level protosw error"); 	
+		return 0; 
+	}
+
+	/*
+	 * We need a way to jot down the LLC2 configuration for
+	 * a certain LSAP address. To do this we enter 
+	 * a "route" for the SAP.
+	 */
+	ifdl_addr = sdl_getaddrif(ifp);
+	sdl_copy(ifdl_addr, &sap_saddr); 
+	sdl_copy(ifdl_addr, &sap_sgate);
+	saploc = LLSAPLOC(&sap_saddr, ifp); 
+	sap_saddr.sdl_data[saploc] = sap;
+	sap_saddr.sdl_alen++;
+
+	/* now enter it */ 
+	rtrequest(RTM_ADD, (struct sockaddr *)&sap_saddr,
+			(struct sockaddr *)&sap_sgate, 0, 0, &sirt); 
+	if (sirt == 0) 	
+		return 0;
+
+	/* Plug in config information in rt->rt_llinfo */
+
+	sirt->rt_llinfo = malloc(size , M_PCB, M_WAITOK); 
+	sapinfo = (struct npaidbentry *) sirt->rt_llinfo; 
+	if (sapinfo) { 	
+		bzero ((caddr_t)sapinfo, size); 	
+		/* 	 
+		 * For the time being we support LLC CLASS II here 	 
+		 * only 	 
+		 */ 	
+		sapinfo->si_class = LLC_CLASS_II; 	
+		sapinfo->si_window = llconf->dllcfg_window;
+		sapinfo->si_trace = llconf->dllcfg_trace; 	
+		if (sapinfo->si_trace)
+			llc_tracelevel--;
+		else llc_tracelevel++;
+		sapinfo->si_input = pp->pr_input; 	
+		sapinfo->si_ctlinput = (caddr_t (*)())pp->pr_ctlinput;
+
+		return (sapinfo);
+	}
+
+	return 0;
+}
+
+/*
+ * Get sapinfo for SAP address and interface 
+ */
+struct npaidbentry *
+llc_getsapinfo(u_char sap, struct ifnet *ifp)
+{
+	struct sockaddr_dl *ifdl_addr; 
+	struct sockaddr_dl si_addr; 
+	struct rtentry *sirt; 
+	u_char saploc;
+
+	USES_AF_LINK_RTS;
+
+	ifdl_addr = sdl_getaddrif(ifp); 
+	sdl_copy(ifdl_addr, &si_addr); 
+	saploc = LLSAPLOC(&si_addr, ifp); 
+	si_addr.sdl_data[saploc] = sap;
+	si_addr.sdl_alen++;
+
+	if ((sirt = rtalloc1((struct sockaddr *)&si_addr, 0))) 	
+		sirt->rt_refcnt--; 
+	else return(0);
+
+	return((struct npaidbentry *)sirt->rt_llinfo);
+}
+
+/*
+ * llc_seq2slot() --- We only allocate enough memory to hold the window. This
+ * introduces the necessity to keep track of two ``pointers''
+ *
+ *        o llcl_freeslot     the next free slot to be used
+ *                            this one advances modulo llcl_window
+ *        o llcl_projvs       the V(S) associated with the next frame
+ *                            to be set via llcl_freeslot
+ *                            this one advances modulo LLC_MAX_SEQUENCE
+ *
+ * A new frame is inserted at llcl_output_buffers[llcl_freeslot], after
+ * which both llcl_freeslot and llcl_projvs are incremented.
+ *
+ * The slot sl(sn) for any given sequence number sn is given by
+ *
+ *        sl(sn) = (llcl_freeslot + llcl_window - 1 - (llcl_projvs +
+ *                  LLC_MAX_SEQUENCE- sn) % LLC_MAX_SEQUENCE) % 
+ *                  llcl_window 
+ *
+ * i.e. we first calculate the number of frames we need to ``go back''
+ * from the current one (really the next one, but that doesn't matter as
+ * llcl_projvs is likewise of by plus one) and subtract that from the
+ * pointer to the most recently taken frame (llcl_freeslot - 1).
+ */
+
+short
+llc_seq2slot(struct llc_linkcb *linkp, short seqn)
+{
+	register sn = 0;
+
+	sn = (linkp->llcl_freeslot + linkp->llcl_window - 
+	      (linkp->llcl_projvs + LLC_MAX_SEQUENCE - seqn) % 
+	      LLC_MAX_SEQUENCE) % linkp->llcl_window;
+
+	return sn;
+}
+
+/*
+ * LLC2 link state handler
+ *
+ * There is in most cases one function per LLC2 state. The LLC2 standard
+ * ISO 8802-2 allows in some cases for ambiguities, i.e. we have the choice
+ * to do one thing or the other. Right now I have just chosen one but have also 
+ * indicated the spot by "multiple possibilities". One could make the behavior 
+ * in those cases configurable, allowing the superuser to enter a profile word
+ * (32/64 bits, whatever is needed) that would suit her needs [I quite like 
+ * that idea, perhaps I'll get around to it].
+ *
+ * [Preceeding each state handler function is the description as taken from
+ * ISO 8802-2, section 7.9.2.1]
+ */
+
+/*
+ * ADM --- The connection component is in the asynchronous disconnected mode.
+ *         It can accept an SABME PDU from a remote LLC SSAP or, at the request
+ *         of the service access point user, can initiate an SABME PDU
+ *         transmission to a remote LLC DSAP, to establish a data link
+ *         connection. It also responds to a DISC command PDU and to any
+ *         command PDU with the P bit set to ``1''.
+ */
+int
+llc_state_ADM(struct llc_linkcb *linkp, struct llc *frame, int frame_kind,
+	      int cmdrsp, int pollfinal)
+{
+	int action = 0;
+
+	switch(frame_kind + cmdrsp) {
+	case NL_CONNECT_REQUEST:
+		llc_send(linkp, LLCFT_SABME, LLC_CMD, pollfinal);
+		LLC_SETFLAG(linkp, P, pollfinal);
+		LLC_SETFLAG(linkp, S, 0);
+		linkp->llcl_retry = 0;
+		LLC_NEWSTATE(linkp, SETUP);
+		break;
+	case LLCFT_SABME + LLC_CMD:
+		/* 
+		 * ISO 8802-2, table 7-1, ADM state says to set
+		 * the P flag, yet this will cause an SABME [P] to be
+		 * answered with an UA only, not an UA [F], all
+		 * other `disconnected' states set the F flag, so ...
+		 */
+		LLC_SETFLAG(linkp, F, pollfinal);
+		LLC_NEWSTATE(linkp, CONN);
+		action = LLC_CONNECT_INDICATION;
+		break;
+	case LLCFT_DISC + LLC_CMD:
+		llc_send(linkp, LLCFT_DM, LLC_RSP, pollfinal);
+		break;
+	default:
+		if (cmdrsp == LLC_CMD && pollfinal == 1) 
+			llc_send(linkp, LLCFT_DM, LLC_RSP, 1);
+		/* remain in ADM state */
+	}
+
+	return action;
+}
+
+/*
+ * CONN --- The local connection component has received an SABME PDU from a
+ *          remote LLC SSAP, and it is waiting for the local user to accept or
+ *          refuse the connection.
+ */
+int
+llc_state_CONN(struct llc_linkcb *linkp, struct llc *frame, int frame_kind,
+	       int cmdrsp, int pollfinal)
+{
+	int action = 0;
+
+	switch(frame_kind + cmdrsp) {
+	case NL_CONNECT_RESPONSE:
+		llc_send(linkp, LLCFT_UA, LLC_RSP, LLC_GETFLAG(linkp, F));
+		LLC_RESETCOUNTER(linkp);
+		LLC_SETFLAG(linkp, P, 0);
+		LLC_SETFLAG(linkp, REMOTE_BUSY, 0);
+		LLC_NEWSTATE(linkp, NORMAL);
+		break;
+	case NL_DISCONNECT_REQUEST:
+		llc_send(linkp, LLCFT_DM, LLC_RSP, LLC_GETFLAG(linkp, F));
+		LLC_NEWSTATE(linkp, ADM);
+		break;
+	case LLCFT_SABME + LLC_CMD:
+		LLC_SETFLAG(linkp, F, pollfinal);
+		break;
+	case LLCFT_DM + LLC_RSP:
+		LLC_NEWSTATE(linkp, ADM);
+		action = LLC_DISCONNECT_INDICATION;
+		break;
+	/* all other frames effect nothing here */
+	}
+
+	return action;
+}
+
+/*
+ * RESET_WAIT --- The local connection component is waiting for the local user
+ *                 to indicate a RESET_REQUEST or a DISCONNECT_REQUEST.  
+ */
+int
+llc_state_RESET_WAIT(struct llc_linkcb *linkp, struct llc *frame, int frame_kind,
+		     int cmdrsp, int pollfinal)
+{
+	int action = 0;
+
+	switch(frame_kind + cmdrsp) {
+	case NL_RESET_REQUEST:
+		if (LLC_GETFLAG(linkp, S) == 0) {
+			llc_send(linkp, LLCFT_SABME, LLC_CMD, pollfinal);
+			LLC_SETFLAG(linkp, P, pollfinal);
+			LLC_START_ACK_TIMER(linkp);
+			linkp->llcl_retry = 0;
+			LLC_NEWSTATE(linkp, RESET);
+		} else {
+			llc_send(linkp, LLCFT_UA, LLC_RSP, 
+				      LLC_GETFLAG(linkp, F));
+			LLC_RESETCOUNTER(linkp);
+			LLC_SETFLAG(linkp, P, 0);
+			LLC_SETFLAG(linkp, REMOTE_BUSY, 0);
+			LLC_NEWSTATE(linkp, NORMAL);
+			action = LLC_RESET_CONFIRM;
+		}
+		break;
+	case NL_DISCONNECT_REQUEST:
+		if (LLC_GETFLAG(linkp, S) == 0) {
+			llc_send(linkp, LLCFT_DISC, LLC_CMD, pollfinal);
+			LLC_SETFLAG(linkp, P, pollfinal);
+			LLC_START_ACK_TIMER(linkp);
+			linkp->llcl_retry = 0;
+			LLC_NEWSTATE(linkp, D_CONN);
+		} else {
+			llc_send(linkp, LLCFT_DM, LLC_RSP, 
+				      LLC_GETFLAG(linkp, F));
+			LLC_NEWSTATE(linkp, ADM);
+		}
+		break;
+	case LLCFT_DM + LLC_RSP:
+		LLC_NEWSTATE(linkp, ADM);
+		action = LLC_DISCONNECT_INDICATION;
+		break;
+	case LLCFT_SABME + LLC_CMD:
+		LLC_SETFLAG(linkp, S, 1);
+		LLC_SETFLAG(linkp, F, pollfinal);
+		break;
+	case LLCFT_DISC + LLC_CMD:
+		llc_send(linkp, LLCFT_DM, LLC_RSP, pollfinal);
+		LLC_NEWSTATE(linkp, ADM);
+		action = LLC_DISCONNECT_INDICATION;
+		break;
+	}
+
+	return action;
+}
+
+/*
+ * RESET_CHECK --- The local connection component is waiting for the local user
+ *                 to accept or refuse a remote reset request.
+ */
+int
+llc_state_RESET_CHECK(struct llc_linkcb *linkp, struct llc *frame, int frame_kind,
+		      int cmdrsp, int pollfinal)
+{
+	int action = 0;
+
+	switch(frame_kind + cmdrsp) {
+	case NL_RESET_RESPONSE:
+		llc_send(linkp, LLCFT_UA, LLC_RSP, LLC_GETFLAG(linkp, F));
+		LLC_RESETCOUNTER(linkp);
+		LLC_SETFLAG(linkp, P, 0);
+		LLC_SETFLAG(linkp, REMOTE_BUSY, 0);
+		LLC_NEWSTATE(linkp, NORMAL);
+		break;
+	case NL_DISCONNECT_REQUEST:
+		llc_send(linkp, LLCFT_DM, LLC_RSP, LLC_GETFLAG(linkp, F));
+		LLC_NEWSTATE(linkp, ADM);
+		break;
+	case LLCFT_DM + LLC_RSP:
+		action = LLC_DISCONNECT_INDICATION;
+		break;
+	case LLCFT_SABME + LLC_CMD:
+		LLC_SETFLAG(linkp, F, pollfinal);
+		break;
+	case LLCFT_DISC + LLC_CMD:
+		llc_send(linkp, LLCFT_DM, LLC_RSP, pollfinal);
+		LLC_NEWSTATE(linkp, ADM);
+		action = LLC_DISCONNECT_INDICATION;
+		break;
+	}
+
+	return action;
+}
+
+/*
+ * SETUP --- The connection component has transmitted an SABME command PDU to a
+ *           remote LLC DSAP and is waiting for a reply.
+ */
+int
+llc_state_SETUP(struct llc_linkcb *linkp, struct llc *frame, int frame_kind,
+		int cmdrsp, int pollfinal)
+{
+	int action = 0;
+
+	switch(frame_kind + cmdrsp) {
+	case LLCFT_SABME + LLC_CMD:
+		LLC_RESETCOUNTER(linkp);
+		llc_send(linkp, LLCFT_UA, LLC_RSP, pollfinal);
+		LLC_SETFLAG(linkp, S, 1);
+		break;
+	case LLCFT_UA + LLC_RSP:
+		if (LLC_GETFLAG(linkp, P) == pollfinal) {
+			LLC_STOP_ACK_TIMER(linkp);
+			LLC_RESETCOUNTER(linkp);
+			LLC_UPDATE_P_FLAG(linkp, cmdrsp, pollfinal);
+			LLC_SETFLAG(linkp, REMOTE_BUSY, 0);
+			LLC_NEWSTATE(linkp, NORMAL);
+			action = LLC_CONNECT_CONFIRM;
+		}
+		break;
+	case LLC_ACK_TIMER_EXPIRED:
+		if (LLC_GETFLAG(linkp, S) == 1) {
+			LLC_SETFLAG(linkp, P, 0);
+			LLC_SETFLAG(linkp, REMOTE_BUSY, 0),
+			LLC_NEWSTATE(linkp, NORMAL);
+			action = LLC_CONNECT_CONFIRM;
+		} else if (linkp->llcl_retry < llc_n2) {
+			llc_send(linkp, LLCFT_SABME, LLC_CMD, pollfinal);
+			LLC_SETFLAG(linkp, P, pollfinal);
+			LLC_START_ACK_TIMER(linkp);
+			linkp->llcl_retry++;
+		} else {
+			LLC_NEWSTATE(linkp, ADM);
+			action = LLC_DISCONNECT_INDICATION;
+		}
+		break;
+	case LLCFT_DISC + LLC_CMD:
+		llc_send(linkp, LLCFT_DM, LLC_RSP, pollfinal);
+		LLC_STOP_ACK_TIMER(linkp);
+		LLC_NEWSTATE(linkp, ADM);
+		action = LLC_DISCONNECT_INDICATION;
+		break;
+	case LLCFT_DM + LLC_RSP:
+		LLC_STOP_ACK_TIMER(linkp);
+		LLC_NEWSTATE(linkp, ADM);
+		action = LLC_DISCONNECT_INDICATION;
+		break;
+	}
+
+	return action;
+}
+
+/*
+ * RESET --- As a result of a service access point user request or the receipt
+ *           of a FRMR response PDU, the local connection component has sent an
+ *           SABME command PDU to the remote LLC DSAP to reset the data link
+ *           connection and is waiting for a reply.
+ */
+int
+llc_state_RESET(struct llc_linkcb *linkp, struct llc *frame, int frame_kind,
+		int cmdrsp, int pollfinal)
+{
+	int action = 0;
+
+	switch(frame_kind + cmdrsp) {
+	case LLCFT_SABME + LLC_CMD:
+		LLC_RESETCOUNTER(linkp);
+		LLC_SETFLAG(linkp, S, 1);
+		llc_send(linkp, LLCFT_UA, LLC_RSP, pollfinal);
+		break;
+	case LLCFT_UA + LLC_RSP:
+		if (LLC_GETFLAG(linkp, P) == pollfinal) {
+			LLC_STOP_ACK_TIMER(linkp);
+			LLC_RESETCOUNTER(linkp);
+			LLC_UPDATE_P_FLAG(linkp, cmdrsp, pollfinal);
+			LLC_SETFLAG(linkp, REMOTE_BUSY, 0);
+			LLC_NEWSTATE(linkp, NORMAL);
+			action = LLC_RESET_CONFIRM;
+		}
+		break;
+	case LLC_ACK_TIMER_EXPIRED:
+		if (LLC_GETFLAG(linkp, S) == 1) {
+			LLC_SETFLAG(linkp, P, 0);
+			LLC_SETFLAG(linkp, REMOTE_BUSY, 0);
+			LLC_NEWSTATE(linkp, NORMAL);
+			action = LLC_RESET_CONFIRM;
+		} else if (linkp->llcl_retry < llc_n2) {
+			llc_send(linkp, LLCFT_SABME, LLC_CMD, pollfinal);
+			LLC_SETFLAG(linkp, P, pollfinal);
+			LLC_START_ACK_TIMER(linkp);
+			linkp->llcl_retry++;
+		} else {
+			LLC_NEWSTATE(linkp, ADM);
+			action = LLC_DISCONNECT_INDICATION;
+		}
+		break;
+	case LLCFT_DISC + LLC_CMD:
+		llc_send(linkp, LLCFT_DM, LLC_RSP, pollfinal);
+		LLC_STOP_ACK_TIMER(linkp);
+		LLC_NEWSTATE(linkp, ADM);
+		action = LLC_DISCONNECT_INDICATION;
+		break;
+	case LLCFT_DM + LLC_RSP:
+		LLC_STOP_ACK_TIMER(linkp);
+		LLC_NEWSTATE(linkp, ADM);
+		action = LLC_DISCONNECT_INDICATION;
+		break;
+	}
+
+	return action;
+}
+
+/*
+ * D_CONN --- At the request of the service access point user, the local LLC
+ *            has sent a DISC command PDU to the remote LLC DSAP and is waiting
+ *            for a reply.
+ */
+int
+llc_state_D_CONN(struct llc_linkcb *linkp, struct llc *frame, int frame_kind,
+		 int cmdrsp, int pollfinal)
+{
+	int action = 0;
+
+	switch(frame_kind + cmdrsp) {
+	case LLCFT_SABME + LLC_CMD:
+		llc_send(linkp, LLCFT_DM, LLC_RSP, pollfinal);
+		LLC_STOP_ACK_TIMER(linkp);
+		LLC_NEWSTATE(linkp, ADM);
+		break;
+	case LLCFT_UA + LLC_RSP:
+		if (LLC_GETFLAG(linkp, P) == pollfinal) {
+			LLC_STOP_ACK_TIMER(linkp);
+			LLC_NEWSTATE(linkp, ADM);
+		}
+		break;
+	case LLCFT_DISC + LLC_CMD:
+		llc_send(linkp, LLCFT_UA, LLC_RSP, pollfinal);
+		break;
+	case LLCFT_DM + LLC_RSP:
+		LLC_STOP_ACK_TIMER(linkp);
+		LLC_NEWSTATE(linkp, ADM);
+		break;
+	case LLC_ACK_TIMER_EXPIRED:
+		if (linkp->llcl_retry < llc_n2) {
+			llc_send(linkp, LLCFT_DISC, LLC_CMD, pollfinal);
+			LLC_SETFLAG(linkp, P, pollfinal);
+			LLC_START_ACK_TIMER(linkp);
+			linkp->llcl_retry++;
+		} else LLC_NEWSTATE(linkp, ADM);
+		break;
+	}
+
+	return action;
+}
+
+/*
+ * ERROR --- The local connection component has detected an error in a received
+ *           PDU and has sent a FRMR response PDU. It is waiting for a reply from 
+ *           the remote connection component.
+ */
+int
+llc_state_ERROR(struct llc_linkcb *linkp, struct llc *frame, int frame_kind,
+		int cmdrsp, int pollfinal)
+{
+	int action = 0;
+
+	switch(frame_kind + cmdrsp) {
+	case LLCFT_SABME + LLC_CMD:
+		LLC_STOP_ACK_TIMER(linkp);
+		LLC_NEWSTATE(linkp, RESET_CHECK);
+		action = LLC_RESET_INDICATION_REMOTE;
+		break;
+	case LLCFT_DISC + LLC_CMD:
+		llc_send(linkp, LLCFT_UA, LLC_RSP, pollfinal);
+		LLC_STOP_ACK_TIMER(linkp);
+		LLC_NEWSTATE(linkp, ADM);
+		action = LLC_DISCONNECT_INDICATION;
+		break;
+	case LLCFT_DM + LLC_RSP:
+		LLC_STOP_ACK_TIMER(linkp);
+		LLC_NEWSTATE(linkp, ADM);
+		action = LLC_DISCONNECT_INDICATION;
+		break;
+	case LLCFT_FRMR + LLC_RSP:
+		LLC_STOP_ACK_TIMER(linkp);
+		LLC_SETFLAG(linkp, S, 0);
+		LLC_NEWSTATE(linkp, RESET_WAIT);
+		action = LLC_FRMR_RECEIVED;
+		break;
+	case LLC_ACK_TIMER_EXPIRED:
+		if (linkp->llcl_retry < llc_n2) {
+			llc_send(linkp, LLCFT_FRMR, LLC_RSP, 0);
+			LLC_START_ACK_TIMER(linkp);
+			linkp->llcl_retry++;
+		} else {
+			LLC_SETFLAG(linkp, S, 0);
+			LLC_NEWSTATE(linkp, RESET_WAIT);
+			action = LLC_RESET_INDICATION_LOCAL;
+		}
+		break;
+	default:
+		if (cmdrsp == LLC_CMD){
+			llc_send(linkp, LLCFT_FRMR, LLC_RSP, pollfinal);
+			LLC_START_ACK_TIMER(linkp);
+		}
+		break;
+
+	}
+
+	return action;
+}
+
+/*
+ * NORMAL, BUSY, REJECT, AWAIT, AWAIT_BUSY, and AWAIT_REJECT all share
+ * a common core state handler.
+ */
+int
+llc_state_NBRAcore(struct llc_linkcb *linkp, struct llc *frame, int frame_kind,
+		   int cmdrsp, int pollfinal)
+{
+	int action = 0;
+
+	switch(frame_kind + cmdrsp) {
+	case NL_DISCONNECT_REQUEST:
+		llc_send(linkp, LLCFT_DISC, LLC_CMD, pollfinal);
+		LLC_SETFLAG(linkp, P, pollfinal);
+		LLC_STOP_ALL_TIMERS(linkp);
+		LLC_START_ACK_TIMER(linkp);
+		linkp->llcl_retry = 0;
+		LLC_NEWSTATE(linkp, D_CONN);
+		break;
+	case NL_RESET_REQUEST:
+		llc_send(linkp, LLCFT_SABME, LLC_CMD, pollfinal);
+		LLC_SETFLAG(linkp, P, pollfinal);
+		LLC_STOP_ALL_TIMERS(linkp);
+		LLC_START_ACK_TIMER(linkp);
+		linkp->llcl_retry = 0;
+		LLC_SETFLAG(linkp, S, 0);
+		LLC_NEWSTATE(linkp, RESET);
+		break;
+	case LLCFT_SABME + LLC_CMD:
+		LLC_SETFLAG(linkp, F, pollfinal);
+		LLC_STOP_ALL_TIMERS(linkp);
+		LLC_NEWSTATE(linkp, RESET_CHECK);
+		action = LLC_RESET_INDICATION_REMOTE;
+		break;
+	case LLCFT_DISC + LLC_CMD:
+		llc_send(linkp, LLCFT_UA, LLC_RSP, pollfinal);
+		LLC_STOP_ALL_TIMERS(linkp);
+		LLC_NEWSTATE(linkp, ADM);
+		action = LLC_DISCONNECT_INDICATION;
+		break;
+	case LLCFT_FRMR + LLC_RSP:
+		LLC_STOP_ALL_TIMERS(linkp);
+		LLC_SETFLAG(linkp, S, 0);
+		LLC_NEWSTATE(linkp, RESET_WAIT);
+		action =  LLC_FRMR_RECEIVED;
+		break;
+	case LLCFT_DM + LLC_RSP:
+		LLC_STOP_ALL_TIMERS(linkp);
+		LLC_NEWSTATE(linkp, ADM);
+		action = LLC_DISCONNECT_INDICATION;
+		break;
+	case LLC_INVALID_NR + LLC_CMD:
+	case LLC_INVALID_NS + LLC_CMD:
+		LLC_SETFRMR(linkp, frame, cmdrsp, 
+			 (frame_kind == LLC_INVALID_NR ? LLC_FRMR_Z :
+			  (LLC_FRMR_V | LLC_FRMR_W)));
+		llc_send(linkp, LLCFT_FRMR, LLC_RSP, pollfinal);
+		LLC_STOP_ALL_TIMERS(linkp);
+		LLC_START_ACK_TIMER(linkp);
+		linkp->llcl_retry = 0;
+		LLC_NEWSTATE(linkp, ERROR);
+		action = LLC_FRMR_SENT;
+		break;
+	case LLC_INVALID_NR + LLC_RSP:
+	case LLC_INVALID_NS + LLC_RSP:
+	case LLCFT_UA + LLC_RSP:
+	case LLC_BAD_PDU: {
+		char frmrcause = 0;
+
+		switch (frame_kind) {
+		case LLC_INVALID_NR: frmrcause = LLC_FRMR_Z; break;
+		case LLC_INVALID_NS: frmrcause = LLC_FRMR_V | LLC_FRMR_W; break;
+		default: frmrcause = LLC_FRMR_W;
+		}
+		LLC_SETFRMR(linkp, frame, cmdrsp, frmrcause);
+		llc_send(linkp, LLCFT_FRMR, LLC_RSP, 0);
+		LLC_STOP_ALL_TIMERS(linkp);
+		LLC_START_ACK_TIMER(linkp);
+		linkp->llcl_retry = 0;
+		LLC_NEWSTATE(linkp, ERROR);
+		action = LLC_FRMR_SENT;
+		break;
+	}
+	default:
+		if (cmdrsp == LLC_RSP && pollfinal == 1 && 
+		    LLC_GETFLAG(linkp, P) == 0) {
+			LLC_SETFRMR(linkp, frame, cmdrsp, LLC_FRMR_W);
+			LLC_STOP_ALL_TIMERS(linkp);
+			LLC_START_ACK_TIMER(linkp);
+			linkp->llcl_retry = 0;
+			LLC_NEWSTATE(linkp, ERROR);
+			action = LLC_FRMR_SENT;
+		}
+		break;
+	case LLC_P_TIMER_EXPIRED:
+	case LLC_ACK_TIMER_EXPIRED:
+	case LLC_REJ_TIMER_EXPIRED:
+	case LLC_BUSY_TIMER_EXPIRED:
+		if (linkp->llcl_retry >= llc_n2) {
+			LLC_STOP_ALL_TIMERS(linkp);
+			LLC_SETFLAG(linkp, S, 0);
+			LLC_NEWSTATE(linkp, RESET_WAIT);
+			action = LLC_RESET_INDICATION_LOCAL;
+		}
+		break;
+	}
+
+	return action;
+}
+
+/*
+ * NORMAL --- A data link connection exists between the local LLC service access
+ *            point and the remote LLC service access point. Sending and
+ *            reception of information and supervisory PDUs can be performed.
+ */
+int
+llc_state_NORMAL(struct llc_linkcb *linkp, struct llc *frame, int frame_kind,
+		 int cmdrsp, int pollfinal)
+{
+	int action = LLC_PASSITON;
+
+	switch(frame_kind + cmdrsp) {
+	case NL_DATA_REQUEST:
+		if (LLC_GETFLAG(linkp, REMOTE_BUSY) == 0) {
+#ifdef not_now
+			if (LLC_GETFLAG(linkp, P) == 0) {
+				/* multiple possibilities */
+				llc_send(linkp, LLCFT_INFO, LLC_CMD, 1);
+				LLC_START_P_TIMER(linkp);
+				if (LLC_TIMERXPIRED(linkp, ACK) != LLC_TIMER_RUNNING)
+					LLC_START_ACK_TIMER(linkp);
+			} else {
+#endif 
+				/* multiple possibilities */
+				llc_send(linkp, LLCFT_INFO, LLC_CMD, 0);
+				if (LLC_TIMERXPIRED(linkp, ACK) != LLC_TIMER_RUNNING)
+					LLC_START_ACK_TIMER(linkp);
+#ifdef not_now
+			}
+#endif
+			action = 0;
+		}
+		break;
+	case LLC_LOCAL_BUSY_DETECTED:
+		if (LLC_GETFLAG(linkp, P) == 0) {
+			/* multiple possibilities --- action-wise */
+			/* multiple possibilities --- CMD/RSP-wise */
+			llc_send(linkp, LLCFT_RNR, LLC_CMD, 0);
+			LLC_START_P_TIMER(linkp);
+			LLC_SETFLAG(linkp, DATA, 0);
+			LLC_NEWSTATE(linkp, BUSY);
+			action = 0;
+		} else { 
+			/* multiple possibilities --- CMD/RSP-wise */
+			llc_send(linkp, LLCFT_RNR, LLC_CMD, 0);
+			LLC_SETFLAG(linkp, DATA, 0);
+			LLC_NEWSTATE(linkp, BUSY);
+			action = 0;			
+		}
+		break;
+	case LLC_INVALID_NS + LLC_CMD:
+	case LLC_INVALID_NS + LLC_RSP: {
+		register int p = LLC_GETFLAG(linkp, P);
+		register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+		if (cmdrsp == LLC_CMD && pollfinal == 1) {
+			llc_send(linkp, LLCFT_REJ, LLC_RSP, 1);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			LLC_START_REJ_TIMER(linkp);
+			LLC_NEWSTATE(linkp, REJECT);
+			action = 0;
+		} else if (pollfinal == 0 && p == 1) {
+			llc_send(linkp, LLCFT_REJ, LLC_CMD, 0);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			LLC_START_REJ_TIMER(linkp);
+			LLC_NEWSTATE(linkp, REJECT);
+			action = 0;
+		} else if ((pollfinal == 0 && p == 0) || 
+			   (pollfinal == 1 && p == 1 && cmdrsp == LLC_RSP)) {
+			llc_send(linkp, LLCFT_REJ, LLC_CMD, 1);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			LLC_START_P_TIMER(linkp);
+			LLC_START_REJ_TIMER(linkp);
+			if (cmdrsp == LLC_RSP && pollfinal == 1) {
+				LLC_CLEAR_REMOTE_BUSY(linkp, action);
+			} else action = 0;
+			LLC_NEWSTATE(linkp, REJECT);
+		}
+		break;
+	} 
+	case LLCFT_INFO + LLC_CMD:
+	case LLCFT_INFO + LLC_RSP: {
+		register int p = LLC_GETFLAG(linkp, P);
+		register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+		
+		if (cmdrsp == LLC_CMD && pollfinal == 1) {
+			LLC_INC(linkp->llcl_vr);
+			LLC_SENDACKNOWLEDGE(linkp, LLC_RSP, 1);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			action = LLC_DATA_INDICATION;
+		} else if (pollfinal == 0 && p == 1) {
+			LLC_INC(linkp->llcl_vr);
+			LLC_SENDACKNOWLEDGE(linkp, LLC_CMD, 0);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			action = LLC_DATA_INDICATION;
+		} else if ((pollfinal == 0 && p == 0 && cmdrsp == LLC_CMD) ||
+			   (pollfinal == p && cmdrsp == LLC_RSP)) {
+			LLC_INC(linkp->llcl_vr);
+			LLC_UPDATE_P_FLAG(linkp, cmdrsp, pollfinal);
+			LLC_SENDACKNOWLEDGE(linkp, LLC_CMD, 0);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			if (cmdrsp == LLC_RSP && pollfinal == 1) 
+				LLC_CLEAR_REMOTE_BUSY(linkp, action);
+			action = LLC_DATA_INDICATION;
+		}
+		break;
+	}
+	case LLCFT_RR + LLC_CMD:
+	case LLCFT_RR + LLC_RSP: {
+		register int p = LLC_GETFLAG(linkp, P);
+		register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+		if (cmdrsp == LLC_CMD && pollfinal == 1) {
+			LLC_SENDACKNOWLEDGE(linkp, LLC_RSP, 1);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			LLC_CLEAR_REMOTE_BUSY(linkp, action);
+		} else if ((pollfinal == 0) || 
+			   (cmdrsp == LLC_RSP && pollfinal == 1 && p == 1)) {
+			LLC_UPDATE_P_FLAG(linkp, cmdrsp, pollfinal);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			LLC_CLEAR_REMOTE_BUSY(linkp, action);
+		} 
+		break;
+	}
+	case LLCFT_RNR + LLC_CMD:
+	case LLCFT_RNR + LLC_RSP: {
+		register int p = LLC_GETFLAG(linkp, P);
+		register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+		
+		if (cmdrsp == LLC_CMD && pollfinal == 1) {
+			llc_send(linkp, LLCFT_RR, LLC_RSP, 1);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			LLC_SET_REMOTE_BUSY(linkp, action);
+		} else if ((pollfinal == 0) || 
+			   (cmdrsp == LLC_RSP && pollfinal == 1 && p == 1)) {
+			LLC_UPDATE_P_FLAG(linkp, cmdrsp, pollfinal);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			LLC_SET_REMOTE_BUSY(linkp, action);
+		}
+		break;
+	}
+	case LLCFT_REJ + LLC_CMD:
+	case LLCFT_REJ + LLC_RSP: {
+		register int p = LLC_GETFLAG(linkp, P);
+		register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+		if (cmdrsp == LLC_CMD && pollfinal == 1) {
+			linkp->llcl_vs = nr;
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			llc_resend(linkp, LLC_RSP, 1);
+			LLC_CLEAR_REMOTE_BUSY(linkp, action);
+		} else if (pollfinal == 0 && p == 1) {
+			linkp->llcl_vs = nr;
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			llc_resend(linkp, LLC_CMD, 0);
+			LLC_CLEAR_REMOTE_BUSY(linkp, action);
+		} else if ((pollfinal == 0 && p == 0 && cmdrsp == LLC_CMD) ||
+			   (pollfinal == p && cmdrsp == LLC_RSP)) {
+			linkp->llcl_vs = nr;
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			LLC_START_P_TIMER(linkp);
+			llc_resend(linkp, LLC_CMD, 1);
+			LLC_CLEAR_REMOTE_BUSY(linkp, action);
+		}
+		break;
+	}
+	case NL_INITIATE_PF_CYCLE:
+		if (LLC_GETFLAG(linkp, P) == 0) {
+			llc_send(linkp, LLCFT_RR, LLC_CMD, 1);
+			LLC_START_P_TIMER(linkp);
+			action = 0;
+		}
+		break;
+	case LLC_P_TIMER_EXPIRED:
+		if (linkp->llcl_retry < llc_n2) {
+			llc_send(linkp, LLCFT_RR, LLC_CMD, 1);
+			LLC_START_P_TIMER(linkp);
+			linkp->llcl_retry++;
+			LLC_NEWSTATE(linkp, AWAIT);
+			action = 0;
+		}
+		break;
+	case LLC_ACK_TIMER_EXPIRED:
+	case LLC_BUSY_TIMER_EXPIRED:
+		if ((LLC_GETFLAG(linkp, P) == 0) 
+		    && (linkp->llcl_retry < llc_n2)) {
+			llc_send(linkp, LLCFT_RR, LLC_CMD, 1);
+			LLC_START_P_TIMER(linkp);
+			linkp->llcl_retry++;
+			LLC_NEWSTATE(linkp, AWAIT);
+			action = 0;
+		}
+		break;
+	}
+	if (action == LLC_PASSITON)
+		action = llc_state_NBRAcore(linkp, frame, frame_kind, 
+					    cmdrsp, pollfinal);
+
+	return action;
+}
+
+/*
+ * BUSY --- A data link connection exists between the local LLC service access
+ *          point and the remote LLC service access point. I PDUs may be sent.
+ *          Local conditions make it likely that the information feld of
+ *          received I PDUs will be ignored. Supervisory PDUs may be both sent
+ *          and received.
+ */
+int
+llc_state_BUSY(struct llc_linkcb *linkp, struct llc *frame, int frame_kind,
+	       int cmdrsp, int pollfinal)
+{
+	int action = LLC_PASSITON;
+
+	switch(frame_kind + cmdrsp) {
+	case NL_DATA_REQUEST:
+		if (LLC_GETFLAG(linkp, REMOTE_BUSY) == 0)
+			if (LLC_GETFLAG(linkp, P) == 0) {
+				llc_send(linkp, LLCFT_INFO, LLC_CMD, 1);
+				LLC_START_P_TIMER(linkp);
+				if (LLC_TIMERXPIRED(linkp, ACK) != LLC_TIMER_RUNNING)
+					LLC_START_ACK_TIMER(linkp);
+				action = 0;
+			} else {
+				llc_send(linkp, LLCFT_INFO, LLC_CMD, 0);
+				if (LLC_TIMERXPIRED(linkp, ACK) != LLC_TIMER_RUNNING)
+					LLC_START_ACK_TIMER(linkp);
+				action = 0;
+			}
+		break;
+	case LLC_LOCAL_BUSY_CLEARED: {
+		register int p = LLC_GETFLAG(linkp, P);
+		register int df = LLC_GETFLAG(linkp, DATA);
+
+		switch (df) {
+		case 1: 
+			if (p == 0) {
+				/* multiple possibilities */
+				llc_send(linkp, LLCFT_REJ, LLC_CMD, 1);
+				LLC_START_REJ_TIMER(linkp);
+				LLC_START_P_TIMER(linkp);
+				LLC_NEWSTATE(linkp, REJECT);
+				action = 0;
+			} else {
+				llc_send(linkp, LLCFT_REJ, LLC_CMD, 0);
+				LLC_START_REJ_TIMER(linkp);
+				LLC_NEWSTATE(linkp, REJECT);
+				action = 0;
+			}
+			break;
+		case 0:
+			if (p == 0) {
+				/* multiple possibilities */
+				llc_send(linkp, LLCFT_RR, LLC_CMD, 1);
+				LLC_START_P_TIMER(linkp);
+				LLC_NEWSTATE(linkp, NORMAL);
+				action = 0;
+			} else {
+				llc_send(linkp, LLCFT_RR, LLC_CMD, 0);
+				LLC_NEWSTATE(linkp, NORMAL);
+				action = 0;
+			}
+			break;
+		case 2:
+			if (p == 0) {
+				/* multiple possibilities */
+				llc_send(linkp, LLCFT_RR, LLC_CMD, 1);
+				LLC_START_P_TIMER(linkp);
+				LLC_NEWSTATE(linkp, REJECT);
+				action = 0;
+			} else {
+				llc_send(linkp, LLCFT_RR, LLC_CMD, 0);
+				LLC_NEWSTATE(linkp, REJECT);
+				action =0;
+			}
+			break;
+		}
+		break;
+	}
+	case LLC_INVALID_NS + LLC_CMD:
+	case LLC_INVALID_NS + LLC_RSP: {
+		register int p = LLC_GETFLAG(linkp, P);
+		register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+		if (cmdrsp == LLC_CMD && pollfinal == 1) {
+			llc_send(linkp, LLCFT_RNR, LLC_RSP, 1);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			if (LLC_GETFLAG(linkp, DATA) == 0)
+				LLC_SETFLAG(linkp, DATA, 1);
+			action = 0;
+		} else if ((cmdrsp == LLC_CMD && pollfinal == 0 && p == 0) ||
+			   (cmdrsp == LLC_RSP && pollfinal == p)) {
+			llc_send(linkp, LLCFT_RNR, LLC_CMD, 0);
+			LLC_UPDATE_P_FLAG(linkp, cmdrsp, pollfinal);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			if (LLC_GETFLAG(linkp, DATA) == 0) 
+				LLC_SETFLAG(linkp, DATA, 1);
+			if (cmdrsp == LLC_RSP && pollfinal == 1) {
+				LLC_CLEAR_REMOTE_BUSY(linkp, action);
+			} else action = 0;
+		} else if (pollfinal == 0 && p == 1) {
+			llc_send(linkp, LLCFT_RNR, LLC_RSP, 1);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			if (LLC_GETFLAG(linkp, DATA) == 0)
+				LLC_SETFLAG(linkp, DATA, 1);
+			action = 0;
+		}
+		break;
+	}
+	case LLCFT_INFO + LLC_CMD:
+	case LLCFT_INFO + LLC_RSP: {
+		register int p = LLC_GETFLAG(linkp, P);
+		register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+		
+		if (cmdrsp == LLC_CMD && pollfinal == 1) {
+			LLC_INC(linkp->llcl_vr);
+			llc_send(linkp, LLCFT_RNR, LLC_RSP, 1);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			if (LLC_GETFLAG(linkp, DATA) == 2)
+				LLC_STOP_REJ_TIMER(linkp);
+			LLC_SETFLAG(linkp, DATA, 0);
+			action = LLC_DATA_INDICATION;			
+		} else if ((cmdrsp == LLC_CMD && pollfinal == 0 && p == 0) ||
+			   (cmdrsp == LLC_RSP && pollfinal == p)) {
+			LLC_INC(linkp->llcl_vr);
+			llc_send(linkp, LLCFT_RNR, LLC_CMD, 1);
+			LLC_START_P_TIMER(linkp);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			if (LLC_GETFLAG(linkp, DATA) == 2)
+				LLC_STOP_REJ_TIMER(linkp);
+			if (cmdrsp == LLC_RSP && pollfinal == 1)
+				LLC_CLEAR_REMOTE_BUSY(linkp, action);
+			action = LLC_DATA_INDICATION;
+		} else if (pollfinal == 0 && p == 1) {
+			LLC_INC(linkp->llcl_vr);
+			llc_send(linkp, LLCFT_RNR, LLC_CMD, 0);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			if (LLC_GETFLAG(linkp, DATA) == 2)
+				LLC_STOP_REJ_TIMER(linkp);
+			LLC_SETFLAG(linkp, DATA, 0);
+			action = LLC_DATA_INDICATION;
+		}
+		break;
+	}
+	case LLCFT_RR + LLC_CMD:
+	case LLCFT_RR + LLC_RSP: 
+	case LLCFT_RNR + LLC_CMD:
+	case LLCFT_RNR + LLC_RSP: { 
+		register int p = LLC_GETFLAG(linkp, P);
+		register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+		
+		if (cmdrsp == LLC_CMD && pollfinal == 1) {
+			llc_send(linkp, LLCFT_RNR, LLC_RSP, 1);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			if (frame_kind == LLCFT_RR) {
+				LLC_CLEAR_REMOTE_BUSY(linkp, action);
+			} else {
+				LLC_SET_REMOTE_BUSY(linkp, action);
+			}
+		} else if (pollfinal = 0 || 
+			   (cmdrsp == LLC_RSP && pollfinal == 1)) {
+			LLC_UPDATE_P_FLAG(linkp, cmdrsp, pollfinal);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			if (frame_kind == LLCFT_RR) {
+				LLC_CLEAR_REMOTE_BUSY(linkp, action);
+			} else  {
+				LLC_SET_REMOTE_BUSY(linkp, action);
+			}
+		}
+		break;
+	}
+	case LLCFT_REJ + LLC_CMD:
+	case LLCFT_REJ + LLC_RSP: {
+		register int p = LLC_GETFLAG(linkp, P);
+		register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+		if (cmdrsp == LLC_CMD && pollfinal == 1) {
+			linkp->llcl_vs = nr;
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			llc_send(linkp, LLCFT_RNR, LLC_RSP, 1);
+			llc_resend(linkp, LLC_CMD, 0);
+			LLC_CLEAR_REMOTE_BUSY(linkp, action);
+		} else if ((cmdrsp == LLC_CMD && pollfinal == 0 && p == 0) ||
+			   (cmdrsp == LLC_RSP && pollfinal == p)) {
+			linkp->llcl_vs = nr;
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			LLC_UPDATE_P_FLAG(linkp, cmdrsp, pollfinal);
+			llc_resend(linkp, LLC_CMD, 0);
+			LLC_CLEAR_REMOTE_BUSY(linkp, action);
+		} else if (pollfinal == 0 && p == 1) {
+			linkp->llcl_vs = nr;
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			llc_resend(linkp, LLC_CMD, 0);
+			LLC_CLEAR_REMOTE_BUSY(linkp, action);
+		}
+		break;
+	}
+	case NL_INITIATE_PF_CYCLE:
+		if (LLC_GETFLAG(linkp, P) == 0) {
+			llc_send(linkp, LLCFT_RNR, LLC_CMD, 1);
+			LLC_START_P_TIMER(linkp);
+			action = 0;
+		}
+		break;
+	case LLC_P_TIMER_EXPIRED:
+		/* multiple possibilities */
+		if (linkp->llcl_retry < llc_n2) {
+			llc_send(linkp, LLCFT_RNR, LLC_CMD, 1);
+			LLC_START_P_TIMER(linkp);
+			linkp->llcl_retry++;
+			LLC_NEWSTATE(linkp, AWAIT_BUSY);
+			action = 0;
+		}
+		break;
+	case LLC_ACK_TIMER_EXPIRED:
+	case LLC_BUSY_TIMER_EXPIRED:
+		if (LLC_GETFLAG(linkp, P) == 0 && linkp->llcl_retry < llc_n2) {
+			llc_send(linkp, LLCFT_RNR, LLC_CMD, 1);
+			LLC_START_P_TIMER(linkp);
+			linkp->llcl_retry++;
+			LLC_NEWSTATE(linkp, AWAIT_BUSY);
+			action = 0;
+		}
+		break;
+	case LLC_REJ_TIMER_EXPIRED:
+		if (linkp->llcl_retry < llc_n2) 
+			if (LLC_GETFLAG(linkp, P) == 0) {
+				/* multiple possibilities */
+				llc_send(linkp, LLCFT_RNR, LLC_CMD, 1);
+				LLC_START_P_TIMER(linkp);
+				linkp->llcl_retry++;
+				LLC_SETFLAG(linkp, DATA, 1);
+				LLC_NEWSTATE(linkp, AWAIT_BUSY);
+				action = 0;
+			} else{
+				LLC_SETFLAG(linkp, DATA, 1);
+				LLC_NEWSTATE(linkp, BUSY);
+				action = 0;
+			}
+		
+		break;
+	}
+	if (action == LLC_PASSITON)
+		action = llc_state_NBRAcore(linkp, frame, frame_kind, 
+					    cmdrsp, pollfinal);
+
+	return action;
+}
+
+/*
+ * REJECT --- A data link connection exists between the local LLC service
+ *            access point and the remote LLC service access point. The local
+ *            connection component has requested that the remote connection
+ *            component resend a specific I PDU that the local connection
+ *            componnent has detected as being out of sequence. Both I PDUs and
+ *            supervisory PDUs may be sent and received.
+ */ 
+int
+llc_state_REJECT(struct llc_linkcb *linkp, struct llc *frame, int frame_kind,
+		 int cmdrsp, int pollfinal)
+{
+	int action = LLC_PASSITON;
+
+	switch(frame_kind + cmdrsp) {
+	case NL_DATA_REQUEST:
+		if (LLC_GETFLAG(linkp, P) == 0) {
+			llc_send(linkp, LLCFT_INFO, LLC_CMD, 1);
+			LLC_START_P_TIMER(linkp);
+			if (LLC_TIMERXPIRED(linkp, ACK) != LLC_TIMER_RUNNING)
+				LLC_START_ACK_TIMER(linkp);
+			LLC_NEWSTATE(linkp, REJECT);
+			action = 0;
+		} else { 
+			llc_send(linkp, LLCFT_INFO, LLC_CMD, 0);
+			if (LLC_TIMERXPIRED(linkp, ACK) != LLC_TIMER_RUNNING)
+				LLC_START_ACK_TIMER(linkp);
+			LLC_NEWSTATE(linkp, REJECT);
+			action = 0;
+		}
+		break;
+	case NL_LOCAL_BUSY_DETECTED:
+		if (LLC_GETFLAG(linkp, P) == 0) {
+			llc_send(linkp, LLCFT_RNR, LLC_CMD, 1);
+			LLC_START_P_TIMER(linkp);
+			LLC_SETFLAG(linkp, DATA, 2);
+			LLC_NEWSTATE(linkp, BUSY);
+			action = 0;
+		} else {
+			llc_send(linkp, LLCFT_RNR, LLC_CMD, 0);
+			LLC_SETFLAG(linkp, DATA, 2);
+			LLC_NEWSTATE(linkp, BUSY);
+			action = 0;
+		}
+		break;
+	case LLC_INVALID_NS + LLC_CMD:
+	case LLC_INVALID_NS + LLC_RSP: { 
+		register int p = LLC_GETFLAG(linkp, P);
+		register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+		if (cmdrsp == LLC_CMD && pollfinal == 1) {
+			llc_send(linkp, LLCFT_RR, LLC_RSP, 1);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			action = 0;
+		} else if (pollfinal == 0 || 
+			   (cmdrsp == LLC_RSP && pollfinal == 1 && p == 1)) {
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			LLC_UPDATE_P_FLAG(linkp, cmdrsp, pollfinal);
+			if (cmdrsp == LLC_RSP && pollfinal == 1) {
+				LLC_CLEAR_REMOTE_BUSY(linkp, action);
+			} else action = 0;
+		}
+		break;
+	}
+	case LLCFT_INFO + LLC_CMD:
+	case LLCFT_INFO + LLC_RSP: {
+		register int p = LLC_GETFLAG(linkp, P);
+		register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+		
+		if (cmdrsp == LLC_CMD && pollfinal == 1) {
+			LLC_INC(linkp->llcl_vr);
+			LLC_SENDACKNOWLEDGE(linkp, LLC_RSP, 1);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			LLC_STOP_REJ_TIMER(linkp);
+			LLC_NEWSTATE(linkp, NORMAL);
+			action = LLC_DATA_INDICATION;
+		} else if ((cmdrsp = LLC_RSP && pollfinal == p) ||
+			   (cmdrsp == LLC_CMD && pollfinal == 0 && p == 0)) {
+			LLC_INC(linkp->llcl_vr);
+			LLC_SENDACKNOWLEDGE(linkp, LLC_CMD, 1);
+			LLC_START_P_TIMER(linkp);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			if (cmdrsp == LLC_RSP && pollfinal == 1)
+				LLC_CLEAR_REMOTE_BUSY(linkp, action);
+			LLC_STOP_REJ_TIMER(linkp);
+			LLC_NEWSTATE(linkp, NORMAL);
+			action = LLC_DATA_INDICATION;
+		} else if (pollfinal == 0 && p == 1) {
+			LLC_INC(linkp->llcl_vr);
+			LLC_SENDACKNOWLEDGE(linkp, LLC_CMD, 0);
+			LLC_STOP_REJ_TIMER(linkp);
+			LLC_NEWSTATE(linkp, NORMAL);
+			action = LLC_DATA_INDICATION;
+		}
+		break;
+	}
+	case LLCFT_RR + LLC_CMD:
+	case LLCFT_RR + LLC_RSP: {
+		register int p = LLC_GETFLAG(linkp, P);
+		register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+		if (cmdrsp == LLC_CMD && pollfinal == 1) {
+			LLC_SENDACKNOWLEDGE(linkp, LLC_RSP, 1);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			LLC_CLEAR_REMOTE_BUSY(linkp, action);
+		} else if (pollfinal == 0 || 
+			   (cmdrsp == LLC_RSP && pollfinal == 1 && p == 1)) {
+			LLC_UPDATE_P_FLAG(linkp, cmdrsp, pollfinal);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			LLC_CLEAR_REMOTE_BUSY(linkp, action);
+		}
+		break;
+	}
+	case LLCFT_RNR + LLC_CMD:
+	case LLCFT_RNR + LLC_RSP: {
+		register int p = LLC_GETFLAG(linkp, P);
+		register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+		
+		if (cmdrsp == LLC_CMD && pollfinal == 1) {
+			llc_send(linkp, LLCFT_RR, LLC_RSP, 1);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			LLC_SET_REMOTE_BUSY(linkp, action);
+		} else if (pollfinal == 0 ||
+			   (cmdrsp == LLC_RSP && pollfinal == 1 && p == 1)) {
+			LLC_UPDATE_P_FLAG(linkp, cmdrsp, pollfinal);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			action = 0;
+		}
+		break;
+	}
+	case LLCFT_REJ + LLC_CMD:
+	case LLCFT_REJ + LLC_RSP: {
+		register int p = LLC_GETFLAG(linkp, P);
+		register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+		if (cmdrsp == LLC_CMD && pollfinal == 1) {
+			linkp->llcl_vs = nr;
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			llc_resend(linkp, LLC_RSP, 1);
+			LLC_CLEAR_REMOTE_BUSY(linkp, action);
+		} else if ((cmdrsp == LLC_CMD && pollfinal == 0 && p == 0) ||
+			   (cmdrsp == LLC_RSP && pollfinal == p)) {
+			linkp->llcl_vs = nr;
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			LLC_UPDATE_P_FLAG(linkp, cmdrsp, pollfinal);
+			llc_resend(linkp, LLC_CMD, 0);
+			LLC_CLEAR_REMOTE_BUSY(linkp, action);
+		} else if (pollfinal == 0 && p == 1) {
+			linkp->llcl_vs = nr;
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			llc_resend(linkp, LLC_CMD, 0);
+			LLC_CLEAR_REMOTE_BUSY(linkp, action);
+		}
+		break;
+	}
+	case NL_INITIATE_PF_CYCLE:
+		if (LLC_GETFLAG(linkp, P) == 0) {
+			llc_send(linkp, LLCFT_RR, LLC_CMD, 1);
+			LLC_START_P_TIMER(linkp);
+			action = 0;
+		}
+		break;
+	case LLC_REJ_TIMER_EXPIRED:
+		if (LLC_GETFLAG(linkp, P) == 0 && linkp->llcl_retry < llc_n2) {
+			llc_send(linkp, LLCFT_REJ, LLC_CMD, 1);
+			LLC_START_P_TIMER(linkp);
+			LLC_START_REJ_TIMER(linkp);
+			linkp->llcl_retry++;
+			action = 0;
+		}
+	case LLC_P_TIMER_EXPIRED:
+		if (linkp->llcl_retry < llc_n2) {
+			llc_send(linkp, LLCFT_RR, LLC_CMD, 1);
+			LLC_START_P_TIMER(linkp);
+			LLC_START_REJ_TIMER(linkp);
+			linkp->llcl_retry++;
+			LLC_NEWSTATE(linkp, AWAIT_REJECT);
+			action = 0;
+		}
+		break;
+	case LLC_ACK_TIMER_EXPIRED:
+	case LLC_BUSY_TIMER_EXPIRED:
+		if (LLC_GETFLAG(linkp, P) == 0 && linkp->llcl_retry < llc_n2) {
+			llc_send(linkp, LLCFT_RR, LLC_CMD, 1);
+			LLC_START_P_TIMER(linkp);
+			LLC_START_REJ_TIMER(linkp);
+			linkp->llcl_retry++;
+			/* 
+			 * I cannot locate the description of RESET_V(S)
+			 * in ISO 8802-2, table 7-1, state REJECT, last event,
+			 * and  assume they meant to set V(S) to 0 ...
+			 */
+			linkp->llcl_vs = 0; /* XXX */
+			LLC_NEWSTATE(linkp, AWAIT_REJECT);
+			action = 0;
+		}
+
+		break;
+	}
+	if (action == LLC_PASSITON)
+		action = llc_state_NBRAcore(linkp, frame, frame_kind, 
+					    cmdrsp, pollfinal);
+
+	return action;
+}
+
+/*
+ * AWAIT --- A data link connection exists between the local LLC service access
+ *           point and the remote LLC service access point. The local LLC is
+ *           performing a timer recovery operation and has sent a command PDU
+ *           with the P bit set to ``1'', and is awaiting an acknowledgement
+ *           from the remote LLC. I PDUs may be received but not sent.
+ *           Supervisory PDUs may be both sent and received.
+ */
+int
+llc_state_AWAIT(struct llc_linkcb *linkp, struct llc *frame, int frame_kind,
+		int cmdrsp, int pollfinal)
+{
+	int action = LLC_PASSITON;
+
+	switch(frame_kind + cmdrsp) {
+	case LLC_LOCAL_BUSY_DETECTED:
+		llc_send(linkp, LLCFT_RNR, LLC_CMD, 0);
+		LLC_SETFLAG(linkp, DATA, 0);
+		LLC_NEWSTATE(linkp, AWAIT_BUSY);
+		action = 0;
+		break;
+	case LLC_INVALID_NS + LLC_CMD:
+	case LLC_INVALID_NS + LLC_RSP: {
+		register int p = LLC_GETFLAG(linkp, P);
+		register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+		
+		if (cmdrsp == LLC_CMD && pollfinal == 1) {
+			llc_send(linkp, LLCFT_REJ, LLC_RSP, 1);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			LLC_START_REJ_TIMER(linkp);
+			LLC_NEWSTATE(linkp, AWAIT_REJECT);
+			action = 0;
+		} else if (cmdrsp == LLC_RSP && pollfinal == 1) {
+			llc_send(linkp, LLCFT_REJ, LLC_CMD, 0);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			linkp->llcl_vs = nr;
+			LLC_STOP_P_TIMER(linkp);
+			llc_resend(linkp, LLC_CMD, 0);
+			LLC_START_REJ_TIMER(linkp);
+			LLC_CLEAR_REMOTE_BUSY(linkp, action);
+			LLC_NEWSTATE(linkp, REJECT);
+		} else if (pollfinal == 0) {
+			llc_send(linkp, LLCFT_REJ, LLC_CMD, 0);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			LLC_START_REJ_TIMER(linkp);
+			LLC_NEWSTATE(linkp, AWAIT_REJECT);
+			action = 0;
+		}
+		break;
+	}
+	case LLCFT_INFO + LLC_RSP:
+	case LLCFT_INFO + LLC_CMD: {
+		register int p = LLC_GETFLAG(linkp, P);
+		register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+		
+		LLC_INC(linkp->llcl_vr);
+		if (cmdrsp == LLC_CMD && pollfinal == 1) {
+			llc_send(linkp, LLCFT_RR, LLC_RSP, 1);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			action = LLC_DATA_INDICATION;
+		} else if (cmdrsp == LLC_RSP && pollfinal == 1) {
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			linkp->llcl_vs = nr;
+			llc_resend(linkp, LLC_CMD, 1);
+			LLC_START_P_TIMER(linkp);
+			LLC_CLEAR_REMOTE_BUSY(linkp, action);
+			LLC_NEWSTATE(linkp, NORMAL);
+			action = LLC_DATA_INDICATION;
+		} else if (pollfinal == 0) {
+			llc_send(linkp, LLCFT_RR, LLC_CMD, 0);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			action = LLC_DATA_INDICATION;
+		}
+		break;
+	}
+	case LLCFT_RR + LLC_CMD:
+	case LLCFT_RR + LLC_RSP:
+	case LLCFT_REJ + LLC_CMD:
+	case LLCFT_REJ + LLC_RSP: {
+		register int p = LLC_GETFLAG(linkp, P);
+		register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+		
+		if (cmdrsp == LLC_CMD && pollfinal == 1) {
+			llc_send(linkp, LLCFT_RR, LLC_RSP, 1);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			LLC_CLEAR_REMOTE_BUSY(linkp, action);
+		} else if (cmdrsp == LLC_RSP && pollfinal == 1) {
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			linkp->llcl_vs = nr;
+			LLC_STOP_P_TIMER(linkp);
+			llc_resend(linkp, LLC_CMD, 0);
+			LLC_CLEAR_REMOTE_BUSY(linkp, action);
+			LLC_NEWSTATE(linkp, NORMAL);
+		} else if (pollfinal == 0) {
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			LLC_CLEAR_REMOTE_BUSY(linkp, action);
+		}	
+		break;
+	}
+	case LLCFT_RNR + LLC_CMD:
+	case LLCFT_RNR + LLC_RSP: {
+		register int p = LLC_GETFLAG(linkp, P);
+		register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+		if (pollfinal == 1 && cmdrsp == LLC_CMD) {
+			llc_send(linkp, LLCFT_RR, LLC_RSP, 1);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			LLC_SET_REMOTE_BUSY(linkp, action);
+		} else if (pollfinal == 1 && cmdrsp == LLC_RSP) {
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			linkp->llcl_vs = nr;
+			LLC_STOP_P_TIMER(linkp);
+			LLC_SET_REMOTE_BUSY(linkp, action);
+			LLC_NEWSTATE(linkp, NORMAL);
+		} else if (pollfinal == 0) {
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			LLC_SET_REMOTE_BUSY(linkp, action);
+		}
+		break;
+	}
+	case LLC_P_TIMER_EXPIRED:
+		if (linkp->llcl_retry < llc_n2) {
+			llc_send(linkp, LLCFT_RR, LLC_CMD, 1);
+			LLC_START_P_TIMER(linkp);
+			linkp->llcl_retry++;
+			action = 0;
+		}
+		break;
+	}
+	if (action == LLC_PASSITON)
+		action = llc_state_NBRAcore(linkp, frame, frame_kind, 
+					    cmdrsp, pollfinal);
+
+	return action;
+}
+
+/*
+ * AWAIT_BUSY --- A data link connection exists between the local LLC service
+ *                access point and the remote LLC service access point. The
+ *                local LLC is performing a timer recovery operation and has
+ *                sent a command PDU with the P bit set to ``1'', and is
+ *                awaiting an acknowledgement from the remote LLC. I PDUs may
+ *                not be sent. Local conditions make it likely that the
+ *                information feld of receoved I PDUs will be ignored.
+ *                Supervisory PDUs may be both sent and received.
+ */
+int
+llc_state_AWAIT_BUSY(struct llc_linkcb *linkp, struct llc *frame, int frame_kind,
+		     int cmdrsp, int pollfinal)
+{
+	int action = LLC_PASSITON;
+
+	switch(frame_kind + cmdrsp) {
+	case LLC_LOCAL_BUSY_CLEARED:
+		switch (LLC_GETFLAG(linkp, DATA)) {
+		case 1:
+			llc_send(linkp, LLCFT_REJ, LLC_CMD, 0);
+			LLC_START_REJ_TIMER(linkp);
+			LLC_NEWSTATE(linkp, AWAIT_REJECT);
+			action = 0;
+			break;
+		case 0:
+			llc_send(linkp, LLCFT_RR, LLC_CMD, 0);
+			LLC_NEWSTATE(linkp, AWAIT);
+			action = 0;
+			break;
+		case 2:
+			llc_send(linkp, LLCFT_RR, LLC_CMD, 0);
+			LLC_NEWSTATE(linkp, AWAIT_REJECT);
+			action = 0;
+			break;
+		}
+		break;
+	case LLC_INVALID_NS + LLC_CMD:
+	case LLC_INVALID_NS + LLC_RSP: {
+		register int p = LLC_GETFLAG(linkp, P);
+		register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+		if (cmdrsp == LLC_CMD && pollfinal == 1) {
+			llc_send(linkp, LLCFT_RNR, LLC_RSP, 1);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			LLC_SETFLAG(linkp, DATA, 1);
+			action = 0;
+		} else if (cmdrsp == LLC_RSP && pollfinal == 1) {
+			/* optionally */
+			llc_send(linkp, LLCFT_RNR, LLC_CMD, 0);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			linkp->llcl_vs = nr;
+			LLC_STOP_P_TIMER(linkp);
+			LLC_SETFLAG(linkp, DATA, 1);
+			LLC_CLEAR_REMOTE_BUSY(linkp, action);
+			llc_resend(linkp, LLC_CMD, 0);
+			LLC_NEWSTATE(linkp, BUSY);
+		} else if (pollfinal == 0) {
+			/* optionally */
+			llc_send(linkp, LLCFT_RNR, LLC_CMD, 0);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			LLC_SETFLAG(linkp, DATA, 1);
+			action = 0;
+		}
+	}
+	case LLCFT_INFO + LLC_CMD:
+	case LLCFT_INFO + LLC_RSP: {
+		register int p = LLC_GETFLAG(linkp, P);
+		register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+		
+		if (cmdrsp == LLC_CMD && pollfinal == 1) {
+			llc_send(linkp, LLCFT_RNR, LLC_RSP, 1);
+			LLC_INC(linkp->llcl_vr);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			LLC_SETFLAG(linkp, DATA, 0);
+			action = LLC_DATA_INDICATION;
+		} else if (cmdrsp == LLC_RSP && pollfinal == 1) {
+			llc_send(linkp, LLCFT_RNR, LLC_CMD, 1);
+			LLC_INC(linkp->llcl_vr);
+			LLC_START_P_TIMER(linkp);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			linkp->llcl_vs = nr;
+			LLC_SETFLAG(linkp, DATA, 0);
+			LLC_CLEAR_REMOTE_BUSY(linkp, action);
+			llc_resend(linkp, LLC_CMD, 0);
+			LLC_NEWSTATE(linkp, BUSY);
+			action = LLC_DATA_INDICATION;
+		} else if (pollfinal == 0) {
+			llc_send(linkp, LLCFT_RNR, LLC_CMD, 0);
+			LLC_INC(linkp->llcl_vr);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			LLC_SETFLAG(linkp, DATA, 0);
+			action = LLC_DATA_INDICATION;
+		}
+		break;
+	}
+	case LLCFT_RR + LLC_CMD:
+	case LLCFT_REJ + LLC_CMD:
+	case LLCFT_RR + LLC_RSP:
+	case LLCFT_REJ + LLC_RSP: {
+		register int p = LLC_GETFLAG(linkp, P);
+		register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+		
+		if (cmdrsp == LLC_CMD && pollfinal == 1) {
+			llc_send(linkp, LLCFT_RNR, LLC_RSP, 1);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			LLC_CLEAR_REMOTE_BUSY(linkp, action);
+		} else if (cmdrsp == LLC_RSP && pollfinal == 1) {
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			linkp->llcl_vs = nr;
+			LLC_STOP_P_TIMER(linkp);
+			llc_resend(linkp, LLC_CMD, 0);
+			LLC_CLEAR_REMOTE_BUSY(linkp, action);
+			LLC_NEWSTATE(linkp, BUSY);
+		} else if (pollfinal == 0) {
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			linkp->llcl_vs = nr;
+			LLC_STOP_P_TIMER(linkp);
+			llc_resend(linkp, LLC_CMD, 0);
+			LLC_CLEAR_REMOTE_BUSY(linkp, action);
+		}
+		break;
+	}
+	case LLCFT_RNR + LLC_CMD:
+	case LLCFT_RNR + LLC_RSP: {
+		register int p = LLC_GETFLAG(linkp, P);
+		register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+		
+		if (cmdrsp == LLC_CMD && pollfinal == 1) {
+			llc_send(linkp, LLCFT_RNR, LLC_RSP, 1);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			LLC_SET_REMOTE_BUSY(linkp, action);
+		} else if (cmdrsp == LLC_RSP && pollfinal == 1) {
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			linkp->llcl_vs = nr;
+			LLC_STOP_P_TIMER(linkp);
+			LLC_SET_REMOTE_BUSY(linkp, action);
+			LLC_NEWSTATE(linkp, BUSY);
+		} else if (pollfinal == 0) {
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			LLC_SET_REMOTE_BUSY(linkp, action);
+		}
+		break;
+	}
+	case LLC_P_TIMER_EXPIRED:
+		if (linkp->llcl_retry < llc_n2) {
+			llc_send(linkp, LLCFT_RNR, LLC_CMD, 1);
+			LLC_START_P_TIMER(linkp);
+			linkp->llcl_retry++;
+			action = 0;
+		}
+		break;
+	}
+	if (action == LLC_PASSITON)
+		action = llc_state_NBRAcore(linkp, frame, frame_kind, 
+					    cmdrsp, pollfinal);
+
+	return action;
+}
+
+/*
+ * AWAIT_REJECT --- A data link connection exists between the local LLC service
+ *                  access point and the remote LLC service access point. The
+ *                  local connection component has requested that the remote
+ *                  connection component re-transmit a specific I PDU that the
+ *                  local connection component has detected as being out of
+ *                  sequence. Before the local LLC entered this state it was
+ *                  performing a timer recovery operation and had sent a
+ *                  command PDU with the P bit set to ``1'', and is still
+ *                  awaiting an acknowledgment from the remote LLC. I PDUs may
+ *                  be received but not transmitted. Supervisory PDUs may be
+ *                  both transmitted and received.
+ */
+int
+llc_state_AWAIT_REJECT(struct llc_linkcb *linkp, struct llc *frame, int frame_kind,
+		       int cmdrsp, int pollfinal)
+{
+	int action = LLC_PASSITON;
+
+	switch(frame_kind + cmdrsp) {
+	case LLC_LOCAL_BUSY_DETECTED:
+		llc_send(linkp, LLCFT_RNR, LLC_CMD, 0);
+		LLC_SETFLAG(linkp, DATA, 2);
+		LLC_NEWSTATE(linkp, AWAIT_BUSY);
+		action = 0;
+		break;
+	case LLC_INVALID_NS + LLC_CMD:
+	case LLC_INVALID_NS + LLC_RSP: {
+		register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+		
+		if (cmdrsp == LLC_CMD && pollfinal == 1) {
+			llc_send(linkp, LLCFT_RR, LLC_RSP, 1);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			action = 0;
+		} else if (cmdrsp == LLC_RSP && pollfinal == 1) {
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			linkp->llcl_vs = nr;
+			llc_resend(linkp, LLC_CMD, 1);
+			LLC_START_P_TIMER(linkp);
+			LLC_CLEAR_REMOTE_BUSY(linkp, action);
+			LLC_NEWSTATE(linkp, REJECT);
+		} else if (pollfinal == 0) {
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			action = 0;	
+		}
+		break;
+	}
+	case LLCFT_INFO + LLC_CMD:
+	case LLCFT_INFO + LLC_RSP: {
+		register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+		if (cmdrsp == LLC_CMD && pollfinal == 1) {
+			LLC_INC(linkp->llcl_vr);
+			llc_send(linkp, LLCFT_RR, LLC_RSP, 1);
+			LLC_STOP_REJ_TIMER(linkp);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			LLC_NEWSTATE(linkp, AWAIT);
+			action = LLC_DATA_INDICATION;
+		} else if (cmdrsp == LLC_RSP && pollfinal == 1) {
+			LLC_INC(linkp->llcl_vr);
+			LLC_STOP_P_TIMER(linkp);
+			LLC_STOP_REJ_TIMER(linkp);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			linkp->llcl_vs = nr;
+			llc_resend(linkp, LLC_CMD, 0);
+			LLC_CLEAR_REMOTE_BUSY(linkp, action);
+			LLC_NEWSTATE(linkp, NORMAL);
+			action = LLC_DATA_INDICATION;
+		} else if (pollfinal == 0) {
+			LLC_INC(linkp->llcl_vr);
+			llc_send(linkp, LLCFT_RR, LLC_CMD, 0);
+			LLC_STOP_REJ_TIMER(linkp);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			LLC_NEWSTATE(linkp, AWAIT);
+			action = LLC_DATA_INDICATION;
+		}
+		break;
+	}
+	case LLCFT_RR + LLC_CMD:
+	case LLCFT_REJ + LLC_CMD:
+	case LLCFT_RR + LLC_RSP:
+	case LLCFT_REJ + LLC_RSP: {
+		register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+		
+		if (cmdrsp == LLC_CMD && pollfinal ==  1) {
+			llc_send(linkp, LLCFT_RR, LLC_RSP, 1);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			LLC_CLEAR_REMOTE_BUSY(linkp, action);
+		} else if (cmdrsp == LLC_RSP && pollfinal == 1) {
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			linkp->llcl_vs = nr;
+			llc_resend(linkp, LLC_CMD, 1);
+			LLC_START_P_TIMER(linkp);
+			LLC_CLEAR_REMOTE_BUSY(linkp, action);
+			LLC_NEWSTATE(linkp, REJECT);
+		} else if (pollfinal == 0) {
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			LLC_CLEAR_REMOTE_BUSY(linkp, action);
+		}
+		break;
+	}
+	case LLCFT_RNR + LLC_CMD:
+	case LLCFT_RNR + LLC_RSP: {
+		register int nr = LLCGBITS(frame->llc_control_ext, s_nr);
+
+		if (cmdrsp == LLC_CMD && pollfinal == 1) {
+			llc_send(linkp, LLCFT_RR, LLC_RSP, 1);
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			LLC_SET_REMOTE_BUSY(linkp, action);
+		} else if (cmdrsp == LLC_RSP && pollfinal == 1) {
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			linkp->llcl_vs = nr;
+			LLC_STOP_P_TIMER(linkp);
+			LLC_SET_REMOTE_BUSY(linkp, action);
+			LLC_NEWSTATE(linkp, REJECT);
+		} else if (pollfinal == 0) {
+			LLC_UPDATE_NR_RECEIVED(linkp, nr);
+			LLC_SET_REMOTE_BUSY(linkp, action);
+		}
+		break;
+	}
+	case LLC_P_TIMER_EXPIRED:
+		if (linkp->llcl_retry < llc_n2) {
+			llc_send(linkp, LLCFT_REJ, LLC_CMD, 1);
+			LLC_START_P_TIMER(linkp);
+			linkp->llcl_retry++;
+			action = 0;
+		}
+		break;
+	}
+	if (action == LLC_PASSITON)
+		action = llc_state_NBRAcore(linkp, frame, frame_kind, 
+					    cmdrsp, pollfinal);
+
+	return action;
+}
+
+
+/*
+ * llc_statehandler() --- Wrapper for llc_state_*() functions.
+ *                         Deals with action codes and checks for
+ *                         ``stuck'' links.
+ */
+
+int
+llc_statehandler(struct llc_linkcb *linkp, struct llc *frame, int frame_kind,
+		 int cmdrsp, int pollfinal)
+{
+	register int action = 0;
+
+	/*
+	 * To check for ``zombie'' links each time llc_statehandler() gets called
+	 * the AGE timer of linkp is reset. If it expires llc_timer() will
+	 * take care of the link --- i.e. kill it 8=)
+	 */
+	LLC_STARTTIMER(linkp, AGE);
+
+	/*
+	 * Now call the current statehandler function.
+	 */
+	action = (*linkp->llcl_statehandler)(linkp, frame, frame_kind, 
+					     cmdrsp, pollfinal);
+once_more_and_again:
+	switch (action) {
+	case LLC_CONNECT_INDICATION: {
+		int naction;
+
+		LLC_TRACE(linkp, LLCTR_INTERESTING, "CONNECT INDICATION");
+		linkp->llcl_nlnext = 
+		     (*linkp->llcl_sapinfo->si_ctlinput)
+		      (PRC_CONNECT_INDICATION,
+		       (struct sockaddr *) &linkp->llcl_addr, (caddr_t) linkp);
+		if (linkp->llcl_nlnext == 0)
+			naction = NL_DISCONNECT_REQUEST;
+		else naction = NL_CONNECT_RESPONSE;
+		action = (*linkp->llcl_statehandler)(linkp, frame, naction, 0, 0);
+		goto once_more_and_again;
+	}
+	case LLC_CONNECT_CONFIRM:
+		/* llc_resend(linkp, LLC_CMD, 0); */
+		llc_start(linkp);
+		break;
+	case LLC_DISCONNECT_INDICATION:
+		LLC_TRACE(linkp, LLCTR_INTERESTING, "DISCONNECT INDICATION");
+		(*linkp->llcl_sapinfo->si_ctlinput)
+		  (PRC_DISCONNECT_INDICATION, 
+		   (struct sockaddr *) &linkp->llcl_addr, linkp->llcl_nlnext);
+		break;
+        /* internally visible only */
+	case LLC_RESET_CONFIRM:
+	case LLC_RESET_INDICATION_LOCAL:
+		/*
+		 * not much we can do here, the state machine either makes it or
+		 * brakes it ...
+		 */
+		break;
+	case LLC_RESET_INDICATION_REMOTE:
+		LLC_TRACE(linkp, LLCTR_SHOULDKNOW, "RESET INDICATION (REMOTE)");
+		action = (*linkp->llcl_statehandler)(linkp, frame, 
+						     NL_RESET_RESPONSE, 0, 0);
+		goto once_more_and_again;
+	case LLC_FRMR_SENT:
+		LLC_TRACE(linkp, LLCTR_URGENT, "FRMR SENT");
+		break;
+	case LLC_FRMR_RECEIVED:
+		LLC_TRACE(linkp, LLCTR_URGEN, "FRMR RECEIVED");
+		action = (*linkp->llcl_statehandler)(linkp, frame,
+						     NL_RESET_REQUEST, 0, 0);
+		
+		goto once_more_and_again;
+	case LLC_REMOTE_BUSY:
+		LLC_TRACE(linkp, LLCTR_SHOULDKNOW, "REMOTE BUSY");
+		break;
+	case LLC_REMOTE_NOT_BUSY:
+		LLC_TRACE(linkp, LLCTR_SHOULDKNOW, "REMOTE BUSY CLEARED");
+		/*
+		 * try to get queued frames out
+		 */
+		llc_start(linkp);
+		break;
+	}		
+
+	/*
+         * Only LLC_DATA_INDICATION is for the time being
+	 * passed up to the network layer entity.
+	 * The remaining action codes are for the time 
+	 * being visible internally only.
+         * However, this can/may be changed if necessary.
+	 */
+
+	return action;
+}
+
+
+/*
+ * Core LLC2 routines
+ */ 
+
+/*
+ * The INIT call. This routine is called once after the system is booted.
+ */
+
+llc_init()
+{
+	llcintrq.ifq_maxlen = IFQ_MAXLEN;
+}
+
+
+/*
+ * In case of a link reset we need to shuffle the frames queued inside the
+ * LLC2 window.
+ */
+
+void
+llc_resetwindow(struct llc_linkcb *linkp)
+{
+	register struct mbuf *mptr = (struct mbuf *) 0;
+	register struct mbuf *anchor = (struct mbuf *)0;
+	register short i;
+
+	/* Pick up all queued frames and collect them in a linked mbuf list */
+	if (linkp->llcl_slotsfree != linkp->llcl_window) {
+		i = llc_seq2slot(linkp, linkp->llcl_nr_received);
+		anchor = mptr = linkp->llcl_output_buffers[i]; 
+		for (; i != linkp->llcl_freeslot; 
+		     i = llc_seq2slot(linkp, i+1)) {
+			if (linkp->llcl_output_buffers[i]) {
+				mptr->m_nextpkt = linkp->llcl_output_buffers[i];
+				mptr = mptr->m_nextpkt;
+			} else panic("LLC2 window broken");
+		}
+	}
+	/* clean closure */
+	if (mptr)
+		mptr->m_nextpkt = (struct mbuf *) 0;
+
+	/* Now --- plug 'em in again */
+	if (anchor != (struct mbuf *)0) {
+		for (i = 0, mptr = anchor; mptr != (struct mbuf *) 0; i++) {
+			linkp->llcl_output_buffers[i] = mptr;
+			mptr = mptr->m_nextpkt;
+			linkp->llcl_output_buffers[i]->m_nextpkt = (struct mbuf *)0;
+		}
+		linkp->llcl_freeslot = i;
+	} else linkp->llcl_freeslot = 0;
+	
+	/* We're resetting the link, the next frame to be acknowledged is 0 */
+	linkp->llcl_nr_received = 0;
+
+	/* set distance between LLC2 sequence number and the top of window to 0 */
+	linkp->llcl_projvs = linkp->llcl_freeslot;
+
+	return;
+}
+			
+/*
+ * llc_newlink() --- We allocate enough memory to contain a link control block
+ *                   and initialize it properly. We don't intiate the actual setup
+ *                   of the LLC2 link here.
+ */
+struct llc_linkcb *
+llc_newlink(struct sockaddr_dl *dst, struct ifnet *ifp, struct rtentry *nlrt, 
+	    caddr_t nlnext, struct rtentry *llrt)
+{
+	struct llc_linkcb *nlinkp;
+	u_char sap = LLSAPADDR(dst);
+	short llcwindow;
+
+
+	/* allocate memory for link control block */
+	MALLOC(nlinkp, struct llc_linkcb *, sizeof(struct llc_linkcb),
+	       M_PCB, M_DONTWAIT);
+	if (nlinkp == 0)
+		return (NULL);
+	bzero((caddr_t)nlinkp, sizeof(struct llc_linkcb));
+	
+	/* copy link address */
+	sdl_copy(dst, &nlinkp->llcl_addr);
+
+	/* hold on to the network layer route entry */
+	nlinkp->llcl_nlrt = nlrt;
+
+	/* likewise the network layer control block */
+	nlinkp->llcl_nlnext = nlnext;
+
+	/* jot down the link layer route entry */
+	nlinkp->llcl_llrt = llrt;
+
+	/* reset writeq */
+	nlinkp->llcl_writeqh = nlinkp->llcl_writeqt = NULL;
+
+	/* setup initial state handler function */
+	nlinkp->llcl_statehandler = llc_state_ADM;
+	
+	/* hold on to interface pointer */
+	nlinkp->llcl_if = ifp;
+
+	/* get service access point information */
+	nlinkp->llcl_sapinfo = llc_getsapinfo(sap, ifp);
+
+	/* get window size from SAP info block */
+	if ((llcwindow = nlinkp->llcl_sapinfo->si_window) == 0)
+		llcwindow = LLC_MAX_WINDOW;
+
+	/* allocate memory for window buffer */
+	MALLOC(nlinkp->llcl_output_buffers, struct mbuf **, 
+	       llcwindow*sizeof(struct mbuf *), M_PCB, M_DONTWAIT);
+	if (nlinkp->llcl_output_buffers == 0) {
+		FREE(nlinkp, M_PCB);
+		return(NULL);
+	}
+	bzero((caddr_t)nlinkp->llcl_output_buffers, 
+	      llcwindow*sizeof(struct mbuf *));
+
+	/* set window size & slotsfree */
+	nlinkp->llcl_slotsfree = nlinkp->llcl_window = llcwindow;
+
+	/* enter into linked listed of link control blocks */
+	insque(nlinkp, &llccb_q);
+
+	return(nlinkp);
+}
+
+/*
+ * llc_dellink() --- farewell to link control block
+ */
+llc_dellink(struct llc_linkcb *linkp)
+{
+	register struct mbuf *m;
+	register struct mbuf *n;
+	register struct npaidbentry *sapinfo = linkp->llcl_sapinfo;
+	register i;
+
+	/* notify upper layer of imminent death */
+	if (linkp->llcl_nlnext && sapinfo->si_ctlinput)
+		(*sapinfo->si_ctlinput)
+		   (PRC_DISCONNECT_INDICATION, 
+		    (struct sockaddr *)&linkp->llcl_addr, linkp->llcl_nlnext);
+
+	/* pull the plug */
+	if (linkp->llcl_llrt)
+		((struct npaidbentry *)(linkp->llcl_llrt->rt_llinfo))->np_link 
+			= (struct llc_linkcb *) 0;
+
+	/* leave link control block queue */
+	remque(linkp);
+
+	/* drop queued packets */
+	for (m = linkp->llcl_writeqh; m;) {
+		n = m->m_act;
+		m_freem(m);
+		m = n;
+	}
+
+	/* drop packets in the window */
+	for(i = 0; i < linkp->llcl_window; i++)
+		if (linkp->llcl_output_buffers[i])
+			m_freem(linkp->llcl_output_buffers[i]);
+
+	/* return the window space */
+	FREE((caddr_t)linkp->llcl_output_buffers, M_PCB);
+
+	/* return the control block space --- now it's gone ... */
+	FREE((caddr_t)linkp, M_PCB);
+}
+
+llc_decode(struct llc* frame, struct llc_linkcb * linkp)
+{
+	register int ft = LLC_BAD_PDU;
+
+	if ((frame->llc_control & 01) == 0) {
+		ft = LLCFT_INFO;
+	/* S or U frame ? */
+	} else switch (frame->llc_control) {
+
+	/* U frames */
+	case LLC_UI:
+	case LLC_UI_P:     ft = LLC_UI; break;
+	case LLC_DM:
+	case LLC_DM_P:     ft =LLCFT_DM; break;
+	case LLC_DISC:
+	case LLC_DISC_P:   ft = LLCFT_DISC; break;
+	case LLC_UA:
+	case LLC_UA_P:     ft = LLCFT_UA; break;
+	case LLC_SABME:
+	case LLC_SABME_P:  ft = LLCFT_SABME; break;
+	case LLC_FRMR:
+	case LLC_FRMR_P:   ft = LLCFT_FRMR; break;
+	case LLC_XID:
+	case LLC_XID_P:    ft = LLCFT_XID; break;
+	case LLC_TEST:
+	case LLC_TEST_P:   ft = LLCFT_TEST; break;
+
+	/* S frames */
+	case LLC_RR:       ft = LLCFT_RR; break;
+	case LLC_RNR:      ft = LLCFT_RNR; break;
+	case LLC_REJ:      ft = LLCFT_REJ; break;
+	} /* switch */
+
+	if (linkp) {
+		switch (ft) {
+		case LLCFT_INFO:
+			if (LLCGBITS(frame->llc_control, i_ns) != linkp->llcl_vr) {
+				ft = LLC_INVALID_NS;
+				break;
+			}
+			/* fall thru --- yeeeeeee */
+		case LLCFT_RR:
+		case LLCFT_RNR:
+		case LLCFT_REJ:
+			/* splash! */
+			if (LLC_NR_VALID(linkp, LLCGBITS(frame->llc_control_ext, 
+							 s_nr)) == 0)
+				ft = LLC_INVALID_NR;
+			break;
+		}
+	}
+
+	return ft;
+}
+
+/*
+ * llc_anytimersup() --- Checks if at least one timer is still up and running.
+ */
+int
+llc_anytimersup(struct llc_linkcb * linkp)
+{
+	register int i;
+	
+	FOR_ALL_LLC_TIMERS(i)
+		if (linkp->llcl_timers[i] > 0)
+			break;
+	if (i == LLC_AGE_SHIFT)
+		return 0;
+	else return 1;
+}
+
+/*
+ * llc_link_dump() - dump link info
+ */
+
+#define SAL(s) ((struct sockaddr_dl *)&(s)->llcl_addr)
+#define CHECK(l, s) if (LLC_STATEEQ(l, s)) return #s
+
+char *timer_names[] = {"ACK", "P", "BUSY", "REJ", "AGE"};
+
+char *
+llc_getstatename(struct llc_linkcb *linkp)
+{
+	CHECK(linkp, ADM);
+	CHECK(linkp, CONN);
+	CHECK(linkp, RESET_WAIT);
+	CHECK(linkp, RESET_CHECK);
+	CHECK(linkp, SETUP);
+	CHECK(linkp, RESET);
+	CHECK(linkp, D_CONN);
+	CHECK(linkp, ERROR);
+	CHECK(linkp, NORMAL);
+	CHECK(linkp, BUSY);
+	CHECK(linkp, REJECT);
+	CHECK(linkp, AWAIT);
+	CHECK(linkp, AWAIT_BUSY);
+	CHECK(linkp, AWAIT_REJECT);
+
+	return "UNKNOWN - eh?";
+}
+
+void
+llc_link_dump(struct llc_linkcb* linkp, const char *message)
+{
+	register int i;
+	register char *state;
+
+	/* print interface */
+	printf("if %s%d\n", linkp->llcl_if->if_name, linkp->llcl_if->if_unit);
+	
+	/* print message */
+	printf(">> %s <<\n", message);
+
+	/* print MAC and LSAP */
+	printf("llc addr ");
+	for (i = 0; i < (SAL(linkp)->sdl_alen)-2; i++)
+		printf("%x:", (char)*(LLADDR(SAL(linkp))+i) & 0xff);
+	printf("%x,", (char)*(LLADDR(SAL(linkp))+i) & 0xff);
+	printf("%x\n", (char)*(LLADDR(SAL(linkp))+i+1) & 0xff);
+
+	/* print state we're in and timers */
+        printf("state %s, ", llc_getstatename(linkp));
+        for (i = LLC_ACK_SHIFT; i < LLC_AGE_SHIFT; i++)
+		printf("%s-%c %d/", timer_names[i], 
+		       (linkp->llcl_timerflags & (1<<i) ? 'R' : 'S'),
+		       linkp->llcl_timers[i]);
+	printf("%s-%c %d\n", timer_names[i], (linkp->llcl_timerflags & (1<<i) ? 
+					     'R' : 'S'), linkp->llcl_timers[i]);
+
+	/* print flag values */
+	printf("flags P %d/F %d/S %d/DATA %d/REMOTE_BUSY %d\n",
+	       LLC_GETFLAG(linkp, P), LLC_GETFLAG(linkp, S), 
+	       LLC_GETFLAG(linkp, DATA), LLC_GETFLAG(linkp, REMOTE_BUSY));
+
+	/* print send and receive state variables, ack, and window */
+	printf("V(R) %d/V(S) %d/N(R) received %d/window %d/freeslot %d\n",
+	       linkp->llcl_vs, linkp->llcl_vr, linkp->llcl_nr_received,
+	       linkp->llcl_window, linkp->llcl_freeslot);
+
+	/* further expansions can follow here */
+
+}
+
+void
+llc_trace(struct llc_linkcb *linkp, int level, const char *message)
+{
+	if (linkp->llcl_sapinfo->si_trace && level > llc_tracelevel)
+		llc_link_dump(linkp, message);
+
+	return;
+}
diff --git a/sys/netccitt/llc_timer.c b/sys/netccitt/llc_timer.c
new file mode 100644
index 00000000000..0aecd08b68d
--- /dev/null
+++ b/sys/netccitt/llc_timer.c
@@ -0,0 +1,180 @@
+/* 
+ * Copyright (C) Dirk Husemann, Computer Science Department IV, 
+ * 		 University of Erlangen-Nuremberg, Germany, 1990, 1991, 1992
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * 
+ * This code is derived from software contributed to Berkeley by
+ * Dirk Husemann and the Computer Science Department (IV) of
+ * the University of Erlangen-Nuremberg, Germany.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)llc_timer.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_llc.h>
+
+#include <netccitt/dll.h>
+#include <netccitt/llc_var.h>
+
+
+/*
+ * Various timer values.  They can be adjusted
+ * by patching the binary with adb if necessary.
+ */
+/* ISO 8802-2 timers */
+int 	llc_n2 			= LLC_N2_VALUE;
+int 	llc_ACK_timer 		= LLC_ACK_TIMER;
+int     llc_P_timer             = LLC_P_TIMER;
+int     llc_BUSY_timer          = LLC_BUSY_TIMER;
+int     llc_REJ_timer           = LLC_REJ_TIMER;
+/* Implementation specific timers */
+int 	llc_AGE_timer           = LLC_AGE_TIMER;
+int     llc_DACTION_timer       = LLC_DACTION_TIMER;
+
+/*
+ * The timer routine. We are called every 500ms by the kernel.
+ * Handle the various virtual timers.
+ */
+
+void
+llc_timer()
+{
+	register struct llc_linkcb *linkp;
+	register struct llc_linkcb *nlinkp;
+	register int timer;
+	register int action;
+	register int s = splimp();
+
+	/*
+	 * All links are accessible over the doubly linked list llccb_q
+	 */
+	if (!LQEMPTY) {
+		/*
+		 * A for-loop is not that great an idea as the linkp
+		 * might get deleted if the age timer has expired ...
+		 */
+		linkp = LQFIRST;
+		while (LQVALID(linkp)) {
+			nlinkp = LQNEXT(linkp);
+			/*
+			 * Check implementation specific timers first
+			 */
+			/* The delayed action/acknowledge idle timer */
+			switch (LLC_TIMERXPIRED(linkp, DACTION)) {
+			case LLC_TIMER_RUNNING:
+				LLC_AGETIMER(linkp, DACTION);
+				break;
+			case LLC_TIMER_EXPIRED: {
+				register int cmdrsp;
+				register int pollfinal;
+
+				switch (LLC_GETFLAG(linkp, DACTION)) {
+				case LLC_DACKCMD:
+					cmdrsp = LLC_CMD, pollfinal = 0;
+					break;
+				case LLC_DACKCMDPOLL:
+					cmdrsp = LLC_CMD, pollfinal = 1;
+					break;
+				case LLC_DACKRSP:
+					cmdrsp = LLC_RSP, pollfinal = 0;
+					break;
+				case LLC_DACKRSPFINAL:
+					cmdrsp = LLC_RSP, pollfinal = 1;
+					break;
+				}
+				llc_send(linkp, LLCFT_RR, cmdrsp, pollfinal);
+				LLC_STOPTIMER(linkp, DACTION);
+				break;
+			}
+			}
+			/* The link idle timer */
+			switch (LLC_TIMERXPIRED(linkp, AGE)) {
+			case LLC_TIMER_RUNNING:
+			        LLC_AGETIMER(linkp, AGE);
+				break;
+			case LLC_TIMER_EXPIRED:
+				/*
+				 * Only crunch the link when really no
+				 * timers are running any more.
+				 */
+				if (llc_anytimersup(linkp) == 0) {
+					llc_dellink(linkp);
+					LLC_STOPTIMER(linkp, AGE);
+					goto gone;
+				} else {
+					LLC_STARTTIMER(linkp, AGE);
+				}
+				break;
+			}
+			/* 
+			 * Now, check all the ISO 8802-2 timers 
+			 */
+			FOR_ALL_LLC_TIMERS(timer) {
+				action = 0;
+				if ((linkp->llcl_timerflags & (1<<timer)) &&
+				    (linkp->llcl_timers[timer] == 0)) {
+					switch (timer) {
+					case LLC_ACK_SHIFT:
+						action = LLC_ACK_TIMER_EXPIRED;
+						break;
+					case LLC_P_SHIFT:
+						action = LLC_P_TIMER_EXPIRED;
+						break;
+					case LLC_BUSY_SHIFT:
+						action = LLC_BUSY_TIMER_EXPIRED;
+						break;
+					case LLC_REJ_SHIFT:
+						action = LLC_REJ_TIMER_EXPIRED;
+						break;
+					}
+					linkp->llcl_timerflags &= ~(1<<timer);
+					(void)llc_statehandler(linkp, (struct llc *)0, action, 0, 1);
+				} else if (linkp->llcl_timers[timer] > 0)
+					linkp->llcl_timers[timer]--;
+			}
+			
+gone:			linkp = nlinkp;
+		}
+	}
+	splx (s);
+}
diff --git a/sys/netccitt/llc_var.h b/sys/netccitt/llc_var.h
new file mode 100644
index 00000000000..a27db52d37a
--- /dev/null
+++ b/sys/netccitt/llc_var.h
@@ -0,0 +1,659 @@
+/* 
+ * Copyright (C) Dirk Husemann, Computer Science Department IV, 
+ * 		 University of Erlangen-Nuremberg, Germany, 1990, 1991, 1992
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * 
+ * This code is derived from software contributed to Berkeley by
+ * Dirk Husemann and the Computer Science Department (IV) of
+ * the University of Erlangen-Nuremberg, Germany.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)llc_var.h	8.1 (Berkeley) 6/10/93
+ */
+
+#ifdef __STDC__
+/*
+ * Forward structure declarations for function prototypes [sic].
+ */
+struct llc;
+#endif
+
+#define	NPAIDB_LINK	0
+
+struct npaidbentry {
+	union {
+		/* MAC,DLSAP -> CONS */
+		struct {
+			struct llc_linkcb *NE_link;
+			struct rtentry *NE_rt;
+		} NE;
+		/* SAP info for unconfigured incoming calls */
+		struct {
+			u_short SI_class;
+#define LLC_CLASS_I	0x1
+#define	LLC_CLASS_II	0x3
+#define LLC_CLASS_III	0x4				/* Future */
+#define LLC_CLASS_IV	0x7				/* Future */
+			u_short SI_window;
+			u_short SI_trace;
+			u_short SI_xchxid;
+			void (*SI_input) 
+				__P((struct mbuf *));
+			caddr_t (*SI_ctlinput) 
+				__P((int, struct sockaddr *, caddr_t));
+		} SI;
+	} NESIun;
+};
+#define np_link                 NESIun.NE.NE_link
+#define np_rt                   NESIun.NE.NE_rt
+#define si_class                NESIun.SI.SI_class
+#define si_window               NESIun.SI.SI_window
+#define si_trace                NESIun.SI.SI_trace
+#define si_xchxid               NESIun.SI.SI_xchxid
+#define si_input                NESIun.SI.SI_input
+#define si_ctlinput             NESIun.SI.SI_ctlinput
+
+#define NPDL_SAPNETMASK 0x7e
+
+/*
+ * Definitions for accessing bitfields/bitslices inside
+ * LLC2 headers
+ */
+struct bitslice {
+	unsigned int bs_mask;
+	unsigned int bs_shift;
+};
+
+
+#define	i_z	        0
+#define	i_ns	        1
+#define	i_pf	        0
+#define	i_nr	        1
+#define	s_oz            2
+#define	s_selector	3
+#define	s_pf            0
+#define	s_nr            1
+#define	u_bb            2
+#define	u_select_other	3
+#define	u_pf            4
+#define	u_select	5
+#define	f_vs            1
+#define	f_cr            0
+#define	f_vr            1
+#define	f_wxyzv         6
+
+#define	LLCGBITS(Arg, Index)	(((Arg) & llc_bitslice[(Index)].bs_mask) >> llc_bitslice[(Index)].bs_shift)
+#define	LLCSBITS(Arg, Index, Val)	(Arg) |= (((Val) << llc_bitslice[(Index)].bs_shift) & llc_bitslice[(Index)].bs_mask)
+#define	LLCCSBITS(Arg, Index, Val)	(Arg) = (((Val) << llc_bitslice[(Index)].bs_shift) & llc_bitslice[(Index)].bs_mask)
+
+extern struct bitslice llc_bitslice[];
+
+#define LLC_CMD         0
+#define LLC_RSP         1
+#define LLC_MAXCMDRSP   2
+
+/*
+ * LLC events --- These events may either be frames received from the
+ *                remote LLC DSAP, request from the network layer user, 
+ *                timer events from llc_timer(), or diagnostic events from
+ *                llc_input().  
+ */
+
+/* LLC frame types */
+#define LLCFT_INFO                       0 * LLC_MAXCMDRSP
+#define LLCFT_RR                         1 * LLC_MAXCMDRSP
+#define LLCFT_RNR                        2 * LLC_MAXCMDRSP
+#define LLCFT_REJ                        3 * LLC_MAXCMDRSP
+#define LLCFT_DM                         4 * LLC_MAXCMDRSP
+#define LLCFT_SABME                      5 * LLC_MAXCMDRSP
+#define LLCFT_DISC                       6 * LLC_MAXCMDRSP
+#define LLCFT_UA                         7 * LLC_MAXCMDRSP
+#define LLCFT_FRMR                       8 * LLC_MAXCMDRSP
+#define LLCFT_UI                         9 * LLC_MAXCMDRSP
+#define LLCFT_XID                       10 * LLC_MAXCMDRSP
+#define LLCFT_TEST                      11 * LLC_MAXCMDRSP
+
+/* LLC2 timer events */
+#define LLC_ACK_TIMER_EXPIRED           12 * LLC_MAXCMDRSP
+#define LLC_P_TIMER_EXPIRED             13 * LLC_MAXCMDRSP
+#define LLC_REJ_TIMER_EXPIRED           14 * LLC_MAXCMDRSP
+#define LLC_BUSY_TIMER_EXPIRED          15 * LLC_MAXCMDRSP
+
+/* LLC2 diagnostic events */
+#define LLC_INVALID_NR                  16 * LLC_MAXCMDRSP
+#define LLC_INVALID_NS                  17 * LLC_MAXCMDRSP
+#define LLC_BAD_PDU                     18 * LLC_MAXCMDRSP
+#define LLC_LOCAL_BUSY_DETECTED         19 * LLC_MAXCMDRSP
+#define LLC_LOCAL_BUSY_CLEARED          20 * LLC_MAXCMDRSP
+
+/* Network layer user requests */
+/* 
+ * NL_CONNECT_REQUEST --- The user has requested that a data link connection
+ *                        be established with a remote LLC DSAP.
+ */
+#define NL_CONNECT_REQUEST              21 * LLC_MAXCMDRSP
+/* 
+ * NL_CONNECT_RESPONSE --- The user has accepted the data link connection.
+ */
+#define NL_CONNECT_RESPONSE             22 * LLC_MAXCMDRSP
+/* 
+ * NL_RESET_REQUEST --- The user has requested that the data link with the
+ *                      remote LLC DSAP be reset.
+ */
+#define NL_RESET_REQUEST                23 * LLC_MAXCMDRSP
+/* 
+ * NL_RESET_RESPONSE --- The user has accepted the reset of the data link
+ *                       connection.
+ */
+#define NL_RESET_RESPONSE               24 * LLC_MAXCMDRSP
+/* 
+ * NL_DISCONNECT_REQUEST --- The user has requested that the data link
+ *                           connection with remote LLC DSAP be terminated.
+ */
+#define NL_DISCONNECT_REQUEST           25 * LLC_MAXCMDRSP
+/*
+ * NL_DATA_REQUEST --- The user has requested that a data unit be sent ot the
+ *                     remote LLC DSAP.
+ */
+#define NL_DATA_REQUEST                 26 * LLC_MAXCMDRSP
+/*
+ * NL_INITIATE_PF_CYCLE --- The local LLC wants to initiate a P/F cycle.
+ */
+#define NL_INITIATE_PF_CYCLE            27 * LLC_MAXCMDRSP
+/*
+ * NL_LOCAL_BUSY_DETECTED --- The local entity has encountered a busy condition
+ */
+#define NL_LOCAL_BUSY_DETECTED          28 * LLC_MAXCMDRSP
+
+#define LLCFT_NONE                      255
+
+/* return message from state handlers */
+
+/*
+ * LLC_CONNECT_INDICATION --- Inform the user that a connection has been
+ *                            requested by a remote LLC SSAP.
+ */
+#define LLC_CONNECT_INDICATION      1
+/*
+ * LLC_CONNECT_CONFIRM --- The connection service component indicates that the
+ *                         remote network entity has accepted the connection.
+ */
+#define LLC_CONNECT_CONFIRM         2
+/*
+ * LLC_DISCONNECT_INDICATION --- Inform the user that the remote network
+ *                               entity has intiated disconnection of the data
+ *                               link connection.
+ */
+#define LLC_DISCONNECT_INDICATION   3
+/*
+ * LLC_RESET_CONFIRM --- The connection service component indicates that the
+ *                       remote network entity has accepted the reset.
+ */
+#define LLC_RESET_CONFIRM           4
+/*
+ * LLC_RESET_INDICATION_REMOTE --- The remote network entity or remote peer
+ *                                 has initiated a reset of the data link
+ *                                 connection.
+ */
+#define LLC_RESET_INDICATION_REMOTE 5
+/*
+ * LLC_RESET_INDICATION_LOCAL --- The local LLC has determined that the data
+ *                                link connection is in need of
+ *                                reinitialization.
+ */
+#define LLC_RESET_INDICATION_LOCAL  6
+/*
+ * LLC_FRMR_RECEIVED --- The local connection service component has received a
+ *                       FRMR response PDU.
+ */
+#define LLC_FRMR_RECEIVED           7
+/*
+ * LLC_FRMR_SENT --- The local connection component has received an ivalid
+ *                   PDU, and has sent a FRMR response PDU.
+ */
+#define LLC_FRMR_SENT               8
+/*
+ * LLC_DATA_INDICATION --- The connection service component passes the data
+ *                         unit from the received I PDU to the user.
+ */
+#define LLC_DATA_INDICATION         9
+/*
+ * LLC_REMOTE_NOT_BUSY --- The remote LLC DSAP is no longer busy. The local
+ *                         connection service component will now accept a
+ *                         DATA_REQUEST.
+ */
+#define LLC_REMOTE_NOT_BUSY         10
+/*
+ * LLC_REMOTE_BUSY --- The remote LLC DSAP is busy. The local connection
+ *                     service component will not accept a DATA_REQUEST.
+ */
+#define LLC_REMOTE_BUSY             11
+
+/* Internal return code */
+#define LLC_PASSITON                255
+
+#define INFORMATION_CONTROL	0x00
+#define SUPERVISORY_CONTROL	0x02
+#define UNUMBERED_CONTROL 	0x03 
+ 
+/*
+ * Other necessary definitions
+ */
+ 
+#define LLC_MAX_SEQUENCE    128
+#define LLC_MAX_WINDOW	    127
+#define LLC_WINDOW_SIZE	    7
+
+/*
+ * Don't we love this one? CCITT likes to suck on bits 8=)
+ */
+#define NLHDRSIZEGUESS      3
+
+/*
+ * LLC control block
+ */
+
+struct llc_linkcb {
+	struct llccb_q {
+		struct llccb_q *q_forw;			/* admin chain */
+		struct llccb_q *q_backw;
+	} llcl_q;
+	struct npaidbentry  	*llcl_sapinfo;		/* SAP information */
+	struct sockaddr_dl 	llcl_addr;		/* link snpa address */
+	struct rtentry 		*llcl_nlrt;		/* layer 3 -> LLC */
+	struct rtentry		*llcl_llrt;		/* LLC -> layer 3 */
+	struct ifnet            *llcl_if;           	/* our interface */
+	caddr_t			llcl_nlnext;		/* cb for network layer */
+	struct mbuf   	 	*llcl_writeqh;		/* Write queue head */
+	struct mbuf    		*llcl_writeqt;		/* Write queue tail */
+	struct mbuf    		**llcl_output_buffers;
+	short                   llcl_timers[6];         /* timer array */
+	long                    llcl_timerflags;        /* flags signalling running timers */
+	int                     (*llcl_statehandler)
+		__P((struct llc_linkcb *, struct llc *, int, int, int));
+	int                     llcl_P_flag;
+	int                     llcl_F_flag;
+	int                     llcl_S_flag;
+	int                     llcl_DATA_flag;
+	int                     llcl_REMOTE_BUSY_flag;
+	int                     llcl_DACTION_flag;      /* delayed action */
+	int                     llcl_retry;
+	/*
+	 * The following components deal --- in one way or the other ---
+	 * with the LLC2 window. Indicated by either [L] or [W] is the
+	 * domain of the specific component:
+	 *
+	 *        [L]    The domain is 0--LLC_MAX_WINDOW
+         *        [W]    The domain is 0--llcl_window
+	 */
+	short           	llcl_vr;                /* next to receive [L] */
+	short           	llcl_vs;                /* next to send [L] */
+	short           	llcl_nr_received;       /* next frame to b ack'd [L] */
+	short                   llcl_freeslot;          /* next free slot [W] */
+	short                   llcl_projvs;            /* V(S) associated with freeslot */
+	short                   llcl_slotsfree;         /* free slots [W] */
+	short           	llcl_window;            /* window size */
+	/*
+	 * In llcl_frmrinfo we jot down the last frmr info field, which we
+	 * need to do as we need to be able to resend it in the ERROR state.
+	 */
+	struct frmrinfo         llcl_frmrinfo;          /* last FRMR info field */
+};
+#define llcl_frmr_pdu0          llcl_frmrinfo.rej_pdu_0
+#define llcl_frmr_pdu1          llcl_frmrinfo.rej_pdu_1
+#define llcl_frmr_control       llcl_frmrinfo.frmr_control
+#define llcl_frmr_control_ext   llcl_frmrinfo.frmr_control_ext
+#define llcl_frmr_cause         llcl_frmrinfo.frmr_cause
+
+#define	LQNEXT(l)	(struct llc_linkcb *)((l)->llcl_q.q_forw)
+#define	LQEMPTY		(llccb_q.q_forw == &llccb_q)
+#define	LQFIRST		(struct llc_linkcb *)(llccb_q.q_forw)
+#define LQVALID(l)	(!((struct llccb_q *)(l) == &llccb_q))
+
+#define LLC_ENQUEUE(l, m) if ((l)->llcl_writeqh == NULL) { \
+				(l)->llcl_writeqh = (m); \
+				(l)->llcl_writeqt = (m); \
+			} else { \
+				(l)->llcl_writeqt->m_nextpkt = (m); \
+				(l)->llcl_writeqt = (m); \
+			}
+
+#define LLC_DEQUEUE(l, m) if ((l)->llcl_writeqh == NULL) \
+                                (m) = NULL; \
+                          else { \
+				(m) = (l)->llcl_writeqh; \
+				(l)->llcl_writeqh = (l)->llcl_writeqh->m_nextpkt; \
+			}
+
+#define LLC_SETFRAME(l, m) { \
+			        if ((l)->llcl_slotsfree > 0) { \
+				        (l)->llcl_slotsfree--; \
+					(l)->llcl_output_buffers[(l)->llcl_freeslot] = (m); \
+					(l)->llcl_freeslot = ((l)->llcl_freeslot+1) % (l)->llcl_window; \
+					LLC_INC((l)->llcl_projvs); \
+				} \
+		           }
+
+/*
+ * handling of sockaddr_dl's
+ */
+
+#define LLADDRLEN(s) 	((s)->sdl_alen + (s)->sdl_nlen)
+#define	LLSAPADDR(s) 	((s)->sdl_data[LLADDRLEN(s)-1] & 0xff)
+#define LLSAPLOC(s, if) ((s)->sdl_nlen + (if)->if_addrlen)
+
+struct sdl_hdr {
+	struct sockaddr_dl sdlhdr_dst;
+	struct sockaddr_dl sdlhdr_src;
+	long sdlhdr_len;
+};
+
+#define LLC_GETHDR(f,m) { \
+				struct mbuf *_m = (struct mbuf *) (m); \
+				if (_m) { \
+					M_PREPEND(_m, LLC_ISFRAMELEN, M_DONTWAIT); \
+					bzero(mtod(_m, caddr_t), LLC_ISFRAMELEN); \
+				} else { \
+					MGETHDR (_m, M_DONTWAIT, MT_HEADER); \
+					if (_m != NULL) { \
+						_m->m_pkthdr.len = _m->m_len = LLC_UFRAMELEN; \
+						_m->m_next = _m->m_act = NULL; \
+						bzero(mtod(_m, caddr_t), LLC_UFRAMELEN); \
+					} else return; \
+				} \
+				(m) = _m; \
+				(f) = mtod(m, struct llc *); \
+		      }
+
+#define LLC_NEWSTATE(l, LLCstate) (l)->llcl_statehandler = llc_state_##LLCstate
+#define LLC_STATEEQ(l, LLCstate) ((l)->llcl_statehandler == llc_state_##LLCstate ? 1 : 0)
+
+#define LLC_ACK_SHIFT      0
+#define LLC_P_SHIFT        1
+#define LLC_BUSY_SHIFT     2
+#define LLC_REJ_SHIFT      3
+#define LLC_AGE_SHIFT      4
+#define LLC_DACTION_SHIFT  5
+
+#define LLC_TIMER_NOTRUNNING    0
+#define LLC_TIMER_RUNNING       1
+#define LLC_TIMER_EXPIRED       2
+
+#define LLC_STARTTIMER(l, LLCtimer) { \
+				 (l)->llcl_timers[LLC_##LLCtimer##_SHIFT] = llc_##LLCtimer##_timer; \
+				 (l)->llcl_timerflags |= (1<<LLC_##LLCtimer##_SHIFT); \
+				 }
+#define LLC_STOPTIMER(l, LLCtimer) { \
+				 (l)->llcl_timers[LLC_##LLCtimer##_SHIFT] = 0; \
+				 (l)->llcl_timerflags &= ~(1<<LLC_##LLCtimer##_SHIFT); \
+				 }
+#define LLC_AGETIMER(l, LLCtimer) if ((l)->llcl_timers[LLC_##LLCtimer##_SHIFT] > 0) \
+	                                  (l)->llcl_timers[LLC_##LLCtimer##_SHIFT]--;
+
+#define LLC_TIMERXPIRED(l, LLCtimer) \
+	(((l)->llcl_timerflags & (1<<LLC_##LLCtimer##_SHIFT)) ? \
+	 (((l)->llcl_timers[LLC_##LLCtimer##_SHIFT] == 0 ) ? \
+	  LLC_TIMER_EXPIRED : LLC_TIMER_RUNNING) : LLC_TIMER_NOTRUNNING)
+
+#define FOR_ALL_LLC_TIMERS(t) for ((t) = LLC_ACK_SHIFT; (t) < LLC_AGE_SHIFT; (t)++)
+
+#define LLC_SETFLAG(l, LLCflag, v) (l)->llcl_##LLCflag##_flag = (v)
+#define LLC_GETFLAG(l, LLCflag) (l)->llcl_##LLCflag##_flag
+
+#define LLC_RESETCOUNTER(l) { \
+				      (l)->llcl_vs = (l)->llcl_vr = (l)->llcl_retry = 0; \
+				      llc_resetwindow((l)); \
+			      }
+
+/*
+ * LLC2 macro definitions
+ */
+				    
+
+#define LLC_START_ACK_TIMER(l) LLC_STARTTIMER((l), ACK)
+#define LLC_STOP_ACK_TIMER(l) LLC_STOPTIMER((l), ACK)
+#define LLC_START_REJ_TIMER(l) LLC_STARTTIMER((l), REJ)
+#define LLC_STOP_REJ_TIMER(l) LLC_STOPTIMER((l), REJ)
+#define LLC_START_P_TIMER(l) { \
+				      LLC_STARTTIMER((l), P); \
+				      if (LLC_GETFLAG((l), P) == 0) \
+					      (l)->llcl_retry = 0; \
+				      LLC_SETFLAG((l), P, 1); \
+			     }
+#define LLC_STOP_P_TIMER(l) { \
+				      LLC_STOPTIMER((l), P); \
+				      LLC_SETFLAG((l), P, 0); \
+			    }
+#define LLC_STOP_ALL_TIMERS(l) { \
+				      LLC_STOPTIMER((l), ACK); \
+				      LLC_STOPTIMER((l), REJ); \
+				      LLC_STOPTIMER((l), BUSY); \
+				      LLC_STOPTIMER((l), P); \
+			    }
+
+
+#define LLC_INC(i) (i) = ((i)+1) % LLC_MAX_SEQUENCE
+
+#define LLC_NR_VALID(l, nr)     ((l)->llcl_vs < (l)->llcl_nr_received ? \
+	                             (((nr) >= (l)->llcl_nr_received) || \
+	                              ((nr) <= (l)->llcl_vs) ? 1 : 0) : \
+	                             (((nr) <= (l)->llcl_vs) && \
+	                              ((nr) >= (l)->llcl_nr_received) ? 1 : 0))
+
+#define LLC_UPDATE_P_FLAG(l, cr, pf) { \
+			   if ((cr) == LLC_RSP && (pf) == 1) { \
+			           LLC_SETFLAG((l), P, 0); \
+				   LLC_STOPTIMER((l), P); \
+			    } \
+			    }
+
+#define LLC_UPDATE_NR_RECEIVED(l, nr) { \
+			    while ((l)->llcl_nr_received != (nr)) { \
+				    struct mbuf *_m; \
+				    register short seq; \
+				    if (_m = (l)->llcl_output_buffers[seq = llc_seq2slot((l), (l)->llcl_nr_received)]) \
+					    m_freem(_m); \
+				    (l)->llcl_output_buffers[seq] = NULL; \
+				    LLC_INC((l)->llcl_nr_received); \
+				    (l)->llcl_slotsfree++; \
+			    } \
+			    (l)->llcl_retry = 0; \
+			    if ((l)->llcl_slotsfree < (l)->llcl_window) { \
+				    LLC_START_ACK_TIMER(l); \
+			    } else LLC_STOP_ACK_TIMER(l); \
+			    LLC_STARTTIMER((l), DACTION); \
+			    }
+
+#define LLC_SET_REMOTE_BUSY(l,a) { \
+			    if (LLC_GETFLAG((l), REMOTE_BUSY) == 0) { \
+				    LLC_SETFLAG((l), REMOTE_BUSY, 1); \
+				    LLC_STARTTIMER((l), BUSY); \
+				    (a) = LLC_REMOTE_BUSY; \
+			    } else { \
+				    (a) = 0; \
+			    } \
+			    }
+#define LLC_CLEAR_REMOTE_BUSY(l,a) { \
+			    if (LLC_GETFLAG((l), REMOTE_BUSY) == 1) { \
+				    LLC_SETFLAG((l), REMOTE_BUSY, 1); \
+				    LLC_STOPTIMER((l), BUSY); \
+				    if (LLC_STATEEQ((l), NORMAL) || \
+					LLC_STATEEQ((l), REJECT) || \
+					LLC_STATEEQ((l), BUSY)) \
+						llc_resend((l), LLC_CMD, 0); \
+				    (a) = LLC_REMOTE_NOT_BUSY; \
+			    } else { \
+				    (a) = 0; \
+			    } \
+			    }
+
+#define LLC_DACKCMD      0x1
+#define LLC_DACKCMDPOLL  0x2
+#define LLC_DACKRSP      0x3
+#define LLC_DACKRSPFINAL 0x4
+
+#define LLC_SENDACKNOWLEDGE(l, cmd, pf) { \
+			   if ((cmd) == LLC_CMD) { \
+				   LLC_SETFLAG((l), DACTION, ((pf) == 0 ? LLC_DACKCMD : LLC_DACKCMDPOLL)); \
+			   } else { \
+				   LLC_SETFLAG((l), DACTION, ((pf) == 0 ? LLC_DACKRSP : LLC_DACKRSPFINAL)); \
+			   } \
+		   }
+
+#define LLC_FRMR_W     (1<<0)
+#define LLC_FRMR_X     (1<<1)
+#define LLC_FRMR_Y     (1<<2)
+#define LLC_FRMR_Z     (1<<3)
+#define LLC_FRMR_V     (1<<4)
+
+#define LLC_SETFRMR(l, f, cr, c) { \
+			   if ((f)->llc_control & 0x3) { \
+				   (l)->llcl_frmr_pdu0 = (f)->llc_control; \
+				   (l)->llcl_frmr_pdu1 = 0; \
+			   } else { \
+				   (l)->llcl_frmr_pdu0 = (f)->llc_control; \
+				   (l)->llcl_frmr_pdu1 = (f)->llc_control_ext; \
+			   } \
+			   LLCCSBITS((l)->llcl_frmr_control, f_vs, (l)->llcl_vs); \
+			   LLCCSBITS((l)->llcl_frmr_control_ext, f_cr, (cr)); \
+			   LLCSBITS((l)->llcl_frmr_control_ext, f_vr, (l)->llcl_vr); \
+			   LLCCSBITS((l)->llcl_frmr_cause, f_wxyzv, (c)); \
+			}
+
+/*
+ * LLC tracing levels:
+ *     LLCTR_INTERESTING        interesting event, we might care to know about
+ *                              it, but then again, we might not ...
+ *     LLCTR_SHOULDKNOW         we probably should know about this event
+ *     LLCTR_URGENT             something has gone utterly wrong ...
+ */
+#define LLCTR_INTERESTING       1
+#define LLCTR_SHOULDKNOW        2
+#define LLCTR_URGENT            3
+
+#ifdef LLCDEBUG
+#define LLC_TRACE(lp, l, msg) llc_trace((lp), (l), (msg))
+#else /* LLCDEBUG */
+#define LLC_TRACE(lp, l, msg) /* NOOP */
+#endif /* LLCDEBUG */
+				      
+#define LLC_N2_VALUE	  15              /* up to 15 retries */
+#define LLC_ACK_TIMER     10              /*  5 secs */
+#define LLC_P_TIMER        4              /*  2 secs */
+#define LLC_BUSY_TIMER    12              /*  6 secs */
+#define LLC_REJ_TIMER     12              /*  6 secs */
+#define LLC_AGE_TIMER     40              /* 20 secs */
+#define LLC_DACTION_TIMER  2              /*  1 secs */
+
+#if defined (KERNEL) && defined(LLC)
+extern int llc_n2;
+extern int llc_ACK_timer;
+extern int llc_P_timer;
+extern int llc_REJ_timer;
+extern int llc_BUSY_timer;
+extern int llc_AGE_timer;
+extern int llc_DACTION_timer;
+
+extern int af_link_rts_init_done;
+
+#define USES_AF_LINK_RTS { \
+	if (!af_link_rts_init_done) { \
+	       rn_inithead((void **)&rt_tables[AF_LINK], 32); \
+	       af_link_rts_init_done++; \
+	       } \
+	 }
+
+struct ifqueue llcintrq;
+
+extern struct llccb_q llccb_q;
+extern char *frame_names[];
+
+/* 
+ * Function prototypes
+ */
+int sdl_cmp __P((struct sockaddr_dl *, struct sockaddr_dl *));
+int sdl_copy __P((struct sockaddr_dl *, struct sockaddr_dl *));
+int sdl_swapaddr __P((struct sockaddr_dl *, struct sockaddr_dl *));
+int sdl_checkaddrif __P((struct ifnet *, struct sockaddr_dl *));
+int sdl_setaddrif __P((struct ifnet *, u_char *, u_char, u_char, 
+		      struct sockaddr_dl *));
+int sdl_sethdrif __P((struct ifnet *, u_char *, u_char, u_char *, u_char, u_char, 
+		      struct sdl_hdr *));
+struct npaidbentry *llc_setsapinfo __P((struct ifnet *, u_char, u_char,
+					struct dllconfig *));
+struct npaidbentry *llc_getsapinfo __P((u_char, struct ifnet *));
+struct rtentry *npaidb_enrich __P((short, caddr_t, struct sockaddr_dl *));
+int npaidb_destroy __P((struct rtentry *));
+short llc_seq2slot __P((struct llc_linkcb *, short));
+int llc_state_ADM __P((struct llc_linkcb *, struct llc *, int, int, int));
+int llc_state_CONN __P((struct llc_linkcb *, struct llc *, int, int, int));
+int llc_state_RESET_WAIT __P((struct llc_linkcb *, struct llc *, 
+			      int, int, int));
+int llc_state_RESET_CHECK __P((struct llc_linkcb *, struct llc *, 
+			       int, int, int));
+int llc_state_SETUP __P((struct llc_linkcb *, struct llc *, int, int, int));
+int llc_state_RESET __P((struct llc_linkcb *, struct llc *, int, int, int));
+int llc_state_D_CONN __P((struct llc_linkcb *, struct llc *, int, int, int));
+int llc_state_ERROR __P((struct llc_linkcb *, struct llc *, int, int, int));
+int llc_state_NBRAcore __P((struct llc_linkcb *, struct llc *, int, int, int));
+int llc_state_NORMAL __P((struct llc_linkcb *, struct llc *, int, int, int));
+int llc_state_BUSY __P((struct llc_linkcb *, struct llc *, int, int, int));
+int llc_state_REJECT __P((struct llc_linkcb *, struct llc *, int, int, int));
+int llc_state_AWAIT __P((struct llc_linkcb *, struct llc *, int, int, int));
+int llc_state_AWAIT_BUSY __P((struct llc_linkcb *, struct llc *, int, int, int));
+int llc_state_AWAIT_REJECT __P((struct llc_linkcb *, struct llc *, int, int, int));
+int llc_statehandler __P((struct llc_linkcb *, struct llc *, int, int, int));
+int llc_init __P((void));
+struct llc_linkcb *llc_newlink __P((struct sockaddr_dl *, struct ifnet *, 
+				    struct rtentry *, caddr_t, struct rtentry *));
+int llc_dellink __P((struct llc_linkcb *));
+int llc_anytimersup __P((struct llc_linkcb *));
+char * llc_getstatename __P((struct llc_linkcb *));
+void llc_link_dump __P((struct llc_linkcb *, const char *));
+void llc_trace __P((struct llc_linkcb *, int, const char *));
+void llc_resetwindow __P((struct llc_linkcb *));
+int llc_decode __P((struct llc *, struct llc_linkcb *));
+void llc_timer __P((void));
+void llcintr __P((void));
+int llc_input __P((struct llc_linkcb *, struct mbuf *, u_char));
+caddr_t llc_ctlinput __P((int, struct sockaddr *, caddr_t));
+int llc_output __P((struct llc_linkcb *, struct mbuf *));
+void llc_start __P((struct llc_linkcb *));
+int llc_send __P((struct llc_linkcb *, int, int, int));
+int llc_resend __P((struct llc_linkcb *, int, int));
+int llc_rawsend __P((struct llc_linkcb *, struct mbuf *, struct llc *, int, int,
+		    int, int));
+int cons_rtrequest __P((int, struct rtentry *, struct sockaddr *));
+int x25_llcglue __P((int, struct sockaddr *));
+
+#endif
+
+
diff --git a/sys/netccitt/pk.h b/sys/netccitt/pk.h
new file mode 100644
index 00000000000..528e0a68080
--- /dev/null
+++ b/sys/netccitt/pk.h
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (c) 1990, 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Laboratory for Computation Vision and the Computer Science Department
+ * of the University of British Columbia.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)pk.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ *
+ *  X.25 Packet Level Definitions:
+ *
+ */
+
+/* Packet type identifier field defintions. */
+
+#define X25_CALL                         11
+#define X25_CALL_ACCEPTED                15   
+#define X25_CLEAR                        19
+#define X25_CLEAR_CONFIRM                23  
+#define X25_DATA                          0   
+#define X25_INTERRUPT                    35   
+#define X25_INTERRUPT_CONFIRM            39   
+
+#define X25_RR                            1   
+#define X25_RNR                           5   
+#define X25_REJECT			  9
+#define X25_RESET                        27 
+#define X25_RESET_CONFIRM                31   
+#define X25_DIAGNOSTIC			241
+
+#define X25_RESTART                     251     
+#define X25_RESTART_CONFIRM		255 
+
+/* Restart cause field definitions. */
+
+#define X25_RESTART_DTE_ORIGINATED	  0
+#define X25_RESTART_LOCAL_PROCEDURE_ERROR 1
+#define X25_RESTART_NETWORK_CONGESTION	  3
+#define X25_RESTART_NETWORK_OPERATIONAL	  7
+#define X25_RESTART_DTE_ORIGINATED2	  128
+
+
+/* Miscellaneous definitions. */
+
+#define DATA_PACKET_DESIGNATOR		0x01
+#define RR_OR_RNR_PACKET_DESIGNATOR	0x02
+#define RR_PACKET_DESIGNATOR		0x04
+
+#define DEFAULT_WINDOW_SIZE		2
+#define MODULUS				8
+
+#define ADDRLN				1
+#define MAXADDRLN			15
+#define FACILITIESLN			1
+#define MAXFACILITIESLN			10
+#define MAXUSERDATA			16
+#define MAXCALLINFOLN			1+15+1+10+16
+
+#define PACKET_OK			0
+#define IGNORE_PACKET			1
+#define ERROR_PACKET			2
+
+typedef char    bool;
+#define FALSE	0
+#define TRUE	1
+
+/*
+ *  X.25 Packet format definitions
+ *  This will eventually have to be rewritten without reference
+ *  to bit fields, to be ansi C compliant and allignment safe.
+ */
+
+typedef u_char octet;
+
+struct x25_calladdr {
+	octet addrlens;
+	octet address_field[MAXADDRLN];
+};
+
+struct x25_packet {
+	octet bits;
+	octet logical_channel_number;
+	octet packet_type;
+	octet packet_data;
+};
+#define packet_cause packet_data
+
+struct data_packet {
+	octet bits;
+};
+
+#define FACILITIES_REVERSE_CHARGE	0x1
+#define FACILITIES_THROUGHPUT		0x2
+#define FACILITIES_PACKETSIZE		0x42
+#define FACILITIES_WINDOWSIZE		0x43
+
+#define PKHEADERLN	3
+
+#define DP(xp)          (((struct data_packet *)&(xp) -> packet_type) -> bits)
+#define PS(xp)           X25GBITS(DP(xp), p_s)
+#define PR(xp)           X25GBITS(DP(xp), p_r)
+#define MBIT(xp)         X25GBITS(DP(xp), m_bit)
+#define SPR(xp, v)       X25SBITS(DP(xp), p_r, (v))
+#define SPS(xp, v)       X25SBITS(DP(xp), p_s, (v))
+#define SMBIT(xp, v)     X25SBITS(DP(xp), m_bit, (v))
+
+#define LCN(xp)		(xp -> logical_channel_number + \
+	(X25GBITS(xp -> bits, lc_group_number) ? (X25GBITS(xp -> bits, lc_group_number) << 8) : 0))
+#define SET_LCN(xp, lcn) ((xp -> logical_channel_number = lcn), \
+	(X25SBITS(xp -> bits, lc_group_number, lcn > 255 ? lcn >> 8 : 0)))
+
+struct mbuf *pk_template ();
+
+/* Define X.25 packet level states. */
+
+/* Call setup and clearing substates.  */
+
+#define LISTEN           0
+#define READY            1
+#define RECEIVED_CALL    2
+#define SENT_CALL        3
+#define DATA_TRANSFER    4
+#define RECEIVED_CLEAR   5
+#define SENT_CLEAR       6
+
+/* DTE states. */
+
+#define DTE_WAITING		7
+#define DTE_RECEIVED_RESTART	8
+#define DTE_SENT_RESTART	9
+#define DTE_READY		0
+
+/* Cleaning out ... */
+
+#define LCN_ZOMBIE 		10
+
+#define MAXSTATES		11
+
+/*
+ *  The following definitions are used in a switch statement after
+ *  determining the packet type.  These values are returned by the
+ *  pk_decode procedure. 
+ */
+
+#define CALL             0 * MAXSTATES
+#define CALL_ACCEPTED    1 * MAXSTATES
+#define CLEAR            2 * MAXSTATES
+#define CLEAR_CONF       3 * MAXSTATES
+#define DATA             4 * MAXSTATES
+#define INTERRUPT        5 * MAXSTATES
+#define INTERRUPT_CONF   6 * MAXSTATES
+#define RR               7 * MAXSTATES
+#define RNR              8 * MAXSTATES
+#define RESET            9 * MAXSTATES
+#define RESET_CONF      10 * MAXSTATES
+#define RESTART         11 * MAXSTATES
+#define RESTART_CONF    12 * MAXSTATES
+#define REJECT          13 * MAXSTATES
+#define DIAG_TYPE       14 * MAXSTATES
+#define INVALID_PACKET  15 * MAXSTATES
+#define DELETE_PACKET	INVALID_PACKET
+
+/*
+ * The following definitions are used by the restart procedures
+ * for noting wether the PLE is supposed to behave as DTE or DCE
+ * (essentially necessary for operation over LLC2)
+ */
+#define	DTE_DXERESOLVING	0x0001
+#define	DTE_PLAYDTE		0x0002
+#define	DTE_PLAYDCE		0x0004
+#define DTE_CONNECTPENDING	0x0010
+#define	DTE_PRETENDDTE		0x0020
+
+#define MAXRESTARTCOLLISIONS	10
diff --git a/sys/netccitt/pk_acct.c b/sys/netccitt/pk_acct.c
new file mode 100644
index 00000000000..fccd875285e
--- /dev/null
+++ b/sys/netccitt/pk_acct.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Laboratory for Computation Vision and the Computer Science Department
+ * of the University of British Columbia.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)pk_acct.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+
+#include <net/if.h>
+
+#include <netccitt/x25.h>
+#include <netccitt/pk.h>
+#include <netccitt/pk_var.h>
+#include <netccitt/x25acct.h>
+
+
+struct	vnode *pkacctp;
+/* 
+ *  Turn on packet accounting
+ */
+
+pk_accton (path)
+	char *path;
+{
+	register struct vnode *vp = NULL;
+	struct nameidata nd;
+	struct vnode *oacctp = pkacctp;
+	struct proc *p = curproc;
+	int error;
+
+	if (path == 0)
+		goto close;
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, path, p);
+	if (error = vn_open (&nd, FWRITE, 0644))
+		return (error);
+	vp = nd.ni_vp;
+	VOP_UNLOCK(vp);
+	if (vp -> v_type != VREG) {
+		vrele (vp);
+		return (EACCES);
+	}
+	pkacctp = vp;
+	if (oacctp) {
+	close:
+		error = vn_close (oacctp, FWRITE, p -> p_ucred, p);
+	}
+	return (error);
+}
+
+/* 
+ *  Write a record on the accounting file.
+ */
+
+pk_acct (lcp)
+register struct pklcd *lcp;
+{
+	register struct vnode *vp;
+	register struct sockaddr_x25 *sa;
+	register char *src, *dst;
+	register int len;
+	register long etime;
+	static struct x25acct acbuf;
+
+	if ((vp = pkacctp) == 0)
+		return;
+	bzero ((caddr_t)&acbuf, sizeof (acbuf));
+	if (lcp -> lcd_ceaddr != 0)
+		sa = lcp -> lcd_ceaddr;
+	else if (lcp -> lcd_craddr != 0) {
+		sa = lcp -> lcd_craddr;
+		acbuf.x25acct_callin = 1;
+	} else
+		return;
+
+	if (sa -> x25_opts.op_flags & X25_REVERSE_CHARGE)
+		acbuf.x25acct_revcharge = 1;
+	acbuf.x25acct_stime = lcp -> lcd_stime;
+	acbuf.x25acct_etime = time.tv_sec - acbuf.x25acct_stime;
+	acbuf.x25acct_uid = curproc -> p_cred -> p_ruid;
+	acbuf.x25acct_psize = sa -> x25_opts.op_psize;
+	acbuf.x25acct_net = sa -> x25_net;
+	/*
+	 * Convert address to bcd
+	 */
+	src = sa -> x25_addr;
+	dst = acbuf.x25acct_addr;
+	for (len = 0; *src; len++)
+		if (len & 01)
+			*dst++ |= *src++ & 0xf;
+		else
+			*dst = *src++ << 4;
+	acbuf.x25acct_addrlen = len;
+
+	bcopy (sa -> x25_udata, acbuf.x25acct_udata,
+		sizeof (acbuf.x25acct_udata));
+	acbuf.x25acct_txcnt = lcp -> lcd_txcnt;
+	acbuf.x25acct_rxcnt = lcp -> lcd_rxcnt;
+
+	(void) vn_rdwr(UIO_WRITE, vp, (caddr_t)&acbuf, sizeof (acbuf),
+		(off_t)0, UIO_SYSSPACE, IO_UNIT|IO_APPEND,
+		curproc -> p_ucred, (int *)0,
+		(struct proc *)0);
+}
diff --git a/sys/netccitt/pk_debug.c b/sys/netccitt/pk_debug.c
new file mode 100644
index 00000000000..b5103557c56
--- /dev/null
+++ b/sys/netccitt/pk_debug.c
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Laboratory for Computation Vision and the Computer Science Department
+ * of the University of British Columbia.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)pk_debug.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+
+#include <net/if.h>
+
+#include <netccitt/x25.h>
+#include <netccitt/pk.h>
+#include <netccitt/pk_var.h>
+
+char	*pk_state[] = {
+	"Listen",	"Ready",	"Received-Call",
+	"Sent-Call",	"Data-Transfer","Received-Clear",
+	"Sent-Clear",
+};
+
+char   *pk_name[] = {
+	"Call",		"Call-Conf",	"Clear",
+	"Clear-Conf",	"Data",		"Intr",		"Intr-Conf",
+	"Rr",		"Rnr",		"Reset",	"Reset-Conf",
+	"Restart",	"Restart-Conf",	"Reject",	"Diagnostic",
+	"Invalid"
+};
+
+pk_trace (xcp, m, dir)
+struct x25config *xcp;
+register struct mbuf *m;
+char *dir;
+{
+	register char *s;
+	struct x25_packet *xp = mtod(m, struct x25_packet *);
+	register int i, len = 0, cnt = 0;
+
+	if (xcp -> xc_ptrace == 0)
+		return;
+
+	i = pk_decode (xp) / MAXSTATES;
+	for (; m; m = m -> m_next) {
+		len = len + m -> m_len;
+		++cnt;
+	}
+	printf ("LCN=%d %s:	%s	#=%d, len=%d ",
+		LCN(xp), dir, pk_name[i], cnt, len);
+	for (s = (char *) xp, i = 0; i < 5; ++i, ++s)
+		printf ("%x ", (int) * s & 0xff);
+	printf ("\n");
+}
+
+mbuf_cache(c, m)
+register struct mbuf_cache *c;
+struct mbuf *m;
+{
+	register struct mbuf **mp;
+
+	if (c->mbc_size != c->mbc_oldsize) {
+		unsigned zero_size, copy_size;
+		unsigned new_size = c->mbc_size * sizeof(m);
+		caddr_t cache = (caddr_t)c->mbc_cache;
+
+		if (new_size) {
+			c->mbc_cache = (struct mbuf **)
+				malloc(new_size, M_MBUF, M_NOWAIT);
+			if (c->mbc_cache == 0) {
+				c->mbc_cache = (struct mbuf **)cache;
+				return;
+			}
+			c->mbc_num %= c->mbc_size;
+		} else
+			c->mbc_cache = 0;
+		if (c->mbc_size < c->mbc_oldsize) {
+			register struct mbuf **mplim;
+			mp = c->mbc_size + (struct mbuf **)cache;
+			mplim = c->mbc_oldsize + (struct mbuf **)cache;
+			while (mp < mplim)
+				m_freem(*mp++);
+			zero_size = 0;
+		} else
+			zero_size = (c->mbc_size - c->mbc_oldsize) * sizeof(m);
+		copy_size = new_size - zero_size;
+		c->mbc_oldsize = c->mbc_size;
+		if (copy_size)
+			bcopy(cache, (caddr_t)c->mbc_cache, copy_size);
+		if (cache)
+			free(cache, M_MBUF);
+		if (zero_size)
+			bzero(copy_size + (caddr_t)c->mbc_cache, zero_size);
+	}
+	if (c->mbc_size == 0)
+		return;
+	mp = c->mbc_cache + c->mbc_num;
+	c->mbc_num = (1 + c->mbc_num) % c->mbc_size;
+	if (*mp)
+		m_freem(*mp);
+	if (*mp = m_copym(m, 0, M_COPYALL, M_DONTWAIT))
+		(*mp)->m_flags |= m->m_flags & 0x08;
+}
diff --git a/sys/netccitt/pk_input.c b/sys/netccitt/pk_input.c
new file mode 100644
index 00000000000..1f8f0bc7127
--- /dev/null
+++ b/sys/netccitt/pk_input.c
@@ -0,0 +1,1119 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (C) Computer Science Department IV, 
+ * 		 University of Erlangen-Nuremberg, Germany, 1992
+ * Copyright (c) 1991, 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by the
+ * Laboratory for Computation Vision and the Computer Science Department
+ * of the the University of British Columbia and the Computer Science
+ * Department (IV) of the University of Erlangen-Nuremberg, Germany.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)pk_input.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_llc.h>
+#include <net/route.h>
+
+#include <netccitt/dll.h>
+#include <netccitt/x25.h>
+#include <netccitt/pk.h>
+#include <netccitt/pk_var.h>
+#include <netccitt/llc_var.h>
+
+struct pkcb_q pkcb_q = {&pkcb_q, &pkcb_q};
+
+/*
+ * ccittintr() is the generic interrupt handler for HDLC, LLC2, and X.25. This
+ * allows to have kernel running X.25 but no HDLC or LLC2 or both (in case we
+ * employ boards that do all the stuff themselves, e.g. ADAX X.25 or TPS ISDN.)
+ */
+void
+ccittintr ()
+{
+	extern struct ifqueue pkintrq;
+	extern struct ifqueue hdintrq;
+	extern struct ifqueue llcintrq;
+
+#ifdef HDLC
+	if (hdintrq.ifq_len)
+		hdintr ();
+#endif
+#ifdef LLC
+	if (llcintrq.ifq_len)
+		llcintr ();
+#endif
+	if (pkintrq.ifq_len)
+		pkintr ();
+}
+
+struct pkcb *
+pk_newlink (ia, llnext)
+struct x25_ifaddr *ia;
+caddr_t llnext;
+{
+	register struct x25config *xcp = &ia -> ia_xc;
+	register struct pkcb *pkp;
+	register struct pklcd *lcp;
+	register struct protosw *pp;
+	unsigned size;
+
+	pp = pffindproto (AF_CCITT, (int) xcp -> xc_lproto, 0);
+	if (pp == 0 || pp -> pr_output == 0) {
+		pk_message (0, xcp, "link level protosw error");
+		return ((struct pkcb *)0);
+	}
+	/*
+	 * Allocate a network control block structure
+	 */
+	size = sizeof (struct pkcb);
+	pkp = (struct pkcb *) malloc (size, M_PCB, M_WAITOK);
+	if (pkp == 0)
+		return ((struct pkcb *)0);
+	bzero ((caddr_t) pkp, size);
+	pkp -> pk_lloutput = pp -> pr_output;
+	pkp -> pk_llctlinput = (caddr_t (*)()) pp -> pr_ctlinput;
+	pkp -> pk_xcp = xcp;
+	pkp -> pk_ia = ia;
+	pkp -> pk_state = DTE_WAITING;
+	pkp -> pk_llnext = llnext;
+	insque (pkp, &pkcb_q);
+
+	/*
+	 * set defaults
+	 */
+
+	if (xcp -> xc_pwsize == 0)
+		xcp -> xc_pwsize = DEFAULT_WINDOW_SIZE;
+	if (xcp -> xc_psize == 0)
+		xcp -> xc_psize = X25_PS128;
+	/*
+	 * Allocate logical channel descriptor vector
+	 */
+
+	(void) pk_resize (pkp);
+	return (pkp);
+}
+
+
+pk_dellink (pkp)
+register struct pkcb *pkp;
+{
+	register int i;
+	register struct protosw *pp;
+	
+	/*
+	 * Essentially we have the choice to
+	 * (a) go ahead and let the route be deleted and
+	 *     leave the pkcb associated with that route
+	 *     as it is, i.e. the connections stay open
+	 * (b) do a pk_disconnect() on all channels associated
+	 *     with the route via the pkcb and then proceed.
+	 *
+	 * For the time being we stick with (b)
+	 */
+	
+	for (i = 1; i < pkp -> pk_maxlcn; ++i)
+		if (pkp -> pk_chan[i])
+			pk_disconnect (pkp -> pk_chan[i]);
+
+	/*
+	 * Free the pkcb
+	 */
+
+	/*
+	 * First find the protoswitch to get hold of the link level
+	 * protocol to be notified that the packet level entity is
+	 * dissolving ...
+	 */
+	pp = pffindproto (AF_CCITT, (int) pkp -> pk_xcp -> xc_lproto, 0);
+	if (pp == 0 || pp -> pr_output == 0) {
+		pk_message (0, pkp -> pk_xcp, "link level protosw error");
+		return (EPROTONOSUPPORT);
+	}
+
+	pkp -> pk_refcount--;
+	if (!pkp -> pk_refcount) {
+		struct dll_ctlinfo ctlinfo;
+
+		remque (pkp);
+		if (pkp -> pk_rt -> rt_llinfo == (caddr_t) pkp)
+			pkp -> pk_rt -> rt_llinfo = (caddr_t) NULL;
+		
+		/*
+		 * Tell the link level that the pkcb is dissolving
+		 */
+		if (pp -> pr_ctlinput && pkp -> pk_llnext) {
+			ctlinfo.dlcti_pcb = pkp -> pk_llnext;
+			ctlinfo.dlcti_rt = pkp -> pk_rt;
+			(pp -> pr_ctlinput)(PRC_DISCONNECT_REQUEST, 
+					    pkp -> pk_xcp, &ctlinfo);
+		}
+		free ((caddr_t) pkp -> pk_chan, M_IFADDR);
+		free ((caddr_t) pkp, M_PCB);
+	}
+
+	return (0);
+}
+
+
+pk_resize (pkp)
+register struct pkcb *pkp;
+{
+	struct pklcd *dev_lcp = 0;
+	struct x25config *xcp = pkp -> pk_xcp;
+	if (pkp -> pk_chan &&
+	    (pkp -> pk_maxlcn != xcp -> xc_maxlcn)) {
+		pk_restart (pkp, X25_RESTART_NETWORK_CONGESTION);
+		dev_lcp = pkp -> pk_chan[0];
+		free ((caddr_t) pkp -> pk_chan, M_IFADDR);
+		pkp -> pk_chan = 0;
+	}
+	if (pkp -> pk_chan == 0) {
+		unsigned size;
+		pkp -> pk_maxlcn = xcp -> xc_maxlcn;
+		size = (pkp -> pk_maxlcn + 1) * sizeof (struct pklcd *);
+		pkp -> pk_chan =
+			(struct pklcd **) malloc (size, M_IFADDR, M_WAITOK);
+		if (pkp -> pk_chan) {
+			bzero ((caddr_t) pkp -> pk_chan, size);
+			/*
+			 * Allocate a logical channel descriptor for lcn 0
+			 */
+			if (dev_lcp == 0 &&
+			    (dev_lcp = pk_attach ((struct socket *)0)) == 0)
+				return (ENOBUFS);
+			dev_lcp -> lcd_state = READY;
+			dev_lcp -> lcd_pkp = pkp;
+			pkp -> pk_chan[0] = dev_lcp;
+		} else {
+			if (dev_lcp)
+				pk_close (dev_lcp);
+			return (ENOBUFS);
+		}
+	}
+	return 0;
+}
+
+/* 
+ *  This procedure is called by the link level whenever the link
+ *  becomes operational, is reset, or when the link goes down. 
+ */
+/*VARARGS*/
+caddr_t
+pk_ctlinput (code, src, addr)
+	struct sockaddr *src;
+	caddr_t addr;
+{
+	register struct pkcb *pkp = (struct pkcb *) addr;
+
+	switch (code) {
+	case PRC_LINKUP: 
+		if (pkp -> pk_state == DTE_WAITING)
+			pk_restart (pkp, X25_RESTART_NETWORK_CONGESTION);
+		break;
+
+	case PRC_LINKDOWN: 
+		pk_restart (pkp, -1);	/* Clear all active circuits */
+		pkp -> pk_state = DTE_WAITING;
+		break;
+
+	case PRC_LINKRESET: 
+		pk_restart (pkp, X25_RESTART_NETWORK_CONGESTION);
+		break;
+		
+	case PRC_CONNECT_INDICATION: {
+		struct rtentry *llrt;
+
+		if ((llrt = rtalloc1(src, 0)) == 0)
+			return 0;
+		else llrt -> rt_refcnt--;
+		
+		pkp = (((struct npaidbentry *) llrt -> rt_llinfo) -> np_rt) ?
+			(struct pkcb *)(((struct npaidbentry *) llrt -> rt_llinfo) -> np_rt -> rt_llinfo) : (struct pkcb *) 0;
+		if (pkp == (struct pkcb *) 0)
+			return 0;
+		pkp -> pk_llnext = addr;
+
+		return ((caddr_t) pkp);
+	}
+	case PRC_DISCONNECT_INDICATION:
+		pk_restart (pkp, -1) ;  /* Clear all active circuits */
+		pkp -> pk_state = DTE_WAITING;
+		pkp -> pk_llnext = (caddr_t) 0;
+	}
+	return (0);
+}
+struct ifqueue pkintrq;
+/*
+ * This routine is called if there are semi-smart devices that do HDLC
+ * in hardware and want to queue the packet and call level 3 directly
+ */
+pkintr ()
+{
+	register struct mbuf *m;
+	register struct ifaddr *ifa;
+	register struct ifnet *ifp;
+	register int s;
+
+	for (;;) {
+		s = splimp ();
+		IF_DEQUEUE (&pkintrq, m);
+		splx (s);
+		if (m == 0)
+			break;
+		if (m -> m_len < PKHEADERLN) {
+			printf ("pkintr: packet too short (len=%d)\n",
+				m -> m_len);
+			m_freem (m);
+			continue;
+		}
+		pk_input (m);
+	}
+}
+struct mbuf *pk_bad_packet;
+struct mbuf_cache pk_input_cache = {0 };
+/* 
+ *  X.25 PACKET INPUT
+ *
+ *  This procedure is called by a link level procedure whenever
+ *  an information frame is received. It decodes the packet and
+ *  demultiplexes based on the logical channel number.
+ *
+ *  We change the original conventions of the UBC code here --
+ *  since there may be multiple pkcb's for a given interface
+ *  of type 802.2 class 2, we retrieve which one it is from
+ *  m_pkthdr.rcvif (which has been overwritten by lower layers);
+ *  That field is then restored for the benefit of upper layers which
+ *  may make use of it, such as CLNP.
+ *
+ */
+
+#define RESTART_DTE_ORIGINATED(xp) (((xp) -> packet_cause == X25_RESTART_DTE_ORIGINATED) || \
+			    ((xp) -> packet_cause >= X25_RESTART_DTE_ORIGINATED2))
+
+pk_input (m)
+register struct mbuf *m;
+{
+	register struct x25_packet *xp;
+	register struct pklcd *lcp;
+	register struct socket *so = 0;
+	register struct pkcb *pkp;
+	int  ptype, lcn, lcdstate = LISTEN;
+
+	if (pk_input_cache.mbc_size || pk_input_cache.mbc_oldsize)
+		mbuf_cache (&pk_input_cache, m);
+	if ((m -> m_flags & M_PKTHDR) == 0)
+		panic ("pkintr");
+
+	if ((pkp = (struct pkcb *) m -> m_pkthdr.rcvif) == 0)
+		return;
+	xp = mtod (m, struct x25_packet *);
+	ptype = pk_decode (xp);
+	lcn = LCN(xp);
+	lcp = pkp -> pk_chan[lcn];
+
+	/* 
+	 *  If the DTE is in Restart  state, then it will ignore data, 
+	 *  interrupt, call setup and clearing, flow control and reset 
+	 *  packets.
+	 */
+	if (lcn < 0 || lcn > pkp -> pk_maxlcn) {
+		pk_message (lcn, pkp -> pk_xcp, "illegal lcn");
+		m_freem (m);
+		return;
+	}
+
+	pk_trace (pkp -> pk_xcp, m, "P-In");
+
+	if (pkp -> pk_state != DTE_READY && ptype != RESTART && ptype != RESTART_CONF) {
+		m_freem (m);
+		return;
+	}
+	if (lcp) {
+		so = lcp -> lcd_so;
+		lcdstate = lcp -> lcd_state;
+	} else {
+		if (ptype == CLEAR) {	/* idle line probe (Datapac specific) */
+			/* send response on lcd 0's output queue */
+			lcp = pkp -> pk_chan[0];
+			lcp -> lcd_template = pk_template (lcn, X25_CLEAR_CONFIRM);
+			pk_output (lcp);
+			m_freem (m);
+			return;
+		}
+		if (ptype != CALL)
+			ptype = INVALID_PACKET;
+	}
+
+	if (lcn == 0 && ptype != RESTART && ptype != RESTART_CONF) {
+		pk_message (0, pkp -> pk_xcp, "illegal ptype (%d, %s) on lcn 0",
+			ptype, pk_name[ptype / MAXSTATES]);
+		if (pk_bad_packet)
+			m_freem (pk_bad_packet);
+		pk_bad_packet = m;
+		return;
+	}
+
+	m -> m_pkthdr.rcvif = pkp -> pk_ia -> ia_ifp;
+
+	switch (ptype + lcdstate) {
+	/* 
+	 *  Incoming Call packet received. 
+	 */
+	case CALL + LISTEN: 
+		pk_incoming_call (pkp, m);
+		break;
+
+	/* 	
+	 *  Call collision: Just throw this "incoming call" away since 
+	 *  the DCE will ignore it anyway. 
+	 */
+	case CALL + SENT_CALL: 
+		pk_message ((int) lcn, pkp -> pk_xcp, 
+			"incoming call collision");
+		break;
+
+	/* 
+	 *  Call confirmation packet received. This usually means our
+	 *  previous connect request is now complete.
+	 */
+	case CALL_ACCEPTED + SENT_CALL: 
+		MCHTYPE(m, MT_CONTROL);
+		pk_call_accepted (lcp, m);
+		break;
+
+	/* 
+	 *  This condition can only happen if the previous state was
+	 *  SENT_CALL. Just ignore the packet, eventually a clear 
+	 *  confirmation should arrive.
+	 */
+	case CALL_ACCEPTED + SENT_CLEAR: 
+		break;
+
+	/* 
+	 *  Clear packet received. This requires a complete tear down
+	 *  of the virtual circuit.  Free buffers and control blocks.
+	 *  and send a clear confirmation.
+	 */
+	case CLEAR + READY:
+	case CLEAR + RECEIVED_CALL: 
+	case CLEAR + SENT_CALL: 
+	case CLEAR + DATA_TRANSFER: 
+		lcp -> lcd_state = RECEIVED_CLEAR;
+		lcp -> lcd_template = pk_template (lcp -> lcd_lcn, X25_CLEAR_CONFIRM);
+		pk_output (lcp);
+		pk_clearcause (pkp, xp);
+		if (lcp -> lcd_upper) {
+			MCHTYPE(m, MT_CONTROL);
+			lcp -> lcd_upper (lcp, m);
+		}
+		pk_close (lcp);
+		lcp = 0;
+		break;
+
+	/* 
+	 *  Clear collision: Treat this clear packet as a confirmation.
+	 */
+	case CLEAR + SENT_CLEAR: 
+		pk_close (lcp);
+		break;
+
+	/* 
+	 *  Clear confirmation received. This usually means the virtual
+	 *  circuit is now completely removed.
+	 */
+	case CLEAR_CONF + SENT_CLEAR: 
+		pk_close (lcp);
+		break;
+
+	/* 
+	 *  A clear confirmation on an unassigned logical channel - just
+	 *  ignore it. Note: All other packets on an unassigned channel
+	 *  results in a clear.
+	 */
+	case CLEAR_CONF + READY:
+	case CLEAR_CONF + LISTEN:
+		break;
+
+	/* 
+	 *  Data packet received. Pass on to next level. Move the Q and M
+	 *  bits into the data portion for the next level.
+	 */
+	case DATA + DATA_TRANSFER: 
+		if (lcp -> lcd_reset_condition) {
+			ptype = DELETE_PACKET;
+			break;
+		}
+
+		/* 
+		 *  Process the P(S) flow control information in this Data packet. 
+		 *  Check that the packets arrive in the correct sequence and that 
+		 *  they are within the "lcd_input_window". Input window rotation is 
+		 *  initiated by the receive interface.
+		 */
+
+		if (PS(xp) != ((lcp -> lcd_rsn + 1) % MODULUS) ||
+			PS(xp) == ((lcp -> lcd_input_window + lcp -> lcd_windowsize) % MODULUS)) {
+			m_freem (m);
+			pk_procerror (RESET, lcp, "p(s) flow control error", 1);
+			break;
+		}
+		lcp -> lcd_rsn = PS(xp);
+
+		if (pk_ack (lcp, PR(xp)) != PACKET_OK) {
+			m_freem (m);
+			break;
+		}
+		m -> m_data += PKHEADERLN;
+		m -> m_len -= PKHEADERLN;
+		m -> m_pkthdr.len -= PKHEADERLN;
+
+		lcp -> lcd_rxcnt++;
+		if (lcp -> lcd_flags & X25_MBS_HOLD) {
+			register struct mbuf *n = lcp -> lcd_cps;
+			int mbit = MBIT(xp);
+			octet q_and_d_bits;
+
+			if (n) {
+				n -> m_pkthdr.len += m -> m_pkthdr.len;
+				while (n -> m_next)
+					n = n -> m_next;
+				n -> m_next = m;
+				m = lcp -> lcd_cps;
+
+				if (lcp -> lcd_cpsmax &&
+				    n -> m_pkthdr.len > lcp -> lcd_cpsmax) {
+					pk_procerror (RESET, lcp,
+						"C.P.S. overflow", 128);
+					return;
+				}
+				q_and_d_bits = 0xc0 & *(octet *) xp;
+				xp = (struct x25_packet *)
+					(mtod (m, octet *) - PKHEADERLN);
+				*(octet *) xp |= q_and_d_bits;
+			}
+			if (mbit) {
+				lcp -> lcd_cps = m;
+				pk_flowcontrol (lcp, 0, 1);
+				return;
+			}
+			lcp -> lcd_cps = 0;
+		}
+		if (so == 0)
+			break;
+		if (lcp -> lcd_flags & X25_MQBIT) {
+			octet t = (X25GBITS(xp -> bits, q_bit)) ? t = 0x80 : 0;
+
+			if (MBIT(xp))
+				t |= 0x40;
+			m -> m_data -= 1;
+			m -> m_len += 1;
+			m -> m_pkthdr.len += 1;
+			*mtod (m, octet *) = t;
+		}
+
+		/*
+		 * Discard Q-BIT packets if the application
+		 * doesn't want to be informed of M and Q bit status
+		 */
+		if (X25GBITS(xp -> bits, q_bit) 
+		    && (lcp -> lcd_flags & X25_MQBIT) == 0) {
+			m_freem (m);
+			/*
+			 * NB.  This is dangerous: sending a RR here can
+			 * cause sequence number errors if a previous data
+			 * packet has not yet been passed up to the application
+			 * (RR's are normally generated via PRU_RCVD).
+			 */
+			pk_flowcontrol (lcp, 0, 1);
+		} else {
+			sbappendrecord (&so -> so_rcv, m);
+			sorwakeup (so);
+		}
+		break;
+
+	/* 
+	 *  Interrupt packet received.
+	 */
+	case INTERRUPT + DATA_TRANSFER: 
+		if (lcp -> lcd_reset_condition)
+			break;
+		lcp -> lcd_intrdata = xp -> packet_data;
+		lcp -> lcd_template = pk_template (lcp -> lcd_lcn, X25_INTERRUPT_CONFIRM);
+		pk_output (lcp);
+		m -> m_data += PKHEADERLN;
+		m -> m_len -= PKHEADERLN;
+		m -> m_pkthdr.len -= PKHEADERLN;
+		MCHTYPE(m, MT_OOBDATA);
+		if (so) {
+			if (so -> so_options & SO_OOBINLINE)
+				sbinsertoob (&so -> so_rcv, m);
+			else
+				m_freem (m);
+			sohasoutofband (so);
+		}
+		break;
+
+	/* 
+	 *  Interrupt confirmation packet received.
+	 */
+	case INTERRUPT_CONF + DATA_TRANSFER: 
+		if (lcp -> lcd_reset_condition)
+			break;
+		if (lcp -> lcd_intrconf_pending == TRUE)
+			lcp -> lcd_intrconf_pending = FALSE;
+		else
+			pk_procerror (RESET, lcp, "unexpected packet", 43);
+		break;
+
+	/* 
+	 *  Receiver ready received. Rotate the output window and output
+	 *  any data packets waiting transmission.
+	 */
+	case RR + DATA_TRANSFER: 
+		if (lcp -> lcd_reset_condition ||
+		    pk_ack (lcp, PR(xp)) != PACKET_OK) {
+			ptype = DELETE_PACKET;
+			break;
+		}
+		if (lcp -> lcd_rnr_condition == TRUE)
+			lcp -> lcd_rnr_condition = FALSE;
+		pk_output (lcp);
+		break;
+
+	/* 
+	 *  Receiver Not Ready received. Packets up to the P(R) can be
+	 *  be sent. Condition is cleared with a RR.
+	 */
+	case RNR + DATA_TRANSFER: 
+		if (lcp -> lcd_reset_condition ||
+		    pk_ack (lcp, PR(xp)) != PACKET_OK) {
+			ptype = DELETE_PACKET;
+			break;
+		}
+		lcp -> lcd_rnr_condition = TRUE;
+		break;
+
+	/* 
+	 *  Reset packet received. Set state to FLOW_OPEN.  The Input and
+	 *  Output window edges ar set to zero. Both the send and receive
+	 *  numbers are reset. A confirmation is returned.
+	 */
+	case RESET + DATA_TRANSFER: 
+		if (lcp -> lcd_reset_condition)
+			/* Reset collision. Just ignore packet. */
+			break;
+
+		pk_resetcause (pkp, xp);
+		lcp -> lcd_window_condition = lcp -> lcd_rnr_condition =
+			lcp -> lcd_intrconf_pending = FALSE;
+		lcp -> lcd_output_window = lcp -> lcd_input_window =
+			lcp -> lcd_last_transmitted_pr = 0;
+		lcp -> lcd_ssn = 0;
+		lcp -> lcd_rsn = MODULUS - 1;
+
+		lcp -> lcd_template = pk_template (lcp -> lcd_lcn, X25_RESET_CONFIRM);
+		pk_output (lcp);
+
+		pk_flush (lcp);
+		if (so == 0)
+			break;
+		wakeup ((caddr_t) & so -> so_timeo);
+		sorwakeup (so);
+		sowwakeup (so);
+		break;
+
+	/* 
+	 *  Reset confirmation received.
+	 */
+	case RESET_CONF + DATA_TRANSFER: 
+		if (lcp -> lcd_reset_condition) {
+			lcp -> lcd_reset_condition = FALSE;
+			pk_output (lcp);
+		}
+		else
+			pk_procerror (RESET, lcp, "unexpected packet", 32);
+		break;
+
+	case DATA + SENT_CLEAR: 
+		ptype = DELETE_PACKET;
+	case RR + SENT_CLEAR: 
+	case RNR + SENT_CLEAR: 
+	case INTERRUPT + SENT_CLEAR: 
+	case INTERRUPT_CONF + SENT_CLEAR: 
+	case RESET + SENT_CLEAR: 
+	case RESET_CONF + SENT_CLEAR: 
+		/* Just ignore p if we have sent a CLEAR already.
+		   */
+		break;
+
+	/* 
+	 *  Restart sets all the permanent virtual circuits to the "Data
+	 *  Transfer" stae and  all the switched virtual circuits to the
+	 *  "Ready" state.
+	 */
+	case RESTART + READY: 
+		switch (pkp -> pk_state) {
+		case DTE_SENT_RESTART: 
+			/* 
+			 * Restart collision.
+			 * If case the restart cause is "DTE originated" we
+			 * have a DTE-DTE situation and are trying to resolve
+			 * who is going to play DTE/DCE [ISO 8208:4.2-4.5]
+			 */
+			if (RESTART_DTE_ORIGINATED(xp)) {
+				pk_restart (pkp, X25_RESTART_DTE_ORIGINATED);
+				pk_message (0, pkp -> pk_xcp,
+					    "RESTART collision");
+				if ((pkp -> pk_restartcolls++) > MAXRESTARTCOLLISIONS) {
+					pk_message (0, pkp -> pk_xcp,
+						    "excessive RESTART collisions");
+					pkp -> pk_restartcolls = 0;
+				}
+				break;
+			}
+			pkp -> pk_state = DTE_READY;
+			pkp -> pk_dxerole |= DTE_PLAYDTE;
+			pkp -> pk_dxerole &= ~DTE_PLAYDCE;
+			pk_message (0, pkp -> pk_xcp,
+				"Packet level operational");
+			pk_message (0, pkp -> pk_xcp, 
+				    "Assuming DTE role");
+			if (pkp -> pk_dxerole & DTE_CONNECTPENDING)
+				pk_callcomplete (pkp);
+			break;
+
+		default: 
+			pk_restart (pkp, -1);
+			pk_restartcause (pkp, xp);
+			pkp -> pk_chan[0] -> lcd_template = pk_template (0,
+				X25_RESTART_CONFIRM);
+			pk_output (pkp -> pk_chan[0]);
+			pkp -> pk_state = DTE_READY;
+			pkp -> pk_dxerole |= RESTART_DTE_ORIGINATED(xp) ? DTE_PLAYDCE :
+				DTE_PLAYDTE;
+			if (pkp -> pk_dxerole & DTE_PLAYDTE) {
+				pkp -> pk_dxerole &= ~DTE_PLAYDCE;
+				pk_message (0, pkp -> pk_xcp, 
+					    "Assuming DTE role");
+			} else {
+				pkp -> pk_dxerole &= ~DTE_PLAYDTE;
+				pk_message (0, pkp -> pk_xcp, 
+					 "Assuming DCE role");
+			}
+			if (pkp -> pk_dxerole & DTE_CONNECTPENDING)
+				pk_callcomplete (pkp);
+		}
+		break;
+
+	/* 
+	 *  Restart confirmation received. All logical channels are set
+	 *  to READY. 
+	 */
+	case RESTART_CONF + READY: 
+		switch (pkp -> pk_state) {
+		case DTE_SENT_RESTART: 
+			pkp -> pk_state = DTE_READY;
+			pkp -> pk_dxerole |= DTE_PLAYDTE;
+			pkp -> pk_dxerole &= ~DTE_PLAYDCE;
+			pk_message (0, pkp -> pk_xcp,
+				    "Packet level operational");
+			pk_message (0, pkp -> pk_xcp,
+				    "Assuming DTE role");
+			if (pkp -> pk_dxerole & DTE_CONNECTPENDING)
+				pk_callcomplete (pkp);
+			break;
+
+		default: 
+			/* Restart local procedure error. */
+			pk_restart (pkp, X25_RESTART_LOCAL_PROCEDURE_ERROR);
+			pkp -> pk_state = DTE_SENT_RESTART;
+			pkp -> pk_dxerole &= ~(DTE_PLAYDTE | DTE_PLAYDCE);
+		}
+		break;
+
+	default: 
+		if (lcp) {
+			pk_procerror (CLEAR, lcp, "unknown packet error", 33);
+			pk_message (lcn, pkp -> pk_xcp,
+				"\"%s\" unexpected in \"%s\" state",
+				pk_name[ptype/MAXSTATES], pk_state[lcdstate]);
+		} else
+			pk_message (lcn, pkp -> pk_xcp,
+				"packet arrived on unassigned lcn");
+		break;
+	}
+	if (so == 0 && lcp && lcp -> lcd_upper && lcdstate == DATA_TRANSFER) {
+		if (ptype != DATA && ptype != INTERRUPT)
+			MCHTYPE(m, MT_CONTROL);
+		lcp -> lcd_upper (lcp, m);
+	} else if (ptype != DATA && ptype != INTERRUPT)
+		m_freem (m);
+}
+
+static
+prune_dnic (from, to, dnicname, xcp)
+char *from, *to, *dnicname;
+register struct x25config *xcp;
+{
+	register char *cp1 = from, *cp2 = from;
+	if (xcp -> xc_prepnd0 && *cp1 == '0') {
+		from = ++cp1;
+		goto copyrest;
+	}
+	if (xcp -> xc_nodnic) {
+		for (cp1 = dnicname; *cp2 = *cp1++;)
+			cp2++;
+		cp1 = from;
+	}
+copyrest:
+	for (cp1 = dnicname; *cp2 = *cp1++;)
+		cp2++;
+}
+/* static */
+pk_simple_bsd (from, to, lower, len)
+register octet *from, *to;
+register len, lower;
+{
+	register int c;
+	while (--len >= 0) {
+		c = *from;
+		if (lower & 0x01)
+			*from++;
+		else
+			c >>= 4;
+		c &= 0x0f; c |= 0x30; *to++ = c; lower++;
+	}
+	*to = 0;
+}
+
+/*static octet * */
+pk_from_bcd (a, iscalling, sa, xcp)
+register struct x25_calladdr *a;
+register struct sockaddr_x25 *sa;
+register struct x25config *xcp;
+{
+	octet buf[MAXADDRLN+1];
+	octet *cp;
+	unsigned count;
+
+	bzero ((caddr_t) sa, sizeof (*sa));
+	sa -> x25_len = sizeof (*sa);
+	sa -> x25_family = AF_CCITT;
+	if (iscalling) {
+		cp = a -> address_field + (X25GBITS(a -> addrlens, called_addrlen) / 2);
+		count = X25GBITS(a -> addrlens, calling_addrlen);
+		pk_simple_bsd (cp, buf, X25GBITS(a -> addrlens, called_addrlen), count);
+	} else {
+		count = X25GBITS(a -> addrlens, called_addrlen);
+		pk_simple_bsd (a -> address_field, buf, 0, count);
+	}
+	if (xcp -> xc_addr.x25_net && (xcp -> xc_nodnic || xcp -> xc_prepnd0)) {
+		octet dnicname[sizeof (long) * NBBY/3 + 2];
+
+		sprintf ((char *) dnicname, "%d", xcp -> xc_addr.x25_net);
+		prune_dnic ((char *) buf, sa -> x25_addr, dnicname, xcp);
+	} else
+		bcopy ((caddr_t) buf, (caddr_t) sa -> x25_addr, count + 1);
+}
+
+static
+save_extra (m0, fp, so)
+struct mbuf *m0;
+octet *fp;
+struct socket *so;
+{
+	register struct mbuf *m;
+	struct cmsghdr cmsghdr;
+	if (m = m_copy (m, 0, (int)M_COPYALL)) {
+		int off = fp - mtod (m0, octet *);
+		int len = m -> m_pkthdr.len - off + sizeof (cmsghdr);
+		cmsghdr.cmsg_len = len;
+		cmsghdr.cmsg_level = AF_CCITT;
+		cmsghdr.cmsg_type = PK_FACILITIES;
+		m_adj (m, off);
+		M_PREPEND (m, sizeof (cmsghdr), M_DONTWAIT);
+		if (m == 0)
+			return;
+		bcopy ((caddr_t)&cmsghdr, mtod (m, caddr_t), sizeof (cmsghdr));
+		MCHTYPE(m, MT_CONTROL);
+		sbappendrecord (&so -> so_rcv, m);
+	}
+}
+
+/* 
+ * This routine handles incoming call packets. It matches the protocol
+ * field on the Call User Data field (usually the first four bytes) with 
+ * sockets awaiting connections.
+ */
+
+pk_incoming_call (pkp, m0)
+struct mbuf *m0;
+struct pkcb *pkp;
+{
+	register struct pklcd *lcp = 0, *l;
+	register struct sockaddr_x25 *sa;
+	register struct x25_calladdr *a;
+	register struct socket *so = 0;
+	struct	x25_packet *xp = mtod (m0, struct x25_packet *);
+	struct	mbuf *m;
+	struct	x25config *xcp = pkp -> pk_xcp;
+	int len = m0 -> m_pkthdr.len;
+	unsigned udlen;
+	char *errstr = "server unavailable";
+	octet *u, *facp;
+	int lcn = LCN(xp);
+
+	/* First, copy the data from the incoming call packet to a X25 address
+	   descriptor. It is to be regretted that you have
+	   to parse the facilities into a sockaddr to determine
+	   if reverse charging is being requested */
+	if ((m = m_get (M_DONTWAIT, MT_SONAME)) == 0)
+		return;
+	sa = mtod (m, struct sockaddr_x25 *);
+	a = (struct x25_calladdr *) &xp -> packet_data;
+	facp = u = (octet *) (a -> address_field +
+		((X25GBITS(a -> addrlens, called_addrlen) + X25GBITS(a -> addrlens, calling_addrlen) + 1) / 2));
+	u += *u + 1;
+	udlen = min (16, ((octet *) xp) + len - u);
+	if (udlen < 0)
+		udlen = 0;
+	pk_from_bcd (a, 1, sa, pkp -> pk_xcp); /* get calling address */
+	pk_parse_facilities (facp, sa);
+	bcopy ((caddr_t) u, sa -> x25_udata, udlen);
+	sa -> x25_udlen = udlen;
+
+	/*
+	 * Now, loop through the listen sockets looking for a match on the
+	 * PID. That is the first few octets of the user data field.
+	 * This is the closest thing to a port number for X.25 packets.
+	 * It does provide a way of multiplexing services at the user level. 
+	 */
+
+	for (l = pk_listenhead; l; l = l -> lcd_listen) {
+		struct sockaddr_x25 *sxp = l -> lcd_ceaddr;
+
+		if (bcmp (sxp -> x25_udata, u, sxp -> x25_udlen))
+			continue;
+		if (sxp -> x25_net &&
+		    sxp -> x25_net != xcp -> xc_addr.x25_net)
+			continue;
+		/*
+		 * don't accept incoming calls with the D-Bit on
+		 * unless the server agrees
+		 */
+		if (X25GBITS(xp -> bits, d_bit) && !(sxp -> x25_opts.op_flags & X25_DBIT)) {
+			errstr = "incoming D-Bit mismatch";
+			break;
+		}
+		/*
+		 * don't accept incoming collect calls unless
+		 * the server sets the reverse charging option.
+		 */
+		if ((sxp -> x25_opts.op_flags & (X25_OLDSOCKADDR|X25_REVERSE_CHARGE)) == 0 &&
+			sa -> x25_opts.op_flags & X25_REVERSE_CHARGE) {
+			errstr = "incoming collect call refused";
+			break;
+		}
+		if (l -> lcd_so) {
+			if (so = sonewconn (l -> lcd_so, SS_ISCONNECTED))
+				    lcp = (struct pklcd *) so -> so_pcb;
+		} else 
+			lcp = pk_attach ((struct socket *) 0);
+		if (lcp == 0) {
+			/*
+			 * Insufficient space or too many unaccepted
+			 * connections.  Just throw the call away.
+			 */
+			errstr = "server malfunction";
+			break;
+		}
+		lcp -> lcd_upper = l -> lcd_upper;
+		lcp -> lcd_upnext = l -> lcd_upnext;
+		lcp -> lcd_lcn = lcn;
+		lcp -> lcd_state = RECEIVED_CALL;
+		sa -> x25_opts.op_flags |= (sxp -> x25_opts.op_flags &
+			~X25_REVERSE_CHARGE) | l -> lcd_flags;
+		pk_assoc (pkp, lcp, sa);
+		lcp -> lcd_faddr = *sa;
+		lcp -> lcd_laddr.x25_udlen = sxp -> x25_udlen;
+		lcp -> lcd_craddr = &lcp -> lcd_faddr;
+		lcp -> lcd_template = pk_template (lcp -> lcd_lcn, X25_CALL_ACCEPTED);
+		if (lcp -> lcd_flags & X25_DBIT) {
+			if (X25GBITS(xp -> bits, d_bit))
+				X25SBITS(mtod (lcp -> lcd_template,
+					struct x25_packet *) -> bits, d_bit, 1);
+			else
+				lcp -> lcd_flags &= ~X25_DBIT;
+		}
+		if (so) {
+			pk_output (lcp);
+			soisconnected (so);
+			if (so -> so_options & SO_OOBINLINE)
+				save_extra (m0, facp, so);
+		} else if (lcp -> lcd_upper) {
+			(*lcp -> lcd_upper) (lcp, m0);
+		}
+		(void) m_free (m);
+		return;
+	}
+
+	/*
+	 * If the call fails for whatever reason, we still need to build a
+	 * skeleton LCD in order to be able to properly  receive the CLEAR
+	 * CONFIRMATION.
+	 */
+#ifdef WATERLOO		/* be explicit */
+	if (l == 0 && bcmp (sa -> x25_udata, "ean", 3) == 0)
+		pk_message (lcn, pkp -> pk_xcp, "host=%s ean%c: %s",
+			sa -> x25_addr, sa -> x25_udata[3] & 0xff, errstr);
+	else if (l == 0 && bcmp (sa -> x25_udata, "\1\0\0\0", 4) == 0)
+		pk_message (lcn, pkp -> pk_xcp, "host=%s x29d: %s",
+			sa -> x25_addr, errstr);
+	else
+#endif
+	pk_message (lcn, pkp -> pk_xcp, "host=%s pid=%x %x %x %x: %s",
+		sa -> x25_addr, sa -> x25_udata[0] & 0xff,
+		sa -> x25_udata[1] & 0xff, sa -> x25_udata[2] & 0xff,
+		sa -> x25_udata[3] & 0xff, errstr);
+	if ((lcp = pk_attach ((struct socket *)0)) == 0) {
+		(void) m_free (m);
+		return;
+	}
+	lcp -> lcd_lcn = lcn;
+	lcp -> lcd_state = RECEIVED_CALL;
+	pk_assoc (pkp, lcp, sa);
+	(void) m_free (m);
+	pk_clear (lcp, 0, 1);
+}
+
+pk_call_accepted (lcp, m)
+struct pklcd *lcp;
+struct mbuf *m;
+{
+	register struct x25_calladdr *ap;
+	register octet *fcp;
+	struct x25_packet *xp = mtod (m, struct x25_packet *);
+	int len = m -> m_len;
+
+	lcp -> lcd_state = DATA_TRANSFER;
+	if (lcp -> lcd_so)
+		soisconnected (lcp -> lcd_so);
+	if ((lcp -> lcd_flags & X25_DBIT) && (X25GBITS(xp -> bits, d_bit) == 0))
+		lcp -> lcd_flags &= ~X25_DBIT;
+	if (len > 3) {
+		ap = (struct x25_calladdr *) &xp -> packet_data;
+		fcp = (octet *) ap -> address_field + (X25GBITS(ap -> addrlens, calling_addrlen) +
+			X25GBITS(ap -> addrlens, called_addrlen) + 1) / 2;
+		if (fcp + *fcp <= ((octet *) xp) + len)
+			pk_parse_facilities (fcp, lcp -> lcd_ceaddr);
+	}
+	pk_assoc (lcp -> lcd_pkp, lcp, lcp -> lcd_ceaddr);
+	if (lcp -> lcd_so == 0 && lcp -> lcd_upper)
+		lcp -> lcd_upper (lcp, m);
+}
+
+pk_parse_facilities (fcp, sa)
+register octet *fcp;
+register struct sockaddr_x25 *sa;
+{
+	register octet *maxfcp;
+
+	maxfcp = fcp + *fcp;
+	fcp++;
+	while (fcp < maxfcp) {
+		/*
+		 * Ignore national DCE or DTE facilities
+		 */
+		if (*fcp == 0 || *fcp == 0xff)
+			break;
+		switch (*fcp) {
+		case FACILITIES_WINDOWSIZE:
+			sa -> x25_opts.op_wsize = fcp[1];
+			fcp += 3;
+			break;
+
+		case FACILITIES_PACKETSIZE:
+			sa -> x25_opts.op_psize = fcp[1];
+			fcp += 3;
+			break;
+
+		case FACILITIES_THROUGHPUT:
+			sa -> x25_opts.op_speed = fcp[1];
+			fcp += 2;
+			break;
+
+		case FACILITIES_REVERSE_CHARGE:
+			if (fcp[1] & 01)
+				sa -> x25_opts.op_flags |= X25_REVERSE_CHARGE;
+			/*
+			 * Datapac specific: for a X.25(1976) DTE, bit 2
+			 * indicates a "hi priority" (eg. international) call.
+			 */
+			if (fcp[1] & 02 && sa -> x25_opts.op_psize == 0)
+				sa -> x25_opts.op_psize = X25_PS128;
+			fcp += 2;
+			break;
+
+		default:
+/*printf("unknown facility %x, class=%d\n", *fcp, (*fcp & 0xc0) >> 6);*/
+			switch ((*fcp & 0xc0) >> 6) {
+			case 0:			/* class A */
+				fcp += 2;
+				break;
+
+			case 1:
+				fcp += 3;
+				break;
+
+			case 2:
+				fcp += 4;
+				break;
+
+			case 3:
+				fcp++;
+				fcp += *fcp;
+			}
+		}
+	}
+}
diff --git a/sys/netccitt/pk_llcsubr.c b/sys/netccitt/pk_llcsubr.c
new file mode 100644
index 00000000000..d8cc5016a28
--- /dev/null
+++ b/sys/netccitt/pk_llcsubr.c
@@ -0,0 +1,369 @@
+/* 
+ * Copyright (C) Dirk Husemann, Computer Science Department IV, 
+ * 		 University of Erlangen-Nuremberg, Germany, 1990, 1991, 1992
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * 
+ * This code is derived from software contributed to Berkeley by
+ * Dirk Husemann and the Computer Science Department (IV) of
+ * the University of Erlangen-Nuremberg, Germany.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)pk_llcsubr.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_llc.h>
+#include <net/if_types.h>
+#include <net/route.h>
+
+#include <netccitt/dll.h>
+#include <netccitt/x25.h>
+#include <netccitt/pk.h>
+#include <netccitt/pk_var.h>
+#include <netccitt/llc_var.h>
+
+
+/*
+ * Routing support for X.25
+ *
+ * We distinguish between two cases:
+ * RTF_HOST:
+ * 	rt_key(rt)	X.25 address of host
+ *	rt_gateway	SNPA (MAC+DLSAP) address of host
+ *	rt_llinfo	pkcb for rt_key(rt)
+ *
+ * RTF_GATEWAY
+ *	rt_key(rt)	X.25 address of host or suitably masked network
+ *	rt_gateway	X.25 address of next X.25 gateway (switch)
+ *	rt_llinfo	rtentry for rt_gateway address
+ *			ought to be of type RTF_HOST
+ *
+ *
+ * Mapping of X.121 to pkcbs:
+ *
+ * HDLC uses the DTE-DCE model of X.25, therefore we need a many-to-one
+ * relationship, i.e.:
+ *	
+ * 	{X.121_a, X.121_b, X.121_c, ..., X.121_i} -> pkcb_0
+ *
+ * LLC2 utilizes the DTE-DTE model of X.25, resulting effectively in a
+ * one-to-one relationship, i.e.:
+ *
+ *	{X.121_j} 	->	pkcb_1a
+ *	{X.121_k}	->	pkcb_1b
+ *	...
+ *	{X.121_q}	->	pkcb_1q
+ * 
+ * It might make sense to allow a many-to-one relation for LLC2 also,
+ * 
+ *	{X.121_r, X.121_s, X.121_t, X.121_u} -> pkcb_2a
+ *
+ * This would make addresses X.121_[r-u] essentially aliases of one
+ * address ({X.121_[r-u]} would constitute a representative set).
+ *
+ * Each one-to-one relation must obviously be entered individually with
+ * a route add command, whereas a many-to-one relationship can be 
+ * either entered individually or generated by using a netmask.
+ * 
+ * To facilitate dealings the many-to-one case for LLC2 can only be
+ * established via a netmask.
+ *
+ */
+
+#define XTRACTPKP(rt)	((rt)->rt_flags & RTF_GATEWAY ? \
+			 ((rt)->rt_llinfo ? \
+			  (struct pkcb *) ((struct rtentry *)((rt)->rt_llinfo))->rt_llinfo : \
+			  (struct pkcb *) NULL) : \
+			 (struct pkcb *)((rt)->rt_llinfo))
+
+#define equal(a1, a2) (bcmp((caddr_t)(a1), \
+			       (caddr_t)(a2), \
+			       (a1)->sa_len) == 0)
+#define XIFA(rt) ((struct x25_ifaddr *)((rt)->rt_ifa))
+#define SA(s) ((struct sockaddr *)s)
+
+int
+cons_rtrequest(int cmd, struct rtentry *rt, struct sockaddr *dst)
+{
+	register struct pkcb *pkp;
+	register int i;
+	register char one_to_one;
+	struct pkcb *pk_newlink();
+	struct rtentry *npaidb_enter();
+
+	pkp = XTRACTPKP(rt);
+
+	switch(cmd) {
+	case RTM_RESOLVE:
+	case RTM_ADD:
+		if (pkp) 
+			return(EEXIST);
+
+		if (rt->rt_flags & RTF_GATEWAY) {
+			if (rt->rt_llinfo)
+				RTFREE((struct rtentry *)rt->rt_llinfo);
+			rt->rt_llinfo = (caddr_t) rtalloc1(rt->rt_gateway, 1);
+			return(0);
+		}
+		/*
+		 * Assumptions:	(1) ifnet structure is filled in
+		 *		(2) at least the pkcb created via 
+		 *		    x25config (ifconfig?) has been 
+		 *		    set up already.
+		 *		(3) HDLC interfaces have an if_type of 
+		 *		    IFT_X25{,DDN}, LLC2 interfaces 
+		 *		    anything else (any better way to 
+		 *		    do this?)
+		 *
+		 */
+		if (!rt->rt_ifa)
+			return (ENETDOWN);
+	
+		/*	
+		 * We differentiate between dealing with a many-to-one
+		 * (HDLC: DTE-DCE) and a one-to-one (LLC2: DTE-DTE) 
+		 * relationship (by looking at the if type).
+		 *
+		 * Only in case of the many-to-one relationship (HDLC)
+		 * we set the ia->ia_pkcb pointer to the pkcb allocated
+		 * via pk_newlink() as we will use just that one pkcb for
+		 * future route additions (the rtentry->rt_llinfo pointer
+		 * points to the pkcb allocated for that route).
+		 *
+		 * In case of the one-to-one relationship (LLC2) we 
+		 * create a new pkcb (via pk_newlink()) for each new rtentry.
+		 * 
+		 * NOTE: Only in case of HDLC does ia->ia_pkcb point
+		 * to a pkcb, in the LLC2 case it doesn't (as we don't 
+		 * need it here)!
+		 */
+		one_to_one = ISISO8802(rt->rt_ifp);
+
+		if (!(pkp = XIFA(rt)->ia_pkcb) && !one_to_one) 
+			XIFA(rt)->ia_pkcb = pkp = 
+				pk_newlink(XIFA(rt), (caddr_t) 0);
+		else if (one_to_one && 
+			 !equal(rt->rt_gateway, rt->rt_ifa->ifa_addr)) {
+			pkp = pk_newlink(XIFA(rt), (caddr_t) 0);
+			/*
+			 * We also need another route entry for mapping
+			 * MAC+LSAP->X.25 address
+			 */
+			pkp->pk_llrt = npaidb_enter(rt->rt_gateway, rt_key(rt), rt, 0);
+		}
+		if (pkp) {
+			if (!pkp->pk_rt)
+				pkp->pk_rt = rt;
+			pkp->pk_refcount++;
+		}
+		rt->rt_llinfo = (caddr_t) pkp;
+
+		return(0);
+
+	case RTM_DELETE:
+	{
+		/*
+		 * The pkp might be empty if we are dealing
+		 * with an interface route entry for LLC2, in this 
+		 * case we don't need to do anything ...
+		 */
+		if (pkp) {
+			if ( rt->rt_flags & RTF_GATEWAY ) {
+				if (rt->rt_llinfo)
+					RTFREE((struct rtentry *)rt->rt_llinfo);
+				return(0);
+			}
+			
+			if (pkp->pk_llrt)
+				npaidb_destroy(pkp->pk_llrt);
+
+			pk_dellink (pkp);
+			
+			return(0);
+		}
+	}
+	}
+}
+
+/*
+ * Network Protocol Addressing Information DataBase (npaidb) 
+ * 
+ * To speed up locating the entity dealing with an LLC packet use is made 
+ * of a routing tree. This npaidb routing tree is handled 
+ * by the normal rn_*() routines just like (almost) any other routing tree. 
+ * 
+ * The mapping being done by the npaidb_*() routines is as follows: 
+ * 
+ *     Key:       MAC,LSAP (enhancing struct sockaddr_dl) 
+ *     Gateway:   sockaddr_x25 (i.e. X.25 address - X.121 or NSAP) 
+ *     Llinfo:    npaidbentry { 
+ *                         struct llc_linkcb *npaidb_linkp; 
+ *                         struct rtentry *npaidb_rt; 
+ *                } 
+ * 
+ * Using the npaidbentry provided by llinfo we can then access 
+ * 
+ *       o the pkcb by using (struct pkcb *) (npaidb_rt->rt_llinfo)
+ *       o the linkcb via npaidb_linkp 
+ * 
+ * The following functions are provided 
+ * 
+ *       o npaidb_enter(struct sockaddr_dl *sdl, struct sockaddr_x25 *sx25, 
+ *                      struct struct llc_linkcb *link, struct rtentry *rt) 
+ * 
+ *       o npaidb_enrich(short type, caddr_t info) 
+ * 
+ */
+
+struct sockaddr_dl npdl_netmask = {
+ sizeof(struct sockaddr_dl),					/* _len */
+ 0,								/* _family */
+ 0,								/* _index */
+ 0,								/* _type */
+ -1,								/* _nlen */
+ -1,								/* _alen */
+ -1,								/* _slen */
+ { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},		/* _data */
+}; 
+struct sockaddr npdl_dummy;
+
+int npdl_datasize = sizeof(struct sockaddr_dl)-
+		((int)((caddr_t)&((struct sockaddr_dl *)0)->sdl_data[0]));
+
+struct rtentry *
+npaidb_enter(struct sockaddr_dl *key, struct sockaddr *value,
+	     struct rtentry *rt, struct llc_linkcb *link)
+{
+	struct rtentry *nprt; register int i;
+
+	USES_AF_LINK_RTS;
+
+	if ((nprt = rtalloc1(SA(key), 0)) == 0) {
+		register u_int size = sizeof(struct npaidbentry);
+		register u_char saploc = LLSAPLOC(key, rt->rt_ifp);
+
+		/* 
+		 * set up netmask: LLC2 packets have the lowest bit set in
+		 * response packets (e.g. 0x7e for command packets, 0x7f for
+		 * response packets), to facilitate the lookup we use a netmask
+		 * of 11111110 for the SAP position. The remaining positions 
+		 * are zeroed out.
+		 */
+		npdl_netmask.sdl_data[saploc] = NPDL_SAPNETMASK;
+		bzero((caddr_t)&npdl_netmask.sdl_data[saploc+1], 
+		      npdl_datasize-saploc-1);
+
+		if (value == 0)
+			value = &npdl_dummy;
+
+		/* now enter it */
+		rtrequest(RTM_ADD, SA(key), SA(value),
+			SA(&npdl_netmask), 0, &nprt);
+
+		/* and reset npdl_netmask */
+		for (i = saploc; i < npdl_datasize; i++)
+			npdl_netmask.sdl_data[i] = -1;
+
+		nprt->rt_llinfo = malloc(size , M_PCB, M_WAITOK);
+		if (nprt->rt_llinfo) {
+			bzero (nprt->rt_llinfo, size);
+			((struct npaidbentry *) (nprt->rt_llinfo))->np_rt = rt;
+		}
+	} else nprt->rt_refcnt--;
+	return nprt;
+}
+
+struct rtentry *
+npaidb_enrich(short type, caddr_t info, struct sockaddr_dl *sdl)
+{
+	struct rtentry *rt;
+
+	USES_AF_LINK_RTS;
+
+	if (rt = rtalloc1((struct sockaddr *)sdl, 0)) {
+		rt->rt_refcnt--;
+		switch (type) {
+		case NPAIDB_LINK:
+			((struct npaidbentry *)(rt->rt_llinfo))->np_link = 
+				(struct llc_linkcb *) info;
+			break;
+		}
+		return rt;
+	}		
+
+	return ((struct rtentry *) 0);
+
+}
+
+npaidb_destroy(struct rtentry *rt)
+{
+	USES_AF_LINK_RTS;
+
+	if (rt->rt_llinfo) 
+		free((caddr_t) rt->rt_llinfo, M_PCB);
+	return(rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, rt_mask(rt), 
+			 0, 0));
+}
+
+
+#ifdef LLC
+/*
+ * Glue between X.25 and LLC2
+ */
+int
+x25_llcglue(int prc, struct sockaddr *addr)
+{
+	register struct sockaddr_x25 *sx25 = (struct sockaddr_x25 *)addr;
+	register struct x25_ifaddr *x25ifa;
+	struct dll_ctlinfo ctlinfo;
+	
+	if((x25ifa = (struct x25_ifaddr *)ifa_ifwithaddr(addr)) == 0)
+		return 0;
+
+	ctlinfo.dlcti_cfg  =
+	    (struct dllconfig *)(((struct sockaddr_x25 *)(&x25ifa->ia_xc))+1);
+	ctlinfo.dlcti_lsap = LLC_X25_LSAP;
+
+	return ((int)llc_ctlinput(prc, addr, (caddr_t)&ctlinfo));
+}
+#endif /* LLC */
diff --git a/sys/netccitt/pk_output.c b/sys/netccitt/pk_output.c
new file mode 100644
index 00000000000..ccc02a4c327
--- /dev/null
+++ b/sys/netccitt/pk_output.c
@@ -0,0 +1,216 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (C) Computer Science Department IV, 
+ * 		 University of Erlangen-Nuremberg, Germany, 1992
+ * Copyright (c) 1991, 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by the
+ * Laboratory for Computation Vision and the Computer Science Department
+ * of the the University of British Columbia and the Computer Science
+ * Department (IV) of the University of Erlangen-Nuremberg, Germany.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)pk_output.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+
+#include <net/if.h>
+
+#include <netccitt/x25.h>
+#include <netccitt/pk.h>
+#include <netccitt/pk_var.h>
+
+struct mbuf_cache pk_output_cache = {0 }, pk_input_cache;
+struct	mbuf *nextpk ();
+
+pk_output (lcp)
+register struct pklcd *lcp;
+{
+	register struct x25_packet *xp;
+	register struct mbuf *m;
+	register struct pkcb *pkp = lcp -> lcd_pkp;
+
+	if (lcp == 0 || pkp == 0) {
+		printf ("pk_output: zero arg\n");
+		return;
+	}
+
+	while ((m = nextpk (lcp)) != NULL) {
+		xp = mtod (m, struct x25_packet *);
+
+		switch (pk_decode (xp) + lcp -> lcd_state) {
+		/* 
+		 *  All the work is already done - just set the state and
+		 *  pass to peer.
+		 */
+		case CALL + READY: 
+			lcp -> lcd_state = SENT_CALL;
+			lcp -> lcd_timer = pk_t21;
+			break;
+
+		/*
+		 *  Just set the state to allow packet to flow and send the
+		 *  confirmation.
+		 */
+		case CALL_ACCEPTED + RECEIVED_CALL: 
+			lcp -> lcd_state = DATA_TRANSFER;
+			break;
+
+		/* 
+		 *  Just set the state. Keep the LCD around till the clear
+		 *  confirmation is returned.
+		 */
+		case CLEAR + RECEIVED_CALL: 
+		case CLEAR + SENT_CALL: 
+		case CLEAR + DATA_TRANSFER: 
+			lcp -> lcd_state = SENT_CLEAR;
+			lcp -> lcd_retry = 0;
+			/* fall through */
+
+		case CLEAR + SENT_CLEAR:
+			lcp -> lcd_timer = pk_t23;
+			lcp -> lcd_retry++;
+			break;
+
+		case CLEAR_CONF + RECEIVED_CLEAR: 
+		case CLEAR_CONF + SENT_CLEAR: 
+		case CLEAR_CONF + READY: 
+			lcp -> lcd_state = READY;
+			break;
+
+		case DATA + DATA_TRANSFER: 
+			SPS(xp, lcp -> lcd_ssn);
+			lcp -> lcd_input_window =
+				(lcp -> lcd_rsn + 1) % MODULUS;
+			SPR(xp, lcp -> lcd_input_window);
+			lcp -> lcd_last_transmitted_pr = lcp -> lcd_input_window;
+			lcp -> lcd_ssn = (lcp -> lcd_ssn + 1) % MODULUS;
+			if (lcp -> lcd_ssn == ((lcp -> lcd_output_window + lcp -> lcd_windowsize) % MODULUS))
+				lcp -> lcd_window_condition = TRUE;
+			break;
+
+		case INTERRUPT + DATA_TRANSFER: 
+#ifdef ancient_history
+			xp -> packet_data = 0;
+#endif
+			lcp -> lcd_intrconf_pending = TRUE;
+			break;
+
+		case INTERRUPT_CONF + DATA_TRANSFER: 
+			break;
+
+		case RR + DATA_TRANSFER: 
+		case RNR + DATA_TRANSFER: 
+			lcp -> lcd_input_window =
+				(lcp -> lcd_rsn + 1) % MODULUS;
+			SPR(xp, lcp -> lcd_input_window);
+			lcp -> lcd_last_transmitted_pr = lcp -> lcd_input_window;
+			break;
+
+		case RESET + DATA_TRANSFER: 
+			lcp -> lcd_reset_condition = TRUE;
+			break;
+
+		case RESET_CONF + DATA_TRANSFER: 
+			lcp -> lcd_reset_condition = FALSE;
+			break;
+
+		/* 
+		 *  A restart should be only generated internally. Therefore
+		 *  all logic for restart is in the pk_restart routine.
+		 */
+		case RESTART + READY: 
+			lcp -> lcd_timer = pk_t20;
+			break;
+
+		/* 
+		 *  Restarts are all  handled internally.  Therefore all the
+		 *  logic for the incoming restart packet is handled in  the
+		 *  pk_input routine.
+		 */
+		case RESTART_CONF + READY: 
+			break;
+
+		default: 
+			m_freem (m);
+			return;
+		}
+
+		/* Trace the packet. */
+		pk_trace (pkp -> pk_xcp, m, "P-Out");
+
+		/* Pass the packet on down to the link layer */
+		if (pk_input_cache.mbc_size || pk_input_cache.mbc_oldsize) {
+			m->m_flags |= 0x08;
+			mbuf_cache(&pk_input_cache, m);
+		}
+		(*pkp -> pk_lloutput) (pkp -> pk_llnext, m, pkp -> pk_rt);
+	}
+}
+
+/* 
+ *  This procedure returns the next packet to send or null. A
+ *  packet is composed of one or more mbufs.
+ */
+
+struct mbuf *
+nextpk (lcp)
+struct pklcd *lcp;
+{
+	register struct mbuf *m, *n;
+	struct socket *so = lcp -> lcd_so;
+	register struct sockbuf *sb = & (so ? so -> so_snd : lcp -> lcd_sb);
+
+	if (lcp -> lcd_template) {
+		m = lcp -> lcd_template;
+		lcp -> lcd_template = NULL;
+	} else {
+		if (lcp -> lcd_rnr_condition || lcp -> lcd_window_condition ||
+				lcp -> lcd_reset_condition)
+			return (NULL);
+
+		if ((m = sb -> sb_mb) == 0)
+			return (NULL);
+
+ 		sb -> sb_mb = m -> m_nextpkt;
+ 		m->m_act = 0;
+		for (n = m; n; n = n -> m_next)
+			sbfree (sb, n);
+	}
+	return (m);
+}
diff --git a/sys/netccitt/pk_subr.c b/sys/netccitt/pk_subr.c
new file mode 100644
index 00000000000..44c43b6f3f6
--- /dev/null
+++ b/sys/netccitt/pk_subr.c
@@ -0,0 +1,1192 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (C) Computer Science Department IV, 
+ * 		 University of Erlangen-Nuremberg, Germany, 1992
+ * Copyright (c) 1991, 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by the
+ * Laboratory for Computation Vision and the Computer Science Department
+ * of the the University of British Columbia and the Computer Science
+ * Department (IV) of the University of Erlangen-Nuremberg, Germany.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)pk_subr.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netccitt/dll.h>
+#include <netccitt/x25.h>
+#include <netccitt/x25err.h>
+#include <netccitt/pk.h>
+#include <netccitt/pk_var.h>
+
+int     pk_sendspace = 1024 * 2 + 8;
+int     pk_recvspace = 1024 * 2 + 8;
+
+struct pklcd_q pklcd_q = {&pklcd_q, &pklcd_q};
+
+struct x25bitslice x25_bitslice[] = {
+/*	  mask, shift value */
+	{ 0xf0, 0x4 },
+	{ 0xf,  0x0 },
+	{ 0x80, 0x7 },
+	{ 0x40, 0x6 },
+	{ 0x30, 0x4 },
+	{ 0xe0, 0x5 },
+	{ 0x10, 0x4 },
+	{ 0xe,  0x1 },
+	{ 0x1,  0x0 }
+};
+
+
+/* 
+ *  Attach X.25 protocol to socket, allocate logical channel descripter
+ *  and buffer space, and enter LISTEN state if we are to accept
+ *  IN-COMMING CALL packets.  
+ *
+ */
+
+struct pklcd *
+pk_attach (so)
+struct socket *so;
+{
+	register struct pklcd *lcp;
+	register int error = ENOBUFS;
+	int pk_output ();
+
+	MALLOC(lcp, struct pklcd *, sizeof (*lcp), M_PCB, M_NOWAIT);
+	if (lcp) {
+		bzero ((caddr_t)lcp, sizeof (*lcp));
+		insque (&lcp -> lcd_q, &pklcd_q);
+		lcp -> lcd_state = READY;
+		lcp -> lcd_send = pk_output;
+		if (so) {
+			error = soreserve (so, pk_sendspace, pk_recvspace);
+			lcp -> lcd_so = so;
+			if (so -> so_options & SO_ACCEPTCONN)
+				lcp -> lcd_state = LISTEN;
+		} else
+			sbreserve (&lcp -> lcd_sb, pk_sendspace);
+	}
+	if (so) {
+		so -> so_pcb = (caddr_t) lcp;
+		so -> so_error = error;
+	}
+	return (lcp);
+}
+
+/* 
+ *  Disconnect X.25 protocol from socket.
+ */
+
+pk_disconnect (lcp)
+register struct pklcd *lcp;
+{
+	register struct socket *so = lcp -> lcd_so;
+	register struct pklcd *l, *p;
+
+	switch (lcp -> lcd_state) {
+	case LISTEN: 
+		for (p = 0, l = pk_listenhead; l && l != lcp; p = l, l = l -> lcd_listen);
+		if (p == 0) {
+			if (l != 0)
+				pk_listenhead = l -> lcd_listen;
+		}
+		else
+		if (l != 0)
+			p -> lcd_listen = l -> lcd_listen;
+		pk_close (lcp);
+		break;
+
+	case READY: 
+		pk_acct (lcp);
+		pk_close (lcp);
+		break;
+
+	case SENT_CLEAR: 
+	case RECEIVED_CLEAR: 
+		break;
+
+	default: 
+		pk_acct (lcp);
+		if (so) {
+			soisdisconnecting (so);
+			sbflush (&so -> so_rcv);
+		}
+		pk_clear (lcp, 241, 0); /* Normal Disconnect */
+
+	}
+}
+
+/* 
+ *  Close an X.25 Logical Channel. Discard all space held by the
+ *  connection and internal descriptors. Wake up any sleepers.
+ */
+
+pk_close (lcp)
+struct pklcd *lcp;
+{
+	register struct socket *so = lcp -> lcd_so;
+
+	/*
+	 * If the X.25 connection is torn down due to link
+	 * level failure (e.g. LLC2 FRMR) and at the same the user
+	 * level is still filling up the socket send buffer that
+	 * send buffer is locked. An attempt to sbflush () that send
+	 * buffer will lead us into - no, not temptation but - panic!
+	 * So - we'll just check wether the send buffer is locked
+	 * and if that's the case we'll mark the lcp as zombie and 
+	 * have the pk_timer () do the cleaning ...
+	 */
+	
+	if (so && so -> so_snd.sb_flags & SB_LOCK)
+		lcp -> lcd_state = LCN_ZOMBIE;
+	else
+		pk_freelcd (lcp);
+
+	if (so == NULL)
+		return;
+
+	so -> so_pcb = 0;
+	soisdisconnected (so);
+	/* sofree (so);	/* gak!!! you can't do that here */
+}
+
+/* 
+ *  Create a template to be used to send X.25 packets on a logical
+ *  channel. It allocates an mbuf and fills in a skeletal packet
+ *  depending on its type. This packet is passed to pk_output where
+ *  the remainer of the packet is filled in.
+*/
+
+struct mbuf *
+pk_template (lcn, type)
+int lcn, type;
+{
+	register struct mbuf *m;
+	register struct x25_packet *xp;
+
+	MGETHDR (m, M_DONTWAIT, MT_HEADER);
+	if (m == 0)
+		panic ("pk_template");
+	m -> m_act = 0;
+
+	/*
+	 * Efficiency hack: leave a four byte gap at the beginning
+	 * of the packet level header with the hope that this will
+	 * be enough room for the link level to insert its header.
+	 */
+	m -> m_data += max_linkhdr;
+	m -> m_pkthdr.len = m -> m_len = PKHEADERLN;
+
+	xp = mtod (m, struct x25_packet *);
+	*(long *)xp = 0;		/* ugly, but fast */
+/*	xp -> q_bit = 0;*/
+	X25SBITS(xp -> bits, fmt_identifier, 1);
+/*	xp -> lc_group_number = 0;*/
+
+	SET_LCN(xp, lcn);
+	xp -> packet_type = type;
+
+	return (m);
+}
+
+/* 
+ *  This routine restarts all the virtual circuits. Actually,
+ *  the virtual circuits are not "restarted" as such. Instead,
+ *  any active switched circuit is simply returned to READY
+ *  state.
+ */
+
+pk_restart (pkp, restart_cause)
+register struct pkcb *pkp;
+int restart_cause;
+{
+	register struct mbuf *m;
+	register struct pklcd *lcp;
+	register int i;
+
+	/* Restart all logical channels. */
+	if (pkp -> pk_chan == 0)
+		return;
+
+	/*
+	 * Don't do this if we're doing a restart issued from
+	 * inside pk_connect () --- which is only done if and
+	 * only if the X.25 link is down, i.e. a RESTART needs
+	 * to be done to get it up.
+	 */
+	if (!(pkp -> pk_dxerole & DTE_CONNECTPENDING)) {
+		for (i = 1; i <= pkp -> pk_maxlcn; ++i)
+			if ((lcp = pkp -> pk_chan[i]) != NULL) {
+				if (lcp -> lcd_so) {
+					lcp -> lcd_so -> so_error = ENETRESET;
+					pk_close (lcp);
+				} else {
+					pk_flush (lcp);
+					lcp -> lcd_state = READY;
+					if (lcp -> lcd_upper)
+						lcp -> lcd_upper (lcp, 0);
+				}
+			}
+	}
+
+	if (restart_cause < 0)
+		return;
+
+	pkp -> pk_state = DTE_SENT_RESTART;
+	pkp -> pk_dxerole &= ~(DTE_PLAYDCE | DTE_PLAYDTE);
+	lcp = pkp -> pk_chan[0];
+	m = lcp -> lcd_template = pk_template (lcp -> lcd_lcn, X25_RESTART);
+	m -> m_pkthdr.len = m -> m_len += 2;
+	mtod (m, struct x25_packet *) -> packet_data = 0;	/* DTE only */
+	mtod (m, octet *)[4]  = restart_cause;
+	pk_output (lcp);
+}
+
+
+/* 
+ *  This procedure frees up the Logical Channel Descripter.
+ */
+
+pk_freelcd (lcp)
+register struct pklcd *lcp;
+{
+	if (lcp == NULL)
+		return;
+
+	if (lcp -> lcd_lcn > 0)
+		lcp -> lcd_pkp -> pk_chan[lcp -> lcd_lcn] = NULL;
+
+	pk_flush (lcp);
+	remque (&lcp -> lcd_q);
+	free ((caddr_t)lcp, M_PCB);
+}
+
+static struct x25_ifaddr *
+pk_ifwithaddr (sx)
+	struct sockaddr_x25 *sx;
+{
+	struct ifnet *ifp;
+	struct ifaddr *ifa;
+	register struct x25_ifaddr *ia;
+	char *addr = sx -> x25_addr;
+
+	for (ifp = ifnet; ifp; ifp = ifp -> if_next)
+		for (ifa = ifp -> if_addrlist; ifa; ifa = ifa -> ifa_next)
+			if (ifa -> ifa_addr -> sa_family == AF_CCITT) {
+				ia = (struct x25_ifaddr *)ifa;
+				if (bcmp (addr, ia -> ia_xc.xc_addr.x25_addr,
+					 16) == 0)
+					return (ia);
+				
+			}
+	return ((struct x25_ifaddr *)0);
+}
+
+
+/* 
+ *  Bind a address and protocol value to a socket.  The important
+ *  part is the protocol value - the first four characters of the 
+ *  Call User Data field.
+ */
+
+#define XTRACTPKP(rt)	((rt) -> rt_flags & RTF_GATEWAY ? \
+			 ((rt) -> rt_llinfo ? \
+			  (struct pkcb *) ((struct rtentry *)((rt) -> rt_llinfo)) -> rt_llinfo : \
+			  (struct pkcb *) NULL) : \
+			 (struct pkcb *)((rt) -> rt_llinfo))
+
+pk_bind (lcp, nam)
+struct pklcd *lcp;
+struct mbuf *nam;
+{
+	register struct pklcd *pp;
+	register struct sockaddr_x25 *sa;
+
+	if (nam == NULL)
+		return (EADDRNOTAVAIL);
+	if (lcp -> lcd_ceaddr)				/* XXX */
+		return (EADDRINUSE);
+	if (pk_checksockaddr (nam))
+		return (EINVAL);
+	sa = mtod (nam, struct sockaddr_x25 *);
+
+	/*
+	 * If the user wishes to accept calls only from a particular
+	 * net (net != 0), make sure the net is known
+	 */
+
+	if (sa -> x25_addr[0]) {
+		if (!pk_ifwithaddr (sa))
+			return (ENETUNREACH);
+	} else if (sa -> x25_net) {
+		if (!ifa_ifwithnet ((struct sockaddr *)sa))
+			return (ENETUNREACH);
+	}
+
+	/*
+	 * For ISO's sake permit default listeners, but only one such . . .
+	 */
+	for (pp = pk_listenhead; pp; pp = pp -> lcd_listen) {
+		register struct sockaddr_x25 *sa2 = pp -> lcd_ceaddr;
+		if ((sa2 -> x25_udlen == sa -> x25_udlen) &&
+		    (sa2 -> x25_udlen == 0 ||
+		     (bcmp (sa2 -> x25_udata, sa -> x25_udata,
+			    min (sa2 -> x25_udlen, sa -> x25_udlen)) == 0)))
+				return (EADDRINUSE);
+	}
+	lcp -> lcd_laddr = *sa;
+	lcp -> lcd_ceaddr = &lcp -> lcd_laddr;
+	return (0);
+}
+
+/*
+ * Include a bound control block in the list of listeners.
+ */
+pk_listen (lcp)
+register struct pklcd *lcp;
+{
+	register struct pklcd **pp;
+
+	if (lcp -> lcd_ceaddr == 0)
+		return (EDESTADDRREQ);
+
+	lcp -> lcd_state = LISTEN;
+	/*
+	 * Add default listener at end, any others at start.
+	 */
+	if (lcp -> lcd_ceaddr -> x25_udlen == 0) {
+		for (pp = &pk_listenhead; *pp; )
+			pp = &((*pp) -> lcd_listen);
+		*pp = lcp;
+	} else {
+		lcp -> lcd_listen = pk_listenhead;
+		pk_listenhead = lcp;
+	}
+	return (0);
+}
+/*
+ * Include a listening control block for the benefit of other protocols.
+ */
+pk_protolisten (spi, spilen, callee)
+int (*callee) ();
+{
+	register struct pklcd *lcp = pk_attach ((struct socket *)0);
+	register struct mbuf *nam;
+	register struct sockaddr_x25 *sa;
+	int error = ENOBUFS;
+
+	if (lcp) {
+		if (nam = m_getclr (MT_SONAME, M_DONTWAIT)) {
+			sa = mtod (nam, struct sockaddr_x25 *);
+			sa -> x25_family = AF_CCITT;
+			sa -> x25_len = nam -> m_len = sizeof (*sa);
+			sa -> x25_udlen = spilen;
+			sa -> x25_udata[0] = spi;
+			lcp -> lcd_upper = callee;
+			lcp -> lcd_flags = X25_MBS_HOLD;
+			if ((error = pk_bind (lcp, nam)) == 0)
+				error = pk_listen (lcp);
+			(void) m_free (nam);
+		}
+		if (error)
+			pk_freelcd (lcp);
+	}
+	return error; /* Hopefully Zero !*/
+}
+
+/*
+ * Associate a logical channel descriptor with a network.
+ * Fill in the default network specific parameters and then
+ * set any parameters explicitly specified by the user or
+ * by the remote DTE.
+ */
+
+pk_assoc (pkp, lcp, sa)
+register struct pkcb *pkp;
+register struct pklcd *lcp;
+register struct sockaddr_x25 *sa;
+{
+
+	lcp -> lcd_pkp = pkp;
+	lcp -> lcd_packetsize = pkp -> pk_xcp -> xc_psize;
+	lcp -> lcd_windowsize = pkp -> pk_xcp -> xc_pwsize;
+	lcp -> lcd_rsn = MODULUS - 1;
+	pkp -> pk_chan[lcp -> lcd_lcn] = lcp;
+
+	if (sa -> x25_opts.op_psize)
+		lcp -> lcd_packetsize = sa -> x25_opts.op_psize;
+	else
+		sa -> x25_opts.op_psize = lcp -> lcd_packetsize;
+	if (sa -> x25_opts.op_wsize)
+		lcp -> lcd_windowsize = sa -> x25_opts.op_wsize;
+	else
+		sa -> x25_opts.op_wsize = lcp -> lcd_windowsize;
+	sa -> x25_net = pkp -> pk_xcp -> xc_addr.x25_net;
+	lcp -> lcd_flags |= sa -> x25_opts.op_flags;
+	lcp -> lcd_stime = time.tv_sec;
+}
+
+pk_connect (lcp, sa)
+register struct pklcd *lcp;
+register struct sockaddr_x25 *sa;
+{
+	register struct pkcb *pkp;
+	register struct rtentry *rt;
+	register struct rtentry *nrt;
+
+	struct rtentry *npaidb_enter ();
+	struct pkcb *pk_newlink ();
+
+	if (sa -> x25_addr[0] == '\0')
+		return (EDESTADDRREQ);
+
+	/*
+	 * Is the destination address known?
+	 */
+	if (!(rt = rtalloc1 ((struct sockaddr *)sa, 1)))
+		return (ENETUNREACH);
+
+	if (!(pkp = XTRACTPKP(rt)))
+		pkp = pk_newlink ((struct x25_ifaddr *) (rt -> rt_ifa), 
+				 (caddr_t) 0);
+
+	/*
+	 * Have we entered the LLC address?
+	 */
+	if (nrt = npaidb_enter (rt -> rt_gateway, rt_key (rt), rt, 0))
+		pkp -> pk_llrt = nrt;
+
+	/*
+	 * Have we allocated an LLC2 link yet?
+	 */
+	if (pkp -> pk_llnext == (caddr_t)0 && pkp -> pk_llctlinput) {
+		struct dll_ctlinfo ctlinfo;
+
+		ctlinfo.dlcti_rt = rt;
+		ctlinfo.dlcti_pcb = (caddr_t) pkp;
+		ctlinfo.dlcti_conf = 
+			(struct dllconfig *) (&((struct x25_ifaddr *)(rt -> rt_ifa)) -> ia_xc);
+		pkp -> pk_llnext = 
+			(pkp -> pk_llctlinput) (PRC_CONNECT_REQUEST, 0, &ctlinfo);
+	}
+
+	if (pkp -> pk_state != DTE_READY && pkp -> pk_state != DTE_WAITING)
+			return (ENETDOWN);
+	if ((lcp -> lcd_lcn = pk_getlcn (pkp)) == 0)
+		return (EMFILE);
+
+	lcp -> lcd_faddr = *sa;
+	lcp -> lcd_ceaddr = & lcp -> lcd_faddr;
+	pk_assoc (pkp, lcp, lcp -> lcd_ceaddr);
+
+	/*
+	 * If the link is not up yet, initiate an X.25 RESTART
+	 */
+	if (pkp -> pk_state == DTE_WAITING) {
+		pkp -> pk_dxerole |= DTE_CONNECTPENDING;
+		pk_ctlinput (PRC_LINKUP, (struct sockaddr *)0, pkp);
+		if (lcp -> lcd_so)
+			soisconnecting (lcp -> lcd_so);
+		return 0;
+	}
+
+	if (lcp -> lcd_so)
+		soisconnecting (lcp -> lcd_so);
+	lcp -> lcd_template = pk_template (lcp -> lcd_lcn, X25_CALL);
+	pk_callrequest (lcp, lcp -> lcd_ceaddr, pkp -> pk_xcp);
+	return (*pkp -> pk_ia -> ia_start) (lcp);
+}
+
+/*
+ * Complete all pending X.25 call requests --- this gets called after
+ * the X.25 link has been restarted.
+ */
+#define RESHUFFLELCN(maxlcn, lcn) ((maxlcn) - (lcn) + 1)
+
+pk_callcomplete (pkp)
+	register struct pkcb *pkp;
+{
+	register struct pklcd *lcp;
+	register int i;
+	register int ni;
+	
+
+	if (pkp -> pk_dxerole & DTE_CONNECTPENDING) 
+		pkp -> pk_dxerole &= ~DTE_CONNECTPENDING;
+	else return;
+
+	if (pkp -> pk_chan == 0)
+		return;
+	
+	/*
+	 * We pretended to be a DTE for allocating lcns, if
+	 * it turns out that we are in reality performing as a
+	 * DCE we need to reshuffle the lcps.
+	 *			        	  	      
+         *             /+---------------+--------     -	      
+	 *            / | a  (maxlcn-1) |              \      
+	 *           /  +---------------+              	\     
+	 *     +--- *   | b  (maxlcn-2) |         	 \    
+	 *     |     \  +---------------+         	  \   
+	 *   r |      \ | c  (maxlcn-3) |         	   \  
+	 *   e |       \+---------------+         	    | 
+	 *   s |        |	 .                	    |  
+	 *   h |        |        .                	    | m
+	 *   u |        |	 .      	  	    | a
+	 *   f |        |	 .      	  	    | x
+	 *   f |        |	 .                	    | l
+	 *   l |       /+---------------+         	    | c
+	 *   e |      / | c' (   3    ) |         	    | n
+	 *     |     /  +---------------+         	    | 
+	 *     +--> *   | b' (   2    ) |         	   /
+	 *           \  +---------------+         	  / 
+	 *            \ | a' (   1    ) |         	 /  
+    	 *             \+---------------+               /   
+         *              | 0             |              /    
+	 *              +---------------+--------     -     
+	 *	    
+	 */	    
+	if (pkp -> pk_dxerole & DTE_PLAYDCE) {
+		/* Sigh, reshuffle it */
+		for (i = pkp -> pk_maxlcn; i > 0; --i)
+			if (pkp -> pk_chan[i]) {
+				ni = RESHUFFLELCN(pkp -> pk_maxlcn, i);
+				pkp -> pk_chan[ni] = pkp -> pk_chan[i];
+				pkp -> pk_chan[i] = NULL;
+				pkp -> pk_chan[ni] -> lcd_lcn = ni;
+			}
+	}
+
+	for (i = 1; i <= pkp -> pk_maxlcn; ++i)
+		if ((lcp = pkp -> pk_chan[i]) != NULL) {
+			/* if (lcp -> lcd_so)
+				soisconnecting (lcp -> lcd_so); */
+			lcp -> lcd_template = pk_template (lcp -> lcd_lcn, X25_CALL);
+			pk_callrequest (lcp, lcp -> lcd_ceaddr, pkp -> pk_xcp);
+			(*pkp -> pk_ia -> ia_start) (lcp);
+		}
+}
+
+struct bcdinfo {
+	octet *cp;
+	unsigned posn;
+};
+/* 
+ *  Build the rest of the CALL REQUEST packet. Fill in calling
+ *  address, facilities fields and the user data field.
+ */
+
+pk_callrequest (lcp, sa, xcp)
+struct pklcd *lcp;
+register struct sockaddr_x25 *sa;
+register struct x25config *xcp;
+{
+	register struct x25_calladdr *a;
+	register struct mbuf *m = lcp -> lcd_template;
+	register struct x25_packet *xp = mtod (m, struct x25_packet *);
+	struct bcdinfo b;
+
+	if (lcp -> lcd_flags & X25_DBIT)
+		X25SBITS(xp -> bits, d_bit, 1);
+	a = (struct x25_calladdr *) &xp -> packet_data;
+	b.cp = (octet *) a -> address_field;
+	b.posn = 0;
+	X25SBITS(a -> addrlens, called_addrlen, to_bcd (&b, sa, xcp));
+	X25SBITS(a -> addrlens, calling_addrlen, to_bcd (&b, &xcp -> xc_addr, xcp));
+	if (b.posn & 0x01)
+		*b.cp++ &= 0xf0;
+	m -> m_pkthdr.len = m -> m_len += b.cp - (octet *) a;
+
+	if (lcp -> lcd_facilities) {
+		m -> m_pkthdr.len += 
+			(m -> m_next = lcp -> lcd_facilities) -> m_pkthdr.len;
+		lcp -> lcd_facilities = 0;
+	} else
+		pk_build_facilities (m, sa, (int)xcp -> xc_type);
+
+	m_copyback (m, m -> m_pkthdr.len, sa -> x25_udlen, sa -> x25_udata);
+}
+
+pk_build_facilities (m, sa, type)
+register struct mbuf *m;
+struct sockaddr_x25 *sa;
+{
+	register octet *cp;
+	register octet *fcp;
+	register int revcharge;
+
+	cp = mtod (m, octet *) + m -> m_len;
+	fcp = cp + 1;
+	revcharge = sa -> x25_opts.op_flags & X25_REVERSE_CHARGE ? 1 : 0;
+	/*
+	 * This is specific to Datapac X.25(1976) DTEs.  International
+	 * calls must have the "hi priority" bit on.
+	 */
+	if (type == X25_1976 && sa -> x25_opts.op_psize == X25_PS128)
+		revcharge |= 02;
+	if (revcharge) {
+		*fcp++ = FACILITIES_REVERSE_CHARGE;
+		*fcp++ = revcharge;
+	}
+	switch (type) {
+	case X25_1980:
+	case X25_1984:
+		*fcp++ = FACILITIES_PACKETSIZE;
+		*fcp++ = sa -> x25_opts.op_psize;
+		*fcp++ = sa -> x25_opts.op_psize;
+
+		*fcp++ = FACILITIES_WINDOWSIZE;
+		*fcp++ = sa -> x25_opts.op_wsize;
+		*fcp++ = sa -> x25_opts.op_wsize;
+	}
+	*cp = fcp - cp - 1;
+	m -> m_pkthdr.len = (m -> m_len += *cp + 1);
+}
+
+to_bcd (b, sa, xcp)
+register struct bcdinfo *b;
+struct sockaddr_x25 *sa;
+register struct x25config *xcp;
+{
+	register char *x = sa -> x25_addr;
+	unsigned start = b -> posn;
+	/*
+	 * The nodnic and prepnd0 stuff looks tedious,
+	 * but it does allow full X.121 addresses to be used,
+	 * which is handy for routing info (& OSI type 37 addresses).
+	 */
+	if (xcp -> xc_addr.x25_net && (xcp -> xc_nodnic || xcp -> xc_prepnd0)) {
+		char dnicname[sizeof (long) * NBBY/3 + 2];
+		register char *p = dnicname;
+
+		sprintf (p, "%d", xcp -> xc_addr.x25_net & 0x7fff);
+		for (; *p; p++) /* *p == 0 means dnic matched */
+			if ((*p ^ *x++) & 0x0f)
+				break;
+		if (*p || xcp -> xc_nodnic == 0)
+			x = sa -> x25_addr;
+		if (*p && xcp -> xc_prepnd0) {
+			if ((b -> posn)++ & 0x01)
+				*(b -> cp)++;
+			else
+				*(b -> cp) = 0;
+		}
+	}
+	while (*x)
+		if ((b -> posn)++ & 0x01)
+			*(b -> cp)++ |= *x++ & 0x0F;
+		else
+			*(b -> cp) = *x++ << 4;
+	return ((b -> posn) - start);
+}
+
+/* 
+ *  This routine gets the  first available logical channel number.  The
+ *  search is 
+ *  		- from the highest number to lowest number if playing DTE, and
+ *		- from lowest to highest number if playing DCE.
+ */
+
+pk_getlcn (pkp)
+register struct pkcb *pkp;
+{
+	register int i;
+
+	if (pkp -> pk_chan == 0)
+		return (0);
+	if ( pkp -> pk_dxerole & DTE_PLAYDCE ) {
+		for (i = 1; i <= pkp -> pk_maxlcn; ++i)
+			if (pkp -> pk_chan[i] == NULL)
+				break;
+	} else { 
+		for (i = pkp -> pk_maxlcn; i > 0; --i)
+			if (pkp -> pk_chan[i] == NULL)
+				break;
+	}
+	i = ( i > pkp -> pk_maxlcn ? 0 : i );
+	return (i);
+}
+
+/* 
+ *  This procedure sends a CLEAR request packet. The lc state is
+ *  set to "SENT_CLEAR". 
+ */
+
+pk_clear (lcp, diagnostic, abortive)
+register struct pklcd *lcp;
+{
+	register struct mbuf *m = pk_template (lcp -> lcd_lcn, X25_CLEAR);
+
+	m -> m_len += 2;
+	m -> m_pkthdr.len += 2;
+	mtod (m, struct x25_packet *) -> packet_data = 0;
+	mtod (m, octet *)[4] = diagnostic;
+	if (lcp -> lcd_facilities) {
+		m -> m_next = lcp -> lcd_facilities;
+		m -> m_pkthdr.len += m -> m_next -> m_len;
+		lcp -> lcd_facilities = 0;
+	}
+	if (abortive)
+		lcp -> lcd_template = m;
+	else {
+		struct socket *so = lcp -> lcd_so;
+		struct sockbuf *sb = so ? & so -> so_snd : & lcp -> lcd_sb;
+		sbappendrecord (sb, m);
+	}
+	pk_output (lcp);
+
+}
+
+/*
+ * This procedure generates RNR's or RR's to inhibit or enable
+ * inward data flow, if the current state changes (blocked ==> open or
+ * vice versa), or if forced to generate one.  One forces RNR's to ack data.  
+ */
+pk_flowcontrol (lcp, inhibit, forced)
+register struct pklcd *lcp;
+{
+	inhibit = (inhibit != 0);
+	if (lcp == 0 || lcp -> lcd_state != DATA_TRANSFER ||
+	    (forced == 0 && lcp -> lcd_rxrnr_condition == inhibit))
+		return;
+	lcp -> lcd_rxrnr_condition = inhibit;
+	lcp -> lcd_template =
+		pk_template (lcp -> lcd_lcn, inhibit ? X25_RNR : X25_RR);
+	pk_output (lcp);
+}
+
+/* 
+ *  This procedure sends a RESET request packet. It re-intializes
+ *  virtual circuit.
+ */
+
+static
+pk_reset (lcp, diagnostic)
+register struct pklcd *lcp;
+{
+	register struct mbuf *m;
+	register struct socket *so = lcp -> lcd_so;
+
+	if (lcp -> lcd_state != DATA_TRANSFER)
+		return;
+
+	if (so)
+		so -> so_error = ECONNRESET;
+	lcp -> lcd_reset_condition = TRUE;
+
+	/* Reset all the control variables for the channel. */
+	pk_flush (lcp);
+	lcp -> lcd_window_condition = lcp -> lcd_rnr_condition =
+		lcp -> lcd_intrconf_pending = FALSE;
+	lcp -> lcd_rsn = MODULUS - 1;
+	lcp -> lcd_ssn = 0;
+	lcp -> lcd_output_window = lcp -> lcd_input_window =
+		lcp -> lcd_last_transmitted_pr = 0;
+	m = lcp -> lcd_template = pk_template (lcp -> lcd_lcn, X25_RESET);
+	m -> m_pkthdr.len = m -> m_len += 2;
+	mtod (m, struct x25_packet *) -> packet_data = 0;
+	mtod (m, octet *)[4] = diagnostic;
+	pk_output (lcp);
+
+}
+
+/*
+ * This procedure frees all data queued for output or delivery on a
+ *  virtual circuit.
+ */
+
+pk_flush (lcp)
+register struct pklcd *lcp;
+{
+	register struct socket *so;
+
+	if (lcp -> lcd_template)
+		m_freem (lcp -> lcd_template);
+
+	if (lcp -> lcd_cps) {
+		m_freem (lcp -> lcd_cps);
+		lcp -> lcd_cps = 0;
+	}
+	if (lcp -> lcd_facilities) {
+		m_freem (lcp -> lcd_facilities);
+		lcp -> lcd_facilities = 0;
+	}
+	if (so = lcp -> lcd_so) 
+		sbflush (&so -> so_snd);
+	else 
+		sbflush (&lcp -> lcd_sb);
+}
+
+/* 
+ *  This procedure handles all local protocol procedure errors.
+ */
+
+pk_procerror (error, lcp, errstr, diagnostic)
+register struct pklcd *lcp;
+char *errstr;
+{
+
+	pk_message (lcp -> lcd_lcn, lcp -> lcd_pkp -> pk_xcp, errstr);
+
+	switch (error) {
+	case CLEAR: 
+		if (lcp -> lcd_so) {
+			lcp -> lcd_so -> so_error = ECONNABORTED;
+			soisdisconnecting (lcp -> lcd_so);
+		}
+		pk_clear (lcp, diagnostic, 1);
+		break;
+
+	case RESET: 
+		pk_reset (lcp, diagnostic);
+	}
+}
+
+/* 
+ *  This procedure is called during the DATA TRANSFER state to check 
+ *  and  process  the P(R) values  received  in the DATA,  RR OR RNR
+ *  packets.
+ */
+
+pk_ack (lcp, pr)
+struct pklcd *lcp;
+unsigned pr;
+{
+	register struct socket *so = lcp -> lcd_so;
+
+	if (lcp -> lcd_output_window == pr)
+		return (PACKET_OK);
+	if (lcp -> lcd_output_window < lcp -> lcd_ssn) {
+		if (pr < lcp -> lcd_output_window || pr > lcp -> lcd_ssn) {
+			pk_procerror (RESET, lcp,
+				"p(r) flow control error", 2);
+			return (ERROR_PACKET);
+		}
+	}
+	else {
+		if (pr < lcp -> lcd_output_window && pr > lcp -> lcd_ssn) {
+			pk_procerror (RESET, lcp,
+				"p(r) flow control error #2", 2);
+			return (ERROR_PACKET);
+		}
+	}
+
+	lcp -> lcd_output_window = pr;		/* Rotate window. */
+	if (lcp -> lcd_window_condition == TRUE)
+		lcp -> lcd_window_condition = FALSE;
+
+	if (so && ((so -> so_snd.sb_flags & SB_WAIT) || 
+		   (so -> so_snd.sb_flags & SB_NOTIFY)))
+		sowwakeup (so);
+
+	return (PACKET_OK);
+}
+
+/* 
+ *  This procedure decodes the X.25 level 3 packet returning a 
+ *  code to be used in switchs or arrays.
+ */
+
+pk_decode (xp)
+register struct x25_packet *xp;
+{
+	register int type;
+
+	if (X25GBITS(xp -> bits, fmt_identifier) != 1)
+		return (INVALID_PACKET);
+#ifdef ancient_history
+	/* 
+	 *  Make sure that the logical channel group number is 0.
+	 *  This restriction may be removed at some later date.
+	 */
+	if (xp -> lc_group_number != 0)
+		return (INVALID_PACKET);
+#endif
+	/* 
+	 *  Test for data packet first.
+	 */
+	if (!(xp -> packet_type & DATA_PACKET_DESIGNATOR))
+		return (DATA);
+
+	/* 
+	 *  Test if flow control packet (RR or RNR).
+	 */
+	if (!(xp -> packet_type & RR_OR_RNR_PACKET_DESIGNATOR))
+		switch (xp -> packet_type & 0x1f) {
+		case X25_RR:
+			return (RR);
+		case X25_RNR:
+			return (RNR);
+		case X25_REJECT:
+			return (REJECT);
+		}
+
+	/* 
+	 *  Determine the rest of the packet types.
+	 */
+	switch (xp -> packet_type) {
+	case X25_CALL: 
+		type = CALL;
+		break;
+
+	case X25_CALL_ACCEPTED: 
+		type = CALL_ACCEPTED;
+		break;
+
+	case X25_CLEAR: 
+		type = CLEAR;
+		break;
+
+	case X25_CLEAR_CONFIRM: 
+		type = CLEAR_CONF;
+		break;
+
+	case X25_INTERRUPT: 
+		type = INTERRUPT;
+		break;
+
+	case X25_INTERRUPT_CONFIRM: 
+		type = INTERRUPT_CONF;
+		break;
+
+	case X25_RESET: 
+		type = RESET;
+		break;
+
+	case X25_RESET_CONFIRM: 
+		type = RESET_CONF;
+		break;
+
+	case X25_RESTART: 
+		type = RESTART;
+		break;
+
+	case X25_RESTART_CONFIRM: 
+		type = RESTART_CONF;
+		break;
+
+	case X25_DIAGNOSTIC:
+		type = DIAG_TYPE;
+		break;
+
+	default: 
+		type = INVALID_PACKET;
+	}
+	return (type);
+}
+
+/* 
+ *  A restart packet has been received. Print out the reason
+ *  for the restart.
+ */
+
+pk_restartcause (pkp, xp)
+struct pkcb *pkp;
+register struct x25_packet *xp;
+{
+	register struct x25config *xcp = pkp -> pk_xcp;
+	register int lcn = LCN(xp);
+
+	switch (xp -> packet_data) {
+	case X25_RESTART_LOCAL_PROCEDURE_ERROR: 
+		pk_message (lcn, xcp, "restart: local procedure error");
+		break;
+
+	case X25_RESTART_NETWORK_CONGESTION: 
+		pk_message (lcn, xcp, "restart: network congestion");
+		break;
+
+	case X25_RESTART_NETWORK_OPERATIONAL: 
+		pk_message (lcn, xcp, "restart: network operational");
+		break;
+
+	default: 
+		pk_message (lcn, xcp, "restart: unknown cause");
+	}
+}
+
+#define MAXRESETCAUSE	7
+
+int     Reset_cause[] = {
+	EXRESET, EXROUT, 0, EXRRPE, 0, EXRLPE, 0, EXRNCG
+};
+
+/* 
+ *  A reset packet has arrived. Return the cause to the user.
+ */
+
+pk_resetcause (pkp, xp)
+struct pkcb *pkp;
+register struct x25_packet *xp;
+{
+	register struct pklcd *lcp =
+				pkp -> pk_chan[LCN(xp)];
+	register int code = xp -> packet_data;
+
+	if (code > MAXRESETCAUSE)
+		code = 7;	/* EXRNCG */
+
+	pk_message (LCN(xp), lcp -> lcd_pkp, "reset code 0x%x, diagnostic 0x%x",
+			xp -> packet_data, 4[(u_char *)xp]);
+			
+	if (lcp -> lcd_so)
+		lcp -> lcd_so -> so_error = Reset_cause[code];
+}
+
+#define MAXCLEARCAUSE	25
+
+int     Clear_cause[] = {
+	EXCLEAR, EXCBUSY, 0, EXCINV, 0, EXCNCG, 0,
+	0, 0, EXCOUT, 0, EXCAB, 0, EXCNOB, 0, 0, 0, EXCRPE,
+	0, EXCLPE, 0, 0, 0, 0, 0, EXCRRC
+};
+
+/* 
+ *  A clear packet has arrived. Return the cause to the user.
+ */
+
+pk_clearcause (pkp, xp)
+struct pkcb *pkp;
+register struct x25_packet *xp;
+{
+	register struct pklcd *lcp =
+		pkp -> pk_chan[LCN(xp)];
+	register int code = xp -> packet_data;
+
+	if (code > MAXCLEARCAUSE)
+		code = 5;	/* EXRNCG */
+	if (lcp -> lcd_so)
+		lcp -> lcd_so -> so_error = Clear_cause[code];
+}
+
+char *
+format_ntn (xcp)
+register struct x25config *xcp;
+{
+
+	return (xcp -> xc_addr.x25_addr);
+}
+
+/* VARARGS1 */
+pk_message (lcn, xcp, fmt, a1, a2, a3, a4, a5, a6)
+struct x25config *xcp;
+char *fmt;
+{
+
+	if (lcn)
+		if (!PQEMPTY)
+			printf ("X.25(%s): lcn %d: ", format_ntn (xcp), lcn);
+		else
+			printf ("X.25: lcn %d: ", lcn);
+	else
+		if (!PQEMPTY)
+			printf ("X.25(%s): ", format_ntn (xcp));
+		else
+			printf ("X.25: ");
+
+	printf (fmt, a1, a2, a3, a4, a5, a6);
+	printf ("\n");
+}
+
+pk_fragment (lcp, m0, qbit, mbit, wait)
+struct mbuf *m0;
+register struct pklcd *lcp;
+{
+	register struct mbuf *m = m0;
+	register struct x25_packet *xp;
+	register struct sockbuf *sb;
+	struct mbuf *head = 0, *next, **mp = &head, *m_split ();
+	int totlen, psize = 1 << (lcp -> lcd_packetsize);
+
+	if (m == 0)
+		return 0;
+	if (m -> m_flags & M_PKTHDR == 0)
+		panic ("pk_fragment");
+	totlen = m -> m_pkthdr.len;
+	m -> m_act = 0;
+	sb = lcp -> lcd_so ? &lcp -> lcd_so -> so_snd : & lcp -> lcd_sb;
+	do {
+		if (totlen > psize) {
+			if ((next = m_split (m, psize, wait)) == 0)
+				goto abort;
+			totlen -= psize;
+		} else
+			next = 0;
+		M_PREPEND(m, PKHEADERLN, wait);
+		if (m == 0)
+			goto abort;
+		*mp = m;
+		mp = & m -> m_act;
+		*mp = 0;
+		xp = mtod (m, struct x25_packet *);
+		0[(char *)xp] = 0;
+		if (qbit)
+			X25SBITS(xp -> bits, q_bit, 1);
+		if (lcp -> lcd_flags & X25_DBIT)
+			X25SBITS(xp -> bits, d_bit, 1);
+		X25SBITS(xp -> bits, fmt_identifier, 1);
+		xp -> packet_type = X25_DATA;
+		SET_LCN(xp, lcp -> lcd_lcn);
+		if (next || (mbit && (totlen == psize ||
+				      (lcp -> lcd_flags & X25_DBIT))))
+			SMBIT(xp, 1);
+	} while (m = next);
+	for (m = head; m; m = next) {
+		next = m -> m_act;
+		m -> m_act = 0;
+		sbappendrecord (sb, m);
+	}
+	return 0;
+abort:
+	if (wait)
+		panic ("pk_fragment null mbuf after wait");
+	if (next)
+		m_freem (next);
+	for (m = head; m; m = next) {
+		next = m -> m_act;
+		m_freem (m);
+	}
+	return ENOBUFS;
+}
diff --git a/sys/netccitt/pk_timer.c b/sys/netccitt/pk_timer.c
new file mode 100644
index 00000000000..52c1860b4b4
--- /dev/null
+++ b/sys/netccitt/pk_timer.c
@@ -0,0 +1,126 @@
+/* 
+ * Copyright (c) Computing Centre, University of British Columbia, 1984
+ * Copyright (C) Computer Science Department IV, 
+ * 		 University of Erlangen-Nuremberg, Germany, 1990, 1992
+ * Copyright (c) 1990, 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * 
+ * This code is derived from software contributed to Berkeley by the
+ * Laboratory for Computation Vision and the Computer Science Department
+ * of the the University of British Columbia and the Computer Science
+ * Department (IV) of the University of Erlangen-Nuremberg, Germany.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)pk_timer.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+
+#include <net/if.h>
+
+#include <netccitt/x25.h>
+#include <netccitt/pk.h>
+#include <netccitt/pk_var.h>
+
+/*
+ * Various timer values.  They can be adjusted
+ * by patching the binary with adb if necessary.
+ */
+int	pk_t20 = 18 * PR_SLOWHZ;	/* restart timer */
+int	pk_t21 = 20 * PR_SLOWHZ;	/* call timer */
+/* XXX pk_t22 is never used */
+int	pk_t22 = 18 * PR_SLOWHZ;	/* reset timer */
+int	pk_t23 = 18 * PR_SLOWHZ;	/* clear timer */
+
+pk_timer ()
+{
+	register struct pkcb *pkp;
+	register struct pklcd *lcp, **pp;
+	register int lcns_jammed, cant_restart;
+
+	FOR_ALL_PKCBS(pkp) {
+		switch (pkp -> pk_state) {
+		case DTE_SENT_RESTART:
+			lcp = pkp -> pk_chan[0];
+			/*
+			 * If restart failures are common, a link level
+			 * reset should be initiated here.
+			 */
+			if (lcp -> lcd_timer && --lcp -> lcd_timer == 0) {
+				pk_message (0, pkp -> pk_xcp,
+					"packet level restart failed");
+				pkp -> pk_state = DTE_WAITING;
+			}
+			break;
+
+		case DTE_READY:
+			lcns_jammed = cant_restart = 0;
+			for (pp = &pkp -> pk_chan[1]; pp <= &pkp -> pk_chan[pkp -> pk_maxlcn]; pp++) {
+				if ((lcp = *pp) == 0)
+					continue;
+				switch (lcp -> lcd_state) {
+				case SENT_CALL: 
+					if (--lcp -> lcd_timer == 0) {
+					    if (lcp -> lcd_so)
+						lcp -> lcd_so -> so_error = ETIMEDOUT;
+					    pk_clear (lcp, 49, 1);
+					}
+					break;
+
+				case SENT_CLEAR: 
+					if (lcp -> lcd_retry >= 3)
+						lcns_jammed++;
+					else
+						if (--lcp -> lcd_timer == 0)
+							pk_clear (lcp, 50, 1);
+					break;
+
+				case DATA_TRANSFER:	/* lcn active */
+					cant_restart++;
+					break;
+
+				case LCN_ZOMBIE:       /* zombie state */
+					pk_freelcd (lcp);
+					break;
+				}
+			}
+			if (lcns_jammed > pkp -> pk_maxlcn / 2 && cant_restart == 0) {
+				pk_message (0, pkp -> pk_xcp, "%d lcns jammed: attempting restart", lcns_jammed);
+				pk_restart (pkp, 0);
+			}
+		}
+	}
+}
diff --git a/sys/netccitt/pk_usrreq.c b/sys/netccitt/pk_usrreq.c
new file mode 100644
index 00000000000..d0dc42c0d40
--- /dev/null
+++ b/sys/netccitt/pk_usrreq.c
@@ -0,0 +1,604 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (C) Computer Science Department IV, 
+ * 		 University of Erlangen-Nuremberg, Germany, 1992
+ * Copyright (c) 1991, 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by the
+ * Laboratory for Computation Vision and the Computer Science Department
+ * of the the University of British Columbia and the Computer Science
+ * Department (IV) of the University of Erlangen-Nuremberg, Germany.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)pk_usrreq.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/route.h>
+
+#include <netccitt/x25.h>
+#include <netccitt/pk.h>
+#include <netccitt/pk_var.h>
+
+static old_to_new();
+static new_to_old();
+/*
+ * 
+ *  X.25 Packet level protocol interface to socket abstraction.
+ *
+ *  Process an X.25 user request on a logical channel.  If this is a send
+ *  request then m is the mbuf chain of the send data. If this is a timer
+ *  expiration (called from the software clock routine) them timertype is
+ *  the particular timer.
+ *
+ */
+
+pk_usrreq (so, req, m, nam, control)
+struct socket *so;
+int req;
+register struct mbuf *m, *nam;
+struct mbuf *control;
+{
+	register struct pklcd *lcp = (struct pklcd *) so -> so_pcb;
+	register int error = 0;
+
+	if (req == PRU_CONTROL)
+		return (pk_control (so, (int)m, (caddr_t)nam,
+			(struct ifnet *)control));
+	if (control && control -> m_len) {
+		error = EINVAL;
+		goto release;
+	}
+	if (lcp == NULL && req != PRU_ATTACH) {
+		error = EINVAL;
+		goto release;
+	}
+
+/*
+	pk_trace (pkcbhead, TR_USER, (struct pklcd *)0,
+		req, (struct x25_packet *)0);
+*/
+
+	switch (req) {
+	/* 
+	 *  X.25 attaches to socket via PRU_ATTACH and allocates a logical
+	 *  channel descriptor.  If the socket is to  receive connections,
+	 *  then the LISTEN state is entered.
+	 */
+	case PRU_ATTACH: 
+		if (lcp) {
+			error = EISCONN;
+			/* Socket already connected. */
+			break;
+		}
+		lcp = pk_attach (so);
+		if (lcp == 0)
+			error = ENOBUFS;
+		break;
+
+	/* 
+	 *  Detach a logical channel from the socket. If the state of the
+	 *  channel is embryonic, simply discard it. Otherwise we have to 
+	 *  initiate a PRU_DISCONNECT which will finish later.
+	 */
+	case PRU_DETACH: 
+		pk_disconnect (lcp);
+		break;
+
+	/* 
+	 *  Give the socket an address.
+	 */
+	case PRU_BIND: 
+		if (nam -> m_len == sizeof (struct x25_sockaddr))
+			old_to_new (nam);
+		error = pk_bind (lcp, nam);
+		break;
+
+	/* 
+	 *  Prepare to accept connections.
+	 */
+	case PRU_LISTEN: 
+		error = pk_listen (lcp);
+		break;
+
+	/* 
+	 *  Initiate a CALL REQUEST to peer entity. Enter state SENT_CALL
+	 *  and mark the socket as connecting. Set timer waiting for 
+	 *  CALL ACCEPT or CLEAR.
+	 */
+	case PRU_CONNECT: 
+		if (nam -> m_len == sizeof (struct x25_sockaddr))
+			old_to_new (nam);
+		if (pk_checksockaddr (nam))
+			return (EINVAL);
+		error = pk_connect (lcp, mtod (nam, struct sockaddr_x25 *));
+		break;
+
+	/* 
+	 *  Initiate a disconnect to peer entity via a CLEAR REQUEST packet.
+	 *  The socket will be disconnected when we receive a confirmation
+	 *  or a clear collision.
+	 */
+	case PRU_DISCONNECT: 
+		pk_disconnect (lcp);
+		break;
+
+	/* 
+	 *  Accept an INCOMING CALL. Most of the work has already been done
+	 *  by pk_input. Just return the callers address to the user.
+	 */
+	case PRU_ACCEPT: 
+		if (lcp -> lcd_craddr == NULL)
+			break;
+		bcopy ((caddr_t)lcp -> lcd_craddr, mtod (nam, caddr_t),
+			sizeof (struct sockaddr_x25));
+		nam -> m_len = sizeof (struct sockaddr_x25);
+		if (lcp -> lcd_flags & X25_OLDSOCKADDR)
+			new_to_old (nam);
+		break;
+
+	/* 
+	 *  After a receive, we should send a RR.
+	 */
+	case PRU_RCVD: 
+		pk_flowcontrol (lcp, /*sbspace (&so -> so_rcv) <= */ 0, 1);
+		break;
+
+	/* 
+	 *  Send INTERRUPT packet.
+	 */
+	case PRU_SENDOOB: 
+		if (m == 0) {
+			MGETHDR(m, M_WAITOK, MT_OOBDATA);
+			m -> m_pkthdr.len = m -> m_len = 1;
+			*mtod (m, octet *) = 0;
+		}
+		if (m -> m_pkthdr.len > 32) {
+			m_freem (m);
+			error = EMSGSIZE;
+			break;
+		}
+		MCHTYPE(m, MT_OOBDATA);
+		/* FALLTHROUGH */
+
+	/* 
+	 *  Do send by placing data on the socket output queue.
+	 */
+	case PRU_SEND: 
+		if (control) {
+			register struct cmsghdr *ch = mtod (m, struct cmsghdr *);
+			control -> m_len -= sizeof (*ch);
+			control -> m_data += sizeof (*ch);
+			error = pk_ctloutput (PRCO_SETOPT, so, ch -> cmsg_level,
+					ch -> cmsg_type, &control);
+		}
+		if (error == 0 && m)
+			error = pk_send (lcp, m);
+		break;
+
+	/* 
+	 *  Abort a virtual circuit. For example all completed calls
+	 *  waiting acceptance.
+	 */
+	case PRU_ABORT: 
+		pk_disconnect (lcp);
+		break;
+
+	/* Begin unimplemented hooks. */
+
+	case PRU_SHUTDOWN: 
+		error = EOPNOTSUPP;
+		break;
+
+	case PRU_CONTROL: 
+		error = EOPNOTSUPP;
+		break;
+
+	case PRU_SENSE: 
+#ifdef BSD4_3
+		((struct stat *)m) -> st_blksize = so -> so_snd.sb_hiwat;
+#else
+		error = EOPNOTSUPP;
+#endif
+		break;
+
+	/* End unimplemented hooks. */
+
+	case PRU_SOCKADDR: 
+		if (lcp -> lcd_ceaddr == 0)
+			return (EADDRNOTAVAIL);
+		nam -> m_len = sizeof (struct sockaddr_x25);
+		bcopy ((caddr_t)lcp -> lcd_ceaddr, mtod (nam, caddr_t),
+			sizeof (struct sockaddr_x25));
+		if (lcp -> lcd_flags & X25_OLDSOCKADDR)
+			new_to_old (nam);
+		break;
+
+	case PRU_PEERADDR:
+		if (lcp -> lcd_state != DATA_TRANSFER)
+			return (ENOTCONN);
+		nam -> m_len = sizeof (struct sockaddr_x25);
+		bcopy (lcp -> lcd_craddr ? (caddr_t)lcp -> lcd_craddr :
+			(caddr_t)lcp -> lcd_ceaddr,
+			mtod (nam, caddr_t), sizeof (struct sockaddr_x25));
+		if (lcp -> lcd_flags & X25_OLDSOCKADDR)
+			new_to_old (nam);
+		break;
+
+	/* 
+	 *  Receive INTERRUPT packet.
+	 */
+	case PRU_RCVOOB: 
+		if (so -> so_options & SO_OOBINLINE) {
+			register struct mbuf *n  = so -> so_rcv.sb_mb;
+			if (n && n -> m_type == MT_OOBDATA) {
+				unsigned len =  n -> m_pkthdr.len;
+				so -> so_rcv.sb_mb = n -> m_nextpkt;
+				if (len !=  n -> m_len &&
+				    (n = m_pullup (n, len)) == 0)
+					break;
+				m -> m_len = len;
+				bcopy (mtod (m, caddr_t), mtod (n, caddr_t), len);
+				m_freem (n);
+			}
+			break;
+		}
+		m -> m_len = 1;
+		*mtod (m, char *) = lcp -> lcd_intrdata;
+		break;
+
+	default: 
+		panic ("pk_usrreq");
+	}
+release:
+	if (control != NULL)
+		m_freem (control);
+	return (error);
+}
+
+/* 
+ * If you want to use UBC X.25 level 3 in conjunction with some
+ * other X.25 level 2 driver, have the ifp -> if_ioctl routine
+ * assign pk_start to ia -> ia_start when called with SIOCSIFCONF_X25.
+ */
+/* ARGSUSED */
+pk_start (lcp)
+register struct pklcd *lcp;
+{
+	pk_output (lcp);
+	return (0); /* XXX pk_output should return a value */
+}
+
+#ifndef _offsetof
+#define _offsetof(t, m) ((int)((caddr_t)&((t *)0)->m))
+#endif
+struct sockaddr_x25 pk_sockmask = {
+	_offsetof(struct sockaddr_x25, x25_addr[0]),      /* x25_len */
+	0,                                                /* x25_family */
+	-1,                                               /* x25_net id */
+};
+
+/*ARGSUSED*/
+pk_control (so, cmd, data, ifp)
+struct socket *so;
+int cmd;
+caddr_t data;
+register struct ifnet *ifp;
+{
+	register struct ifreq_x25 *ifr = (struct ifreq_x25 *)data;
+	register struct ifaddr *ifa = 0;
+	register struct x25_ifaddr *ia = 0;
+	struct pklcd *dev_lcp = 0;
+	int error, s, old_maxlcn;
+	unsigned n;
+
+	/*
+	 * Find address for this interface, if it exists.
+	 */
+	if (ifp)
+		for (ifa = ifp -> if_addrlist; ifa; ifa = ifa -> ifa_next)
+			if (ifa -> ifa_addr -> sa_family == AF_CCITT)
+				break;
+
+	ia = (struct x25_ifaddr *)ifa;
+	switch (cmd) {
+	case SIOCGIFCONF_X25:
+		if (ifa == 0)
+			return (EADDRNOTAVAIL);
+		ifr -> ifr_xc = ia -> ia_xc;
+		return (0);
+
+	case SIOCSIFCONF_X25:
+		if ((so->so_state & SS_PRIV) == 0)
+			return (EPERM);
+		if (ifp == 0)
+			panic ("pk_control");
+		if (ifa == (struct ifaddr *)0) {
+			register struct mbuf *m;
+
+			MALLOC(ia, struct x25_ifaddr *, sizeof (*ia),
+				M_IFADDR, M_WAITOK);
+			if (ia == 0)
+				return (ENOBUFS);
+			bzero ((caddr_t)ia, sizeof (*ia));
+			if (ifa = ifp -> if_addrlist) {
+				for ( ; ifa -> ifa_next; ifa = ifa -> ifa_next)
+					;
+				ifa -> ifa_next = &ia -> ia_ifa;
+			} else
+				ifp -> if_addrlist = &ia -> ia_ifa;
+			ifa = &ia -> ia_ifa;
+			ifa -> ifa_netmask = (struct sockaddr *)&pk_sockmask;
+			ifa -> ifa_addr = (struct sockaddr *)&ia -> ia_xc.xc_addr;
+			ifa -> ifa_dstaddr = (struct sockaddr *)&ia -> ia_dstaddr; /* XXX */
+			ia -> ia_ifp = ifp;
+			ia -> ia_dstaddr.x25_family = AF_CCITT;
+			ia -> ia_dstaddr.x25_len = pk_sockmask.x25_len;
+		} else if (ISISO8802(ifp) == 0) {
+			rtinit (ifa, (int)RTM_DELETE, 0);
+		}
+		old_maxlcn = ia -> ia_maxlcn;
+		ia -> ia_xc = ifr -> ifr_xc;
+		ia -> ia_dstaddr.x25_net = ia -> ia_xc.xc_addr.x25_net;
+		if (ia -> ia_maxlcn != old_maxlcn && old_maxlcn != 0) {
+			/* VERY messy XXX */
+			register struct pkcb *pkp;
+			FOR_ALL_PKCBS(pkp)
+				if (pkp -> pk_ia == ia)
+					pk_resize (pkp);
+		}
+		/*
+		 * Give the interface a chance to initialize if this
+p		 * is its first address, and to validate the address.
+		 */
+		ia -> ia_start = pk_start;
+		s = splimp();
+		if (ifp -> if_ioctl)
+			error = (*ifp -> if_ioctl)(ifp, SIOCSIFCONF_X25, 
+						   (caddr_t) ifa);
+		if (error)
+			ifp -> if_flags &= ~IFF_UP;
+		else if (ISISO8802(ifp) == 0)
+			error = rtinit (ifa, (int)RTM_ADD, RTF_UP);
+		splx (s);
+		return (error);
+
+	default:
+		if (ifp == 0 || ifp -> if_ioctl == 0)
+			return (EOPNOTSUPP);
+		return ((*ifp -> if_ioctl)(ifp, cmd, data));
+	}
+}
+
+pk_ctloutput (cmd, so, level, optname, mp)
+struct socket *so;
+struct mbuf **mp;
+int cmd, level, optname;
+{
+	register struct mbuf *m = *mp;
+	register struct pklcd *lcp = (struct pklcd *) so -> so_pcb;
+	int error = EOPNOTSUPP;
+
+	if (m == 0)
+		return (EINVAL);
+	if (cmd == PRCO_SETOPT) switch (optname) {
+	case PK_FACILITIES:
+		if (m == 0)
+			return (EINVAL);
+		lcp -> lcd_facilities = m;
+		*mp = 0;
+		return (0);
+
+	case PK_ACCTFILE:
+		if ((so->so_state & SS_PRIV) == 0)
+			error = EPERM;
+		else if (m -> m_len)
+			error = pk_accton (mtod (m, char *));
+		else
+			error = pk_accton ((char *)0);
+		break;
+
+	case PK_RTATTACH:
+		error = pk_rtattach (so, m);
+		break;
+	    
+	case PK_PRLISTEN:
+		error = pk_user_protolisten (mtod (m, u_char *));
+	}
+	if (*mp) {
+		(void) m_freem (*mp);
+		*mp = 0;
+	}
+	return (error);
+
+}
+
+
+/*
+ * Do an in-place conversion of an "old style"
+ * socket address to the new style
+ */
+
+static
+old_to_new (m)
+register struct mbuf *m;
+{
+	register struct x25_sockaddr *oldp;
+	register struct sockaddr_x25 *newp;
+	register char *ocp, *ncp;
+	struct sockaddr_x25 new;
+
+	oldp = mtod (m, struct x25_sockaddr *);
+	newp = &new;
+	bzero ((caddr_t)newp, sizeof (*newp));
+
+	newp -> x25_family = AF_CCITT;
+	newp -> x25_len = sizeof(*newp);
+	newp -> x25_opts.op_flags = (oldp -> xaddr_facilities & X25_REVERSE_CHARGE)
+		| X25_MQBIT | X25_OLDSOCKADDR;
+	if (oldp -> xaddr_facilities & XS_HIPRIO)	/* Datapac specific */
+		newp -> x25_opts.op_psize = X25_PS128;
+	bcopy ((caddr_t)oldp -> xaddr_addr, newp -> x25_addr,
+	       (unsigned)min (oldp -> xaddr_len, sizeof (newp -> x25_addr) - 1));
+	if (bcmp ((caddr_t)oldp -> xaddr_proto, newp -> x25_udata, 4) != 0) {
+		bcopy ((caddr_t)oldp -> xaddr_proto, newp -> x25_udata, 4);
+		newp -> x25_udlen = 4;
+	}
+	ocp = (caddr_t)oldp -> xaddr_userdata;
+	ncp = newp -> x25_udata + 4;
+	while (*ocp && ocp < (caddr_t)oldp -> xaddr_userdata + 12) {
+		if (newp -> x25_udlen == 0)
+			newp -> x25_udlen = 4;
+		*ncp++ = *ocp++;
+		newp -> x25_udlen++;
+	}
+	bcopy ((caddr_t)newp, mtod (m, char *), sizeof (*newp));
+	m -> m_len = sizeof (*newp);
+}
+
+/*
+ * Do an in-place conversion of a new style
+ * socket address to the old style
+ */
+
+static
+new_to_old (m)
+register struct mbuf *m;
+{
+	register struct x25_sockaddr *oldp;
+	register struct sockaddr_x25 *newp;
+	register char *ocp, *ncp;
+	struct x25_sockaddr old;
+
+	oldp = &old;
+	newp = mtod (m, struct sockaddr_x25 *);
+	bzero ((caddr_t)oldp, sizeof (*oldp));
+
+	oldp -> xaddr_facilities = newp -> x25_opts.op_flags & X25_REVERSE_CHARGE;
+	if (newp -> x25_opts.op_psize == X25_PS128)
+		oldp -> xaddr_facilities |= XS_HIPRIO;	/* Datapac specific */
+	ocp = (char *)oldp -> xaddr_addr;
+	ncp = newp -> x25_addr;
+	while (*ncp) {
+		*ocp++ = *ncp++;
+		oldp -> xaddr_len++;
+	}
+
+	bcopy (newp -> x25_udata, (caddr_t)oldp -> xaddr_proto, 4);
+	if (newp -> x25_udlen > 4)
+		bcopy (newp -> x25_udata + 4, (caddr_t)oldp -> xaddr_userdata,
+			(unsigned)(newp -> x25_udlen - 4));
+
+	bcopy ((caddr_t)oldp, mtod (m, char *), sizeof (*oldp));
+	m -> m_len = sizeof (*oldp);
+}
+
+
+pk_checksockaddr (m)
+struct mbuf *m;
+{
+	register struct sockaddr_x25 *sa = mtod (m, struct sockaddr_x25 *);
+	register char *cp;
+
+	if (m -> m_len != sizeof (struct sockaddr_x25))
+		return (1);
+	if (sa -> x25_family != AF_CCITT ||
+		sa -> x25_udlen > sizeof (sa -> x25_udata))
+		return (1);
+	for (cp = sa -> x25_addr; *cp; cp++) {
+		if (*cp < '0' || *cp > '9' ||
+			cp >= &sa -> x25_addr[sizeof (sa -> x25_addr) - 1])
+			return (1);
+	}
+	return (0);
+}
+
+pk_send (lcp, m)
+struct pklcd *lcp;
+register struct mbuf *m;
+{
+	int mqbit = 0, error = 0;
+	register struct x25_packet *xp;
+	register struct socket *so;
+
+	if (m -> m_type == MT_OOBDATA) {
+		if (lcp -> lcd_intrconf_pending)
+			error = ETOOMANYREFS;
+		if (m -> m_pkthdr.len > 32)
+			error = EMSGSIZE;
+		M_PREPEND(m, PKHEADERLN, M_WAITOK);
+		if (m == 0 || error)
+			goto bad;
+		*(mtod (m, octet *)) = 0;
+		xp = mtod (m, struct x25_packet *);
+		X25SBITS(xp -> bits, fmt_identifier, 1);
+		xp -> packet_type = X25_INTERRUPT;
+		SET_LCN(xp, lcp -> lcd_lcn);
+		sbinsertoob ( (so = lcp -> lcd_so) ?
+			&so -> so_snd : &lcp -> lcd_sb, m);
+		goto send;
+	}
+	/*
+	 * Application has elected (at call setup time) to prepend
+	 * a control byte to each packet written indicating m-bit
+	 * and q-bit status.  Examine and then discard this byte.
+	 */
+	if (lcp -> lcd_flags & X25_MQBIT) {
+		if (m -> m_len < 1) {
+			m_freem (m);
+			return (EMSGSIZE);
+		}
+		mqbit = *(mtod (m, u_char *));
+		m -> m_len--;
+		m -> m_data++;
+		m -> m_pkthdr.len--;
+	}
+	error = pk_fragment (lcp, m, mqbit & 0x80, mqbit & 0x40, 1);
+send:
+	if (error == 0 && lcp -> lcd_state == DATA_TRANSFER)
+		lcp -> lcd_send (lcp); /* XXXXXXXXX fix pk_output!!! */
+	return (error);
+bad:
+	if (m)
+		m_freem (m);
+	return (error);
+}
diff --git a/sys/netccitt/pk_var.h b/sys/netccitt/pk_var.h
new file mode 100644
index 00000000000..beda05dc375
--- /dev/null
+++ b/sys/netccitt/pk_var.h
@@ -0,0 +1,231 @@
+/* 
+ * Copyright (c) Computing Centre, University of British Columbia, 1985 
+ * Copyright (C) Computer Science Department IV, 
+ * 		 University of Erlangen-Nuremberg, Germany, 1990, 1991, 1992
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * 
+ * This code is derived from software contributed to Berkeley by the
+ * Laboratory for Computation Vision and the Computer Science Department
+ * of the the University of British Columbia and the Computer Science
+ * Department (IV) of the University of Erlangen-Nuremberg, Germany.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)pk_var.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ *
+ *  X.25 Logical Channel Descriptor
+ *
+ */
+
+struct pklcd {
+	struct 	pklcd_q {
+		struct	pklcd_q *q_forw;	/* debugging chain */
+		struct	pklcd_q *q_back;	/* debugging chain */
+	} lcd_q;
+	int	(*lcd_upper)();		/* switch to socket vs datagram vs ...*/
+	caddr_t	lcd_upnext;		/* reference for lcd_upper() */
+	int	(*lcd_send)();		/* if X.25 front end, direct connect */
+	caddr_t lcd_downnext;		/* reference for lcd_send() */
+	short   lcd_lcn;		/* Logical channel number */
+	short   lcd_state;		/* Logical Channel state */
+	short   lcd_timer;		/* Various timer values */
+	short   lcd_dg_timer;		/* to reclaim idle datagram circuits */
+        bool	lcd_intrconf_pending;	/* Interrupt confirmation pending */
+	octet	lcd_intrdata;		/* Octet of incoming intr data */
+	char	lcd_retry;		/* Timer retry count */
+	char	lcd_rsn;		/* Seq no of last received packet */
+	char	lcd_ssn;		/* Seq no of next packet to send */
+	char	lcd_output_window;	/* Output flow control window */
+	char	lcd_input_window;	/* Input flow control window */
+	char	lcd_last_transmitted_pr;/* Last Pr value transmitted */
+        bool	lcd_rnr_condition;	/* Remote in busy condition */
+        bool	lcd_window_condition;	/* Output window size exceeded */
+        bool	lcd_reset_condition;	/* True, if waiting reset confirm */
+	bool	lcd_rxrnr_condition;	/* True, if we have sent rnr */
+	char	lcd_packetsize;		/* Maximum packet size */
+	char	lcd_windowsize;		/* Window size - both directions */
+        octet	lcd_closed_user_group;	/* Closed user group specification */
+	char	lcd_flags;		/* copy of sockaddr_x25 op_flags */
+	struct	mbuf *lcd_facilities;	/* user supplied facilities for cr */
+	struct	mbuf *lcd_template;	/* Address of response packet */
+	struct	socket *lcd_so;		/* Socket addr for connection */
+	struct	sockaddr_x25 *lcd_craddr;/* Calling address pointer */
+	struct	sockaddr_x25 *lcd_ceaddr;/* Called address pointer */
+	time_t	lcd_stime;		/* time circuit established */
+	long    lcd_txcnt;		/* Data packet transmit count */
+	long    lcd_rxcnt;		/* Data packet receive count */
+	short   lcd_intrcnt;		/* Interrupt packet transmit count */
+	struct	pklcd *lcd_listen;	/* Next lcd on listen queue */
+	struct	pkcb *lcd_pkp;		/* Network this lcd is attached to */
+	struct	mbuf *lcd_cps;		/* Complete Packet Sequence reassembly*/
+	long	lcd_cpsmax;		/* Max length for CPS */
+	struct	sockaddr_x25 lcd_faddr;	/* Remote Address (Calling) */
+	struct	sockaddr_x25 lcd_laddr;	/* Local Address (Called) */
+	struct	sockbuf lcd_sb;		/* alternate for datagram service */
+};
+
+/*
+ * Per network information, allocated dynamically
+ * when a new network is configured.
+ */
+
+struct	pkcb {
+	struct pkcb_q {
+		struct pkcb_q *q_forw;
+		struct pkcb_q *q_backw;
+	} pk_q;
+	short	pk_state;		/* packet level status */
+	short	pk_maxlcn;		/* local copy of xc_maxlcn */
+	int	(*pk_lloutput) ();	/* link level output procedure */
+	caddr_t (*pk_llctlinput) ();    /* link level ctloutput procedure */
+	caddr_t pk_llnext;		/* handle for next level down */
+	struct	x25config *pk_xcp;	/* network specific configuration */
+	struct	x25_ifaddr *pk_ia;	/* backpointer to ifaddr */
+	struct	pklcd **pk_chan;	/* actual size == xc_maxlcn+1 */
+	short	pk_dxerole;		/* DXE role of PLE over LLC2 */
+	short	pk_restartcolls;	/* counting RESTART collisions til resolved */
+	struct	rtentry *pk_rt;		/* back pointer to route */
+	struct  rtentry *pk_llrt;       /* pointer to reverse mapping */
+	u_short pk_refcount;  		/* ref count */
+};
+
+#define FOR_ALL_PKCBS(p) for((p) = (struct pkcb *)(pkcb_q.q_forw); \
+			     (pkcb_q.q_forw != &pkcb_q) && ((struct pkcb_q *)(p) != &pkcb_q); \
+			     (p) = (struct pkcb *)((p) -> pk_q.q_forw))
+
+#define	PQEMPTY		(pkcb_q.q_forw == &pkcb_q)
+
+/*
+ *	Interface address, x25 version. Exactly one of these structures is 
+ *	allocated for each interface with an x25 address.
+ *
+ *	The ifaddr structure conatins the protocol-independent part
+ *	of the structure, and is assumed to be first.
+ */
+struct x25_ifaddr {
+	struct	ifaddr ia_ifa;		/* protocol-independent info */
+#define ia_ifp	ia_ifa.ifa_ifp
+#define	ia_flags ia_ifa.ifa_flags
+	struct	x25config ia_xc;	/* network specific configuration */
+	struct  pkcb *ia_pkcb;
+#define ia_maxlcn ia_xc.xc_maxlcn
+	int	(*ia_start) ();		/* connect, confirm method */
+	struct	sockaddr_x25 ia_dstaddr; /* reserve space for route dst */
+};
+
+/*
+ * ``Link-Level'' extension to Routing Entry for upper level
+ * packet switching via X.25 virtual circuits.
+ */
+struct llinfo_x25 {
+	struct	llinfo_x25 *lx_next;	/* chain together in linked list */
+	struct	llinfo_x25 *lx_prev;	/* chain together in linked list */
+	struct	rtentry *lx_rt;		/* back pointer to route */
+	struct	pklcd *lx_lcd;		/* local connection block */
+	struct	x25_ifaddr *lx_ia;	/* may not be same as rt_ifa */
+	int	lx_state;		/* can't trust lcd->lcd_state */
+	int	lx_flags;
+	int	lx_timer;		/* for idle timeout */
+	int	lx_family;		/* for dispatch */
+};
+
+/* States for lx_state */
+#define LXS_NEWBORN		0
+#define LXS_RESOLVING		1
+#define LXS_FREE		2
+#define LXS_CONNECTING		3
+#define LXS_CONNECTED		4
+#define LXS_DISCONNECTING 	5
+#define LXS_LISTENING 		6
+
+/* flags */
+#define LXF_VALID	0x1		/* Circuit is live, etc. */
+#define LXF_RTHELD	0x2		/* this lcb references rtentry */
+#define LXF_LISTEN	0x4		/* accepting incoming calls */
+
+/*
+ * Definitions for accessing bitfields/bitslices inside X.25 structs
+ */
+
+
+struct x25bitslice {
+	unsigned int bs_mask;
+	unsigned int bs_shift;
+};
+
+#define	calling_addrlen	0
+#define	called_addrlen	1
+#define	q_bit	        2
+#define	d_bit           3
+#define	fmt_identifier	4
+#define	lc_group_number	1
+#define	p_r             5
+#define	m_bit           6
+#define	p_s             7
+#define	zilch           8
+
+#define	X25GBITS(Arg, Index)	(((Arg) & x25_bitslice[(Index)].bs_mask) >> x25_bitslice[(Index)].bs_shift)
+#define	X25SBITS(Arg, Index, Val)	(Arg) |= (((Val) << x25_bitslice[(Index)].bs_shift) & x25_bitslice[(Index)].bs_mask)
+#define	X25CSBITS(Arg, Index, Val)	(Arg) = (((Val) << x25_bitslice[(Index)].bs_shift) & x25_bitslice[(Index)].bs_mask)
+
+extern struct x25bitslice x25_bitslice[];
+
+
+#define ISOFIFTTYPE(i,t) ((i)->if_type == (t))
+#define ISISO8802(i) ((ISOFIFTTYPE(i, IFT_ETHER) || \
+		       ISOFIFTTYPE(i, IFT_ISO88023) || \
+		       ISOFIFTTYPE(i, IFT_ISO88024) || \
+		       ISOFIFTTYPE(i, IFT_ISO88025) || \
+		       ISOFIFTTYPE(i, IFT_ISO88026) || \
+		       ISOFIFTTYPE(i, IFT_P10) || \
+		       ISOFIFTTYPE(i, IFT_P80) || \
+		       ISOFIFTTYPE(i, IFT_FDDI)))
+
+/*
+ * miscellenous debugging info
+ */
+struct mbuf_cache {
+	int	mbc_size;
+	int	mbc_num;
+	int	mbc_oldsize;
+	struct	mbuf **mbc_cache;
+};
+
+#if defined(KERNEL) && defined(CCITT)
+extern struct pkcb_q pkcb_q;
+struct	pklcd *pk_listenhead;
+struct	pklcd *pk_attach();
+
+extern char	*pk_name[], *pk_state[];
+int	pk_t20, pk_t21, pk_t22, pk_t23;
+#endif
diff --git a/sys/netccitt/x25.h b/sys/netccitt/x25.h
new file mode 100644
index 00000000000..e86af39a1a6
--- /dev/null
+++ b/sys/netccitt/x25.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (c) 1990, 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * 		 University of Erlangen-Nuremberg, Germany, 1992
+ * 
+ * This code is derived from software contributed to Berkeley by the
+ * Laboratory for Computation Vision and the Computer Science Department
+ * of the the University of British Columbia and the Computer Science
+ * Department (IV) of the University of Erlangen-Nuremberg, Germany.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)x25.h	8.1 (Berkeley) 6/10/93
+ */
+
+#ifdef KERNEL
+#define PRC_IFUP	3
+#define PRC_LINKUP	4
+#define PRC_LINKDOWN	5
+#define PRC_LINKRESET	6
+#define PRC_LINKDONTCOPY	7
+#ifndef PRC_DISCONNECT_REQUEST  
+#define PRC_DISCONNECT_REQUEST 10
+#endif
+#endif
+
+#define CCITTPROTO_HDLC		1
+#define CCITTPROTO_X25		2	/* packet level protocol */
+#define IEEEPROTO_802LLC	3	/* doesn't belong here */
+
+#define HDLCPROTO_LAP		1
+#define HDLCPROTO_LAPB		2
+#define HDLCPROTO_UNSET		3
+#define HDLCPROTO_LAPD		4
+
+/* socket options */
+#define PK_ACCTFILE		1	/* use level = CCITTPROTO_X25 */
+#define PK_FACILITIES		2	/* use level = CCITTPROTO_X25 */
+#define PK_RTATTACH		3	/* use level = CCITTPROTO_X25 */
+#define PK_PRLISTEN		4	/* use level = CCITTPROTO_X25 */
+
+#define MAX_FACILITIES		109     /* maximum size for facilities */
+
+/*
+ *  X.25 Socket address structure.  It contains the  X.121 or variation of
+ *  X.121, facilities information, higher level protocol value (first four
+ *  bytes of the User Data field), and the last  12 characters of the User
+ *  Data field.
+ */
+
+struct x25_sockaddr {		/* obsolete - use sockaddr_x25 */
+    short  xaddr_len;		/* Length of xaddr_addr.		*/
+    u_char xaddr_addr[15];	/* Network dependent or X.121 address.	*/
+    u_char xaddr_facilities;	/* Facilities information.		*/
+#define XS_REVERSE_CHARGE	0x01
+#define XS_HIPRIO		0x02
+    u_char xaddr_proto[4];	/* Protocol ID (4 bytes of user data).	*/
+    u_char xaddr_userdata[12];	/* Remaining User data field.		*/
+};
+
+/*
+ *  X.25 Socket address structure.  It contains the network id, X.121
+ *  address, facilities information, higher level protocol value (first four
+ *  bytes of the User Data field), and up to 12 characters of User Data.
+ */
+
+struct	sockaddr_x25 {
+	u_char	x25_len;
+	u_char	x25_family;	/* must be AF_CCITT */
+	short	x25_net;	/* network id code (usually a dnic) */
+	char	x25_addr[16];	/* X.121 address (null terminated) */
+	struct	x25opts {
+		char	op_flags;	/* miscellaneous options */
+					/* pk_var.h defines other lcd_flags */
+#define X25_REVERSE_CHARGE	0x01	/* remote DTE pays for call */
+#define X25_DBIT		0x02	/* not yet supported */
+#define X25_MQBIT		0x04	/* prepend M&Q bit status byte to packet data */
+#define X25_OLDSOCKADDR		0x08	/* uses old sockaddr structure */
+#define X25_DG_CIRCUIT		0x10	/* lcd_flag: used for datagrams */
+#define X25_DG_ROUTING		0x20	/* lcd_flag: peer addr not yet known */
+#define X25_MBS_HOLD		0x40	/* lcd_flag: collect m-bit sequences */
+		char	op_psize;	/* requested packet size */
+#define X25_PS128		7
+#define X25_PS256		8
+#define X25_PS512		9
+		char	op_wsize;	/* window size (1 .. 7) */
+		char	op_speed;	/* throughput class */
+	} x25_opts;
+	short	x25_udlen;	/* user data field length */
+	char	x25_udata[16];	/* user data field */
+};
+
+/*
+ * network configuration info
+ * this structure must be 16 bytes long
+ */
+
+struct	x25config {
+	struct	sockaddr_x25 xc_addr;
+	/* link level parameters */
+	u_short	xc_lproto:4,	/* link level protocol eg. CCITTPROTO_HDLC */
+		xc_lptype:4,	/* protocol type eg. HDLCPROTO_LAPB */
+		xc_ltrace:1,	/* link level tracing flag */
+		xc_lwsize:7;	/* link level window size */
+	u_short	xc_lxidxchg:1,  /* link level XID exchange flag - NOT YET */
+	/* packet level parameters */
+	        xc_rsvd1:2,
+                xc_pwsize:3,	/* default window size */
+		xc_psize:4,	/* default packet size 7=128, 8=256, ... */
+		xc_type:3,	/* network type */
+#define X25_1976	0
+#define X25_1980	1
+#define X25_1984	2
+#define X25_DDN		3
+#define X25_BASIC	4
+		xc_ptrace:1,	/* packet level tracing flag */
+		xc_nodnic:1,	/* remove our dnic when calling on net */
+		xc_prepnd0:1;	/* prepend 0 when making offnet calls */
+	u_short	xc_maxlcn;	/* max logical channels */
+	u_short	xc_dg_idletimo;	/* timeout for idle datagram circuits. */
+};
+
+#ifdef IFNAMSIZ
+struct ifreq_x25 {
+	char	ifr_name[IFNAMSIZ];		/* if name, e.g. "en0" */
+	struct	x25config ifr_xc;
+};
+#define	SIOCSIFCONF_X25	_IOW('i', 12, struct ifreq_x25)	/* set ifnet config */
+#define	SIOCGIFCONF_X25	_IOWR('i',13, struct ifreq_x25)	/* get ifnet config */
+#endif
diff --git a/sys/netccitt/x25acct.h b/sys/netccitt/x25acct.h
new file mode 100644
index 00000000000..71f3fd89603
--- /dev/null
+++ b/sys/netccitt/x25acct.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Laboratory for Computation Vision and the Computer Science Department
+ * of the University of British Columbia.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)x25acct.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Format of X.25 accounting record written
+ * to X25ACCTF whenever a circuit is closed.
+ */
+
+#ifdef waterloo
+#define X25ACCTF	"/usr/adm/logs/x25acct"
+#else
+#define X25ACCTF	"/usr/adm/x25acct"
+#endif
+
+struct	x25acct {
+	time_t	x25acct_stime;		/* start time */
+#ifdef waterloo
+	u_long	x25acct_etime;		/* elapsed time (seconds) */
+#else
+	u_short	x25acct_etime;		/* elapsed time (seconds) */
+#endif
+	short	x25acct_uid;		/* user id */
+	short	x25acct_net;		/* network id */
+	u_short	x25acct_psize:4,	/* packet size */
+		x25acct_addrlen:4,	/* x25acct_addr length */
+		x25acct_revcharge:1,	/* reverse charging */
+		x25acct_callin:1,	/* incoming call */
+		x25acct_unused:6;
+	char	x25acct_addr[8];	/* remote DTE address (in bcd) */
+	char	x25acct_udata[4];	/* protocol id */
+	long	x25acct_txcnt;		/* packets transmitted */
+	long	x25acct_rxcnt;		/* packets received */
+};
diff --git a/sys/netccitt/x25err.h b/sys/netccitt/x25err.h
new file mode 100644
index 00000000000..44d5490b422
--- /dev/null
+++ b/sys/netccitt/x25err.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) University of British Columbia, 1984
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Laboratory for Computation Vision and the Computer Science Department
+ * of the University of British Columbia.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)x25err.h	8.1 (Berkeley) 6/10/93
+ */
+
+/* 
+ *  
+ *  X.25 Reset and Clear errors and diagnostics.  These values are 
+ *  returned in the u_error field of the u structure.
+ *
+ */
+
+#define EXRESET		100	/* Reset: call reset			*/
+#define EXROUT		101	/* Reset: out of order			*/
+#define EXRRPE		102	/* Reset: remote procedure error	*/
+#define EXRLPE		103	/* Reset: local procedure error		*/
+#define EXRNCG		104	/* Reset: network congestion		*/
+
+#define EXCLEAR		110	/* Clear: call cleared			*/
+#define EXCBUSY 	111	/* Clear: number busy			*/
+#define EXCOUT		112	/* Clear: out of order			*/
+#define EXCRPE		113	/* Clear: remote procedure error	*/
+#define EXCRRC		114	/* Clear: collect call refused		*/
+#define EXCINV		115	/* Clear: invalid call			*/
+#define EXCAB		116	/* Clear: access barred			*/
+#define EXCLPE		117	/* Clear: local procedure error		*/
+#define EXCNCG		118	/* Clear: network congestion		*/
+#define EXCNOB		119	/* Clear: not obtainable		*/
+
diff --git a/sys/netinet/icmp_var.h b/sys/netinet/icmp_var.h
new file mode 100644
index 00000000000..beef16e1836
--- /dev/null
+++ b/sys/netinet/icmp_var.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)icmp_var.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Variables related to this implementation
+ * of the internet control message protocol.
+ */
+struct	icmpstat {
+/* statistics related to icmp packets generated */
+	u_long	icps_error;		/* # of calls to icmp_error */
+	u_long	icps_oldshort;		/* no error 'cuz old ip too short */
+	u_long	icps_oldicmp;		/* no error 'cuz old was icmp */
+	u_long	icps_outhist[ICMP_MAXTYPE + 1];
+/* statistics related to input messages processed */
+ 	u_long	icps_badcode;		/* icmp_code out of range */
+	u_long	icps_tooshort;		/* packet < ICMP_MINLEN */
+	u_long	icps_checksum;		/* bad checksum */
+	u_long	icps_badlen;		/* calculated bound mismatch */
+	u_long	icps_reflect;		/* number of responses */
+	u_long	icps_inhist[ICMP_MAXTYPE + 1];
+};
+
+/*
+ * Names for ICMP sysctl objects
+ */
+#define	ICMPCTL_MASKREPL	1	/* allow replies to netmask requests */
+#define ICMPCTL_MAXID		2
+
+#define ICMPCTL_NAMES { \
+	{ 0, 0 }, \
+	{ "maskrepl", CTLTYPE_INT }, \
+}
+
+#ifdef KERNEL
+struct	icmpstat icmpstat;
+#endif
diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c
new file mode 100644
index 00000000000..41f07c017b8
--- /dev/null
+++ b/sys/netinet/if_ether.c
@@ -0,0 +1,554 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)if_ether.c	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Ethernet address resolution protocol.
+ * TODO:
+ *	add "inuse/lock" bit (or ref. count) along with valid bit
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/errno.h>
+#include <sys/ioctl.h>
+#include <sys/syslog.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/if_ether.h>
+
+#define SIN(s) ((struct sockaddr_in *)s)
+#define SDL(s) ((struct sockaddr_dl *)s)
+#define SRP(s) ((struct sockaddr_inarp *)s)
+
+/*
+ * ARP trailer negotiation.  Trailer protocol is not IP specific,
+ * but ARP request/response use IP addresses.
+ */
+#define ETHERTYPE_IPTRAILERS ETHERTYPE_TRAIL
+
+
+/* timer values */
+int	arpt_prune = (5*60*1);	/* walk list every 5 minutes */
+int	arpt_keep = (20*60);	/* once resolved, good for 20 more minutes */
+int	arpt_down = 20;		/* once declared down, don't send for 20 secs */
+#define	rt_expire rt_rmx.rmx_expire
+
+static	void arprequest __P((struct arpcom *, u_long *, u_long *, u_char *));
+static	void arptfree __P((struct llinfo_arp *));
+static	void arptimer __P((void *));
+static	struct llinfo_arp *arplookup __P((u_long, int, int));
+static	void in_arpinput __P((struct mbuf *));
+
+extern	struct ifnet loif;
+extern	struct timeval time;
+struct	llinfo_arp llinfo_arp = {&llinfo_arp, &llinfo_arp};
+struct	ifqueue arpintrq = {0, 0, 0, 50};
+int	arp_inuse, arp_allocated, arp_intimer;
+int	arp_maxtries = 5;
+int	useloopback = 1;	/* use loopback interface for local traffic */
+int	arpinit_done = 0;
+
+/*
+ * Timeout routine.  Age arp_tab entries periodically.
+ */
+/* ARGSUSED */
+static void
+arptimer(ignored_arg)
+	void *ignored_arg;
+{
+	int s = splnet();
+	register struct llinfo_arp *la = llinfo_arp.la_next;
+
+	timeout(arptimer, (caddr_t)0, arpt_prune * hz);
+	while (la != &llinfo_arp) {
+		register struct rtentry *rt = la->la_rt;
+		la = la->la_next;
+		if (rt->rt_expire && rt->rt_expire <= time.tv_sec)
+			arptfree(la->la_prev); /* timer has expired, clear */
+	}
+	splx(s);
+}
+
+/*
+ * Parallel to llc_rtrequest.
+ */
+void
+arp_rtrequest(req, rt, sa)
+	int req;
+	register struct rtentry *rt;
+	struct sockaddr *sa;
+{
+	register struct sockaddr *gate = rt->rt_gateway;
+	register struct llinfo_arp *la = (struct llinfo_arp *)rt->rt_llinfo;
+	static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
+
+	if (!arpinit_done) {
+		arpinit_done = 1;
+		timeout(arptimer, (caddr_t)0, hz);
+	}
+	if (rt->rt_flags & RTF_GATEWAY)
+		return;
+	switch (req) {
+
+	case RTM_ADD:
+		/*
+		 * XXX: If this is a manually added route to interface
+		 * such as older version of routed or gated might provide,
+		 * restore cloning bit.
+		 */
+		if ((rt->rt_flags & RTF_HOST) == 0 &&
+		    SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff)
+			rt->rt_flags |= RTF_CLONING;
+		if (rt->rt_flags & RTF_CLONING) {
+			/*
+			 * Case 1: This route should come from a route to iface.
+			 */
+			rt_setgate(rt, rt_key(rt),
+					(struct sockaddr *)&null_sdl);
+			gate = rt->rt_gateway;
+			SDL(gate)->sdl_type = rt->rt_ifp->if_type;
+			SDL(gate)->sdl_index = rt->rt_ifp->if_index;
+			rt->rt_expire = time.tv_sec;
+			break;
+		}
+		/* Announce a new entry if requested. */
+		if (rt->rt_flags & RTF_ANNOUNCE)
+			arprequest((struct arpcom *)rt->rt_ifp,
+			    &SIN(rt_key(rt))->sin_addr.s_addr,
+			    &SIN(rt_key(rt))->sin_addr.s_addr,
+			    (u_char *)LLADDR(SDL(gate)));
+		/*FALLTHROUGH*/
+	case RTM_RESOLVE:
+		if (gate->sa_family != AF_LINK ||
+		    gate->sa_len < sizeof(null_sdl)) {
+			log(LOG_DEBUG, "arp_rtrequest: bad gateway value");
+			break;
+		}
+		SDL(gate)->sdl_type = rt->rt_ifp->if_type;
+		SDL(gate)->sdl_index = rt->rt_ifp->if_index;
+		if (la != 0)
+			break; /* This happens on a route change */
+		/*
+		 * Case 2:  This route may come from cloning, or a manual route
+		 * add with a LL address.
+		 */
+		R_Malloc(la, struct llinfo_arp *, sizeof(*la));
+		rt->rt_llinfo = (caddr_t)la;
+		if (la == 0) {
+			log(LOG_DEBUG, "arp_rtrequest: malloc failed\n");
+			break;
+		}
+		arp_inuse++, arp_allocated++;
+		Bzero(la, sizeof(*la));
+		la->la_rt = rt;
+		rt->rt_flags |= RTF_LLINFO;
+		insque(la, &llinfo_arp);
+		if (SIN(rt_key(rt))->sin_addr.s_addr ==
+		    (IA_SIN(rt->rt_ifa))->sin_addr.s_addr) {
+		    /*
+		     * This test used to be
+		     *	if (loif.if_flags & IFF_UP)
+		     * It allowed local traffic to be forced
+		     * through the hardware by configuring the loopback down.
+		     * However, it causes problems during network configuration
+		     * for boards that can't receive packets they send.
+		     * It is now necessary to clear "useloopback" and remove
+		     * the route to force traffic out to the hardware.
+		     */
+			rt->rt_expire = 0;
+			Bcopy(((struct arpcom *)rt->rt_ifp)->ac_enaddr,
+				LLADDR(SDL(gate)), SDL(gate)->sdl_alen = 6);
+			if (useloopback)
+				rt->rt_ifp = &loif;
+
+		}
+		break;
+
+	case RTM_DELETE:
+		if (la == 0)
+			break;
+		arp_inuse--;
+		remque(la);
+		rt->rt_llinfo = 0;
+		rt->rt_flags &= ~RTF_LLINFO;
+		if (la->la_hold)
+			m_freem(la->la_hold);
+		Free((caddr_t)la);
+	}
+}
+
+/*
+ * Broadcast an ARP packet, asking who has addr on interface ac.
+ */
+void
+arpwhohas(ac, addr)
+	register struct arpcom *ac;
+	register struct in_addr *addr;
+{
+	arprequest(ac, &ac->ac_ipaddr.s_addr, &addr->s_addr, ac->ac_enaddr);
+}
+
+/*
+ * Broadcast an ARP request. Caller specifies:
+ *	- arp header source ip address
+ *	- arp header target ip address
+ *	- arp header source ethernet address
+ */
+static void
+arprequest(ac, sip, tip, enaddr)
+	register struct arpcom *ac;
+	register u_long *sip, *tip;
+	register u_char *enaddr;
+{
+	register struct mbuf *m;
+	register struct ether_header *eh;
+	register struct ether_arp *ea;
+	struct sockaddr sa;
+
+	if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
+		return;
+	m->m_len = sizeof(*ea);
+	m->m_pkthdr.len = sizeof(*ea);
+	MH_ALIGN(m, sizeof(*ea));
+	ea = mtod(m, struct ether_arp *);
+	eh = (struct ether_header *)sa.sa_data;
+	bzero((caddr_t)ea, sizeof (*ea));
+	bcopy((caddr_t)etherbroadcastaddr, (caddr_t)eh->ether_dhost,
+	    sizeof(eh->ether_dhost));
+	eh->ether_type = ETHERTYPE_ARP;		/* if_output will swap */
+	ea->arp_hrd = htons(ARPHRD_ETHER);
+	ea->arp_pro = htons(ETHERTYPE_IP);
+	ea->arp_hln = sizeof(ea->arp_sha);	/* hardware address length */
+	ea->arp_pln = sizeof(ea->arp_spa);	/* protocol address length */
+	ea->arp_op = htons(ARPOP_REQUEST);
+	bcopy((caddr_t)enaddr, (caddr_t)ea->arp_sha, sizeof(ea->arp_sha));
+	bcopy((caddr_t)sip, (caddr_t)ea->arp_spa, sizeof(ea->arp_spa));
+	bcopy((caddr_t)tip, (caddr_t)ea->arp_tpa, sizeof(ea->arp_tpa));
+	sa.sa_family = AF_UNSPEC;
+	sa.sa_len = sizeof(sa);
+	(*ac->ac_if.if_output)(&ac->ac_if, m, &sa, (struct rtentry *)0);
+}
+
+/*
+ * Resolve an IP address into an ethernet address.  If success,
+ * desten is filled in.  If there is no entry in arptab,
+ * set one up and broadcast a request for the IP address.
+ * Hold onto this mbuf and resend it once the address
+ * is finally resolved.  A return value of 1 indicates
+ * that desten has been filled in and the packet should be sent
+ * normally; a 0 return indicates that the packet has been
+ * taken over here, either now or for later transmission.
+ */
+int
+arpresolve(ac, rt, m, dst, desten)
+	register struct arpcom *ac;
+	register struct rtentry *rt;
+	struct mbuf *m;
+	register struct sockaddr *dst;
+	register u_char *desten;
+{
+	register struct llinfo_arp *la;
+	struct sockaddr_dl *sdl;
+
+	if (m->m_flags & M_BCAST) {	/* broadcast */
+		bcopy((caddr_t)etherbroadcastaddr, (caddr_t)desten,
+		    sizeof(etherbroadcastaddr));
+		return (1);
+	}
+	if (m->m_flags & M_MCAST) {	/* multicast */
+		ETHER_MAP_IP_MULTICAST(&SIN(dst)->sin_addr, desten);
+		return(1);
+	}
+	if (rt)
+		la = (struct llinfo_arp *)rt->rt_llinfo;
+	else {
+		if (la = arplookup(SIN(dst)->sin_addr.s_addr, 1, 0))
+			rt = la->la_rt;
+	}
+	if (la == 0 || rt == 0) {
+		log(LOG_DEBUG, "arpresolve: can't allocate llinfo");
+		m_freem(m);
+		return (0);
+	}
+	sdl = SDL(rt->rt_gateway);
+	/*
+	 * Check the address family and length is valid, the address
+	 * is resolved; otherwise, try to resolve.
+	 */
+	if ((rt->rt_expire == 0 || rt->rt_expire > time.tv_sec) &&
+	    sdl->sdl_family == AF_LINK && sdl->sdl_alen != 0) {
+		bcopy(LLADDR(sdl), desten, sdl->sdl_alen);
+		return 1;
+	}
+	/*
+	 * There is an arptab entry, but no ethernet address
+	 * response yet.  Replace the held mbuf with this
+	 * latest one.
+	 */
+	if (la->la_hold)
+		m_freem(la->la_hold);
+	la->la_hold = m;
+	if (rt->rt_expire) {
+		rt->rt_flags &= ~RTF_REJECT;
+		if (la->la_asked == 0 || rt->rt_expire != time.tv_sec) {
+			rt->rt_expire = time.tv_sec;
+			if (la->la_asked++ < arp_maxtries)
+				arpwhohas(ac, &(SIN(dst)->sin_addr));
+			else {
+				rt->rt_flags |= RTF_REJECT;
+				rt->rt_expire += arpt_down;
+				la->la_asked = 0;
+			}
+
+		}
+	}
+	return (0);
+}
+
+/*
+ * Common length and type checks are done here,
+ * then the protocol-specific routine is called.
+ */
+void
+arpintr()
+{
+	register struct mbuf *m;
+	register struct arphdr *ar;
+	int s;
+
+	while (arpintrq.ifq_head) {
+		s = splimp();
+		IF_DEQUEUE(&arpintrq, m);
+		splx(s);
+		if (m == 0 || (m->m_flags & M_PKTHDR) == 0)
+			panic("arpintr");
+		if (m->m_len >= sizeof(struct arphdr) &&
+		    (ar = mtod(m, struct arphdr *)) &&
+		    ntohs(ar->ar_hrd) == ARPHRD_ETHER &&
+		    m->m_len >=
+		      sizeof(struct arphdr) + 2 * ar->ar_hln + 2 * ar->ar_pln)
+
+			    switch (ntohs(ar->ar_pro)) {
+
+			    case ETHERTYPE_IP:
+			    case ETHERTYPE_IPTRAILERS:
+				    in_arpinput(m);
+				    continue;
+			    }
+		m_freem(m);
+	}
+}
+
+/*
+ * ARP for Internet protocols on 10 Mb/s Ethernet.
+ * Algorithm is that given in RFC 826.
+ * In addition, a sanity check is performed on the sender
+ * protocol address, to catch impersonators.
+ * We no longer handle negotiations for use of trailer protocol:
+ * Formerly, ARP replied for protocol type ETHERTYPE_TRAIL sent
+ * along with IP replies if we wanted trailers sent to us,
+ * and also sent them in response to IP replies.
+ * This allowed either end to announce the desire to receive
+ * trailer packets.
+ * We no longer reply to requests for ETHERTYPE_TRAIL protocol either,
+ * but formerly didn't normally send requests.
+ */
+static void
+in_arpinput(m)
+	struct mbuf *m;
+{
+	register struct ether_arp *ea;
+	register struct arpcom *ac = (struct arpcom *)m->m_pkthdr.rcvif;
+	struct ether_header *eh;
+	register struct llinfo_arp *la = 0;
+	register struct rtentry *rt;
+	struct in_ifaddr *ia, *maybe_ia = 0;
+	struct sockaddr_dl *sdl;
+	struct sockaddr sa;
+	struct in_addr isaddr, itaddr, myaddr;
+	int op;
+
+	ea = mtod(m, struct ether_arp *);
+	op = ntohs(ea->arp_op);
+	bcopy((caddr_t)ea->arp_spa, (caddr_t)&isaddr, sizeof (isaddr));
+	bcopy((caddr_t)ea->arp_tpa, (caddr_t)&itaddr, sizeof (itaddr));
+	for (ia = in_ifaddr; ia; ia = ia->ia_next)
+		if (ia->ia_ifp == &ac->ac_if) {
+			maybe_ia = ia;
+			if ((itaddr.s_addr == ia->ia_addr.sin_addr.s_addr) ||
+			     (isaddr.s_addr == ia->ia_addr.sin_addr.s_addr))
+				break;
+		}
+	if (maybe_ia == 0)
+		goto out;
+	myaddr = ia ? ia->ia_addr.sin_addr : maybe_ia->ia_addr.sin_addr;
+	if (!bcmp((caddr_t)ea->arp_sha, (caddr_t)ac->ac_enaddr,
+	    sizeof (ea->arp_sha)))
+		goto out;	/* it's from me, ignore it. */
+	if (!bcmp((caddr_t)ea->arp_sha, (caddr_t)etherbroadcastaddr,
+	    sizeof (ea->arp_sha))) {
+		log(LOG_ERR,
+		    "arp: ether address is broadcast for IP address %x!\n",
+		    ntohl(isaddr.s_addr));
+		goto out;
+	}
+	if (isaddr.s_addr == myaddr.s_addr) {
+		log(LOG_ERR,
+		   "duplicate IP address %x!! sent from ethernet address: %s\n",
+		   ntohl(isaddr.s_addr), ether_sprintf(ea->arp_sha));
+		itaddr = myaddr;
+		goto reply;
+	}
+	la = arplookup(isaddr.s_addr, itaddr.s_addr == myaddr.s_addr, 0);
+	if (la && (rt = la->la_rt) && (sdl = SDL(rt->rt_gateway))) {
+		if (sdl->sdl_alen &&
+		    bcmp((caddr_t)ea->arp_sha, LLADDR(sdl), sdl->sdl_alen))
+			log(LOG_INFO, "arp info overwritten for %x by %s\n",
+			    isaddr.s_addr, ether_sprintf(ea->arp_sha));
+		bcopy((caddr_t)ea->arp_sha, LLADDR(sdl),
+			    sdl->sdl_alen = sizeof(ea->arp_sha));
+		if (rt->rt_expire)
+			rt->rt_expire = time.tv_sec + arpt_keep;
+		rt->rt_flags &= ~RTF_REJECT;
+		la->la_asked = 0;
+		if (la->la_hold) {
+			(*ac->ac_if.if_output)(&ac->ac_if, la->la_hold,
+				rt_key(rt), rt);
+			la->la_hold = 0;
+		}
+	}
+reply:
+	if (op != ARPOP_REQUEST) {
+	out:
+		m_freem(m);
+		return;
+	}
+	if (itaddr.s_addr == myaddr.s_addr) {
+		/* I am the target */
+		bcopy((caddr_t)ea->arp_sha, (caddr_t)ea->arp_tha,
+		    sizeof(ea->arp_sha));
+		bcopy((caddr_t)ac->ac_enaddr, (caddr_t)ea->arp_sha,
+		    sizeof(ea->arp_sha));
+	} else {
+		la = arplookup(itaddr.s_addr, 0, SIN_PROXY);
+		if (la == NULL)
+			goto out;
+		rt = la->la_rt;
+		bcopy((caddr_t)ea->arp_sha, (caddr_t)ea->arp_tha,
+		    sizeof(ea->arp_sha));
+		sdl = SDL(rt->rt_gateway);
+		bcopy(LLADDR(sdl), (caddr_t)ea->arp_sha, sizeof(ea->arp_sha));
+	}
+
+	bcopy((caddr_t)ea->arp_spa, (caddr_t)ea->arp_tpa, sizeof(ea->arp_spa));
+	bcopy((caddr_t)&itaddr, (caddr_t)ea->arp_spa, sizeof(ea->arp_spa));
+	ea->arp_op = htons(ARPOP_REPLY);
+	ea->arp_pro = htons(ETHERTYPE_IP); /* let's be sure! */
+	eh = (struct ether_header *)sa.sa_data;
+	bcopy((caddr_t)ea->arp_tha, (caddr_t)eh->ether_dhost,
+	    sizeof(eh->ether_dhost));
+	eh->ether_type = ETHERTYPE_ARP;
+	sa.sa_family = AF_UNSPEC;
+	sa.sa_len = sizeof(sa);
+	(*ac->ac_if.if_output)(&ac->ac_if, m, &sa, (struct rtentry *)0);
+	return;
+}
+
+/*
+ * Free an arp entry.
+ */
+static void
+arptfree(la)
+	register struct llinfo_arp *la;
+{
+	register struct rtentry *rt = la->la_rt;
+	register struct sockaddr_dl *sdl;
+	if (rt == 0)
+		panic("arptfree");
+	if (rt->rt_refcnt > 0 && (sdl = SDL(rt->rt_gateway)) &&
+	    sdl->sdl_family == AF_LINK) {
+		sdl->sdl_alen = 0;
+		la->la_asked = 0;
+		rt->rt_flags &= ~RTF_REJECT;
+		return;
+	}
+	rtrequest(RTM_DELETE, rt_key(rt), (struct sockaddr *)0, rt_mask(rt),
+			0, (struct rtentry **)0);
+}
+/*
+ * Lookup or enter a new address in arptab.
+ */
+static struct llinfo_arp *
+arplookup(addr, create, proxy)
+	u_long addr;
+	int create, proxy;
+{
+	register struct rtentry *rt;
+	static struct sockaddr_inarp sin = {sizeof(sin), AF_INET };
+
+	sin.sin_addr.s_addr = addr;
+	sin.sin_other = proxy ? SIN_PROXY : 0;
+	rt = rtalloc1((struct sockaddr *)&sin, create);
+	if (rt == 0)
+		return (0);
+	rt->rt_refcnt--;
+	if ((rt->rt_flags & RTF_GATEWAY) || (rt->rt_flags & RTF_LLINFO) == 0 ||
+	    rt->rt_gateway->sa_family != AF_LINK) {
+		if (create)
+			log(LOG_DEBUG, "arptnew failed on %x\n", ntohl(addr));
+		return (0);
+	}
+	return ((struct llinfo_arp *)rt->rt_llinfo);
+}
+
+int
+arpioctl(cmd, data)
+	int cmd;
+	caddr_t data;
+{
+	return (EOPNOTSUPP);
+}
diff --git a/sys/netinet/if_ether.h b/sys/netinet/if_ether.h
new file mode 100644
index 00000000000..6b4def054f7
--- /dev/null
+++ b/sys/netinet/if_ether.h
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)if_ether.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Structure of a 10Mb/s Ethernet header.
+ */
+struct	ether_header {
+	u_char	ether_dhost[6];
+	u_char	ether_shost[6];
+	u_short	ether_type;
+};
+
+#define	ETHERTYPE_PUP		0x0200	/* PUP protocol */
+#define	ETHERTYPE_IP		0x0800	/* IP protocol */
+#define ETHERTYPE_ARP		0x0806	/* Addr. resolution protocol */
+#define ETHERTYPE_REVARP	0x8035	/* reverse Addr. resolution protocol */
+
+/*
+ * The ETHERTYPE_NTRAILER packet types starting at ETHERTYPE_TRAIL have
+ * (type-ETHERTYPE_TRAIL)*512 bytes of data followed
+ * by an ETHER type (as given above) and then the (variable-length) header.
+ */
+#define	ETHERTYPE_TRAIL		0x1000		/* Trailer packet */
+#define	ETHERTYPE_NTRAILER	16
+
+#define	ETHERMTU	1500
+#define	ETHERMIN	(60-14)
+
+#ifdef KERNEL
+/*
+ * Macro to map an IP multicast address to an Ethernet multicast address.
+ * The high-order 25 bits of the Ethernet address are statically assigned,
+ * and the low-order 23 bits are taken from the low end of the IP address.
+ */
+#define ETHER_MAP_IP_MULTICAST(ipaddr, enaddr) \
+	/* struct in_addr *ipaddr; */ \
+	/* u_char enaddr[6];	   */ \
+{ \
+	(enaddr)[0] = 0x01; \
+	(enaddr)[1] = 0x00; \
+	(enaddr)[2] = 0x5e; \
+	(enaddr)[3] = ((u_char *)ipaddr)[1] & 0x7f; \
+	(enaddr)[4] = ((u_char *)ipaddr)[2]; \
+	(enaddr)[5] = ((u_char *)ipaddr)[3]; \
+}
+#endif
+
+/*
+ * Ethernet Address Resolution Protocol.
+ *
+ * See RFC 826 for protocol description.  Structure below is adapted
+ * to resolving internet addresses.  Field names used correspond to 
+ * RFC 826.
+ */
+struct	ether_arp {
+	struct	arphdr ea_hdr;	/* fixed-size header */
+	u_char	arp_sha[6];	/* sender hardware address */
+	u_char	arp_spa[4];	/* sender protocol address */
+	u_char	arp_tha[6];	/* target hardware address */
+	u_char	arp_tpa[4];	/* target protocol address */
+};
+#define	arp_hrd	ea_hdr.ar_hrd
+#define	arp_pro	ea_hdr.ar_pro
+#define	arp_hln	ea_hdr.ar_hln
+#define	arp_pln	ea_hdr.ar_pln
+#define	arp_op	ea_hdr.ar_op
+
+
+/*
+ * Structure shared between the ethernet driver modules and
+ * the address resolution code.  For example, each ec_softc or il_softc
+ * begins with this structure.
+ */
+struct	arpcom {
+	struct 	ifnet ac_if;		/* network-visible interface */
+	u_char	ac_enaddr[6];		/* ethernet hardware address */
+	struct	in_addr ac_ipaddr;	/* copy of ip address- XXX */
+	struct	ether_multi *ac_multiaddrs; /* list of ether multicast addrs */
+	int	ac_multicnt;		/* length of ac_multiaddrs list */	
+};
+
+struct llinfo_arp {				
+	struct	llinfo_arp *la_next;
+	struct	llinfo_arp *la_prev;
+	struct	rtentry *la_rt;
+	struct	mbuf *la_hold;		/* last packet until resolved/timeout */
+	long	la_asked;		/* last time we QUERIED for this addr */
+#define la_timer la_rt->rt_rmx.rmx_expire /* deletion time in seconds */
+};
+
+struct sockaddr_inarp {
+	u_char	sin_len;
+	u_char	sin_family;
+	u_short sin_port;
+	struct	in_addr sin_addr;
+	struct	in_addr sin_srcaddr;
+	u_short	sin_tos;
+	u_short	sin_other;
+#define SIN_PROXY 1
+};
+/*
+ * IP and ethernet specific routing flags
+ */
+#define	RTF_USETRAILERS	RTF_PROTO1	/* use trailers */
+#define RTF_ANNOUNCE	RTF_PROTO2	/* announce new arp entry */
+
+#ifdef	KERNEL
+u_char	etherbroadcastaddr[6];
+u_char	ether_ipmulticast_min[6];
+u_char	ether_ipmulticast_max[6];
+struct	ifqueue arpintrq;
+
+struct	llinfo_arp *arptnew __P((struct in_addr *));
+struct	llinfo_arp llinfo_arp;		/* head of the llinfo queue */
+
+void	arpwhohas __P((struct arpcom *, struct in_addr *));
+void	arpintr __P((void));
+int	arpresolve __P((struct arpcom *,
+	   struct rtentry *, struct mbuf *, struct sockaddr *, u_char *));
+void	arp_rtrequest __P((int, struct rtentry *, struct sockaddr *));
+void	arpwhohas __P((struct arpcom *, struct in_addr *));
+
+int	ether_addmulti __P((struct ifreq *, struct arpcom *));
+int	ether_delmulti __P((struct ifreq *, struct arpcom *));
+
+/*
+ * Ethernet multicast address structure.  There is one of these for each
+ * multicast address or range of multicast addresses that we are supposed
+ * to listen to on a particular interface.  They are kept in a linked list,
+ * rooted in the interface's arpcom structure.  (This really has nothing to
+ * do with ARP, or with the Internet address family, but this appears to be
+ * the minimally-disrupting place to put it.)
+ */
+struct ether_multi {
+	u_char	enm_addrlo[6];		/* low  or only address of range */
+	u_char	enm_addrhi[6];		/* high or only address of range */
+	struct	arpcom *enm_ac;		/* back pointer to arpcom */
+	u_int	enm_refcount;		/* no. claims to this addr/range */
+	struct	ether_multi *enm_next;	/* ptr to next ether_multi */
+};
+
+/*
+ * Structure used by macros below to remember position when stepping through
+ * all of the ether_multi records.
+ */
+struct ether_multistep {
+	struct ether_multi  *e_enm;
+};
+
+/*
+ * Macro for looking up the ether_multi record for a given range of Ethernet
+ * multicast addresses connected to a given arpcom structure.  If no matching
+ * record is found, "enm" returns NULL.
+ */
+#define ETHER_LOOKUP_MULTI(addrlo, addrhi, ac, enm) \
+	/* u_char addrlo[6]; */ \
+	/* u_char addrhi[6]; */ \
+	/* struct arpcom *ac; */ \
+	/* struct ether_multi *enm; */ \
+{ \
+	for ((enm) = (ac)->ac_multiaddrs; \
+	    (enm) != NULL && \
+	    (bcmp((enm)->enm_addrlo, (addrlo), 6) != 0 || \
+	     bcmp((enm)->enm_addrhi, (addrhi), 6) != 0); \
+		(enm) = (enm)->enm_next); \
+}
+
+/*
+ * Macro to step through all of the ether_multi records, one at a time.
+ * The current position is remembered in "step", which the caller must
+ * provide.  ETHER_FIRST_MULTI(), below, must be called to initialize "step"
+ * and get the first record.  Both macros return a NULL "enm" when there
+ * are no remaining records.
+ */
+#define ETHER_NEXT_MULTI(step, enm) \
+	/* struct ether_multistep step; */  \
+	/* struct ether_multi *enm; */  \
+{ \
+	if (((enm) = (step).e_enm) != NULL) \
+		(step).e_enm = (enm)->enm_next; \
+}
+
+#define ETHER_FIRST_MULTI(step, ac, enm) \
+	/* struct ether_multistep step; */ \
+	/* struct arpcom *ac; */ \
+	/* struct ether_multi *enm; */ \
+{ \
+	(step).e_enm = (ac)->ac_multiaddrs; \
+	ETHER_NEXT_MULTI((step), (enm)); \
+}
+
+#endif
diff --git a/sys/netinet/igmp.c b/sys/netinet/igmp.c
new file mode 100644
index 00000000000..78b426c49ea
--- /dev/null
+++ b/sys/netinet/igmp.c
@@ -0,0 +1,313 @@
+/*
+ * Copyright (c) 1988 Stephen Deering.
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Stephen Deering of Stanford University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
+ */
+
+/* Internet Group Management Protocol (IGMP) routines. */
+
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/igmp.h>
+#include <netinet/igmp_var.h>
+
+extern struct ifnet loif;
+
+static int igmp_timers_are_running = 0;
+static u_long igmp_all_hosts_group;
+
+static void igmp_sendreport __P((struct in_multi *));
+
+void
+igmp_init()
+{
+	/*
+	 * To avoid byte-swapping the same value over and over again.
+	 */
+	igmp_all_hosts_group = htonl(INADDR_ALLHOSTS_GROUP);
+}
+
+void
+igmp_input(m, iphlen)
+	register struct mbuf *m;
+	register int iphlen;
+{
+	register struct igmp *igmp;
+	register struct ip *ip;
+	register int igmplen;
+	register struct ifnet *ifp = m->m_pkthdr.rcvif;
+	register int minlen;
+	register struct in_multi *inm;
+	register struct in_ifaddr *ia;
+	struct in_multistep step;
+
+	++igmpstat.igps_rcv_total;
+
+	ip = mtod(m, struct ip *);
+	igmplen = ip->ip_len;
+
+	/*
+	 * Validate lengths
+	 */
+	if (igmplen < IGMP_MINLEN) {
+		++igmpstat.igps_rcv_tooshort;
+		m_freem(m);
+		return;
+	}
+	minlen = iphlen + IGMP_MINLEN;
+	if ((m->m_flags & M_EXT || m->m_len < minlen) &&
+	    (m = m_pullup(m, minlen)) == 0) {
+		++igmpstat.igps_rcv_tooshort;
+		return;
+	}
+
+	/*
+	 * Validate checksum
+	 */
+	m->m_data += iphlen;
+	m->m_len -= iphlen;
+	igmp = mtod(m, struct igmp *);
+	if (in_cksum(m, igmplen)) {
+		++igmpstat.igps_rcv_badsum;
+		m_freem(m);
+		return;
+	}
+	m->m_data -= iphlen;
+	m->m_len += iphlen;
+	ip = mtod(m, struct ip *);
+
+	switch (igmp->igmp_type) {
+
+	case IGMP_HOST_MEMBERSHIP_QUERY:
+		++igmpstat.igps_rcv_queries;
+
+		if (ifp == &loif)
+			break;
+
+		if (ip->ip_dst.s_addr != igmp_all_hosts_group) {
+			++igmpstat.igps_rcv_badqueries;
+			m_freem(m);
+			return;
+		}
+
+		/*
+		 * Start the timers in all of our membership records for
+		 * the interface on which the query arrived, except those
+		 * that are already running and those that belong to the
+		 * "all-hosts" group.
+		 */
+		IN_FIRST_MULTI(step, inm);
+		while (inm != NULL) {
+			if (inm->inm_ifp == ifp && inm->inm_timer == 0 &&
+			    inm->inm_addr.s_addr != igmp_all_hosts_group) {
+				inm->inm_timer =
+				    IGMP_RANDOM_DELAY(inm->inm_addr);
+				igmp_timers_are_running = 1;
+			}
+			IN_NEXT_MULTI(step, inm);
+		}
+
+		break;
+
+	case IGMP_HOST_MEMBERSHIP_REPORT:
+		++igmpstat.igps_rcv_reports;
+
+		if (ifp == &loif)
+			break;
+
+		if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) ||
+		    igmp->igmp_group.s_addr != ip->ip_dst.s_addr) {
+			++igmpstat.igps_rcv_badreports;
+			m_freem(m);
+			return;
+		}
+
+		/*
+		 * KLUDGE: if the IP source address of the report has an
+		 * unspecified (i.e., zero) subnet number, as is allowed for
+		 * a booting host, replace it with the correct subnet number
+		 * so that a process-level multicast routing demon can
+		 * determine which subnet it arrived from.  This is necessary
+		 * to compensate for the lack of any way for a process to
+		 * determine the arrival interface of an incoming packet.
+		 */
+		if ((ntohl(ip->ip_src.s_addr) & IN_CLASSA_NET) == 0) {
+			IFP_TO_IA(ifp, ia);
+			if (ia) ip->ip_src.s_addr = htonl(ia->ia_subnet);
+		}
+
+		/*
+		 * If we belong to the group being reported, stop
+		 * our timer for that group.
+		 */
+		IN_LOOKUP_MULTI(igmp->igmp_group, ifp, inm);
+		if (inm != NULL) {
+			inm->inm_timer = 0;
+			++igmpstat.igps_rcv_ourreports;
+		}
+
+		break;
+	}
+
+	/*
+	 * Pass all valid IGMP packets up to any process(es) listening
+	 * on a raw IGMP socket.
+	 */
+	rip_input(m);
+}
+
+void
+igmp_joingroup(inm)
+	struct in_multi *inm;
+{
+	register int s = splnet();
+
+	if (inm->inm_addr.s_addr == igmp_all_hosts_group ||
+	    inm->inm_ifp == &loif)
+		inm->inm_timer = 0;
+	else {
+		igmp_sendreport(inm);
+		inm->inm_timer = IGMP_RANDOM_DELAY(inm->inm_addr);
+		igmp_timers_are_running = 1;
+	}
+	splx(s);
+}
+
+void
+igmp_leavegroup(inm)
+	struct in_multi *inm;
+{
+	/*
+	 * No action required on leaving a group.
+	 */
+}
+
+void
+igmp_fasttimo()
+{
+	register struct in_multi *inm;
+	register int s;
+	struct in_multistep step;
+
+	/*
+	 * Quick check to see if any work needs to be done, in order
+	 * to minimize the overhead of fasttimo processing.
+	 */
+	if (!igmp_timers_are_running)
+		return;
+
+	s = splnet();
+	igmp_timers_are_running = 0;
+	IN_FIRST_MULTI(step, inm);
+	while (inm != NULL) {
+		if (inm->inm_timer == 0) {
+			/* do nothing */
+		} else if (--inm->inm_timer == 0) {
+			igmp_sendreport(inm);
+		} else {
+			igmp_timers_are_running = 1;
+		}
+		IN_NEXT_MULTI(step, inm);
+	}
+	splx(s);
+}
+
+static void
+igmp_sendreport(inm)
+	register struct in_multi *inm;
+{
+	register struct mbuf *m;
+	register struct igmp *igmp;
+	register struct ip *ip;
+	register struct ip_moptions *imo;
+	struct ip_moptions simo;
+
+	MGETHDR(m, M_DONTWAIT, MT_HEADER);
+	if (m == NULL)
+		return;
+	/*
+	 * Assume max_linkhdr + sizeof(struct ip) + IGMP_MINLEN
+	 * is smaller than mbuf size returned by MGETHDR.
+	 */
+	m->m_data += max_linkhdr;
+	m->m_len = sizeof(struct ip) + IGMP_MINLEN;
+	m->m_pkthdr.len = sizeof(struct ip) + IGMP_MINLEN;
+
+	ip = mtod(m, struct ip *);
+	ip->ip_tos = 0;
+	ip->ip_len = sizeof(struct ip) + IGMP_MINLEN;
+	ip->ip_off = 0;
+	ip->ip_p = IPPROTO_IGMP;
+	ip->ip_src.s_addr = INADDR_ANY;
+	ip->ip_dst = inm->inm_addr;
+
+	igmp = (struct igmp *)(ip + 1);
+	igmp->igmp_type = IGMP_HOST_MEMBERSHIP_REPORT;
+	igmp->igmp_code = 0;
+	igmp->igmp_group = inm->inm_addr;
+	igmp->igmp_cksum = 0;
+	igmp->igmp_cksum = in_cksum(m, IGMP_MINLEN);
+
+	imo = &simo;
+	bzero((caddr_t)imo, sizeof(*imo));
+	imo->imo_multicast_ifp = inm->inm_ifp;
+	imo->imo_multicast_ttl = 1;
+	/*
+	 * Request loopback of the report if we are acting as a multicast
+	 * router, so that the process-level routing demon can hear it.
+	 */
+#ifdef MROUTING
+    {
+	extern struct socket *ip_mrouter;
+	imo->imo_multicast_loop = (ip_mrouter != NULL);
+    }
+#endif
+	ip_output(m, NULL, NULL, 0, imo);
+
+	++igmpstat.igps_snd_reports;
+}
diff --git a/sys/netinet/igmp.h b/sys/netinet/igmp.h
new file mode 100644
index 00000000000..29ce21dee6f
--- /dev/null
+++ b/sys/netinet/igmp.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 1988 Stephen Deering.
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Stephen Deering of Stanford University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)igmp.h	8.1 (Berkeley) 6/10/93
+ */
+
+/* Internet Group Management Protocol (IGMP) definitions. */
+
+/*
+ * IGMP packet format.
+ */
+struct igmp {
+	u_char		igmp_type;	/* version & type of IGMP message  */
+	u_char		igmp_code;	/* unused, should be zero          */
+	u_short		igmp_cksum;	/* IP-style checksum               */
+	struct in_addr	igmp_group;	/* group address being reported    */
+};					/*  (zero for queries)             */
+
+#define IGMP_MINLEN		     8
+
+#define IGMP_HOST_MEMBERSHIP_QUERY   0x11  /* message types, incl. version */
+#define IGMP_HOST_MEMBERSHIP_REPORT  0x12
+#define IGMP_DVMRP		     0x13  /* for experimental multicast   */
+					   /*  routing protocol            */
+
+#define IGMP_MAX_HOST_REPORT_DELAY   10    /* max delay for response to    */
diff --git a/sys/netinet/igmp_var.h b/sys/netinet/igmp_var.h
new file mode 100644
index 00000000000..ff70f70e2b3
--- /dev/null
+++ b/sys/netinet/igmp_var.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 1988 Stephen Deering.
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Stephen Deering of Stanford University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)igmp_var.h	8.1 (Berkeley) 7/19/93
+ */
+
+/*
+ * Internet Group Management Protocol (IGMP),
+ * implementation-specific definitions.
+ *
+ * Written by Steve Deering, Stanford, May 1988.
+ *
+ * MULTICAST 1.1
+ */
+
+struct igmpstat {
+	u_long	igps_rcv_total;		/* total IGMP messages received */
+	u_long	igps_rcv_tooshort;	/* received with too few bytes */
+	u_long	igps_rcv_badsum;	/* received with bad checksum */
+	u_long	igps_rcv_queries;	/* received membership queries */
+	u_long	igps_rcv_badqueries;	/* received invalid queries */
+	u_long	igps_rcv_reports;	/* received membership reports */
+	u_long	igps_rcv_badreports;	/* received invalid reports */
+	u_long	igps_rcv_ourreports;	/* received reports for our groups */
+	u_long	igps_snd_reports;	/* sent membership reports */
+};
+
+#ifdef KERNEL
+struct igmpstat igmpstat;
+
+/*
+ * Macro to compute a random timer value between 1 and (IGMP_MAX_REPORTING_
+ * DELAY * countdown frequency).  We generate a "random" number by adding
+ * the total number of IP packets received, our primary IP address, and the
+ * multicast address being timed-out.  The 4.3 random() routine really
+ * ought to be available in the kernel!
+ */
+#define IGMP_RANDOM_DELAY(multiaddr) \
+	/* struct in_addr multiaddr; */ \
+	( (ipstat.ips_total + \
+	   ntohl(IA_SIN(in_ifaddr)->sin_addr.s_addr) + \
+	   ntohl((multiaddr).s_addr) \
+	  ) \
+	  % (IGMP_MAX_HOST_REPORT_DELAY * PR_FASTHZ) + 1 \
+	)
+
+void	igmp_init __P(());
+void	igmp_input __P((struct mbuf *, int));
+void	igmp_joingroup __P((struct in_multi *));
+void	igmp_leavegroup __P((struct in_multi *));
+void	igmp_fasttimo __P(());
+#endif
diff --git a/sys/netinet/in.c b/sys/netinet/in.c
new file mode 100644
index 00000000000..e8b481b4005
--- /dev/null
+++ b/sys/netinet/in.c
@@ -0,0 +1,622 @@
+/*
+ * Copyright (c) 1982, 1986, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)in.c	8.2 (Berkeley) 11/15/93
+ */
+
+#include <sys/param.h>
+#include <sys/ioctl.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#include <netinet/if_ether.h>
+
+#ifdef INET
+/*
+ * Return the network number from an internet address.
+ */
+u_long
+in_netof(in)
+	struct in_addr in;
+{
+	register u_long i = ntohl(in.s_addr);
+	register u_long net;
+	register struct in_ifaddr *ia;
+
+	if (IN_CLASSA(i))
+		net = i & IN_CLASSA_NET;
+	else if (IN_CLASSB(i))
+		net = i & IN_CLASSB_NET;
+	else if (IN_CLASSC(i))
+		net = i & IN_CLASSC_NET;
+	else if (IN_CLASSD(i))
+		net = i & IN_CLASSD_NET;
+	else
+		return (0);
+
+	/*
+	 * Check whether network is a subnet;
+	 * if so, return subnet number.
+	 */
+	for (ia = in_ifaddr; ia; ia = ia->ia_next)
+		if (net == ia->ia_net)
+			return (i & ia->ia_subnetmask);
+	return (net);
+}
+
+#ifndef SUBNETSARELOCAL
+#define	SUBNETSARELOCAL	1
+#endif
+int subnetsarelocal = SUBNETSARELOCAL;
+/*
+ * Return 1 if an internet address is for a ``local'' host
+ * (one to which we have a connection).  If subnetsarelocal
+ * is true, this includes other subnets of the local net.
+ * Otherwise, it includes only the directly-connected (sub)nets.
+ */
+in_localaddr(in)
+	struct in_addr in;
+{
+	register u_long i = ntohl(in.s_addr);
+	register struct in_ifaddr *ia;
+
+	if (subnetsarelocal) {
+		for (ia = in_ifaddr; ia; ia = ia->ia_next)
+			if ((i & ia->ia_netmask) == ia->ia_net)
+				return (1);
+	} else {
+		for (ia = in_ifaddr; ia; ia = ia->ia_next)
+			if ((i & ia->ia_subnetmask) == ia->ia_subnet)
+				return (1);
+	}
+	return (0);
+}
+
+/*
+ * Determine whether an IP address is in a reserved set of addresses
+ * that may not be forwarded, or whether datagrams to that destination
+ * may be forwarded.
+ */
+in_canforward(in)
+	struct in_addr in;
+{
+	register u_long i = ntohl(in.s_addr);
+	register u_long net;
+
+	if (IN_EXPERIMENTAL(i) || IN_MULTICAST(i))
+		return (0);
+	if (IN_CLASSA(i)) {
+		net = i & IN_CLASSA_NET;
+		if (net == 0 || net == (IN_LOOPBACKNET << IN_CLASSA_NSHIFT))
+			return (0);
+	}
+	return (1);
+}
+
+/*
+ * Trim a mask in a sockaddr
+ */
+void
+in_socktrim(ap)
+struct sockaddr_in *ap;
+{
+    register char *cplim = (char *) &ap->sin_addr;
+    register char *cp = (char *) (&ap->sin_addr + 1);
+
+    ap->sin_len = 0;
+    while (--cp > cplim)
+        if (*cp) {
+	    (ap)->sin_len = cp - (char *) (ap) + 1;
+	    break;
+	}
+}
+
+int	in_interfaces;		/* number of external internet interfaces */
+extern	struct ifnet loif;
+
+/*
+ * Generic internet control operations (ioctl's).
+ * Ifp is 0 if not an interface-specific ioctl.
+ */
+/* ARGSUSED */
+in_control(so, cmd, data, ifp)
+	struct socket *so;
+	int cmd;
+	caddr_t data;
+	register struct ifnet *ifp;
+{
+	register struct ifreq *ifr = (struct ifreq *)data;
+	register struct in_ifaddr *ia = 0;
+	register struct ifaddr *ifa;
+	struct in_ifaddr *oia;
+	struct in_aliasreq *ifra = (struct in_aliasreq *)data;
+	struct sockaddr_in oldaddr;
+	int error, hostIsNew, maskIsNew;
+	u_long i;
+
+	/*
+	 * Find address for this interface, if it exists.
+	 */
+	if (ifp)
+		for (ia = in_ifaddr; ia; ia = ia->ia_next)
+			if (ia->ia_ifp == ifp)
+				break;
+
+	switch (cmd) {
+
+	case SIOCAIFADDR:
+	case SIOCDIFADDR:
+		if (ifra->ifra_addr.sin_family == AF_INET)
+		    for (oia = ia; ia; ia = ia->ia_next) {
+			if (ia->ia_ifp == ifp  &&
+			    ia->ia_addr.sin_addr.s_addr ==
+				ifra->ifra_addr.sin_addr.s_addr)
+			    break;
+		}
+		if (cmd == SIOCDIFADDR && ia == 0)
+			return (EADDRNOTAVAIL);
+		/* FALLTHROUGH */
+	case SIOCSIFADDR:
+	case SIOCSIFNETMASK:
+	case SIOCSIFDSTADDR:
+		if ((so->so_state & SS_PRIV) == 0)
+			return (EPERM);
+
+		if (ifp == 0)
+			panic("in_control");
+		if (ia == (struct in_ifaddr *)0) {
+			oia = (struct in_ifaddr *)
+				malloc(sizeof *oia, M_IFADDR, M_WAITOK);
+			if (oia == (struct in_ifaddr *)NULL)
+				return (ENOBUFS);
+			bzero((caddr_t)oia, sizeof *oia);
+			if (ia = in_ifaddr) {
+				for ( ; ia->ia_next; ia = ia->ia_next)
+					continue;
+				ia->ia_next = oia;
+			} else
+				in_ifaddr = oia;
+			ia = oia;
+			if (ifa = ifp->if_addrlist) {
+				for ( ; ifa->ifa_next; ifa = ifa->ifa_next)
+					continue;
+				ifa->ifa_next = (struct ifaddr *) ia;
+			} else
+				ifp->if_addrlist = (struct ifaddr *) ia;
+			ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
+			ia->ia_ifa.ifa_dstaddr
+					= (struct sockaddr *)&ia->ia_dstaddr;
+			ia->ia_ifa.ifa_netmask
+					= (struct sockaddr *)&ia->ia_sockmask;
+			ia->ia_sockmask.sin_len = 8;
+			if (ifp->if_flags & IFF_BROADCAST) {
+				ia->ia_broadaddr.sin_len = sizeof(ia->ia_addr);
+				ia->ia_broadaddr.sin_family = AF_INET;
+			}
+			ia->ia_ifp = ifp;
+			if (ifp != &loif)
+				in_interfaces++;
+		}
+		break;
+
+	case SIOCSIFBRDADDR:
+		if ((so->so_state & SS_PRIV) == 0)
+			return (EPERM);
+		/* FALLTHROUGH */
+
+	case SIOCGIFADDR:
+	case SIOCGIFNETMASK:
+	case SIOCGIFDSTADDR:
+	case SIOCGIFBRDADDR:
+		if (ia == (struct in_ifaddr *)0)
+			return (EADDRNOTAVAIL);
+		break;
+	}
+	switch (cmd) {
+
+	case SIOCGIFADDR:
+		*((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_addr;
+		break;
+
+	case SIOCGIFBRDADDR:
+		if ((ifp->if_flags & IFF_BROADCAST) == 0)
+			return (EINVAL);
+		*((struct sockaddr_in *)&ifr->ifr_dstaddr) = ia->ia_broadaddr;
+		break;
+
+	case SIOCGIFDSTADDR:
+		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
+			return (EINVAL);
+		*((struct sockaddr_in *)&ifr->ifr_dstaddr) = ia->ia_dstaddr;
+		break;
+
+	case SIOCGIFNETMASK:
+		*((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_sockmask;
+		break;
+
+	case SIOCSIFDSTADDR:
+		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
+			return (EINVAL);
+		oldaddr = ia->ia_dstaddr;
+		ia->ia_dstaddr = *(struct sockaddr_in *)&ifr->ifr_dstaddr;
+		if (ifp->if_ioctl && (error = (*ifp->if_ioctl)
+					(ifp, SIOCSIFDSTADDR, (caddr_t)ia))) {
+			ia->ia_dstaddr = oldaddr;
+			return (error);
+		}
+		if (ia->ia_flags & IFA_ROUTE) {
+			ia->ia_ifa.ifa_dstaddr = (struct sockaddr *)&oldaddr;
+			rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST);
+			ia->ia_ifa.ifa_dstaddr =
+					(struct sockaddr *)&ia->ia_dstaddr;
+			rtinit(&(ia->ia_ifa), (int)RTM_ADD, RTF_HOST|RTF_UP);
+		}
+		break;
+
+	case SIOCSIFBRDADDR:
+		if ((ifp->if_flags & IFF_BROADCAST) == 0)
+			return (EINVAL);
+		ia->ia_broadaddr = *(struct sockaddr_in *)&ifr->ifr_broadaddr;
+		break;
+
+	case SIOCSIFADDR:
+		return (in_ifinit(ifp, ia,
+		    (struct sockaddr_in *) &ifr->ifr_addr, 1));
+
+	case SIOCSIFNETMASK:
+		i = ifra->ifra_addr.sin_addr.s_addr;
+		ia->ia_subnetmask = ntohl(ia->ia_sockmask.sin_addr.s_addr = i);
+		break;
+
+	case SIOCAIFADDR:
+		maskIsNew = 0;
+		hostIsNew = 1;
+		error = 0;
+		if (ia->ia_addr.sin_family == AF_INET) {
+			if (ifra->ifra_addr.sin_len == 0) {
+				ifra->ifra_addr = ia->ia_addr;
+				hostIsNew = 0;
+			} else if (ifra->ifra_addr.sin_addr.s_addr ==
+					       ia->ia_addr.sin_addr.s_addr)
+				hostIsNew = 0;
+		}
+		if (ifra->ifra_mask.sin_len) {
+			in_ifscrub(ifp, ia);
+			ia->ia_sockmask = ifra->ifra_mask;
+			ia->ia_subnetmask =
+			     ntohl(ia->ia_sockmask.sin_addr.s_addr);
+			maskIsNew = 1;
+		}
+		if ((ifp->if_flags & IFF_POINTOPOINT) &&
+		    (ifra->ifra_dstaddr.sin_family == AF_INET)) {
+			in_ifscrub(ifp, ia);
+			ia->ia_dstaddr = ifra->ifra_dstaddr;
+			maskIsNew  = 1; /* We lie; but the effect's the same */
+		}
+		if (ifra->ifra_addr.sin_family == AF_INET &&
+		    (hostIsNew || maskIsNew))
+			error = in_ifinit(ifp, ia, &ifra->ifra_addr, 0);
+		if ((ifp->if_flags & IFF_BROADCAST) &&
+		    (ifra->ifra_broadaddr.sin_family == AF_INET))
+			ia->ia_broadaddr = ifra->ifra_broadaddr;
+		return (error);
+
+	case SIOCDIFADDR:
+		in_ifscrub(ifp, ia);
+		if ((ifa = ifp->if_addrlist) == (struct ifaddr *)ia)
+			ifp->if_addrlist = ifa->ifa_next;
+		else {
+			while (ifa->ifa_next &&
+			       (ifa->ifa_next != (struct ifaddr *)ia))
+				    ifa = ifa->ifa_next;
+			if (ifa->ifa_next)
+				ifa->ifa_next = ((struct ifaddr *)ia)->ifa_next;
+			else
+				printf("Couldn't unlink inifaddr from ifp\n");
+		}
+		oia = ia;
+		if (oia == (ia = in_ifaddr))
+			in_ifaddr = ia->ia_next;
+		else {
+			while (ia->ia_next && (ia->ia_next != oia))
+				ia = ia->ia_next;
+			if (ia->ia_next)
+				ia->ia_next = oia->ia_next;
+			else
+				printf("Didn't unlink inifadr from list\n");
+		}
+		IFAFREE((&oia->ia_ifa));
+		break;
+
+	default:
+		if (ifp == 0 || ifp->if_ioctl == 0)
+			return (EOPNOTSUPP);
+		return ((*ifp->if_ioctl)(ifp, cmd, data));
+	}
+	return (0);
+}
+
+/*
+ * Delete any existing route for an interface.
+ */
+void
+in_ifscrub(ifp, ia)
+	register struct ifnet *ifp;
+	register struct in_ifaddr *ia;
+{
+
+	if ((ia->ia_flags & IFA_ROUTE) == 0)
+		return;
+	if (ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT))
+		rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST);
+	else
+		rtinit(&(ia->ia_ifa), (int)RTM_DELETE, 0);
+	ia->ia_flags &= ~IFA_ROUTE;
+}
+
+/*
+ * Initialize an interface's internet address
+ * and routing table entry.
+ */
+in_ifinit(ifp, ia, sin, scrub)
+	register struct ifnet *ifp;
+	register struct in_ifaddr *ia;
+	struct sockaddr_in *sin;
+	int scrub;
+{
+	register u_long i = ntohl(sin->sin_addr.s_addr);
+	struct sockaddr_in oldaddr;
+	int s = splimp(), flags = RTF_UP, error, ether_output();
+
+	oldaddr = ia->ia_addr;
+	ia->ia_addr = *sin;
+	/*
+	 * Give the interface a chance to initialize
+	 * if this is its first address,
+	 * and to validate the address if necessary.
+	 */
+	if (ifp->if_ioctl &&
+	    (error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia))) {
+		splx(s);
+		ia->ia_addr = oldaddr;
+		return (error);
+	}
+	if (ifp->if_output == ether_output) { /* XXX: Another Kludge */
+		ia->ia_ifa.ifa_rtrequest = arp_rtrequest;
+		ia->ia_ifa.ifa_flags |= RTF_CLONING;
+	}
+	splx(s);
+	if (scrub) {
+		ia->ia_ifa.ifa_addr = (struct sockaddr *)&oldaddr;
+		in_ifscrub(ifp, ia);
+		ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
+	}
+	if (IN_CLASSA(i))
+		ia->ia_netmask = IN_CLASSA_NET;
+	else if (IN_CLASSB(i))
+		ia->ia_netmask = IN_CLASSB_NET;
+	else
+		ia->ia_netmask = IN_CLASSC_NET;
+	/*
+	 * The subnet mask usually includes at least the standard network part,
+	 * but may may be smaller in the case of supernetting.
+	 * If it is set, we believe it.
+	 */
+	if (ia->ia_subnetmask == 0) {
+		ia->ia_subnetmask = ia->ia_netmask;
+		ia->ia_sockmask.sin_addr.s_addr = htonl(ia->ia_subnetmask);
+	} else
+		ia->ia_netmask &= ia->ia_subnetmask;
+	ia->ia_net = i & ia->ia_netmask;
+	ia->ia_subnet = i & ia->ia_subnetmask;
+	in_socktrim(&ia->ia_sockmask);
+	/*
+	 * Add route for the network.
+	 */
+	ia->ia_ifa.ifa_metric = ifp->if_metric;
+	if (ifp->if_flags & IFF_BROADCAST) {
+		ia->ia_broadaddr.sin_addr.s_addr =
+			htonl(ia->ia_subnet | ~ia->ia_subnetmask);
+		ia->ia_netbroadcast.s_addr =
+			htonl(ia->ia_net | ~ ia->ia_netmask);
+	} else if (ifp->if_flags & IFF_LOOPBACK) {
+		ia->ia_ifa.ifa_dstaddr = ia->ia_ifa.ifa_addr;
+		flags |= RTF_HOST;
+	} else if (ifp->if_flags & IFF_POINTOPOINT) {
+		if (ia->ia_dstaddr.sin_family != AF_INET)
+			return (0);
+		flags |= RTF_HOST;
+	}
+	if ((error = rtinit(&(ia->ia_ifa), (int)RTM_ADD, flags)) == 0)
+		ia->ia_flags |= IFA_ROUTE;
+	/*
+	 * If the interface supports multicast, join the "all hosts"
+	 * multicast group on that interface.
+	 */
+	if (ifp->if_flags & IFF_MULTICAST) {
+		struct in_addr addr;
+
+		addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP);
+		in_addmulti(&addr, ifp);
+	}
+	return (error);
+}
+
+
+/*
+ * Return 1 if the address might be a local broadcast address.
+ */
+in_broadcast(in, ifp)
+	struct in_addr in;
+        struct ifnet *ifp;
+{
+	register struct ifaddr *ifa;
+	u_long t;
+
+	if (in.s_addr == INADDR_BROADCAST ||
+	    in.s_addr == INADDR_ANY)
+		return 1;
+	if ((ifp->if_flags & IFF_BROADCAST) == 0)
+		return 0;
+	t = ntohl(in.s_addr);
+	/*
+	 * Look through the list of addresses for a match
+	 * with a broadcast address.
+	 */
+#define ia ((struct in_ifaddr *)ifa)
+	for (ifa = ifp->if_addrlist; ifa; ifa = ifa->ifa_next)
+		if (ifa->ifa_addr->sa_family == AF_INET &&
+		    (in.s_addr == ia->ia_broadaddr.sin_addr.s_addr ||
+		     in.s_addr == ia->ia_netbroadcast.s_addr ||
+		     /*
+		      * Check for old-style (host 0) broadcast.
+		      */
+		     t == ia->ia_subnet || t == ia->ia_net))
+			    return 1;
+	return (0);
+#undef ia
+}
+/*
+ * Add an address to the list of IP multicast addresses for a given interface.
+ */
+struct in_multi *
+in_addmulti(ap, ifp)
+	register struct in_addr *ap;
+	register struct ifnet *ifp;
+{
+	register struct in_multi *inm;
+	struct ifreq ifr;
+	struct in_ifaddr *ia;
+	int s = splnet();
+
+	/*
+	 * See if address already in list.
+	 */
+	IN_LOOKUP_MULTI(*ap, ifp, inm);
+	if (inm != NULL) {
+		/*
+		 * Found it; just increment the reference count.
+		 */
+		++inm->inm_refcount;
+	}
+	else {
+		/*
+		 * New address; allocate a new multicast record
+		 * and link it into the interface's multicast list.
+		 */
+		inm = (struct in_multi *)malloc(sizeof(*inm),
+		    M_IPMADDR, M_NOWAIT);
+		if (inm == NULL) {
+			splx(s);
+			return (NULL);
+		}
+		inm->inm_addr = *ap;
+		inm->inm_ifp = ifp;
+		inm->inm_refcount = 1;
+		IFP_TO_IA(ifp, ia);
+		if (ia == NULL) {
+			free(inm, M_IPMADDR);
+			splx(s);
+			return (NULL);
+		}
+		inm->inm_ia = ia;
+		inm->inm_next = ia->ia_multiaddrs;
+		ia->ia_multiaddrs = inm;
+		/*
+		 * Ask the network driver to update its multicast reception
+		 * filter appropriately for the new address.
+		 */
+		((struct sockaddr_in *)&ifr.ifr_addr)->sin_family = AF_INET;
+		((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr = *ap;
+		if ((ifp->if_ioctl == NULL) ||
+		    (*ifp->if_ioctl)(ifp, SIOCADDMULTI,(caddr_t)&ifr) != 0) {
+			ia->ia_multiaddrs = inm->inm_next;
+			free(inm, M_IPMADDR);
+			splx(s);
+			return (NULL);
+		}
+		/*
+		 * Let IGMP know that we have joined a new IP multicast group.
+		 */
+		igmp_joingroup(inm);
+	}
+	splx(s);
+	return (inm);
+}
+
+/*
+ * Delete a multicast address record.
+ */
+int
+in_delmulti(inm)
+	register struct in_multi *inm;
+{
+	register struct in_multi **p;
+	struct ifreq ifr;
+	int s = splnet();
+
+	if (--inm->inm_refcount == 0) {
+		/*
+		 * No remaining claims to this record; let IGMP know that
+		 * we are leaving the multicast group.
+		 */
+		igmp_leavegroup(inm);
+		/*
+		 * Unlink from list.
+		 */
+		for (p = &inm->inm_ia->ia_multiaddrs;
+		     *p != inm;
+		     p = &(*p)->inm_next)
+			 continue;
+		*p = (*p)->inm_next;
+		/*
+		 * Notify the network driver to update its multicast reception
+		 * filter.
+		 */
+		((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET;
+		((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr =
+								inm->inm_addr;
+		(*inm->inm_ifp->if_ioctl)(inm->inm_ifp, SIOCDELMULTI,
+							     (caddr_t)&ifr);
+		free(inm, M_IPMADDR);
+	}
+	splx(s);
+}
+#endif
diff --git a/sys/netinet/in.h b/sys/netinet/in.h
new file mode 100644
index 00000000000..1ce9948f6e3
--- /dev/null
+++ b/sys/netinet/in.h
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 1982, 1986, 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)in.h	8.3 (Berkeley) 1/3/94
+ */
+
+/*
+ * Constants and structures defined by the internet system,
+ * Per RFC 790, September 1981, and numerous additions.
+ */
+
+/*
+ * Protocols
+ */
+#define	IPPROTO_IP		0		/* dummy for IP */
+#define	IPPROTO_ICMP		1		/* control message protocol */
+#define	IPPROTO_IGMP		2		/* group mgmt protocol */
+#define	IPPROTO_GGP		3		/* gateway^2 (deprecated) */
+#define	IPPROTO_TCP		6		/* tcp */
+#define	IPPROTO_EGP		8		/* exterior gateway protocol */
+#define	IPPROTO_PUP		12		/* pup */
+#define	IPPROTO_UDP		17		/* user datagram protocol */
+#define	IPPROTO_IDP		22		/* xns idp */
+#define	IPPROTO_TP		29 		/* tp-4 w/ class negotiation */
+#define	IPPROTO_EON		80		/* ISO cnlp */
+#define	IPPROTO_ENCAP		98		/* encapsulation header */
+
+#define	IPPROTO_RAW		255		/* raw IP packet */
+#define	IPPROTO_MAX		256
+
+
+/*
+ * Local port number conventions:
+ * Ports < IPPORT_RESERVED are reserved for
+ * privileged processes (e.g. root).
+ * Ports > IPPORT_USERRESERVED are reserved
+ * for servers, not necessarily privileged.
+ */
+#define	IPPORT_RESERVED		1024
+#define	IPPORT_USERRESERVED	5000
+
+/*
+ * Internet address (a structure for historical reasons)
+ */
+struct in_addr {
+	u_long s_addr;
+};
+
+/*
+ * Definitions of bits in internet address integers.
+ * On subnets, the decomposition of addresses to host and net parts
+ * is done according to subnet mask, not the masks here.
+ */
+#define	IN_CLASSA(i)		(((long)(i) & 0x80000000) == 0)
+#define	IN_CLASSA_NET		0xff000000
+#define	IN_CLASSA_NSHIFT	24
+#define	IN_CLASSA_HOST		0x00ffffff
+#define	IN_CLASSA_MAX		128
+
+#define	IN_CLASSB(i)		(((long)(i) & 0xc0000000) == 0x80000000)
+#define	IN_CLASSB_NET		0xffff0000
+#define	IN_CLASSB_NSHIFT	16
+#define	IN_CLASSB_HOST		0x0000ffff
+#define	IN_CLASSB_MAX		65536
+
+#define	IN_CLASSC(i)		(((long)(i) & 0xe0000000) == 0xc0000000)
+#define	IN_CLASSC_NET		0xffffff00
+#define	IN_CLASSC_NSHIFT	8
+#define	IN_CLASSC_HOST		0x000000ff
+
+#define	IN_CLASSD(i)		(((long)(i) & 0xf0000000) == 0xe0000000)
+#define	IN_CLASSD_NET		0xf0000000	/* These ones aren't really */
+#define	IN_CLASSD_NSHIFT	28		/* net and host fields, but */
+#define	IN_CLASSD_HOST		0x0fffffff	/* routing needn't know.    */
+#define	IN_MULTICAST(i)		IN_CLASSD(i)
+
+#define	IN_EXPERIMENTAL(i)	(((long)(i) & 0xf0000000) == 0xf0000000)
+#define	IN_BADCLASS(i)		(((long)(i) & 0xf0000000) == 0xf0000000)
+
+#define	INADDR_ANY		(u_long)0x00000000
+#define	INADDR_BROADCAST	(u_long)0xffffffff	/* must be masked */
+#ifndef KERNEL
+#define	INADDR_NONE		0xffffffff		/* -1 return */
+#endif
+
+#define	INADDR_UNSPEC_GROUP	(u_long)0xe0000000	/* 224.0.0.0 */
+#define	INADDR_ALLHOSTS_GROUP	(u_long)0xe0000001	/* 224.0.0.1 */
+#define	INADDR_MAX_LOCAL_GROUP	(u_long)0xe00000ff	/* 224.0.0.255 */
+
+#define	IN_LOOPBACKNET		127			/* official! */
+
+/*
+ * Socket address, internet style.
+ */
+struct sockaddr_in {
+	u_char	sin_len;
+	u_char	sin_family;
+	u_short	sin_port;
+	struct	in_addr sin_addr;
+	char	sin_zero[8];
+};
+
+/*
+ * Structure used to describe IP options.
+ * Used to store options internally, to pass them to a process,
+ * or to restore options retrieved earlier.
+ * The ip_dst is used for the first-hop gateway when using a source route
+ * (this gets put into the header proper).
+ */
+struct ip_opts {
+	struct	in_addr ip_dst;		/* first hop, 0 w/o src rt */
+	char	ip_opts[40];		/* actually variable in size */
+};
+
+/*
+ * Options for use with [gs]etsockopt at the IP level.
+ * First word of comment is data type; bool is stored in int.
+ */
+#define	IP_OPTIONS		1    /* buf/ip_opts; set/get IP options */
+#define	IP_HDRINCL		2    /* int; header is included with data */
+#define	IP_TOS			3    /* int; IP type of service and preced. */
+#define	IP_TTL			4    /* int; IP time to live */
+#define	IP_RECVOPTS		5    /* bool; receive all IP opts w/dgram */
+#define	IP_RECVRETOPTS		6    /* bool; receive IP opts for response */
+#define	IP_RECVDSTADDR		7    /* bool; receive IP dst addr w/dgram */
+#define	IP_RETOPTS		8    /* ip_opts; set/get IP options */
+#define	IP_MULTICAST_IF		9    /* u_char; set/get IP multicast i/f  */
+#define	IP_MULTICAST_TTL	10   /* u_char; set/get IP multicast ttl */
+#define	IP_MULTICAST_LOOP	11   /* u_char; set/get IP multicast loopback */
+#define	IP_ADD_MEMBERSHIP	12   /* ip_mreq; add an IP group membership */
+#define	IP_DROP_MEMBERSHIP	13   /* ip_mreq; drop an IP group membership */
+
+/*
+ * Defaults and limits for options
+ */
+#define	IP_DEFAULT_MULTICAST_TTL  1	/* normally limit m'casts to 1 hop  */
+#define	IP_DEFAULT_MULTICAST_LOOP 1	/* normally hear sends if a member  */
+#define	IP_MAX_MEMBERSHIPS	20	/* per socket; must fit in one mbuf */
+
+/*
+ * Argument structure for IP_ADD_MEMBERSHIP and IP_DROP_MEMBERSHIP.
+ */
+struct ip_mreq {
+	struct	in_addr imr_multiaddr;	/* IP multicast address of group */
+	struct	in_addr imr_interface;	/* local IP address of interface */
+};
+
+/*
+ * Definitions for inet sysctl operations.
+ *
+ * Third level is protocol number.
+ * Fourth level is desired variable within that protocol.
+ */
+#define	IPPROTO_MAXID	(IPPROTO_IDP + 1)	/* don't list to IPPROTO_MAX */
+
+#define	CTL_IPPROTO_NAMES { \
+	{ "ip", CTLTYPE_NODE }, \
+	{ "icmp", CTLTYPE_NODE }, \
+	{ "igmp", CTLTYPE_NODE }, \
+	{ "ggp", CTLTYPE_NODE }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ "tcp", CTLTYPE_NODE }, \
+	{ 0, 0 }, \
+	{ "egp", CTLTYPE_NODE }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ "pup", CTLTYPE_NODE }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ "udp", CTLTYPE_NODE }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ "idp", CTLTYPE_NODE }, \
+}
+
+/*
+ * Names for IP sysctl objects
+ */
+#define	IPCTL_FORWARDING	1	/* act as router */
+#define	IPCTL_SENDREDIRECTS	2	/* may send redirects when forwarding */
+#define	IPCTL_DEFTTL		3	/* default TTL */
+#ifdef notyet
+#define	IPCTL_DEFMTU		4	/* default MTU */
+#endif
+#define	IPCTL_MAXID		5
+
+#define	IPCTL_NAMES { \
+	{ 0, 0 }, \
+	{ "forwarding", CTLTYPE_INT }, \
+	{ "redirect", CTLTYPE_INT }, \
+	{ "ttl", CTLTYPE_INT }, \
+	{ "mtu", CTLTYPE_INT }, \
+}
+
+
+#ifdef KERNEL
+int	 in_broadcast __P((struct in_addr, struct ifnet *));
+int	 in_canforward __P((struct in_addr));
+int	 in_cksum __P((struct mbuf *, int));
+int	 in_localaddr __P((struct in_addr));
+u_long	 in_netof __P((struct in_addr));
+void	 in_socktrim __P((struct sockaddr_in *));
+#endif
diff --git a/sys/netinet/in_cksum.c b/sys/netinet/in_cksum.c
new file mode 100644
index 00000000000..c19a9200836
--- /dev/null
+++ b/sys/netinet/in_cksum.c
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 1988, 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)in_cksum.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+
+/*
+ * Checksum routine for Internet Protocol family headers (Portable Version).
+ *
+ * This routine is very heavily used in the network
+ * code and should be modified for each CPU to be as fast as possible.
+ */
+
+#define ADDCARRY(x)  (x > 65535 ? x -= 65535 : x)
+#define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; ADDCARRY(sum);}
+
+int
+in_cksum(m, len)
+	register struct mbuf *m;
+	register int len;
+{
+	register u_short *w;
+	register int sum = 0;
+	register int mlen = 0;
+	int byte_swapped = 0;
+
+	union {
+		char	c[2];
+		u_short	s;
+	} s_util;
+	union {
+		u_short s[2];
+		long	l;
+	} l_util;
+
+	for (;m && len; m = m->m_next) {
+		if (m->m_len == 0)
+			continue;
+		w = mtod(m, u_short *);
+		if (mlen == -1) {
+			/*
+			 * The first byte of this mbuf is the continuation
+			 * of a word spanning between this mbuf and the
+			 * last mbuf.
+			 *
+			 * s_util.c[0] is already saved when scanning previous 
+			 * mbuf.
+			 */
+			s_util.c[1] = *(char *)w;
+			sum += s_util.s;
+			w = (u_short *)((char *)w + 1);
+			mlen = m->m_len - 1;
+			len--;
+		} else
+			mlen = m->m_len;
+		if (len < mlen)
+			mlen = len;
+		len -= mlen;
+		/*
+		 * Force to even boundary.
+		 */
+		if ((1 & (int) w) && (mlen > 0)) {
+			REDUCE;
+			sum <<= 8;
+			s_util.c[0] = *(u_char *)w;
+			w = (u_short *)((char *)w + 1);
+			mlen--;
+			byte_swapped = 1;
+		}
+		/*
+		 * Unroll the loop to make overhead from
+		 * branches &c small.
+		 */
+		while ((mlen -= 32) >= 0) {
+			sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3];
+			sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7];
+			sum += w[8]; sum += w[9]; sum += w[10]; sum += w[11];
+			sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15];
+			w += 16;
+		}
+		mlen += 32;
+		while ((mlen -= 8) >= 0) {
+			sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3];
+			w += 4;
+		}
+		mlen += 8;
+		if (mlen == 0 && byte_swapped == 0)
+			continue;
+		REDUCE;
+		while ((mlen -= 2) >= 0) {
+			sum += *w++;
+		}
+		if (byte_swapped) {
+			REDUCE;
+			sum <<= 8;
+			byte_swapped = 0;
+			if (mlen == -1) {
+				s_util.c[1] = *(char *)w;
+				sum += s_util.s;
+				mlen = 0;
+			} else
+				mlen = -1;
+		} else if (mlen == -1)
+			s_util.c[0] = *(char *)w;
+	}
+	if (len)
+		printf("cksum: out of data\n");
+	if (mlen == -1) {
+		/* The last mbuf has odd # of bytes. Follow the
+		   standard (the odd byte may be shifted left by 8 bits
+		   or not as determined by endian-ness of the machine) */
+		s_util.c[1] = 0;
+		sum += s_util.s;
+	}
+	REDUCE;
+	return (~sum & 0xffff);
+}
diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
new file mode 100644
index 00000000000..01b6b17961c
--- /dev/null
+++ b/sys/netinet/in_pcb.c
@@ -0,0 +1,497 @@
+/*
+ * Copyright (c) 1982, 1986, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)in_pcb.c	8.2 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/ioctl.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/proc.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+
+struct	in_addr zeroin_addr;
+
+int
+in_pcballoc(so, head)
+	struct socket *so;
+	struct inpcb *head;
+{
+	register struct inpcb *inp;
+
+	MALLOC(inp, struct inpcb *, sizeof(*inp), M_PCB, M_WAITOK);
+	if (inp == NULL)
+		return (ENOBUFS);
+	bzero((caddr_t)inp, sizeof(*inp));
+	inp->inp_head = head;
+	inp->inp_socket = so;
+	insque(inp, head);
+	so->so_pcb = (caddr_t)inp;
+	return (0);
+}
+
+int
+in_pcbbind(inp, nam)
+	register struct inpcb *inp;
+	struct mbuf *nam;
+{
+	register struct socket *so = inp->inp_socket;
+	register struct inpcb *head = inp->inp_head;
+	register struct sockaddr_in *sin;
+	struct proc *p = curproc;		/* XXX */
+	u_short lport = 0;
+	int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
+	int error;
+
+	if (in_ifaddr == 0)
+		return (EADDRNOTAVAIL);
+	if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY)
+		return (EINVAL);
+	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0 &&
+	    ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 ||
+	     (so->so_options & SO_ACCEPTCONN) == 0))
+		wild = INPLOOKUP_WILDCARD;
+	if (nam) {
+		sin = mtod(nam, struct sockaddr_in *);
+		if (nam->m_len != sizeof (*sin))
+			return (EINVAL);
+#ifdef notdef
+		/*
+		 * We should check the family, but old programs
+		 * incorrectly fail to initialize it.
+		 */
+		if (sin->sin_family != AF_INET)
+			return (EAFNOSUPPORT);
+#endif
+		lport = sin->sin_port;
+		if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
+			/*
+			 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
+			 * allow complete duplication of binding if
+			 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
+			 * and a multicast address is bound on both
+			 * new and duplicated sockets.
+			 */
+			if (so->so_options & SO_REUSEADDR)
+				reuseport = SO_REUSEADDR|SO_REUSEPORT;
+		} else if (sin->sin_addr.s_addr != INADDR_ANY) {
+			sin->sin_port = 0;		/* yech... */
+			if (ifa_ifwithaddr((struct sockaddr *)sin) == 0)
+				return (EADDRNOTAVAIL);
+		}
+		if (lport) {
+			struct inpcb *t;
+
+			/* GROSS */
+			if (ntohs(lport) < IPPORT_RESERVED &&
+			    (error = suser(p->p_ucred, &p->p_acflag)))
+				return (error);
+			t = in_pcblookup(head, zeroin_addr, 0,
+			    sin->sin_addr, lport, wild);
+			if (t && (reuseport & t->inp_socket->so_options) == 0)
+				return (EADDRINUSE);
+		}
+		inp->inp_laddr = sin->sin_addr;
+	}
+	if (lport == 0)
+		do {
+			if (head->inp_lport++ < IPPORT_RESERVED ||
+			    head->inp_lport > IPPORT_USERRESERVED)
+				head->inp_lport = IPPORT_RESERVED;
+			lport = htons(head->inp_lport);
+		} while (in_pcblookup(head,
+			    zeroin_addr, 0, inp->inp_laddr, lport, wild));
+	inp->inp_lport = lport;
+	return (0);
+}
+
+/*
+ * Connect from a socket to a specified address.
+ * Both address and port must be specified in argument sin.
+ * If don't have a local address for this socket yet,
+ * then pick one.
+ */
+int
+in_pcbconnect(inp, nam)
+	register struct inpcb *inp;
+	struct mbuf *nam;
+{
+	struct in_ifaddr *ia;
+	struct sockaddr_in *ifaddr;
+	register struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *);
+
+	if (nam->m_len != sizeof (*sin))
+		return (EINVAL);
+	if (sin->sin_family != AF_INET)
+		return (EAFNOSUPPORT);
+	if (sin->sin_port == 0)
+		return (EADDRNOTAVAIL);
+	if (in_ifaddr) {
+		/*
+		 * If the destination address is INADDR_ANY,
+		 * use the primary local address.
+		 * If the supplied address is INADDR_BROADCAST,
+		 * and the primary interface supports broadcast,
+		 * choose the broadcast address for that interface.
+		 */
+#define	satosin(sa)	((struct sockaddr_in *)(sa))
+#define sintosa(sin)	((struct sockaddr *)(sin))
+#define ifatoia(ifa)	((struct in_ifaddr *)(ifa))
+		if (sin->sin_addr.s_addr == INADDR_ANY)
+		    sin->sin_addr = IA_SIN(in_ifaddr)->sin_addr;
+		else if (sin->sin_addr.s_addr == (u_long)INADDR_BROADCAST &&
+		  (in_ifaddr->ia_ifp->if_flags & IFF_BROADCAST))
+		    sin->sin_addr = satosin(&in_ifaddr->ia_broadaddr)->sin_addr;
+	}
+	if (inp->inp_laddr.s_addr == INADDR_ANY) {
+		register struct route *ro;
+
+		ia = (struct in_ifaddr *)0;
+		/* 
+		 * If route is known or can be allocated now,
+		 * our src addr is taken from the i/f, else punt.
+		 */
+		ro = &inp->inp_route;
+		if (ro->ro_rt &&
+		    (satosin(&ro->ro_dst)->sin_addr.s_addr !=
+			sin->sin_addr.s_addr || 
+		    inp->inp_socket->so_options & SO_DONTROUTE)) {
+			RTFREE(ro->ro_rt);
+			ro->ro_rt = (struct rtentry *)0;
+		}
+		if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/
+		    (ro->ro_rt == (struct rtentry *)0 ||
+		    ro->ro_rt->rt_ifp == (struct ifnet *)0)) {
+			/* No route yet, so try to acquire one */
+			ro->ro_dst.sa_family = AF_INET;
+			ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
+			((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
+				sin->sin_addr;
+			rtalloc(ro);
+		}
+		/*
+		 * If we found a route, use the address
+		 * corresponding to the outgoing interface
+		 * unless it is the loopback (in case a route
+		 * to our address on another net goes to loopback).
+		 */
+		if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))
+			ia = ifatoia(ro->ro_rt->rt_ifa);
+		if (ia == 0) {
+			u_short fport = sin->sin_port;
+
+			sin->sin_port = 0;
+			ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin)));
+			if (ia == 0)
+				ia = ifatoia(ifa_ifwithnet(sintosa(sin)));
+			sin->sin_port = fport;
+			if (ia == 0)
+				ia = in_ifaddr;
+			if (ia == 0)
+				return (EADDRNOTAVAIL);
+		}
+		/*
+		 * If the destination address is multicast and an outgoing
+		 * interface has been set as a multicast option, use the
+		 * address of that interface as our source address.
+		 */
+		if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
+		    inp->inp_moptions != NULL) {
+			struct ip_moptions *imo;
+			struct ifnet *ifp;
+
+			imo = inp->inp_moptions;
+			if (imo->imo_multicast_ifp != NULL) {
+				ifp = imo->imo_multicast_ifp;
+				for (ia = in_ifaddr; ia; ia = ia->ia_next)
+					if (ia->ia_ifp == ifp)
+						break;
+				if (ia == 0)
+					return (EADDRNOTAVAIL);
+			}
+		}
+		ifaddr = (struct sockaddr_in *)&ia->ia_addr;
+	}
+	if (in_pcblookup(inp->inp_head,
+	    sin->sin_addr,
+	    sin->sin_port,
+	    inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr->sin_addr,
+	    inp->inp_lport,
+	    0))
+		return (EADDRINUSE);
+	if (inp->inp_laddr.s_addr == INADDR_ANY) {
+		if (inp->inp_lport == 0)
+			(void)in_pcbbind(inp, (struct mbuf *)0);
+		inp->inp_laddr = ifaddr->sin_addr;
+	}
+	inp->inp_faddr = sin->sin_addr;
+	inp->inp_fport = sin->sin_port;
+	return (0);
+}
+
+int
+in_pcbdisconnect(inp)
+	struct inpcb *inp;
+{
+
+	inp->inp_faddr.s_addr = INADDR_ANY;
+	inp->inp_fport = 0;
+	if (inp->inp_socket->so_state & SS_NOFDREF)
+		in_pcbdetach(inp);
+}
+
+int
+in_pcbdetach(inp)
+	struct inpcb *inp;
+{
+	struct socket *so = inp->inp_socket;
+
+	so->so_pcb = 0;
+	sofree(so);
+	if (inp->inp_options)
+		(void)m_free(inp->inp_options);
+	if (inp->inp_route.ro_rt)
+		rtfree(inp->inp_route.ro_rt);
+	ip_freemoptions(inp->inp_moptions);
+	remque(inp);
+	FREE(inp, M_PCB);
+}
+
+int
+in_setsockaddr(inp, nam)
+	register struct inpcb *inp;
+	struct mbuf *nam;
+{
+	register struct sockaddr_in *sin;
+	
+	nam->m_len = sizeof (*sin);
+	sin = mtod(nam, struct sockaddr_in *);
+	bzero((caddr_t)sin, sizeof (*sin));
+	sin->sin_family = AF_INET;
+	sin->sin_len = sizeof(*sin);
+	sin->sin_port = inp->inp_lport;
+	sin->sin_addr = inp->inp_laddr;
+}
+
+int
+in_setpeeraddr(inp, nam)
+	struct inpcb *inp;
+	struct mbuf *nam;
+{
+	register struct sockaddr_in *sin;
+	
+	nam->m_len = sizeof (*sin);
+	sin = mtod(nam, struct sockaddr_in *);
+	bzero((caddr_t)sin, sizeof (*sin));
+	sin->sin_family = AF_INET;
+	sin->sin_len = sizeof(*sin);
+	sin->sin_port = inp->inp_fport;
+	sin->sin_addr = inp->inp_faddr;
+}
+
+/*
+ * Pass some notification to all connections of a protocol
+ * associated with address dst.  The local address and/or port numbers
+ * may be specified to limit the search.  The "usual action" will be
+ * taken, depending on the ctlinput cmd.  The caller must filter any
+ * cmds that are uninteresting (e.g., no error in the map).
+ * Call the protocol specific routine (if any) to report
+ * any errors for each matching socket.
+ *
+ * Must be called at splnet.
+ */
+int
+in_pcbnotify(head, dst, fport_arg, laddr, lport_arg, cmd, notify)
+	struct inpcb *head;
+	struct sockaddr *dst;
+	u_int fport_arg, lport_arg;
+	struct in_addr laddr;
+	int cmd;
+	void (*notify) __P((struct inpcb *, int));
+{
+	extern u_char inetctlerrmap[];
+	register struct inpcb *inp, *oinp;
+	struct in_addr faddr;
+	u_short fport = fport_arg, lport = lport_arg;
+	int errno;
+
+	if ((unsigned)cmd > PRC_NCMDS || dst->sa_family != AF_INET)
+		return;
+	faddr = ((struct sockaddr_in *)dst)->sin_addr;
+	if (faddr.s_addr == INADDR_ANY)
+		return;
+
+	/*
+	 * Redirects go to all references to the destination,
+	 * and use in_rtchange to invalidate the route cache.
+	 * Dead host indications: notify all references to the destination.
+	 * Otherwise, if we have knowledge of the local port and address,
+	 * deliver only to that socket.
+	 */
+	if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) {
+		fport = 0;
+		lport = 0;
+		laddr.s_addr = 0;
+		if (cmd != PRC_HOSTDEAD)
+			notify = in_rtchange;
+	}
+	errno = inetctlerrmap[cmd];
+	for (inp = head->inp_next; inp != head;) {
+		if (inp->inp_faddr.s_addr != faddr.s_addr ||
+		    inp->inp_socket == 0 ||
+		    (lport && inp->inp_lport != lport) ||
+		    (laddr.s_addr && inp->inp_laddr.s_addr != laddr.s_addr) ||
+		    (fport && inp->inp_fport != fport)) {
+			inp = inp->inp_next;
+			continue;
+		}
+		oinp = inp;
+		inp = inp->inp_next;
+		if (notify)
+			(*notify)(oinp, errno);
+	}
+}
+
+/*
+ * Check for alternatives when higher level complains
+ * about service problems.  For now, invalidate cached
+ * routing information.  If the route was created dynamically
+ * (by a redirect), time to try a default gateway again.
+ */
+int
+in_losing(inp)
+	struct inpcb *inp;
+{
+	register struct rtentry *rt;
+	struct rt_addrinfo info;
+
+	if ((rt = inp->inp_route.ro_rt)) {
+		inp->inp_route.ro_rt = 0;
+		bzero((caddr_t)&info, sizeof(info));
+		info.rti_info[RTAX_DST] =
+			(struct sockaddr *)&inp->inp_route.ro_dst;
+		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
+		info.rti_info[RTAX_NETMASK] = rt_mask(rt);
+		rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
+		if (rt->rt_flags & RTF_DYNAMIC)
+			(void) rtrequest(RTM_DELETE, rt_key(rt),
+				rt->rt_gateway, rt_mask(rt), rt->rt_flags, 
+				(struct rtentry **)0);
+		else 
+		/*
+		 * A new route can be allocated
+		 * the next time output is attempted.
+		 */
+			rtfree(rt);
+	}
+}
+
+/*
+ * After a routing change, flush old routing
+ * and allocate a (hopefully) better one.
+ */
+void
+in_rtchange(inp, errno)
+	register struct inpcb *inp;
+	int errno;
+{
+	if (inp->inp_route.ro_rt) {
+		rtfree(inp->inp_route.ro_rt);
+		inp->inp_route.ro_rt = 0;
+		/*
+		 * A new route can be allocated the next time
+		 * output is attempted.
+		 */
+	}
+}
+
+struct inpcb *
+in_pcblookup(head, faddr, fport_arg, laddr, lport_arg, flags)
+	struct inpcb *head;
+	struct in_addr faddr, laddr;
+	u_int fport_arg, lport_arg;
+	int flags;
+{
+	register struct inpcb *inp, *match = 0;
+	int matchwild = 3, wildcard;
+	u_short fport = fport_arg, lport = lport_arg;
+
+	for (inp = head->inp_next; inp != head; inp = inp->inp_next) {
+		if (inp->inp_lport != lport)
+			continue;
+		wildcard = 0;
+		if (inp->inp_laddr.s_addr != INADDR_ANY) {
+			if (laddr.s_addr == INADDR_ANY)
+				wildcard++;
+			else if (inp->inp_laddr.s_addr != laddr.s_addr)
+				continue;
+		} else {
+			if (laddr.s_addr != INADDR_ANY)
+				wildcard++;
+		}
+		if (inp->inp_faddr.s_addr != INADDR_ANY) {
+			if (faddr.s_addr == INADDR_ANY)
+				wildcard++;
+			else if (inp->inp_faddr.s_addr != faddr.s_addr ||
+			    inp->inp_fport != fport)
+				continue;
+		} else {
+			if (faddr.s_addr != INADDR_ANY)
+				wildcard++;
+		}
+		if (wildcard && (flags & INPLOOKUP_WILDCARD) == 0)
+			continue;
+		if (wildcard < matchwild) {
+			match = inp;
+			matchwild = wildcard;
+			if (matchwild == 0)
+				break;
+		}
+	}
+	return (match);
+}
diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h
new file mode 100644
index 00000000000..c85324702a7
--- /dev/null
+++ b/sys/netinet/in_pcb.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 1982, 1986, 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)in_pcb.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Common structure pcb for internet protocol implementation.
+ * Here are stored pointers to local and foreign host table
+ * entries, local and foreign socket numbers, and pointers
+ * up (to a socket structure) and down (to a protocol-specific)
+ * control block.
+ */
+struct inpcb {
+	struct	inpcb *inp_next,*inp_prev;
+					/* pointers to other pcb's */
+	struct	inpcb *inp_head;	/* pointer back to chain of inpcb's
+					   for this protocol */
+	struct	in_addr inp_faddr;	/* foreign host table entry */
+	u_short	inp_fport;		/* foreign port */
+	struct	in_addr inp_laddr;	/* local host table entry */
+	u_short	inp_lport;		/* local port */
+	struct	socket *inp_socket;	/* back pointer to socket */
+	caddr_t	inp_ppcb;		/* pointer to per-protocol pcb */
+	struct	route inp_route;	/* placeholder for routing entry */
+	int	inp_flags;		/* generic IP/datagram flags */
+	struct	ip inp_ip;		/* header prototype; should have more */
+	struct	mbuf *inp_options;	/* IP options */
+	struct	ip_moptions *inp_moptions; /* IP multicast options */
+};
+
+/* flags in inp_flags: */
+#define	INP_RECVOPTS		0x01	/* receive incoming IP options */
+#define	INP_RECVRETOPTS		0x02	/* receive IP options for reply */
+#define	INP_RECVDSTADDR		0x04	/* receive IP dst address */
+#define	INP_CONTROLOPTS		(INP_RECVOPTS|INP_RECVRETOPTS|INP_RECVDSTADDR)
+#define	INP_HDRINCL		0x08	/* user supplies entire IP header */
+
+#define	INPLOOKUP_WILDCARD	1
+#define	INPLOOKUP_SETLOCAL	2
+
+#define	sotoinpcb(so)	((struct inpcb *)(so)->so_pcb)
+
+#ifdef KERNEL
+int	 in_losing __P((struct inpcb *));
+int	 in_pcballoc __P((struct socket *, struct inpcb *));
+int	 in_pcbbind __P((struct inpcb *, struct mbuf *));
+int	 in_pcbconnect __P((struct inpcb *, struct mbuf *));
+int	 in_pcbdetach __P((struct inpcb *));
+int	 in_pcbdisconnect __P((struct inpcb *));
+struct inpcb *
+	 in_pcblookup __P((struct inpcb *,
+	    struct in_addr, u_int, struct in_addr, u_int, int));
+int	 in_pcbnotify __P((struct inpcb *, struct sockaddr *,
+	    u_int, struct in_addr, u_int, int, void (*)(struct inpcb *, int)));
+void	 in_rtchange __P((struct inpcb *, int));
+int	 in_setpeeraddr __P((struct inpcb *, struct mbuf *));
+int	 in_setsockaddr __P((struct inpcb *, struct mbuf *));
+#endif
diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c
new file mode 100644
index 00000000000..00916b4ce1a
--- /dev/null
+++ b/sys/netinet/in_proto.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)in_proto.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/domain.h>
+#include <sys/mbuf.h>
+
+#include <net/if.h>
+#include <net/radix.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/in_pcb.h>
+#include <netinet/igmp_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#include <netinet/tcp_debug.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
+/*
+ * TCP/IP protocol family: IP, ICMP, UDP, TCP.
+ */
+
+#ifdef NSIP
+void	idpip_input(), nsip_ctlinput();
+#endif
+
+#ifdef TPIP
+void	tpip_input(), tpip_ctlinput(), tp_ctloutput();
+int	tp_init(), tp_slowtimo(), tp_drain(), tp_usrreq();
+#endif
+
+#ifdef EON
+void	eoninput(), eonctlinput(), eonprotoinit();
+#endif /* EON */
+
+extern	struct domain inetdomain;
+
+struct protosw inetsw[] = {
+{ 0,		&inetdomain,	0,		0,
+  0,		ip_output,	0,		0,
+  0,
+  ip_init,	0,		ip_slowtimo,	ip_drain,	ip_sysctl
+},
+{ SOCK_DGRAM,	&inetdomain,	IPPROTO_UDP,	PR_ATOMIC|PR_ADDR,
+  udp_input,	0,		udp_ctlinput,	ip_ctloutput,
+  udp_usrreq,
+  udp_init,	0,		0,		0,		udp_sysctl
+},
+{ SOCK_STREAM,	&inetdomain,	IPPROTO_TCP,	PR_CONNREQUIRED|PR_WANTRCVD,
+  tcp_input,	0,		tcp_ctlinput,	tcp_ctloutput,
+  tcp_usrreq,
+  tcp_init,	tcp_fasttimo,	tcp_slowtimo,	tcp_drain,
+},
+{ SOCK_RAW,	&inetdomain,	IPPROTO_RAW,	PR_ATOMIC|PR_ADDR,
+  rip_input,	rip_output,	0,		rip_ctloutput,
+  rip_usrreq,
+  0,		0,		0,		0,
+},
+{ SOCK_RAW,	&inetdomain,	IPPROTO_ICMP,	PR_ATOMIC|PR_ADDR,
+  icmp_input,	rip_output,	0,		rip_ctloutput,
+  rip_usrreq,
+  0,		0,		0,		0,		icmp_sysctl
+},
+{ SOCK_RAW,	&inetdomain,	IPPROTO_IGMP,	PR_ATOMIC|PR_ADDR,
+  igmp_input,	rip_output,	0,		rip_ctloutput,
+  rip_usrreq,
+  igmp_init,	igmp_fasttimo,	0,		0,
+},
+#ifdef TPIP
+{ SOCK_SEQPACKET,&inetdomain,	IPPROTO_TP,	PR_CONNREQUIRED|PR_WANTRCVD,
+  tpip_input,	0,		tpip_ctlinput,	tp_ctloutput,
+  tp_usrreq,
+  tp_init,	0,		tp_slowtimo,	tp_drain,
+},
+#endif
+/* EON (ISO CLNL over IP) */
+#ifdef EON
+{ SOCK_RAW,	&inetdomain,	IPPROTO_EON,	0,
+  eoninput,	0,		eonctlinput,		0,
+  0,
+  eonprotoinit,	0,		0,		0,
+},
+#endif
+#ifdef NSIP
+{ SOCK_RAW,	&inetdomain,	IPPROTO_IDP,	PR_ATOMIC|PR_ADDR,
+  idpip_input,	rip_output,	nsip_ctlinput,	0,
+  rip_usrreq,
+  0,		0,		0,		0,
+},
+#endif
+	/* raw wildcard */
+{ SOCK_RAW,	&inetdomain,	0,		PR_ATOMIC|PR_ADDR,
+  rip_input,	rip_output,	0,		rip_ctloutput,
+  rip_usrreq,
+  rip_init,	0,		0,		0,
+},
+};
+
+struct domain inetdomain =
+    { AF_INET, "internet", 0, 0, 0, 
+      inetsw, &inetsw[sizeof(inetsw)/sizeof(inetsw[0])], 0,
+      rn_inithead, 32, sizeof(struct sockaddr_in) };
+
+#include "imp.h"
+#if NIMP > 0
+extern	struct domain impdomain;
+int	rimp_output(), hostslowtimo();
+
+struct protosw impsw[] = {
+{ SOCK_RAW,	&impdomain,	0,		PR_ATOMIC|PR_ADDR,
+  0,		rimp_output,	0,		0,
+  rip_usrreq,
+  0,		0,		hostslowtimo,	0,
+},
+};
+
+struct domain impdomain =
+    { AF_IMPLINK, "imp", 0, 0, 0,
+      impsw, &impsw[sizeof (impsw)/sizeof(impsw[0])] };
+#endif
+
+#include "hy.h"
+#if NHY > 0
+/*
+ * HYPERchannel protocol family: raw interface.
+ */
+int	rhy_output();
+extern	struct domain hydomain;
+
+struct protosw hysw[] = {
+{ SOCK_RAW,	&hydomain,	0,		PR_ATOMIC|PR_ADDR,
+  0,		rhy_output,	0,		0,
+  rip_usrreq,
+  0,		0,		0,		0,
+},
+};
+
+struct domain hydomain =
+    { AF_HYLINK, "hy", 0, 0, 0, hysw, &hysw[sizeof (hysw)/sizeof(hysw[0])] };
+#endif
diff --git a/sys/netinet/in_systm.h b/sys/netinet/in_systm.h
new file mode 100644
index 00000000000..cbd8e539a1e
--- /dev/null
+++ b/sys/netinet/in_systm.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)in_systm.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Miscellaneous internetwork
+ * definitions for kernel.
+ */
+
+/*
+ * Network types.
+ *
+ * Internally the system keeps counters in the headers with the bytes
+ * swapped so that VAX instructions will work on them.  It reverses
+ * the bytes before transmission at each protocol level.  The n_ types
+ * represent the types with the bytes in ``high-ender'' order.
+ */
+typedef u_short n_short;		/* short as received from the net */
+typedef u_long	n_long;			/* long as received from the net */
+
+typedef	u_long	n_time;			/* ms since 00:00 GMT, byte rev */
+
+#ifdef KERNEL
+n_time	 iptime __P((void));
+#endif
diff --git a/sys/netinet/in_var.h b/sys/netinet/in_var.h
new file mode 100644
index 00000000000..8218f0b74a3
--- /dev/null
+++ b/sys/netinet/in_var.h
@@ -0,0 +1,200 @@
+/*
+ * Copyright (c) 1985, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)in_var.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Interface address, Internet version.  One of these structures
+ * is allocated for each interface with an Internet address.
+ * The ifaddr structure contains the protocol-independent part
+ * of the structure and is assumed to be first.
+ */
+struct in_ifaddr {
+	struct	ifaddr ia_ifa;		/* protocol-independent info */
+#define	ia_ifp		ia_ifa.ifa_ifp
+#define ia_flags	ia_ifa.ifa_flags
+					/* ia_{,sub}net{,mask} in host order */
+	u_long	ia_net;			/* network number of interface */
+	u_long	ia_netmask;		/* mask of net part */
+	u_long	ia_subnet;		/* subnet number, including net */
+	u_long	ia_subnetmask;		/* mask of subnet part */
+	struct	in_addr ia_netbroadcast; /* to recognize net broadcasts */
+	struct	in_ifaddr *ia_next;	/* next in list of internet addresses */
+	struct	sockaddr_in ia_addr;	/* reserve space for interface name */
+	struct	sockaddr_in ia_dstaddr; /* reserve space for broadcast addr */
+#define	ia_broadaddr	ia_dstaddr
+	struct	sockaddr_in ia_sockmask; /* reserve space for general netmask */
+	struct	in_multi *ia_multiaddrs; /* list of multicast addresses */
+};
+
+struct	in_aliasreq {
+	char	ifra_name[IFNAMSIZ];		/* if name, e.g. "en0" */
+	struct	sockaddr_in ifra_addr;
+	struct	sockaddr_in ifra_broadaddr;
+#define ifra_dstaddr ifra_broadaddr
+	struct	sockaddr_in ifra_mask;
+};
+/*
+ * Given a pointer to an in_ifaddr (ifaddr),
+ * return a pointer to the addr as a sockaddr_in.
+ */
+#define	IA_SIN(ia) (&(((struct in_ifaddr *)(ia))->ia_addr))
+
+#define IN_LNAOF(in, ifa) \
+	((ntohl((in).s_addr) & ~((struct in_ifaddr *)(ifa)->ia_subnetmask))
+			
+
+#ifdef	KERNEL
+extern	struct	in_ifaddr *in_ifaddr;
+extern	struct	ifqueue	ipintrq;		/* ip packet input queue */
+void	in_socktrim __P((struct sockaddr_in *));
+
+
+/*
+ * Macro for finding the interface (ifnet structure) corresponding to one
+ * of our IP addresses.
+ */
+#define INADDR_TO_IFP(addr, ifp) \
+	/* struct in_addr addr; */ \
+	/* struct ifnet *ifp; */ \
+{ \
+	register struct in_ifaddr *ia; \
+\
+	for (ia = in_ifaddr; \
+	    ia != NULL && IA_SIN(ia)->sin_addr.s_addr != (addr).s_addr; \
+	    ia = ia->ia_next) \
+		 continue; \
+	(ifp) = (ia == NULL) ? NULL : ia->ia_ifp; \
+}
+
+/*
+ * Macro for finding the internet address structure (in_ifaddr) corresponding
+ * to a given interface (ifnet structure).
+ */
+#define IFP_TO_IA(ifp, ia) \
+	/* struct ifnet *ifp; */ \
+	/* struct in_ifaddr *ia; */ \
+{ \
+	for ((ia) = in_ifaddr; \
+	    (ia) != NULL && (ia)->ia_ifp != (ifp); \
+	    (ia) = (ia)->ia_next) \
+		continue; \
+}
+#endif
+
+/*
+ * Internet multicast address structure.  There is one of these for each IP
+ * multicast group to which this host belongs on a given network interface.
+ * They are kept in a linked list, rooted in the interface's in_ifaddr
+ * structure.
+ */
+struct in_multi {
+	struct	in_addr inm_addr;	/* IP multicast address */
+	struct	ifnet *inm_ifp;		/* back pointer to ifnet */
+	struct	in_ifaddr *inm_ia;	/* back pointer to in_ifaddr */
+	u_int	inm_refcount;		/* no. membership claims by sockets */
+	u_int	inm_timer;		/* IGMP membership report timer */
+	struct	in_multi *inm_next;	/* ptr to next multicast address */
+};
+
+#ifdef KERNEL
+/*
+ * Structure used by macros below to remember position when stepping through
+ * all of the in_multi records.
+ */
+struct in_multistep {
+	struct in_ifaddr *i_ia;
+	struct in_multi *i_inm;
+};
+
+/*
+ * Macro for looking up the in_multi record for a given IP multicast address
+ * on a given interface.  If no matching record is found, "inm" returns NULL.
+ */
+#define IN_LOOKUP_MULTI(addr, ifp, inm) \
+	/* struct in_addr addr; */ \
+	/* struct ifnet *ifp; */ \
+	/* struct in_multi *inm; */ \
+{ \
+	register struct in_ifaddr *ia; \
+\
+	IFP_TO_IA((ifp), ia); \
+	if (ia == NULL) \
+		(inm) = NULL; \
+	else \
+		for ((inm) = ia->ia_multiaddrs; \
+		    (inm) != NULL && (inm)->inm_addr.s_addr != (addr).s_addr; \
+		     (inm) = inm->inm_next) \
+			 continue; \
+}
+
+/*
+ * Macro to step through all of the in_multi records, one at a time.
+ * The current position is remembered in "step", which the caller must
+ * provide.  IN_FIRST_MULTI(), below, must be called to initialize "step"
+ * and get the first record.  Both macros return a NULL "inm" when there
+ * are no remaining records.
+ */
+#define IN_NEXT_MULTI(step, inm) \
+	/* struct in_multistep  step; */ \
+	/* struct in_multi *inm; */ \
+{ \
+	if (((inm) = (step).i_inm) != NULL) \
+		(step).i_inm = (inm)->inm_next; \
+	else \
+		while ((step).i_ia != NULL) { \
+			(inm) = (step).i_ia->ia_multiaddrs; \
+			(step).i_ia = (step).i_ia->ia_next; \
+			if ((inm) != NULL) { \
+				(step).i_inm = (inm)->inm_next; \
+				break; \
+			} \
+		} \
+}
+
+#define IN_FIRST_MULTI(step, inm) \
+	/* struct in_multistep step; */ \
+	/* struct in_multi *inm; */ \
+{ \
+	(step).i_ia = in_ifaddr; \
+	(step).i_inm = NULL; \
+	IN_NEXT_MULTI((step), (inm)); \
+}
+
+int	in_ifinit __P((struct ifnet *,
+	    struct in_ifaddr *, struct sockaddr_in *, int));
+struct	in_multi *in_addmulti __P((struct in_addr *, struct ifnet *));
+int	in_delmulti __P((struct in_multi *));
+void	in_ifscrub __P((struct ifnet *, struct in_ifaddr *));
+int	in_control __P((struct socket *, int, caddr_t, struct ifnet *));
+#endif
diff --git a/sys/netinet/ip.h b/sys/netinet/ip.h
new file mode 100644
index 00000000000..8a31dfaf13d
--- /dev/null
+++ b/sys/netinet/ip.h
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ip.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Definitions for internet protocol version 4.
+ * Per RFC 791, September 1981.
+ */
+#define	IPVERSION	4
+
+/*
+ * Structure of an internet header, naked of options.
+ *
+ * We declare ip_len and ip_off to be short, rather than u_short
+ * pragmatically since otherwise unsigned comparisons can result
+ * against negative integers quite easily, and fail in subtle ways.
+ */
+struct ip {
+#if BYTE_ORDER == LITTLE_ENDIAN 
+	u_char	ip_hl:4,		/* header length */
+		ip_v:4;			/* version */
+#endif
+#if BYTE_ORDER == BIG_ENDIAN 
+	u_char	ip_v:4,			/* version */
+		ip_hl:4;		/* header length */
+#endif
+	u_char	ip_tos;			/* type of service */
+	short	ip_len;			/* total length */
+	u_short	ip_id;			/* identification */
+	short	ip_off;			/* fragment offset field */
+#define	IP_DF 0x4000			/* dont fragment flag */
+#define	IP_MF 0x2000			/* more fragments flag */
+#define	IP_OFFMASK 0x1fff		/* mask for fragmenting bits */
+	u_char	ip_ttl;			/* time to live */
+	u_char	ip_p;			/* protocol */
+	u_short	ip_sum;			/* checksum */
+	struct	in_addr ip_src,ip_dst;	/* source and dest address */
+};
+
+#define	IP_MAXPACKET	65535		/* maximum packet size */
+
+/*
+ * Definitions for IP type of service (ip_tos)
+ */
+#define	IPTOS_LOWDELAY		0x10
+#define	IPTOS_THROUGHPUT	0x08
+#define	IPTOS_RELIABILITY	0x04
+
+/*
+ * Definitions for IP precedence (also in ip_tos) (hopefully unused)
+ */
+#define	IPTOS_PREC_NETCONTROL		0xe0
+#define	IPTOS_PREC_INTERNETCONTROL	0xc0
+#define	IPTOS_PREC_CRITIC_ECP		0xa0
+#define	IPTOS_PREC_FLASHOVERRIDE	0x80
+#define	IPTOS_PREC_FLASH		0x60
+#define	IPTOS_PREC_IMMEDIATE		0x40
+#define	IPTOS_PREC_PRIORITY		0x20
+#define	IPTOS_PREC_ROUTINE		0x10
+
+/*
+ * Definitions for options.
+ */
+#define	IPOPT_COPIED(o)		((o)&0x80)
+#define	IPOPT_CLASS(o)		((o)&0x60)
+#define	IPOPT_NUMBER(o)		((o)&0x1f)
+
+#define	IPOPT_CONTROL		0x00
+#define	IPOPT_RESERVED1		0x20
+#define	IPOPT_DEBMEAS		0x40
+#define	IPOPT_RESERVED2		0x60
+
+#define	IPOPT_EOL		0		/* end of option list */
+#define	IPOPT_NOP		1		/* no operation */
+
+#define	IPOPT_RR		7		/* record packet route */
+#define	IPOPT_TS		68		/* timestamp */
+#define	IPOPT_SECURITY		130		/* provide s,c,h,tcc */
+#define	IPOPT_LSRR		131		/* loose source route */
+#define	IPOPT_SATID		136		/* satnet id */
+#define	IPOPT_SSRR		137		/* strict source route */
+
+/*
+ * Offsets to fields in options other than EOL and NOP.
+ */
+#define	IPOPT_OPTVAL		0		/* option ID */
+#define	IPOPT_OLEN		1		/* option length */
+#define IPOPT_OFFSET		2		/* offset within option */
+#define	IPOPT_MINOFF		4		/* min value of above */
+
+/*
+ * Time stamp option structure.
+ */
+struct	ip_timestamp {
+	u_char	ipt_code;		/* IPOPT_TS */
+	u_char	ipt_len;		/* size of structure (variable) */
+	u_char	ipt_ptr;		/* index of current entry */
+#if BYTE_ORDER == LITTLE_ENDIAN 
+	u_char	ipt_flg:4,		/* flags, see below */
+		ipt_oflw:4;		/* overflow counter */
+#endif
+#if BYTE_ORDER == BIG_ENDIAN 
+	u_char	ipt_oflw:4,		/* overflow counter */
+		ipt_flg:4;		/* flags, see below */
+#endif
+	union ipt_timestamp {
+		n_long	ipt_time[1];
+		struct	ipt_ta {
+			struct in_addr ipt_addr;
+			n_long ipt_time;
+		} ipt_ta[1];
+	} ipt_timestamp;
+};
+
+/* flag bits for ipt_flg */
+#define	IPOPT_TS_TSONLY		0		/* timestamps only */
+#define	IPOPT_TS_TSANDADDR	1		/* timestamps and addresses */
+#define	IPOPT_TS_PRESPEC	3		/* specified modules only */
+
+/* bits for security (not byte swapped) */
+#define	IPOPT_SECUR_UNCLASS	0x0000
+#define	IPOPT_SECUR_CONFID	0xf135
+#define	IPOPT_SECUR_EFTO	0x789a
+#define	IPOPT_SECUR_MMMM	0xbc4d
+#define	IPOPT_SECUR_RESTR	0xaf13
+#define	IPOPT_SECUR_SECRET	0xd788
+#define	IPOPT_SECUR_TOPSECRET	0x6bc5
+
+/*
+ * Internet implementation parameters.
+ */
+#define	MAXTTL		255		/* maximum time to live (seconds) */
+#define	IPDEFTTL	64		/* default ttl, from RFC 1340 */
+#define	IPFRAGTTL	60		/* time to live for frags, slowhz */
+#define	IPTTLDEC	1		/* subtracted when forwarding */
+
+#define	IP_MSS		576		/* default maximum segment size */
diff --git a/sys/netinet/ip_icmp.c b/sys/netinet/ip_icmp.c
new file mode 100644
index 00000000000..c9b82bca908
--- /dev/null
+++ b/sys/netinet/ip_icmp.c
@@ -0,0 +1,591 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/icmp_var.h>
+
+/*
+ * ICMP routines: error generation, receive packet processing, and
+ * routines to turnaround packets back to the originator, and
+ * host table maintenance routines.
+ */
+
+int	icmpmaskrepl = 0;
+#ifdef ICMPPRINTFS
+int	icmpprintfs = 0;
+#endif
+
+extern	struct protosw inetsw[];
+
+/*
+ * Generate an error packet of type error
+ * in response to bad packet ip.
+ */
+void
+icmp_error(n, type, code, dest, destifp)
+	struct mbuf *n;
+	int type, code;
+	n_long dest;
+	struct ifnet *destifp;
+{
+	register struct ip *oip = mtod(n, struct ip *), *nip;
+	register unsigned oiplen = oip->ip_hl << 2;
+	register struct icmp *icp;
+	register struct mbuf *m;
+	unsigned icmplen;
+
+#ifdef ICMPPRINTFS
+	if (icmpprintfs)
+		printf("icmp_error(%x, %d, %d)\n", oip, type, code);
+#endif
+	if (type != ICMP_REDIRECT)
+		icmpstat.icps_error++;
+	/*
+	 * Don't send error if not the first fragment of message.
+	 * Don't error if the old packet protocol was ICMP
+	 * error message, only known informational types.
+	 */
+	if (oip->ip_off &~ (IP_MF|IP_DF))
+		goto freeit;
+	if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
+	  n->m_len >= oiplen + ICMP_MINLEN &&
+	  !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
+		icmpstat.icps_oldicmp++;
+		goto freeit;
+	}
+	/* Don't send error in response to a multicast or broadcast packet */
+	if (n->m_flags & (M_BCAST|M_MCAST))
+		goto freeit;
+	/*
+	 * First, formulate icmp message
+	 */
+	m = m_gethdr(M_DONTWAIT, MT_HEADER);
+	if (m == NULL)
+		goto freeit;
+	icmplen = oiplen + min(8, oip->ip_len);
+	m->m_len = icmplen + ICMP_MINLEN;
+	MH_ALIGN(m, m->m_len);
+	icp = mtod(m, struct icmp *);
+	if ((u_int)type > ICMP_MAXTYPE)
+		panic("icmp_error");
+	icmpstat.icps_outhist[type]++;
+	icp->icmp_type = type;
+	if (type == ICMP_REDIRECT)
+		icp->icmp_gwaddr.s_addr = dest;
+	else {
+		icp->icmp_void = 0;
+		/* 
+		 * The following assignments assume an overlay with the
+		 * zeroed icmp_void field.
+		 */
+		if (type == ICMP_PARAMPROB) {
+			icp->icmp_pptr = code;
+			code = 0;
+		} else if (type == ICMP_UNREACH &&
+			code == ICMP_UNREACH_NEEDFRAG && destifp) {
+			icp->icmp_nextmtu = htons(destifp->if_mtu);
+		}
+	}
+
+	icp->icmp_code = code;
+	bcopy((caddr_t)oip, (caddr_t)&icp->icmp_ip, icmplen);
+	nip = &icp->icmp_ip;
+	nip->ip_len = htons((u_short)(nip->ip_len + oiplen));
+
+	/*
+	 * Now, copy old ip header (without options)
+	 * in front of icmp message.
+	 */
+	if (m->m_data - sizeof(struct ip) < m->m_pktdat)
+		panic("icmp len");
+	m->m_data -= sizeof(struct ip);
+	m->m_len += sizeof(struct ip);
+	m->m_pkthdr.len = m->m_len;
+	m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
+	nip = mtod(m, struct ip *);
+	bcopy((caddr_t)oip, (caddr_t)nip, sizeof(struct ip));
+	nip->ip_len = m->m_len;
+	nip->ip_hl = sizeof(struct ip) >> 2;
+	nip->ip_p = IPPROTO_ICMP;
+	nip->ip_tos = 0;
+	icmp_reflect(m);
+
+freeit:
+	m_freem(n);
+}
+
+static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
+static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
+static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
+struct sockaddr_in icmpmask = { 8, 0 };
+
+/*
+ * Process a received ICMP message.
+ */
+void
+icmp_input(m, hlen)
+	register struct mbuf *m;
+	int hlen;
+{
+	register struct icmp *icp;
+	register struct ip *ip = mtod(m, struct ip *);
+	int icmplen = ip->ip_len;
+	register int i;
+	struct in_ifaddr *ia;
+	void (*ctlfunc) __P((int, struct sockaddr *, struct ip *));
+	int code;
+	extern u_char ip_protox[];
+
+	/*
+	 * Locate icmp structure in mbuf, and check
+	 * that not corrupted and of at least minimum length.
+	 */
+#ifdef ICMPPRINTFS
+	if (icmpprintfs)
+		printf("icmp_input from %x to %x, len %d\n",
+			ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr),
+			icmplen);
+#endif
+	if (icmplen < ICMP_MINLEN) {
+		icmpstat.icps_tooshort++;
+		goto freeit;
+	}
+	i = hlen + min(icmplen, ICMP_ADVLENMIN);
+	if (m->m_len < i && (m = m_pullup(m, i)) == 0)  {
+		icmpstat.icps_tooshort++;
+		return;
+	}
+	ip = mtod(m, struct ip *);
+	m->m_len -= hlen;
+	m->m_data += hlen;
+	icp = mtod(m, struct icmp *);
+	if (in_cksum(m, icmplen)) {
+		icmpstat.icps_checksum++;
+		goto freeit;
+	}
+	m->m_len += hlen;
+	m->m_data -= hlen;
+
+#ifdef ICMPPRINTFS
+	/*
+	 * Message type specific processing.
+	 */
+	if (icmpprintfs)
+		printf("icmp_input, type %d code %d\n", icp->icmp_type,
+		    icp->icmp_code);
+#endif
+	if (icp->icmp_type > ICMP_MAXTYPE)
+		goto raw;
+	icmpstat.icps_inhist[icp->icmp_type]++;
+	code = icp->icmp_code;
+	switch (icp->icmp_type) {
+
+	case ICMP_UNREACH:
+		switch (code) {
+			case ICMP_UNREACH_NET:
+			case ICMP_UNREACH_HOST:
+			case ICMP_UNREACH_PROTOCOL:
+			case ICMP_UNREACH_PORT:
+			case ICMP_UNREACH_SRCFAIL:
+				code += PRC_UNREACH_NET;
+				break;
+
+			case ICMP_UNREACH_NEEDFRAG:
+				code = PRC_MSGSIZE;
+				break;
+				
+			case ICMP_UNREACH_NET_UNKNOWN:
+			case ICMP_UNREACH_NET_PROHIB:
+			case ICMP_UNREACH_TOSNET:
+				code = PRC_UNREACH_NET;
+				break;
+
+			case ICMP_UNREACH_HOST_UNKNOWN:
+			case ICMP_UNREACH_ISOLATED:
+			case ICMP_UNREACH_HOST_PROHIB:
+			case ICMP_UNREACH_TOSHOST:
+				code = PRC_UNREACH_HOST;
+				break;
+
+			default:
+				goto badcode;
+		}
+		goto deliver;
+
+	case ICMP_TIMXCEED:
+		if (code > 1)
+			goto badcode;
+		code += PRC_TIMXCEED_INTRANS;
+		goto deliver;
+
+	case ICMP_PARAMPROB:
+		if (code > 1)
+			goto badcode;
+		code = PRC_PARAMPROB;
+		goto deliver;
+
+	case ICMP_SOURCEQUENCH:
+		if (code)
+			goto badcode;
+		code = PRC_QUENCH;
+	deliver:
+		/*
+		 * Problem with datagram; advise higher level routines.
+		 */
+		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
+		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
+			icmpstat.icps_badlen++;
+			goto freeit;
+		}
+		NTOHS(icp->icmp_ip.ip_len);
+#ifdef ICMPPRINTFS
+		if (icmpprintfs)
+			printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
+#endif
+		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
+		if (ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput)
+			(*ctlfunc)(code, (struct sockaddr *)&icmpsrc,
+			    &icp->icmp_ip);
+		break;
+
+	badcode:
+		icmpstat.icps_badcode++;
+		break;
+
+	case ICMP_ECHO:
+		icp->icmp_type = ICMP_ECHOREPLY;
+		goto reflect;
+
+	case ICMP_TSTAMP:
+		if (icmplen < ICMP_TSLEN) {
+			icmpstat.icps_badlen++;
+			break;
+		}
+		icp->icmp_type = ICMP_TSTAMPREPLY;
+		icp->icmp_rtime = iptime();
+		icp->icmp_ttime = icp->icmp_rtime;	/* bogus, do later! */
+		goto reflect;
+		
+	case ICMP_MASKREQ:
+#define	satosin(sa)	((struct sockaddr_in *)(sa))
+		if (icmpmaskrepl == 0)
+			break;
+		/*
+		 * We are not able to respond with all ones broadcast
+		 * unless we receive it over a point-to-point interface.
+		 */
+		if (icmplen < ICMP_MASKLEN)
+			break;
+		switch (ip->ip_dst.s_addr) {
+
+		case INADDR_BROADCAST:
+		case INADDR_ANY:
+			icmpdst.sin_addr = ip->ip_src;
+			break;
+
+		default:
+			icmpdst.sin_addr = ip->ip_dst;
+		}
+		ia = (struct in_ifaddr *)ifaof_ifpforaddr(
+			    (struct sockaddr *)&icmpdst, m->m_pkthdr.rcvif);
+		if (ia == 0)
+			break;
+		icp->icmp_type = ICMP_MASKREPLY;
+		icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
+		if (ip->ip_src.s_addr == 0) {
+			if (ia->ia_ifp->if_flags & IFF_BROADCAST)
+			    ip->ip_src = satosin(&ia->ia_broadaddr)->sin_addr;
+			else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
+			    ip->ip_src = satosin(&ia->ia_dstaddr)->sin_addr;
+		}
+reflect:
+		ip->ip_len += hlen;	/* since ip_input deducts this */
+		icmpstat.icps_reflect++;
+		icmpstat.icps_outhist[icp->icmp_type]++;
+		icmp_reflect(m);
+		return;
+
+	case ICMP_REDIRECT:
+		if (code > 3)
+			goto badcode;
+		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
+		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
+			icmpstat.icps_badlen++;
+			break;
+		}
+		/*
+		 * Short circuit routing redirects to force
+		 * immediate change in the kernel's routing
+		 * tables.  The message is also handed to anyone
+		 * listening on a raw socket (e.g. the routing
+		 * daemon for use in updating its tables).
+		 */
+		icmpgw.sin_addr = ip->ip_src;
+		icmpdst.sin_addr = icp->icmp_gwaddr;
+#ifdef	ICMPPRINTFS
+		if (icmpprintfs)
+			printf("redirect dst %x to %x\n", icp->icmp_ip.ip_dst,
+				icp->icmp_gwaddr);
+#endif
+		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
+		rtredirect((struct sockaddr *)&icmpsrc,
+		  (struct sockaddr *)&icmpdst,
+		  (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
+		  (struct sockaddr *)&icmpgw, (struct rtentry **)0);
+		pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&icmpsrc);
+		break;
+
+	/*
+	 * No kernel processing for the following;
+	 * just fall through to send to raw listener.
+	 */
+	case ICMP_ECHOREPLY:
+	case ICMP_ROUTERADVERT:
+	case ICMP_ROUTERSOLICIT:
+	case ICMP_TSTAMPREPLY:
+	case ICMP_IREQREPLY:
+	case ICMP_MASKREPLY:
+	default:
+		break;
+	}
+
+raw:
+	rip_input(m);
+	return;
+
+freeit:
+	m_freem(m);
+}
+
+/*
+ * Reflect the ip packet back to the source
+ */
+void
+icmp_reflect(m)
+	struct mbuf *m;
+{
+	register struct ip *ip = mtod(m, struct ip *);
+	register struct in_ifaddr *ia;
+	struct in_addr t;
+	struct mbuf *opts = 0, *ip_srcroute();
+	int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
+
+	if (!in_canforward(ip->ip_src) &&
+	    ((ntohl(ip->ip_src.s_addr) & IN_CLASSA_NET) !=
+	     (IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
+		m_freem(m);	/* Bad return address */
+		goto done;	/* Ip_output() will check for broadcast */
+	}
+	t = ip->ip_dst;
+	ip->ip_dst = ip->ip_src;
+	/*
+	 * If the incoming packet was addressed directly to us,
+	 * use dst as the src for the reply.  Otherwise (broadcast
+	 * or anonymous), use the address which corresponds
+	 * to the incoming interface.
+	 */
+	for (ia = in_ifaddr; ia; ia = ia->ia_next) {
+		if (t.s_addr == IA_SIN(ia)->sin_addr.s_addr)
+			break;
+		if ((ia->ia_ifp->if_flags & IFF_BROADCAST) &&
+		    t.s_addr == satosin(&ia->ia_broadaddr)->sin_addr.s_addr)
+			break;
+	}
+	icmpdst.sin_addr = t;
+	if (ia == (struct in_ifaddr *)0)
+		ia = (struct in_ifaddr *)ifaof_ifpforaddr(
+			(struct sockaddr *)&icmpdst, m->m_pkthdr.rcvif);
+	/*
+	 * The following happens if the packet was not addressed to us,
+	 * and was received on an interface with no IP address.
+	 */
+	if (ia == (struct in_ifaddr *)0)
+		ia = in_ifaddr;
+	t = IA_SIN(ia)->sin_addr;
+	ip->ip_src = t;
+	ip->ip_ttl = MAXTTL;
+
+	if (optlen > 0) {
+		register u_char *cp;
+		int opt, cnt;
+		u_int len;
+
+		/*
+		 * Retrieve any source routing from the incoming packet;
+		 * add on any record-route or timestamp options.
+		 */
+		cp = (u_char *) (ip + 1);
+		if ((opts = ip_srcroute()) == 0 &&
+		    (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
+			opts->m_len = sizeof(struct in_addr);
+			mtod(opts, struct in_addr *)->s_addr = 0;
+		}
+		if (opts) {
+#ifdef ICMPPRINTFS
+		    if (icmpprintfs)
+			    printf("icmp_reflect optlen %d rt %d => ",
+				optlen, opts->m_len);
+#endif
+		    for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
+			    opt = cp[IPOPT_OPTVAL];
+			    if (opt == IPOPT_EOL)
+				    break;
+			    if (opt == IPOPT_NOP)
+				    len = 1;
+			    else {
+				    len = cp[IPOPT_OLEN];
+				    if (len <= 0 || len > cnt)
+					    break;
+			    }
+			    /*
+			     * Should check for overflow, but it "can't happen"
+			     */
+			    if (opt == IPOPT_RR || opt == IPOPT_TS || 
+				opt == IPOPT_SECURITY) {
+				    bcopy((caddr_t)cp,
+					mtod(opts, caddr_t) + opts->m_len, len);
+				    opts->m_len += len;
+			    }
+		    }
+		    /* Terminate & pad, if necessary */
+		    if (cnt = opts->m_len % 4) {
+			    for (; cnt < 4; cnt++) {
+				    *(mtod(opts, caddr_t) + opts->m_len) =
+					IPOPT_EOL;
+				    opts->m_len++;
+			    }
+		    }
+#ifdef ICMPPRINTFS
+		    if (icmpprintfs)
+			    printf("%d\n", opts->m_len);
+#endif
+		}
+		/*
+		 * Now strip out original options by copying rest of first
+		 * mbuf's data back, and adjust the IP length.
+		 */
+		ip->ip_len -= optlen;
+		ip->ip_hl = sizeof(struct ip) >> 2;
+		m->m_len -= optlen;
+		if (m->m_flags & M_PKTHDR)
+			m->m_pkthdr.len -= optlen;
+		optlen += sizeof(struct ip);
+		bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
+			 (unsigned)(m->m_len - sizeof(struct ip)));
+	}
+	m->m_flags &= ~(M_BCAST|M_MCAST);
+	icmp_send(m, opts);
+done:
+	if (opts)
+		(void)m_free(opts);
+}
+
+/*
+ * Send an icmp packet back to the ip level,
+ * after supplying a checksum.
+ */
+void
+icmp_send(m, opts)
+	register struct mbuf *m;
+	struct mbuf *opts;
+{
+	register struct ip *ip = mtod(m, struct ip *);
+	register int hlen;
+	register struct icmp *icp;
+
+	hlen = ip->ip_hl << 2;
+	m->m_data += hlen;
+	m->m_len -= hlen;
+	icp = mtod(m, struct icmp *);
+	icp->icmp_cksum = 0;
+	icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
+	m->m_data -= hlen;
+	m->m_len += hlen;
+#ifdef ICMPPRINTFS
+	if (icmpprintfs)
+		printf("icmp_send dst %x src %x\n", ip->ip_dst, ip->ip_src);
+#endif
+	(void) ip_output(m, opts, NULL, 0, NULL);
+}
+
+n_time
+iptime()
+{
+	struct timeval atv;
+	u_long t;
+
+	microtime(&atv);
+	t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
+	return (htonl(t));
+}
+
+int
+icmp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
+	int *name;
+	u_int namelen;
+	void *oldp;
+	size_t *oldlenp;
+	void *newp;
+	size_t newlen;
+{
+
+	/* All sysctl names at this level are terminal. */
+	if (namelen != 1)
+		return (ENOTDIR);
+
+	switch (name[0]) {
+	case ICMPCTL_MASKREPL:
+		return (sysctl_int(oldp, oldlenp, newp, newlen, &icmpmaskrepl));
+	default:
+		return (ENOPROTOOPT);
+	}
+	/* NOTREACHED */
+}
diff --git a/sys/netinet/ip_icmp.h b/sys/netinet/ip_icmp.h
new file mode 100644
index 00000000000..3c3462d3266
--- /dev/null
+++ b/sys/netinet/ip_icmp.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ip_icmp.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Interface Control Message Protocol Definitions.
+ * Per RFC 792, September 1981.
+ */
+
+/*
+ * Structure of an icmp header.
+ */
+struct icmp {
+	u_char	icmp_type;		/* type of message, see below */
+	u_char	icmp_code;		/* type sub code */
+	u_short	icmp_cksum;		/* ones complement cksum of struct */
+	union {
+		u_char ih_pptr;			/* ICMP_PARAMPROB */
+		struct in_addr ih_gwaddr;	/* ICMP_REDIRECT */
+		struct ih_idseq {
+			n_short	icd_id;
+			n_short	icd_seq;
+		} ih_idseq;
+		int ih_void;
+
+		/* ICMP_UNREACH_NEEDFRAG -- Path MTU Discovery (RFC1191) */
+		struct ih_pmtu {
+			n_short ipm_void;    
+			n_short ipm_nextmtu;
+		} ih_pmtu;
+	} icmp_hun;
+#define	icmp_pptr	icmp_hun.ih_pptr
+#define	icmp_gwaddr	icmp_hun.ih_gwaddr
+#define	icmp_id		icmp_hun.ih_idseq.icd_id
+#define	icmp_seq	icmp_hun.ih_idseq.icd_seq
+#define	icmp_void	icmp_hun.ih_void
+#define	icmp_pmvoid	icmp_hun.ih_pmtu.ipm_void
+#define	icmp_nextmtu	icmp_hun.ih_pmtu.ipm_nextmtu
+	union {
+		struct id_ts {
+			n_time its_otime;
+			n_time its_rtime;
+			n_time its_ttime;
+		} id_ts;
+		struct id_ip  {
+			struct ip idi_ip;
+			/* options and then 64 bits of data */
+		} id_ip;
+		u_long	id_mask;
+		char	id_data[1];
+	} icmp_dun;
+#define	icmp_otime	icmp_dun.id_ts.its_otime
+#define	icmp_rtime	icmp_dun.id_ts.its_rtime
+#define	icmp_ttime	icmp_dun.id_ts.its_ttime
+#define	icmp_ip		icmp_dun.id_ip.idi_ip
+#define	icmp_mask	icmp_dun.id_mask
+#define	icmp_data	icmp_dun.id_data
+};
+
+/*
+ * Lower bounds on packet lengths for various types.
+ * For the error advice packets must first insure that the
+ * packet is large enought to contain the returned ip header.
+ * Only then can we do the check to see if 64 bits of packet
+ * data have been returned, since we need to check the returned
+ * ip header length.
+ */
+#define	ICMP_MINLEN	8				/* abs minimum */
+#define	ICMP_TSLEN	(8 + 3 * sizeof (n_time))	/* timestamp */
+#define	ICMP_MASKLEN	12				/* address mask */
+#define	ICMP_ADVLENMIN	(8 + sizeof (struct ip) + 8)	/* min */
+#define	ICMP_ADVLEN(p)	(8 + ((p)->icmp_ip.ip_hl << 2) + 8)
+	/* N.B.: must separately check that ip_hl >= 5 */
+
+/*
+ * Definition of type and code field values.
+ */
+#define	ICMP_ECHOREPLY		0		/* echo reply */
+#define	ICMP_UNREACH		3		/* dest unreachable, codes: */
+#define		ICMP_UNREACH_NET	0		/* bad net */
+#define		ICMP_UNREACH_HOST	1		/* bad host */
+#define		ICMP_UNREACH_PROTOCOL	2		/* bad protocol */
+#define		ICMP_UNREACH_PORT	3		/* bad port */
+#define		ICMP_UNREACH_NEEDFRAG	4		/* IP_DF caused drop */
+#define		ICMP_UNREACH_SRCFAIL	5		/* src route failed */
+#define		ICMP_UNREACH_NET_UNKNOWN 6		/* unknown net */
+#define		ICMP_UNREACH_HOST_UNKNOWN 7		/* unknown host */
+#define		ICMP_UNREACH_ISOLATED	8		/* src host isolated */
+#define		ICMP_UNREACH_NET_PROHIB	9		/* prohibited access */
+#define		ICMP_UNREACH_HOST_PROHIB 10		/* ditto */
+#define		ICMP_UNREACH_TOSNET	11		/* bad tos for net */
+#define		ICMP_UNREACH_TOSHOST	12		/* bad tos for host */
+#define	ICMP_SOURCEQUENCH	4		/* packet lost, slow down */
+#define	ICMP_REDIRECT		5		/* shorter route, codes: */
+#define		ICMP_REDIRECT_NET	0		/* for network */
+#define		ICMP_REDIRECT_HOST	1		/* for host */
+#define		ICMP_REDIRECT_TOSNET	2		/* for tos and net */
+#define		ICMP_REDIRECT_TOSHOST	3		/* for tos and host */
+#define	ICMP_ECHO		8		/* echo service */
+#define	ICMP_ROUTERADVERT	9		/* router advertisement */
+#define	ICMP_ROUTERSOLICIT	10		/* router solicitation */
+#define	ICMP_TIMXCEED		11		/* time exceeded, code: */
+#define		ICMP_TIMXCEED_INTRANS	0		/* ttl==0 in transit */
+#define		ICMP_TIMXCEED_REASS	1		/* ttl==0 in reass */
+#define	ICMP_PARAMPROB		12		/* ip header bad */
+#define		ICMP_PARAMPROB_OPTABSENT 1		/* req. opt. absent */
+#define	ICMP_TSTAMP		13		/* timestamp request */
+#define	ICMP_TSTAMPREPLY	14		/* timestamp reply */
+#define	ICMP_IREQ		15		/* information request */
+#define	ICMP_IREQREPLY		16		/* information reply */
+#define	ICMP_MASKREQ		17		/* address mask request */
+#define	ICMP_MASKREPLY		18		/* address mask reply */
+
+#define	ICMP_MAXTYPE		18
+
+#define	ICMP_INFOTYPE(type) \
+	((type) == ICMP_ECHOREPLY || (type) == ICMP_ECHO || \
+	(type) == ICMP_ROUTERADVERT || (type) == ICMP_ROUTERSOLICIT || \
+	(type) == ICMP_TSTAMP || (type) == ICMP_TSTAMPREPLY || \
+	(type) == ICMP_IREQ || (type) == ICMP_IREQREPLY || \
+	(type) == ICMP_MASKREQ || (type) == ICMP_MASKREPLY)
+
+#ifdef KERNEL
+void	icmp_error __P((struct mbuf *, int, int, n_long, struct ifnet *));
+void	icmp_input __P((struct mbuf *, int));
+void	icmp_reflect __P((struct mbuf *));
+void	icmp_send __P((struct mbuf *, struct mbuf *));
+int	icmp_sysctl __P((int *, u_int, void *, size_t *, void *, size_t));
+#endif
diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c
new file mode 100644
index 00000000000..d3bfeac4b19
--- /dev/null
+++ b/sys/netinet/ip_input.c
@@ -0,0 +1,1166 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_icmp.h>
+
+#ifndef	IPFORWARDING
+#ifdef GATEWAY
+#define	IPFORWARDING	1	/* forward IP packets not for us */
+#else /* GATEWAY */
+#define	IPFORWARDING	0	/* don't forward IP packets not for us */
+#endif /* GATEWAY */
+#endif /* IPFORWARDING */
+#ifndef	IPSENDREDIRECTS
+#define	IPSENDREDIRECTS	1
+#endif
+int	ipforwarding = IPFORWARDING;
+int	ipsendredirects = IPSENDREDIRECTS;
+int	ip_defttl = IPDEFTTL;
+#ifdef DIAGNOSTIC
+int	ipprintfs = 0;
+#endif
+
+extern	struct domain inetdomain;
+extern	struct protosw inetsw[];
+u_char	ip_protox[IPPROTO_MAX];
+int	ipqmaxlen = IFQ_MAXLEN;
+struct	in_ifaddr *in_ifaddr;			/* first inet address */
+struct	ifqueue ipintrq;
+
+/*
+ * We need to save the IP options in case a protocol wants to respond
+ * to an incoming packet over the same route if the packet got here
+ * using IP source routing.  This allows connection establishment and
+ * maintenance when the remote end is on a network that is not known
+ * to us.
+ */
+int	ip_nhops = 0;
+static	struct ip_srcrt {
+	struct	in_addr dst;			/* final destination */
+	char	nop;				/* one NOP to align */
+	char	srcopt[IPOPT_OFFSET + 1];	/* OPTVAL, OLEN and OFFSET */
+	struct	in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
+} ip_srcrt;
+
+#ifdef GATEWAY
+extern	int if_index;
+u_long	*ip_ifmatrix;
+#endif
+
+static void save_rte __P((u_char *, struct in_addr));
+/*
+ * IP initialization: fill in IP protocol switch table.
+ * All protocols not implemented in kernel go to raw IP protocol handler.
+ */
+void
+ip_init()
+{
+	register struct protosw *pr;
+	register int i;
+
+	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
+	if (pr == 0)
+		panic("ip_init");
+	for (i = 0; i < IPPROTO_MAX; i++)
+		ip_protox[i] = pr - inetsw;
+	for (pr = inetdomain.dom_protosw;
+	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
+		if (pr->pr_domain->dom_family == PF_INET &&
+		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
+			ip_protox[pr->pr_protocol] = pr - inetsw;
+	ipq.next = ipq.prev = &ipq;
+	ip_id = time.tv_sec & 0xffff;
+	ipintrq.ifq_maxlen = ipqmaxlen;
+#ifdef GATEWAY
+	i = (if_index + 1) * (if_index + 1) * sizeof (u_long);
+	ip_ifmatrix = (u_long *) malloc(i, M_RTABLE, M_WAITOK);
+	bzero((char *)ip_ifmatrix, i);
+#endif
+}
+
+struct	sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
+struct	route ipforward_rt;
+
+/*
+ * Ip input routine.  Checksum and byte swap header.  If fragmented
+ * try to reassemble.  Process options.  Pass to next level.
+ */
+void
+ipintr()
+{
+	register struct ip *ip;
+	register struct mbuf *m;
+	register struct ipq *fp;
+	register struct in_ifaddr *ia;
+	int hlen, s;
+
+next:
+	/*
+	 * Get next datagram off input queue and get IP header
+	 * in first mbuf.
+	 */
+	s = splimp();
+	IF_DEQUEUE(&ipintrq, m);
+	splx(s);
+	if (m == 0)
+		return;
+#ifdef	DIAGNOSTIC
+	if ((m->m_flags & M_PKTHDR) == 0)
+		panic("ipintr no HDR");
+#endif
+	/*
+	 * If no IP addresses have been set yet but the interfaces
+	 * are receiving, can't do anything with incoming packets yet.
+	 */
+	if (in_ifaddr == NULL)
+		goto bad;
+	ipstat.ips_total++;
+	if (m->m_len < sizeof (struct ip) &&
+	    (m = m_pullup(m, sizeof (struct ip))) == 0) {
+		ipstat.ips_toosmall++;
+		goto next;
+	}
+	ip = mtod(m, struct ip *);
+	if (ip->ip_v != IPVERSION) {
+		ipstat.ips_badvers++;
+		goto bad;
+	}
+	hlen = ip->ip_hl << 2;
+	if (hlen < sizeof(struct ip)) {	/* minimum header length */
+		ipstat.ips_badhlen++;
+		goto bad;
+	}
+	if (hlen > m->m_len) {
+		if ((m = m_pullup(m, hlen)) == 0) {
+			ipstat.ips_badhlen++;
+			goto next;
+		}
+		ip = mtod(m, struct ip *);
+	}
+	if (ip->ip_sum = in_cksum(m, hlen)) {
+		ipstat.ips_badsum++;
+		goto bad;
+	}
+
+	/*
+	 * Convert fields to host representation.
+	 */
+	NTOHS(ip->ip_len);
+	if (ip->ip_len < hlen) {
+		ipstat.ips_badlen++;
+		goto bad;
+	}
+	NTOHS(ip->ip_id);
+	NTOHS(ip->ip_off);
+
+	/*
+	 * Check that the amount of data in the buffers
+	 * is as at least much as the IP header would have us expect.
+	 * Trim mbufs if longer than we expect.
+	 * Drop packet if shorter than we expect.
+	 */
+	if (m->m_pkthdr.len < ip->ip_len) {
+		ipstat.ips_tooshort++;
+		goto bad;
+	}
+	if (m->m_pkthdr.len > ip->ip_len) {
+		if (m->m_len == m->m_pkthdr.len) {
+			m->m_len = ip->ip_len;
+			m->m_pkthdr.len = ip->ip_len;
+		} else
+			m_adj(m, ip->ip_len - m->m_pkthdr.len);
+	}
+
+	/*
+	 * Process options and, if not destined for us,
+	 * ship it on.  ip_dooptions returns 1 when an
+	 * error was detected (causing an icmp message
+	 * to be sent and the original packet to be freed).
+	 */
+	ip_nhops = 0;		/* for source routed packets */
+	if (hlen > sizeof (struct ip) && ip_dooptions(m))
+		goto next;
+
+	/*
+	 * Check our list of addresses, to see if the packet is for us.
+	 */
+	for (ia = in_ifaddr; ia; ia = ia->ia_next) {
+#define	satosin(sa)	((struct sockaddr_in *)(sa))
+
+		if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr)
+			goto ours;
+		if (
+#ifdef	DIRECTED_BROADCAST
+		    ia->ia_ifp == m->m_pkthdr.rcvif &&
+#endif
+		    (ia->ia_ifp->if_flags & IFF_BROADCAST)) {
+			u_long t;
+
+			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
+			    ip->ip_dst.s_addr)
+				goto ours;
+			if (ip->ip_dst.s_addr == ia->ia_netbroadcast.s_addr)
+				goto ours;
+			/*
+			 * Look for all-0's host part (old broadcast addr),
+			 * either for subnet or net.
+			 */
+			t = ntohl(ip->ip_dst.s_addr);
+			if (t == ia->ia_subnet)
+				goto ours;
+			if (t == ia->ia_net)
+				goto ours;
+		}
+	}
+	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
+		struct in_multi *inm;
+#ifdef MROUTING
+		extern struct socket *ip_mrouter;
+
+		if (ip_mrouter) {
+			/*
+			 * If we are acting as a multicast router, all
+			 * incoming multicast packets are passed to the
+			 * kernel-level multicast forwarding function.
+			 * The packet is returned (relatively) intact; if
+			 * ip_mforward() returns a non-zero value, the packet
+			 * must be discarded, else it may be accepted below.
+			 *
+			 * (The IP ident field is put in the same byte order
+			 * as expected when ip_mforward() is called from
+			 * ip_output().)
+			 */
+			ip->ip_id = htons(ip->ip_id);
+			if (ip_mforward(m, m->m_pkthdr.rcvif) != 0) {
+				ipstat.ips_cantforward++;
+				m_freem(m);
+				goto next;
+			}
+			ip->ip_id = ntohs(ip->ip_id);
+
+			/*
+			 * The process-level routing demon needs to receive
+			 * all multicast IGMP packets, whether or not this
+			 * host belongs to their destination groups.
+			 */
+			if (ip->ip_p == IPPROTO_IGMP)
+				goto ours;
+			ipstat.ips_forward++;
+		}
+#endif
+		/*
+		 * See if we belong to the destination multicast group on the
+		 * arrival interface.
+		 */
+		IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm);
+		if (inm == NULL) {
+			ipstat.ips_cantforward++;
+			m_freem(m);
+			goto next;
+		}
+		goto ours;
+	}
+	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
+		goto ours;
+	if (ip->ip_dst.s_addr == INADDR_ANY)
+		goto ours;
+
+	/*
+	 * Not for us; forward if possible and desirable.
+	 */
+	if (ipforwarding == 0) {
+		ipstat.ips_cantforward++;
+		m_freem(m);
+	} else
+		ip_forward(m, 0);
+	goto next;
+
+ours:
+	/*
+	 * If offset or IP_MF are set, must reassemble.
+	 * Otherwise, nothing need be done.
+	 * (We could look in the reassembly queue to see
+	 * if the packet was previously fragmented,
+	 * but it's not worth the time; just let them time out.)
+	 */
+	if (ip->ip_off &~ IP_DF) {
+		if (m->m_flags & M_EXT) {		/* XXX */
+			if ((m = m_pullup(m, sizeof (struct ip))) == 0) {
+				ipstat.ips_toosmall++;
+				goto next;
+			}
+			ip = mtod(m, struct ip *);
+		}
+		/*
+		 * Look for queue of fragments
+		 * of this datagram.
+		 */
+		for (fp = ipq.next; fp != &ipq; fp = fp->next)
+			if (ip->ip_id == fp->ipq_id &&
+			    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
+			    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
+			    ip->ip_p == fp->ipq_p)
+				goto found;
+		fp = 0;
+found:
+
+		/*
+		 * Adjust ip_len to not reflect header,
+		 * set ip_mff if more fragments are expected,
+		 * convert offset of this to bytes.
+		 */
+		ip->ip_len -= hlen;
+		((struct ipasfrag *)ip)->ipf_mff &= ~1;
+		if (ip->ip_off & IP_MF)
+			((struct ipasfrag *)ip)->ipf_mff |= 1;
+		ip->ip_off <<= 3;
+
+		/*
+		 * If datagram marked as having more fragments
+		 * or if this is not the first fragment,
+		 * attempt reassembly; if it succeeds, proceed.
+		 */
+		if (((struct ipasfrag *)ip)->ipf_mff & 1 || ip->ip_off) {
+			ipstat.ips_fragments++;
+			ip = ip_reass((struct ipasfrag *)ip, fp);
+			if (ip == 0)
+				goto next;
+			ipstat.ips_reassembled++;
+			m = dtom(ip);
+		} else
+			if (fp)
+				ip_freef(fp);
+	} else
+		ip->ip_len -= hlen;
+
+	/*
+	 * Switch out to protocol's input routine.
+	 */
+	ipstat.ips_delivered++;
+	(*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen);
+	goto next;
+bad:
+	m_freem(m);
+	goto next;
+}
+
+/*
+ * Take incoming datagram fragment and try to
+ * reassemble it into whole datagram.  If a chain for
+ * reassembly of this datagram already exists, then it
+ * is given as fp; otherwise have to make a chain.
+ */
+struct ip *
+ip_reass(ip, fp)
+	register struct ipasfrag *ip;
+	register struct ipq *fp;
+{
+	register struct mbuf *m = dtom(ip);
+	register struct ipasfrag *q;
+	struct mbuf *t;
+	int hlen = ip->ip_hl << 2;
+	int i, next;
+
+	/*
+	 * Presence of header sizes in mbufs
+	 * would confuse code below.
+	 */
+	m->m_data += hlen;
+	m->m_len -= hlen;
+
+	/*
+	 * If first fragment to arrive, create a reassembly queue.
+	 */
+	if (fp == 0) {
+		if ((t = m_get(M_DONTWAIT, MT_FTABLE)) == NULL)
+			goto dropfrag;
+		fp = mtod(t, struct ipq *);
+		insque(fp, &ipq);
+		fp->ipq_ttl = IPFRAGTTL;
+		fp->ipq_p = ip->ip_p;
+		fp->ipq_id = ip->ip_id;
+		fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp;
+		fp->ipq_src = ((struct ip *)ip)->ip_src;
+		fp->ipq_dst = ((struct ip *)ip)->ip_dst;
+		q = (struct ipasfrag *)fp;
+		goto insert;
+	}
+
+	/*
+	 * Find a segment which begins after this one does.
+	 */
+	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
+		if (q->ip_off > ip->ip_off)
+			break;
+
+	/*
+	 * If there is a preceding segment, it may provide some of
+	 * our data already.  If so, drop the data from the incoming
+	 * segment.  If it provides all of our data, drop us.
+	 */
+	if (q->ipf_prev != (struct ipasfrag *)fp) {
+		i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off;
+		if (i > 0) {
+			if (i >= ip->ip_len)
+				goto dropfrag;
+			m_adj(dtom(ip), i);
+			ip->ip_off += i;
+			ip->ip_len -= i;
+		}
+	}
+
+	/*
+	 * While we overlap succeeding segments trim them or,
+	 * if they are completely covered, dequeue them.
+	 */
+	while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) {
+		i = (ip->ip_off + ip->ip_len) - q->ip_off;
+		if (i < q->ip_len) {
+			q->ip_len -= i;
+			q->ip_off += i;
+			m_adj(dtom(q), i);
+			break;
+		}
+		q = q->ipf_next;
+		m_freem(dtom(q->ipf_prev));
+		ip_deq(q->ipf_prev);
+	}
+
+insert:
+	/*
+	 * Stick new segment in its place;
+	 * check for complete reassembly.
+	 */
+	ip_enq(ip, q->ipf_prev);
+	next = 0;
+	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) {
+		if (q->ip_off != next)
+			return (0);
+		next += q->ip_len;
+	}
+	if (q->ipf_prev->ipf_mff & 1)
+		return (0);
+
+	/*
+	 * Reassembly is complete; concatenate fragments.
+	 */
+	q = fp->ipq_next;
+	m = dtom(q);
+	t = m->m_next;
+	m->m_next = 0;
+	m_cat(m, t);
+	q = q->ipf_next;
+	while (q != (struct ipasfrag *)fp) {
+		t = dtom(q);
+		q = q->ipf_next;
+		m_cat(m, t);
+	}
+
+	/*
+	 * Create header for new ip packet by
+	 * modifying header of first packet;
+	 * dequeue and discard fragment reassembly header.
+	 * Make header visible.
+	 */
+	ip = fp->ipq_next;
+	ip->ip_len = next;
+	ip->ipf_mff &= ~1;
+	((struct ip *)ip)->ip_src = fp->ipq_src;
+	((struct ip *)ip)->ip_dst = fp->ipq_dst;
+	remque(fp);
+	(void) m_free(dtom(fp));
+	m = dtom(ip);
+	m->m_len += (ip->ip_hl << 2);
+	m->m_data -= (ip->ip_hl << 2);
+	/* some debugging cruft by sklower, below, will go away soon */
+	if (m->m_flags & M_PKTHDR) { /* XXX this should be done elsewhere */
+		register int plen = 0;
+		for (t = m; m; m = m->m_next)
+			plen += m->m_len;
+		t->m_pkthdr.len = plen;
+	}
+	return ((struct ip *)ip);
+
+dropfrag:
+	ipstat.ips_fragdropped++;
+	m_freem(m);
+	return (0);
+}
+
+/*
+ * Free a fragment reassembly header and all
+ * associated datagrams.
+ */
+void
+ip_freef(fp)
+	struct ipq *fp;
+{
+	register struct ipasfrag *q, *p;
+
+	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = p) {
+		p = q->ipf_next;
+		ip_deq(q);
+		m_freem(dtom(q));
+	}
+	remque(fp);
+	(void) m_free(dtom(fp));
+}
+
+/*
+ * Put an ip fragment on a reassembly chain.
+ * Like insque, but pointers in middle of structure.
+ */
+void
+ip_enq(p, prev)
+	register struct ipasfrag *p, *prev;
+{
+
+	p->ipf_prev = prev;
+	p->ipf_next = prev->ipf_next;
+	prev->ipf_next->ipf_prev = p;
+	prev->ipf_next = p;
+}
+
+/*
+ * To ip_enq as remque is to insque.
+ */
+void
+ip_deq(p)
+	register struct ipasfrag *p;
+{
+
+	p->ipf_prev->ipf_next = p->ipf_next;
+	p->ipf_next->ipf_prev = p->ipf_prev;
+}
+
+/*
+ * IP timer processing;
+ * if a timer expires on a reassembly
+ * queue, discard it.
+ */
+void
+ip_slowtimo()
+{
+	register struct ipq *fp;
+	int s = splnet();
+
+	fp = ipq.next;
+	if (fp == 0) {
+		splx(s);
+		return;
+	}
+	while (fp != &ipq) {
+		--fp->ipq_ttl;
+		fp = fp->next;
+		if (fp->prev->ipq_ttl == 0) {
+			ipstat.ips_fragtimeout++;
+			ip_freef(fp->prev);
+		}
+	}
+	splx(s);
+}
+
+/*
+ * Drain off all datagram fragments.
+ */
+void
+ip_drain()
+{
+
+	while (ipq.next != &ipq) {
+		ipstat.ips_fragdropped++;
+		ip_freef(ipq.next);
+	}
+}
+
+/*
+ * Do option processing on a datagram,
+ * possibly discarding it if bad options are encountered,
+ * or forwarding it if source-routed.
+ * Returns 1 if packet has been forwarded/freed,
+ * 0 if the packet should be processed further.
+ */
+int
+ip_dooptions(m)
+	struct mbuf *m;
+{
+	register struct ip *ip = mtod(m, struct ip *);
+	register u_char *cp;
+	register struct ip_timestamp *ipt;
+	register struct in_ifaddr *ia;
+	int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
+	struct in_addr *sin, dst;
+	n_time ntime;
+
+	dst = ip->ip_dst;
+	cp = (u_char *)(ip + 1);
+	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
+	for (; cnt > 0; cnt -= optlen, cp += optlen) {
+		opt = cp[IPOPT_OPTVAL];
+		if (opt == IPOPT_EOL)
+			break;
+		if (opt == IPOPT_NOP)
+			optlen = 1;
+		else {
+			optlen = cp[IPOPT_OLEN];
+			if (optlen <= 0 || optlen > cnt) {
+				code = &cp[IPOPT_OLEN] - (u_char *)ip;
+				goto bad;
+			}
+		}
+		switch (opt) {
+
+		default:
+			break;
+
+		/*
+		 * Source routing with record.
+		 * Find interface with current destination address.
+		 * If none on this machine then drop if strictly routed,
+		 * or do nothing if loosely routed.
+		 * Record interface address and bring up next address
+		 * component.  If strictly routed make sure next
+		 * address is on directly accessible net.
+		 */
+		case IPOPT_LSRR:
+		case IPOPT_SSRR:
+			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
+				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
+				goto bad;
+			}
+			ipaddr.sin_addr = ip->ip_dst;
+			ia = (struct in_ifaddr *)
+				ifa_ifwithaddr((struct sockaddr *)&ipaddr);
+			if (ia == 0) {
+				if (opt == IPOPT_SSRR) {
+					type = ICMP_UNREACH;
+					code = ICMP_UNREACH_SRCFAIL;
+					goto bad;
+				}
+				/*
+				 * Loose routing, and not at next destination
+				 * yet; nothing to do except forward.
+				 */
+				break;
+			}
+			off--;			/* 0 origin */
+			if (off > optlen - sizeof(struct in_addr)) {
+				/*
+				 * End of source route.  Should be for us.
+				 */
+				save_rte(cp, ip->ip_src);
+				break;
+			}
+			/*
+			 * locate outgoing interface
+			 */
+			bcopy((caddr_t)(cp + off), (caddr_t)&ipaddr.sin_addr,
+			    sizeof(ipaddr.sin_addr));
+			if (opt == IPOPT_SSRR) {
+#define	INA	struct in_ifaddr *
+#define	SA	struct sockaddr *
+			    if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == 0)
+				ia = (INA)ifa_ifwithnet((SA)&ipaddr);
+			} else
+				ia = ip_rtaddr(ipaddr.sin_addr);
+			if (ia == 0) {
+				type = ICMP_UNREACH;
+				code = ICMP_UNREACH_SRCFAIL;
+				goto bad;
+			}
+			ip->ip_dst = ipaddr.sin_addr;
+			bcopy((caddr_t)&(IA_SIN(ia)->sin_addr),
+			    (caddr_t)(cp + off), sizeof(struct in_addr));
+			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
+			/*
+			 * Let ip_intr's mcast routing check handle mcast pkts
+			 */
+			forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr));
+			break;
+
+		case IPOPT_RR:
+			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
+				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
+				goto bad;
+			}
+			/*
+			 * If no space remains, ignore.
+			 */
+			off--;			/* 0 origin */
+			if (off > optlen - sizeof(struct in_addr))
+				break;
+			bcopy((caddr_t)(&ip->ip_dst), (caddr_t)&ipaddr.sin_addr,
+			    sizeof(ipaddr.sin_addr));
+			/*
+			 * locate outgoing interface; if we're the destination,
+			 * use the incoming interface (should be same).
+			 */
+			if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == 0 &&
+			    (ia = ip_rtaddr(ipaddr.sin_addr)) == 0) {
+				type = ICMP_UNREACH;
+				code = ICMP_UNREACH_HOST;
+				goto bad;
+			}
+			bcopy((caddr_t)&(IA_SIN(ia)->sin_addr),
+			    (caddr_t)(cp + off), sizeof(struct in_addr));
+			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
+			break;
+
+		case IPOPT_TS:
+			code = cp - (u_char *)ip;
+			ipt = (struct ip_timestamp *)cp;
+			if (ipt->ipt_len < 5)
+				goto bad;
+			if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) {
+				if (++ipt->ipt_oflw == 0)
+					goto bad;
+				break;
+			}
+			sin = (struct in_addr *)(cp + ipt->ipt_ptr - 1);
+			switch (ipt->ipt_flg) {
+
+			case IPOPT_TS_TSONLY:
+				break;
+
+			case IPOPT_TS_TSANDADDR:
+				if (ipt->ipt_ptr + sizeof(n_time) +
+				    sizeof(struct in_addr) > ipt->ipt_len)
+					goto bad;
+				ipaddr.sin_addr = dst;
+				ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
+							    m->m_pkthdr.rcvif);
+				if (ia == 0)
+					continue;
+				bcopy((caddr_t)&IA_SIN(ia)->sin_addr,
+				    (caddr_t)sin, sizeof(struct in_addr));
+				ipt->ipt_ptr += sizeof(struct in_addr);
+				break;
+
+			case IPOPT_TS_PRESPEC:
+				if (ipt->ipt_ptr + sizeof(n_time) +
+				    sizeof(struct in_addr) > ipt->ipt_len)
+					goto bad;
+				bcopy((caddr_t)sin, (caddr_t)&ipaddr.sin_addr,
+				    sizeof(struct in_addr));
+				if (ifa_ifwithaddr((SA)&ipaddr) == 0)
+					continue;
+				ipt->ipt_ptr += sizeof(struct in_addr);
+				break;
+
+			default:
+				goto bad;
+			}
+			ntime = iptime();
+			bcopy((caddr_t)&ntime, (caddr_t)cp + ipt->ipt_ptr - 1,
+			    sizeof(n_time));
+			ipt->ipt_ptr += sizeof(n_time);
+		}
+	}
+	if (forward) {
+		ip_forward(m, 1);
+		return (1);
+	}
+	return (0);
+bad:
+	ip->ip_len -= ip->ip_hl << 2;   /* XXX icmp_error adds in hdr length */
+	icmp_error(m, type, code, 0, 0);
+	ipstat.ips_badoptions++;
+	return (1);
+}
+
+/*
+ * Given address of next destination (final or next hop),
+ * return internet address info of interface to be used to get there.
+ */
+struct in_ifaddr *
+ip_rtaddr(dst)
+	 struct in_addr dst;
+{
+	register struct sockaddr_in *sin;
+
+	sin = (struct sockaddr_in *) &ipforward_rt.ro_dst;
+
+	if (ipforward_rt.ro_rt == 0 || dst.s_addr != sin->sin_addr.s_addr) {
+		if (ipforward_rt.ro_rt) {
+			RTFREE(ipforward_rt.ro_rt);
+			ipforward_rt.ro_rt = 0;
+		}
+		sin->sin_family = AF_INET;
+		sin->sin_len = sizeof(*sin);
+		sin->sin_addr = dst;
+
+		rtalloc(&ipforward_rt);
+	}
+	if (ipforward_rt.ro_rt == 0)
+		return ((struct in_ifaddr *)0);
+	return ((struct in_ifaddr *) ipforward_rt.ro_rt->rt_ifa);
+}
+
+/*
+ * Save incoming source route for use in replies,
+ * to be picked up later by ip_srcroute if the receiver is interested.
+ */
+void
+save_rte(option, dst)
+	u_char *option;
+	struct in_addr dst;
+{
+	unsigned olen;
+
+	olen = option[IPOPT_OLEN];
+#ifdef DIAGNOSTIC
+	if (ipprintfs)
+		printf("save_rte: olen %d\n", olen);
+#endif
+	if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst)))
+		return;
+	bcopy((caddr_t)option, (caddr_t)ip_srcrt.srcopt, olen);
+	ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
+	ip_srcrt.dst = dst;
+}
+
+/*
+ * Retrieve incoming source route for use in replies,
+ * in the same form used by setsockopt.
+ * The first hop is placed before the options, will be removed later.
+ */
+struct mbuf *
+ip_srcroute()
+{
+	register struct in_addr *p, *q;
+	register struct mbuf *m;
+
+	if (ip_nhops == 0)
+		return ((struct mbuf *)0);
+	m = m_get(M_DONTWAIT, MT_SOOPTS);
+	if (m == 0)
+		return ((struct mbuf *)0);
+
+#define OPTSIZ	(sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt))
+
+	/* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
+	m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) +
+	    OPTSIZ;
+#ifdef DIAGNOSTIC
+	if (ipprintfs)
+		printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len);
+#endif
+
+	/*
+	 * First save first hop for return route
+	 */
+	p = &ip_srcrt.route[ip_nhops - 1];
+	*(mtod(m, struct in_addr *)) = *p--;
+#ifdef DIAGNOSTIC
+	if (ipprintfs)
+		printf(" hops %lx", ntohl(mtod(m, struct in_addr *)->s_addr));
+#endif
+
+	/*
+	 * Copy option fields and padding (nop) to mbuf.
+	 */
+	ip_srcrt.nop = IPOPT_NOP;
+	ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
+	bcopy((caddr_t)&ip_srcrt.nop,
+	    mtod(m, caddr_t) + sizeof(struct in_addr), OPTSIZ);
+	q = (struct in_addr *)(mtod(m, caddr_t) +
+	    sizeof(struct in_addr) + OPTSIZ);
+#undef OPTSIZ
+	/*
+	 * Record return path as an IP source route,
+	 * reversing the path (pointers are now aligned).
+	 */
+	while (p >= ip_srcrt.route) {
+#ifdef DIAGNOSTIC
+		if (ipprintfs)
+			printf(" %lx", ntohl(q->s_addr));
+#endif
+		*q++ = *p--;
+	}
+	/*
+	 * Last hop goes to final destination.
+	 */
+	*q = ip_srcrt.dst;
+#ifdef DIAGNOSTIC
+	if (ipprintfs)
+		printf(" %lx\n", ntohl(q->s_addr));
+#endif
+	return (m);
+}
+
+/*
+ * Strip out IP options, at higher
+ * level protocol in the kernel.
+ * Second argument is buffer to which options
+ * will be moved, and return value is their length.
+ * XXX should be deleted; last arg currently ignored.
+ */
+void
+ip_stripoptions(m, mopt)
+	register struct mbuf *m;
+	struct mbuf *mopt;
+{
+	register int i;
+	struct ip *ip = mtod(m, struct ip *);
+	register caddr_t opts;
+	int olen;
+
+	olen = (ip->ip_hl<<2) - sizeof (struct ip);
+	opts = (caddr_t)(ip + 1);
+	i = m->m_len - (sizeof (struct ip) + olen);
+	bcopy(opts  + olen, opts, (unsigned)i);
+	m->m_len -= olen;
+	if (m->m_flags & M_PKTHDR)
+		m->m_pkthdr.len -= olen;
+	ip->ip_hl = sizeof(struct ip) >> 2;
+}
+
+u_char inetctlerrmap[PRC_NCMDS] = {
+	0,		0,		0,		0,
+	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
+	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
+	EMSGSIZE,	EHOSTUNREACH,	0,		0,
+	0,		0,		0,		0,
+	ENOPROTOOPT
+};
+
+/*
+ * Forward a packet.  If some error occurs return the sender
+ * an icmp packet.  Note we can't always generate a meaningful
+ * icmp message because icmp doesn't have a large enough repertoire
+ * of codes and types.
+ *
+ * If not forwarding, just drop the packet.  This could be confusing
+ * if ipforwarding was zero but some routing protocol was advancing
+ * us as a gateway to somewhere.  However, we must let the routing
+ * protocol deal with that.
+ *
+ * The srcrt parameter indicates whether the packet is being forwarded
+ * via a source route.
+ */
+void
+ip_forward(m, srcrt)
+	struct mbuf *m;
+	int srcrt;
+{
+	register struct ip *ip = mtod(m, struct ip *);
+	register struct sockaddr_in *sin;
+	register struct rtentry *rt;
+	int error, type = 0, code;
+	struct mbuf *mcopy;
+	n_long dest;
+	struct ifnet *destifp;
+
+	dest = 0;
+#ifdef DIAGNOSTIC
+	if (ipprintfs)
+		printf("forward: src %x dst %x ttl %x\n", ip->ip_src,
+			ip->ip_dst, ip->ip_ttl);
+#endif
+	if (m->m_flags & M_BCAST || in_canforward(ip->ip_dst) == 0) {
+		ipstat.ips_cantforward++;
+		m_freem(m);
+		return;
+	}
+	HTONS(ip->ip_id);
+	if (ip->ip_ttl <= IPTTLDEC) {
+		icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, dest, 0);
+		return;
+	}
+	ip->ip_ttl -= IPTTLDEC;
+
+	sin = (struct sockaddr_in *)&ipforward_rt.ro_dst;
+	if ((rt = ipforward_rt.ro_rt) == 0 ||
+	    ip->ip_dst.s_addr != sin->sin_addr.s_addr) {
+		if (ipforward_rt.ro_rt) {
+			RTFREE(ipforward_rt.ro_rt);
+			ipforward_rt.ro_rt = 0;
+		}
+		sin->sin_family = AF_INET;
+		sin->sin_len = sizeof(*sin);
+		sin->sin_addr = ip->ip_dst;
+
+		rtalloc(&ipforward_rt);
+		if (ipforward_rt.ro_rt == 0) {
+			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0);
+			return;
+		}
+		rt = ipforward_rt.ro_rt;
+	}
+
+	/*
+	 * Save at most 64 bytes of the packet in case
+	 * we need to generate an ICMP message to the src.
+	 */
+	mcopy = m_copy(m, 0, imin((int)ip->ip_len, 64));
+
+#ifdef GATEWAY
+	ip_ifmatrix[rt->rt_ifp->if_index +
+	     if_index * m->m_pkthdr.rcvif->if_index]++;
+#endif
+	/*
+	 * If forwarding packet using same interface that it came in on,
+	 * perhaps should send a redirect to sender to shortcut a hop.
+	 * Only send redirect if source is sending directly to us,
+	 * and if packet was not source routed (or has any options).
+	 * Also, don't send redirect if forwarding using a default route
+	 * or a route modified by a redirect.
+	 */
+#define	satosin(sa)	((struct sockaddr_in *)(sa))
+	if (rt->rt_ifp == m->m_pkthdr.rcvif &&
+	    (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
+	    satosin(rt_key(rt))->sin_addr.s_addr != 0 &&
+	    ipsendredirects && !srcrt) {
+#define	RTA(rt)	((struct in_ifaddr *)(rt->rt_ifa))
+		u_long src = ntohl(ip->ip_src.s_addr);
+
+		if (RTA(rt) &&
+		    (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
+		    if (rt->rt_flags & RTF_GATEWAY)
+			dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
+		    else
+			dest = ip->ip_dst.s_addr;
+		    /* Router requirements says to only send host redirects */
+		    type = ICMP_REDIRECT;
+		    code = ICMP_REDIRECT_HOST;
+#ifdef DIAGNOSTIC
+		    if (ipprintfs)
+		        printf("redirect (%d) to %lx\n", code, (u_long)dest);
+#endif
+		}
+	}
+
+	error = ip_output(m, (struct mbuf *)0, &ipforward_rt, IP_FORWARDING
+#ifdef DIRECTED_BROADCAST
+			    | IP_ALLOWBROADCAST
+#endif
+						, 0);
+	if (error)
+		ipstat.ips_cantforward++;
+	else {
+		ipstat.ips_forward++;
+		if (type)
+			ipstat.ips_redirectsent++;
+		else {
+			if (mcopy)
+				m_freem(mcopy);
+			return;
+		}
+	}
+	if (mcopy == NULL)
+		return;
+	destifp = NULL;
+
+	switch (error) {
+
+	case 0:				/* forwarded, but need redirect */
+		/* type, code set above */
+		break;
+
+	case ENETUNREACH:		/* shouldn't happen, checked above */
+	case EHOSTUNREACH:
+	case ENETDOWN:
+	case EHOSTDOWN:
+	default:
+		type = ICMP_UNREACH;
+		code = ICMP_UNREACH_HOST;
+		break;
+
+	case EMSGSIZE:
+		type = ICMP_UNREACH;
+		code = ICMP_UNREACH_NEEDFRAG;
+		if (ipforward_rt.ro_rt)
+			destifp = ipforward_rt.ro_rt->rt_ifp;
+		ipstat.ips_cantfrag++;
+		break;
+
+	case ENOBUFS:
+		type = ICMP_SOURCEQUENCH;
+		code = 0;
+		break;
+	}
+	icmp_error(mcopy, type, code, dest, destifp);
+}
+
+int
+ip_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
+	int *name;
+	u_int namelen;
+	void *oldp;
+	size_t *oldlenp;
+	void *newp;
+	size_t newlen;
+{
+	/* All sysctl names at this level are terminal. */
+	if (namelen != 1)
+		return (ENOTDIR);
+
+	switch (name[0]) {
+	case IPCTL_FORWARDING:
+		return (sysctl_int(oldp, oldlenp, newp, newlen, &ipforwarding));
+	case IPCTL_SENDREDIRECTS:
+		return (sysctl_int(oldp, oldlenp, newp, newlen,
+			&ipsendredirects));
+	case IPCTL_DEFTTL:
+		return (sysctl_int(oldp, oldlenp, newp, newlen, &ip_defttl));
+#ifdef notyet
+	case IPCTL_DEFMTU:
+		return (sysctl_int(oldp, oldlenp, newp, newlen, &ip_mtu));
+#endif
+	default:
+		return (EOPNOTSUPP);
+	}
+	/* NOTREACHED */
+}
diff --git a/sys/netinet/ip_mroute.c b/sys/netinet/ip_mroute.c
new file mode 100644
index 00000000000..1744ec17fb6
--- /dev/null
+++ b/sys/netinet/ip_mroute.c
@@ -0,0 +1,834 @@
+/*
+ * Copyright (c) 1989 Stephen Deering
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Stephen Deering of Stanford University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ip_mroute.c	8.2 (Berkeley) 11/15/93
+ */
+
+/*
+ * Procedures for the kernel part of DVMRP,
+ * a Distance-Vector Multicast Routing Protocol.
+ * (See RFC-1075.)
+ *
+ * Written by David Waitzman, BBN Labs, August 1988.
+ * Modified by Steve Deering, Stanford, February 1989.
+ *
+ * MROUTING 1.1
+ */
+
+#ifndef MROUTING
+int	ip_mrtproto;				/* for netstat only */
+#else
+
+#include <sys/param.h>
+#include <sys/errno.h>
+#include <sys/ioctl.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/time.h>
+
+#include <net/if.h>
+#include <net/route.h>
+#include <net/raw_cb.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+
+#include <netinet/igmp.h>
+#include <netinet/igmp_var.h>
+#include <netinet/ip_mroute.h>
+
+/* Static forwards */
+static	int ip_mrouter_init __P((struct socket *));
+static	int add_vif __P((struct vifctl *));
+static	int del_vif __P((vifi_t *vifip));
+static	int add_lgrp __P((struct lgrplctl *));
+static	int del_lgrp __P((struct lgrplctl *));
+static	int grplst_member __P((struct vif *, struct in_addr));
+static	u_long nethash __P((struct in_addr in));
+static	int add_mrt __P((struct mrtctl *));
+static	int del_mrt __P((struct in_addr *));
+static	struct mrt *mrtfind __P((struct in_addr));
+static	void phyint_send __P((struct mbuf *, struct vif *));
+static	void tunnel_send __P((struct mbuf *, struct vif *));
+
+#define INSIZ sizeof(struct in_addr)
+#define	same(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), INSIZ) == 0)
+#define	satosin(sa)	((struct sockaddr_in *)(sa))
+
+/*
+ * Globals.  All but ip_mrouter and ip_mrtproto could be static,
+ * except for netstat or debugging purposes.
+ */
+struct	socket *ip_mrouter = NULL;
+int	ip_mrtproto = IGMP_DVMRP;		/* for netstat only */
+
+struct	mrt *mrttable[MRTHASHSIZ];
+struct	vif viftable[MAXVIFS];
+struct	mrtstat	mrtstat;
+
+/*
+ * Private variables.
+ */
+static	vifi_t numvifs = 0;
+static	struct mrt *cached_mrt = NULL;
+static	u_long cached_origin;
+static	u_long cached_originmask;
+
+/*
+ * Handle DVMRP setsockopt commands to modify the multicast routing tables.
+ */
+int
+ip_mrouter_cmd(cmd, so, m)
+	register int cmd;
+	register struct socket *so;
+	register struct mbuf *m;
+{
+	register int error = 0;
+
+	if (cmd != DVMRP_INIT && so != ip_mrouter)
+		error = EACCES;
+	else switch (cmd) {
+
+	case DVMRP_INIT:
+		error = ip_mrouter_init(so);
+		break;
+
+	case DVMRP_DONE:
+		error = ip_mrouter_done();
+		break;
+
+	case DVMRP_ADD_VIF:
+		if (m == NULL || m->m_len < sizeof(struct vifctl))
+			error = EINVAL;
+		else
+			error = add_vif(mtod(m, struct vifctl *));
+		break;
+
+	case DVMRP_DEL_VIF:
+		if (m == NULL || m->m_len < sizeof(short))
+			error = EINVAL;
+		else
+			error = del_vif(mtod(m, vifi_t *));
+		break;
+
+	case DVMRP_ADD_LGRP:
+		if (m == NULL || m->m_len < sizeof(struct lgrplctl))
+			error = EINVAL;
+		else
+			error = add_lgrp(mtod(m, struct lgrplctl *));
+		break;
+
+	case DVMRP_DEL_LGRP:
+		if (m == NULL || m->m_len < sizeof(struct lgrplctl))
+			error = EINVAL;
+		else
+			error = del_lgrp(mtod(m, struct lgrplctl *));
+		break;
+
+	case DVMRP_ADD_MRT:
+		if (m == NULL || m->m_len < sizeof(struct mrtctl))
+			error = EINVAL;
+		else
+			error = add_mrt(mtod(m, struct mrtctl *));
+		break;
+
+	case DVMRP_DEL_MRT:
+		if (m == NULL || m->m_len < sizeof(struct in_addr))
+			error = EINVAL;
+		else
+			error = del_mrt(mtod(m, struct in_addr *));
+		break;
+
+	default:
+		error = EOPNOTSUPP;
+		break;
+	}
+	return (error);
+}
+
+/*
+ * Enable multicast routing
+ */
+static int
+ip_mrouter_init(so)
+	register struct socket *so;
+{
+	if (so->so_type != SOCK_RAW ||
+	    so->so_proto->pr_protocol != IPPROTO_IGMP)
+		return (EOPNOTSUPP);
+
+	if (ip_mrouter != NULL)
+		return (EADDRINUSE);
+
+	ip_mrouter = so;
+
+	return (0);
+}
+
+/*
+ * Disable multicast routing
+ */
+int
+ip_mrouter_done()
+{
+	register vifi_t vifi;
+	register int i;
+	register struct ifnet *ifp;
+	register int s;
+	struct ifreq ifr;
+
+	s = splnet();
+
+	/*
+	 * For each phyint in use, free its local group list and
+	 * disable promiscuous reception of all IP multicasts.
+	 */
+	for (vifi = 0; vifi < numvifs; vifi++) {
+		if (viftable[vifi].v_lcl_addr.s_addr != 0 &&
+		    !(viftable[vifi].v_flags & VIFF_TUNNEL)) {
+			if (viftable[vifi].v_lcl_grps)
+				free(viftable[vifi].v_lcl_grps, M_MRTABLE);
+			satosin(&ifr.ifr_addr)->sin_family = AF_INET;
+			satosin(&ifr.ifr_addr)->sin_addr.s_addr = INADDR_ANY;
+			ifp = viftable[vifi].v_ifp;
+			(*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr);
+		}
+	}
+	bzero((caddr_t)viftable, sizeof(viftable));
+	numvifs = 0;
+
+	/*
+	 * Free any multicast route entries.
+	 */
+	for (i = 0; i < MRTHASHSIZ; i++)
+		if (mrttable[i])
+			free(mrttable[i], M_MRTABLE);
+	bzero((caddr_t)mrttable, sizeof(mrttable));
+	cached_mrt = NULL;
+
+	ip_mrouter = NULL;
+
+	splx(s);
+	return (0);
+}
+
+/*
+ * Add a vif to the vif table
+ */
+static int
+add_vif(vifcp)
+	register struct vifctl *vifcp;
+{
+	register struct vif *vifp = viftable + vifcp->vifc_vifi;
+	register struct ifaddr *ifa;
+	register struct ifnet *ifp;
+	struct ifreq ifr;
+	register int error, s;
+	static struct sockaddr_in sin = { sizeof(sin), AF_INET };
+
+	if (vifcp->vifc_vifi >= MAXVIFS)
+		return (EINVAL);
+	if (vifp->v_lcl_addr.s_addr != 0)
+		return (EADDRINUSE);
+
+	/* Find the interface with an address in AF_INET family */
+	sin.sin_addr = vifcp->vifc_lcl_addr;
+	ifa = ifa_ifwithaddr((struct sockaddr *)&sin);
+	if (ifa == 0)
+		return (EADDRNOTAVAIL);
+
+	s = splnet();
+
+	if (vifcp->vifc_flags & VIFF_TUNNEL)
+		vifp->v_rmt_addr = vifcp->vifc_rmt_addr;
+	else {
+		/* Make sure the interface supports multicast */
+		ifp = ifa->ifa_ifp;
+		if ((ifp->if_flags & IFF_MULTICAST) == 0) {
+			splx(s);
+			return (EOPNOTSUPP);
+		}
+		/*
+		 * Enable promiscuous reception of all IP multicasts
+		 * from the interface.
+		 */
+		satosin(&ifr.ifr_addr)->sin_family = AF_INET;
+		satosin(&ifr.ifr_addr)->sin_addr.s_addr = INADDR_ANY;
+		error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr);
+		if (error) {
+			splx(s);
+			return (error);
+		}
+	}
+
+	vifp->v_flags = vifcp->vifc_flags;
+	vifp->v_threshold = vifcp->vifc_threshold;
+	vifp->v_lcl_addr = vifcp->vifc_lcl_addr;
+	vifp->v_ifp = ifa->ifa_ifp;
+
+	/* Adjust numvifs up if the vifi is higher than numvifs */
+	if (numvifs <= vifcp->vifc_vifi)
+		numvifs = vifcp->vifc_vifi + 1;
+
+	splx(s);
+	return (0);
+}
+
+/*
+ * Delete a vif from the vif table
+ */
+static int
+del_vif(vifip)
+	register vifi_t *vifip;
+{
+	register struct vif *vifp = viftable + *vifip;
+	register struct ifnet *ifp;
+	register int i, s;
+	struct ifreq ifr;
+
+	if (*vifip >= numvifs)
+		return (EINVAL);
+	if (vifp->v_lcl_addr.s_addr == 0)
+		return (EADDRNOTAVAIL);
+
+	s = splnet();
+
+	if (!(vifp->v_flags & VIFF_TUNNEL)) {
+		if (vifp->v_lcl_grps)
+			free(vifp->v_lcl_grps, M_MRTABLE);
+		satosin(&ifr.ifr_addr)->sin_family = AF_INET;
+		satosin(&ifr.ifr_addr)->sin_addr.s_addr = INADDR_ANY;
+		ifp = vifp->v_ifp;
+		(*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr);
+	}
+
+	bzero((caddr_t)vifp, sizeof (*vifp));
+
+	/* Adjust numvifs down */
+	for (i = numvifs - 1; i >= 0; i--)
+		if (viftable[i].v_lcl_addr.s_addr != 0)
+			break;
+	numvifs = i + 1;
+
+	splx(s);
+	return (0);
+}
+
+/*
+ * Add the multicast group in the lgrpctl to the list of local multicast
+ * group memberships associated with the vif indexed by gcp->lgc_vifi.
+ */
+static int
+add_lgrp(gcp)
+	register struct lgrplctl *gcp;
+{
+	register struct vif *vifp;
+	register int s;
+
+	if (gcp->lgc_vifi >= numvifs)
+		return (EINVAL);
+
+	vifp = viftable + gcp->lgc_vifi;
+	if (vifp->v_lcl_addr.s_addr == 0 || (vifp->v_flags & VIFF_TUNNEL))
+		return (EADDRNOTAVAIL);
+
+	/* If not enough space in existing list, allocate a larger one */
+	s = splnet();
+	if (vifp->v_lcl_grps_n + 1 >= vifp->v_lcl_grps_max) {
+		register int num;
+		register struct in_addr *ip;
+
+		num = vifp->v_lcl_grps_max;
+		if (num <= 0)
+			num = 32;	/* initial number */
+		else
+			num += num;	/* double last number */
+		ip = (struct in_addr *)malloc(num * sizeof(*ip),
+		    M_MRTABLE, M_NOWAIT);
+		if (ip == NULL) {
+			splx(s);
+			return (ENOBUFS);
+		}
+
+		bzero((caddr_t)ip, num * sizeof(*ip));	/* XXX paranoid */
+		bcopy((caddr_t)vifp->v_lcl_grps, (caddr_t)ip,
+		    vifp->v_lcl_grps_n * sizeof(*ip));
+
+		vifp->v_lcl_grps_max = num;
+		if (vifp->v_lcl_grps)
+			free(vifp->v_lcl_grps, M_MRTABLE);
+		vifp->v_lcl_grps = ip;
+
+		splx(s);
+	}
+
+	vifp->v_lcl_grps[vifp->v_lcl_grps_n++] = gcp->lgc_gaddr;
+
+	if (gcp->lgc_gaddr.s_addr == vifp->v_cached_group)
+		vifp->v_cached_result = 1;
+
+	splx(s);
+	return (0);
+}
+
+/*
+ * Delete the the local multicast group associated with the vif
+ * indexed by gcp->lgc_vifi.
+ */
+
+static int
+del_lgrp(gcp)
+	register struct lgrplctl *gcp;
+{
+	register struct vif *vifp;
+	register int i, error, s;
+
+	if (gcp->lgc_vifi >= numvifs)
+		return (EINVAL);
+	vifp = viftable + gcp->lgc_vifi;
+	if (vifp->v_lcl_addr.s_addr == 0 || (vifp->v_flags & VIFF_TUNNEL))
+		return (EADDRNOTAVAIL);
+
+	s = splnet();
+
+	if (gcp->lgc_gaddr.s_addr == vifp->v_cached_group)
+		vifp->v_cached_result = 0;
+
+	error = EADDRNOTAVAIL;
+	for (i = 0; i < vifp->v_lcl_grps_n; ++i)
+		if (same(&gcp->lgc_gaddr, &vifp->v_lcl_grps[i])) {
+			error = 0;
+			vifp->v_lcl_grps_n--;
+			bcopy((caddr_t)&vifp->v_lcl_grps[i + 1],
+			    (caddr_t)&vifp->v_lcl_grps[i],
+			    (vifp->v_lcl_grps_n - i) * sizeof(struct in_addr));
+			error = 0;
+			break;
+		}
+
+	splx(s);
+	return (error);
+}
+
+/*
+ * Return 1 if gaddr is a member of the local group list for vifp.
+ */
+static int
+grplst_member(vifp, gaddr)
+	register struct vif *vifp;
+	struct in_addr gaddr;
+{
+	register int i, s;
+	register u_long addr;
+
+	mrtstat.mrts_grp_lookups++;
+
+	addr = gaddr.s_addr;
+	if (addr == vifp->v_cached_group)
+		return (vifp->v_cached_result);
+
+	mrtstat.mrts_grp_misses++;
+
+	for (i = 0; i < vifp->v_lcl_grps_n; ++i)
+		if (addr == vifp->v_lcl_grps[i].s_addr) {
+			s = splnet();
+			vifp->v_cached_group = addr;
+			vifp->v_cached_result = 1;
+			splx(s);
+			return (1);
+		}
+	s = splnet();
+	vifp->v_cached_group = addr;
+	vifp->v_cached_result = 0;
+	splx(s);
+	return (0);
+}
+
+/*
+ * A simple hash function: returns MRTHASHMOD of the low-order octet of
+ * the argument's network or subnet number.
+ */
+static u_long
+nethash(in)
+	struct in_addr in;
+{
+	register u_long n;
+
+	n = in_netof(in);
+	while ((n & 0xff) == 0)
+		n >>= 8;
+	return (MRTHASHMOD(n));
+}
+
+/*
+ * Add an mrt entry
+ */
+static int
+add_mrt(mrtcp)
+	register struct mrtctl *mrtcp;
+{
+	struct mrt *rt;
+	u_long hash;
+	int s;
+
+	if (rt = mrtfind(mrtcp->mrtc_origin)) {
+		/* Just update the route */
+		s = splnet();
+		rt->mrt_parent = mrtcp->mrtc_parent;
+		VIFM_COPY(mrtcp->mrtc_children, rt->mrt_children);
+		VIFM_COPY(mrtcp->mrtc_leaves, rt->mrt_leaves);
+		splx(s);
+		return (0);
+	}
+
+	s = splnet();
+
+	rt = (struct mrt *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
+	if (rt == NULL) {
+		splx(s);
+		return (ENOBUFS);
+	}
+
+	/*
+	 * insert new entry at head of hash chain
+	 */
+	rt->mrt_origin = mrtcp->mrtc_origin;
+	rt->mrt_originmask = mrtcp->mrtc_originmask;
+	rt->mrt_parent = mrtcp->mrtc_parent;
+	VIFM_COPY(mrtcp->mrtc_children, rt->mrt_children);
+	VIFM_COPY(mrtcp->mrtc_leaves, rt->mrt_leaves);
+	/* link into table */
+	hash = nethash(mrtcp->mrtc_origin);
+	rt->mrt_next = mrttable[hash];
+	mrttable[hash] = rt;
+
+	splx(s);
+	return (0);
+}
+
+/*
+ * Delete an mrt entry
+ */
+static int
+del_mrt(origin)
+	register struct in_addr *origin;
+{
+	register struct mrt *rt, *prev_rt;
+	register u_long hash = nethash(*origin);
+	register int s;
+
+	for (prev_rt = rt = mrttable[hash]; rt; prev_rt = rt, rt = rt->mrt_next)
+		if (origin->s_addr == rt->mrt_origin.s_addr)
+			break;
+	if (!rt)
+		return (ESRCH);
+
+	s = splnet();
+
+	if (rt == cached_mrt)
+		cached_mrt = NULL;
+
+	if (prev_rt == rt)
+		mrttable[hash] = rt->mrt_next;
+	else
+		prev_rt->mrt_next = rt->mrt_next;
+	free(rt, M_MRTABLE);
+
+	splx(s);
+	return (0);
+}
+
+/*
+ * Find a route for a given origin IP address.
+ */
+static struct mrt *
+mrtfind(origin)
+	struct in_addr origin;
+{
+	register struct mrt *rt;
+	register u_int hash;
+	register int s;
+
+	mrtstat.mrts_mrt_lookups++;
+
+	if (cached_mrt != NULL &&
+	    (origin.s_addr & cached_originmask) == cached_origin)
+		return (cached_mrt);
+
+	mrtstat.mrts_mrt_misses++;
+
+	hash = nethash(origin);
+	for (rt = mrttable[hash]; rt; rt = rt->mrt_next)
+		if ((origin.s_addr & rt->mrt_originmask.s_addr) ==
+		    rt->mrt_origin.s_addr) {
+			s = splnet();
+			cached_mrt = rt;
+			cached_origin = rt->mrt_origin.s_addr;
+			cached_originmask = rt->mrt_originmask.s_addr;
+			splx(s);
+			return (rt);
+		}
+	return (NULL);
+}
+
+/*
+ * IP multicast forwarding function. This function assumes that the packet
+ * pointed to by "ip" has arrived on (or is about to be sent to) the interface
+ * pointed to by "ifp", and the packet is to be relayed to other networks
+ * that have members of the packet's destination IP multicast group.
+ *
+ * The packet is returned unscathed to the caller, unless it is tunneled
+ * or erroneous, in which case a non-zero return value tells the caller to
+ * discard it.
+ */
+
+#define IP_HDR_LEN  20	/* # bytes of fixed IP header (excluding options) */
+#define TUNNEL_LEN  12  /* # bytes of IP option for tunnel encapsulation  */
+
+int
+ip_mforward(m, ifp)
+	register struct mbuf *m;
+	register struct ifnet *ifp;
+{
+	register struct ip *ip = mtod(m, struct ip *);
+	register struct mrt *rt;
+	register struct vif *vifp;
+	register int vifi;
+	register u_char *ipoptions;
+	u_long tunnel_src;
+
+	if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 ||
+	    (ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) {
+		/*
+		 * Packet arrived via a physical interface.
+		 */
+		tunnel_src = 0;
+	} else {
+		/*
+		 * Packet arrived through a tunnel.
+		 *
+		 * A tunneled packet has a single NOP option and a
+		 * two-element loose-source-and-record-route (LSRR)
+		 * option immediately following the fixed-size part of
+		 * the IP header.  At this point in processing, the IP
+		 * header should contain the following IP addresses:
+		 *
+		 * original source          - in the source address field
+		 * destination group        - in the destination address field
+		 * remote tunnel end-point  - in the first  element of LSRR
+		 * one of this host's addrs - in the second element of LSRR
+		 *
+		 * NOTE: RFC-1075 would have the original source and
+		 * remote tunnel end-point addresses swapped.  However,
+		 * that could cause delivery of ICMP error messages to
+		 * innocent applications on intermediate routing
+		 * hosts!  Therefore, we hereby change the spec.
+		 */
+
+		/*
+		 * Verify that the tunnel options are well-formed.
+		 */
+		if (ipoptions[0] != IPOPT_NOP ||
+		    ipoptions[2] != 11 ||	/* LSRR option length   */
+		    ipoptions[3] != 12 ||	/* LSRR address pointer */
+		    (tunnel_src = *(u_long *)(&ipoptions[4])) == 0) {
+			mrtstat.mrts_bad_tunnel++;
+			return (1);
+		}
+
+		/*
+		 * Delete the tunnel options from the packet.
+		 */
+		ovbcopy((caddr_t)(ipoptions + TUNNEL_LEN), (caddr_t)ipoptions,
+		    (unsigned)(m->m_len - (IP_HDR_LEN + TUNNEL_LEN)));
+		m->m_len -= TUNNEL_LEN;
+		ip->ip_len -= TUNNEL_LEN;
+		ip->ip_hl -= TUNNEL_LEN >> 2;
+	}
+
+	/*
+	 * Don't forward a packet with time-to-live of zero or one,
+	 * or a packet destined to a local-only group.
+	 */
+	if (ip->ip_ttl <= 1 ||
+	    ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP)
+		return ((int)tunnel_src);
+
+	/*
+	 * Don't forward if we don't have a route for the packet's origin.
+	 */
+	if (!(rt = mrtfind(ip->ip_src))) {
+		mrtstat.mrts_no_route++;
+		return ((int)tunnel_src);
+	}
+
+	/*
+	 * Don't forward if it didn't arrive from the parent vif for its origin.
+	 */
+	vifi = rt->mrt_parent;
+	if (tunnel_src == 0 ) {
+		if ((viftable[vifi].v_flags & VIFF_TUNNEL) ||
+		    viftable[vifi].v_ifp != ifp )
+			return ((int)tunnel_src);
+	} else {
+		if (!(viftable[vifi].v_flags & VIFF_TUNNEL) ||
+		    viftable[vifi].v_rmt_addr.s_addr != tunnel_src )
+			return ((int)tunnel_src);
+	}
+
+	/*
+	 * For each vif, decide if a copy of the packet should be forwarded.
+	 * Forward if:
+	 *		- the ttl exceeds the vif's threshold AND
+	 *		- the vif is a child in the origin's route AND
+	 *		- ( the vif is not a leaf in the origin's route OR
+	 *		    the destination group has members on the vif )
+	 *
+	 * (This might be speeded up with some sort of cache -- someday.)
+	 */
+	for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++) {
+		if (ip->ip_ttl > vifp->v_threshold &&
+		    VIFM_ISSET(vifi, rt->mrt_children) &&
+		    (!VIFM_ISSET(vifi, rt->mrt_leaves) ||
+		    grplst_member(vifp, ip->ip_dst))) {
+			if (vifp->v_flags & VIFF_TUNNEL)
+				tunnel_send(m, vifp);
+			else
+				phyint_send(m, vifp);
+		}
+	}
+
+	return ((int)tunnel_src);
+}
+
+static void
+phyint_send(m, vifp)
+	register struct mbuf *m;
+	register struct vif *vifp;
+{
+	register struct ip *ip = mtod(m, struct ip *);
+	register struct mbuf *mb_copy;
+	register struct ip_moptions *imo;
+	register int error;
+	struct ip_moptions simo;
+
+	mb_copy = m_copy(m, 0, M_COPYALL);
+	if (mb_copy == NULL)
+		return;
+
+	imo = &simo;
+	imo->imo_multicast_ifp = vifp->v_ifp;
+	imo->imo_multicast_ttl = ip->ip_ttl - 1;
+	imo->imo_multicast_loop = 1;
+
+	error = ip_output(mb_copy, NULL, NULL, IP_FORWARDING, imo);
+}
+
+static void
+tunnel_send(m, vifp)
+	register struct mbuf *m;
+	register struct vif *vifp;
+{
+	register struct ip *ip = mtod(m, struct ip *);
+	register struct mbuf *mb_copy, *mb_opts;
+	register struct ip *ip_copy;
+	register int error;
+	register u_char *cp;
+
+	/*
+	 * Make sure that adding the tunnel options won't exceed the
+	 * maximum allowed number of option bytes.
+	 */
+	if (ip->ip_hl > (60 - TUNNEL_LEN) >> 2) {
+		mrtstat.mrts_cant_tunnel++;
+		return;
+	}
+
+	/* 
+	 * Get a private copy of the IP header so that changes to some 
+	 * of the IP fields don't damage the original header, which is
+	 * examined later in ip_input.c.
+	 */
+	mb_copy = m_copy(m, IP_HDR_LEN, M_COPYALL);
+	if (mb_copy == NULL)
+		return;
+	MGETHDR(mb_opts, M_DONTWAIT, MT_HEADER);
+	if (mb_opts == NULL) {
+		m_freem(mb_copy);
+		return;
+	}
+	/*
+	 * Make mb_opts be the new head of the packet chain.
+	 * Any options of the packet were left in the old packet chain head
+	 */
+	mb_opts->m_next = mb_copy;
+	mb_opts->m_len = IP_HDR_LEN + TUNNEL_LEN;
+	mb_opts->m_data += MSIZE - mb_opts->m_len;
+
+	ip_copy = mtod(mb_opts, struct ip *);
+	/*
+	 * Copy the base ip header to the new head mbuf.
+	 */
+	*ip_copy = *ip;
+	ip_copy->ip_ttl--;
+	ip_copy->ip_dst = vifp->v_rmt_addr;	/* remote tunnel end-point */
+	/*
+	 * Adjust the ip header length to account for the tunnel options.
+	 */
+	ip_copy->ip_hl += TUNNEL_LEN >> 2;
+	ip_copy->ip_len += TUNNEL_LEN;
+	/*
+	 * Add the NOP and LSRR after the base ip header
+	 */
+	cp = (u_char *)(ip_copy + 1);
+	*cp++ = IPOPT_NOP;
+	*cp++ = IPOPT_LSRR;
+	*cp++ = 11;		/* LSRR option length */
+	*cp++ = 8;		/* LSSR pointer to second element */
+	*(u_long*)cp = vifp->v_lcl_addr.s_addr;	/* local tunnel end-point */
+	cp += 4;
+	*(u_long*)cp = ip->ip_dst.s_addr;		/* destination group */
+
+	error = ip_output(mb_opts, NULL, NULL, IP_FORWARDING, NULL);
+}
+#endif
diff --git a/sys/netinet/ip_mroute.h b/sys/netinet/ip_mroute.h
new file mode 100644
index 00000000000..adb40be9552
--- /dev/null
+++ b/sys/netinet/ip_mroute.h
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 1989 Stephen Deering.
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Stephen Deering of Stanford University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ip_mroute.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Definitions for the kernel part of DVMRP,
+ * a Distance-Vector Multicast Routing Protocol.
+ * (See RFC-1075.)
+ *
+ * Written by David Waitzman, BBN Labs, August 1988.
+ * Modified by Steve Deering, Stanford, February 1989.
+ *
+ * MROUTING 1.0
+ */
+
+
+/*
+ * DVMRP-specific setsockopt commands.
+ */
+#define	DVMRP_INIT	100
+#define	DVMRP_DONE	101
+#define	DVMRP_ADD_VIF	102
+#define	DVMRP_DEL_VIF	103
+#define	DVMRP_ADD_LGRP	104
+#define	DVMRP_DEL_LGRP	105
+#define	DVMRP_ADD_MRT	106
+#define	DVMRP_DEL_MRT	107
+
+
+/*
+ * Types and macros for handling bitmaps with one bit per virtual interface.
+ */
+#define	MAXVIFS 32
+typedef u_long vifbitmap_t;
+typedef u_short vifi_t;		/* type of a vif index */
+
+#define	VIFM_SET(n, m)		((m) |= (1 << (n)))
+#define	VIFM_CLR(n, m)		((m) &= ~(1 << (n)))
+#define	VIFM_ISSET(n, m)	((m) & (1 << (n)))
+#define	VIFM_CLRALL(m)		((m) = 0x00000000)
+#define	VIFM_COPY(mfrom, mto)	((mto) = (mfrom))
+#define	VIFM_SAME(m1, m2)	((m1) == (m2))
+
+
+/*
+ * Agument structure for DVMRP_ADD_VIF.
+ * (DVMRP_DEL_VIF takes a single vifi_t argument.)
+ */
+struct vifctl {
+	vifi_t	    vifc_vifi;	    	/* the index of the vif to be added */
+	u_char	    vifc_flags;     	/* VIFF_ flags defined below */
+	u_char	    vifc_threshold; 	/* min ttl required to forward on vif */
+	struct	in_addr vifc_lcl_addr;	/* local interface address */
+	struct	in_addr vifc_rmt_addr;	/* remote address (tunnels only) */
+};
+
+#define	VIFF_TUNNEL	0x1		/* vif represents a tunnel end-point */
+
+
+/*
+ * Argument structure for DVMRP_ADD_LGRP and DVMRP_DEL_LGRP.
+ */
+struct lgrplctl {
+	vifi_t	lgc_vifi;
+	struct	in_addr lgc_gaddr;
+};
+
+
+/*
+ * Argument structure for DVMRP_ADD_MRT.
+ * (DVMRP_DEL_MRT takes a single struct in_addr argument, containing origin.)
+ */
+struct mrtctl {
+	struct	in_addr mrtc_origin;	/* subnet origin of multicasts */
+	struct	in_addr mrtc_originmask; /* subnet mask for origin */
+	vifi_t	mrtc_parent;    	/* incoming vif */
+	vifbitmap_t mrtc_children;	/* outgoing children vifs */
+	vifbitmap_t mrtc_leaves;	/* subset of outgoing children vifs */
+};
+
+
+#ifdef KERNEL
+
+/*
+ * The kernel's virtual-interface structure.
+ */
+struct vif {
+	u_char	v_flags;		/* VIFF_ flags defined above */
+	u_char	v_threshold;		/* min ttl required to forward on vif */
+	struct	in_addr v_lcl_addr;	/* local interface address */
+	struct	in_addr v_rmt_addr;	/* remote address (tunnels only) */
+	struct	ifnet  *v_ifp;		/* pointer to interface */
+	struct	in_addr *v_lcl_grps;	/* list of local grps (phyints only) */
+	int	v_lcl_grps_max;		/* malloc'ed number of v_lcl_grps */
+	int	v_lcl_grps_n;		/* used number of v_lcl_grps */
+	u_long	v_cached_group;		/* last grp looked-up (phyints only) */
+	int	v_cached_result;	/* last look-up result (phyints only) */
+};
+
+/*
+ * The kernel's multicast route structure.
+ */
+struct mrt {
+	struct	in_addr mrt_origin;	/* subnet origin of multicasts */
+	struct	in_addr mrt_originmask;	/* subnet mask for origin */
+	vifi_t	mrt_parent;    		/* incoming vif */
+	vifbitmap_t mrt_children;	/* outgoing children vifs */
+	vifbitmap_t mrt_leaves;		/* subset of outgoing children vifs */
+	struct	mrt *mrt_next;		/* forward link */
+};
+
+
+#define	MRTHASHSIZ	64
+#if (MRTHASHSIZ & (MRTHASHSIZ - 1)) == 0	  /* from sys:route.h */
+#define	MRTHASHMOD(h)	((h) & (MRTHASHSIZ - 1))
+#else
+#define	MRTHASHMOD(h)	((h) % MRTHASHSIZ)
+#endif
+
+/*
+ * The kernel's multicast routing statistics.
+ */
+struct mrtstat {
+	u_long	mrts_mrt_lookups;	/* # multicast route lookups */
+	u_long	mrts_mrt_misses;	/* # multicast route cache misses */
+	u_long	mrts_grp_lookups;	/* # group address lookups */
+	u_long	mrts_grp_misses;	/* # group address cache misses */
+	u_long	mrts_no_route;		/* no route for packet's origin */
+	u_long	mrts_bad_tunnel;	/* malformed tunnel options */
+	u_long	mrts_cant_tunnel;	/* no room for tunnel options */
+};
+
+
+int	ip_mrouter_cmd __P((int, struct socket *, struct mbuf *));
+int	ip_mrouter_done __P((void));
+
+#endif /* KERNEL */
+
diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c
new file mode 100644
index 00000000000..4c22a5e53ec
--- /dev/null
+++ b/sys/netinet/ip_output.c
@@ -0,0 +1,1064 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/errno.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+
+#ifdef vax
+#include <machine/mtpr.h>
+#endif
+
+static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *));
+static void ip_mloopback
+	__P((struct ifnet *, struct mbuf *, struct sockaddr_in *));
+
+/*
+ * IP output.  The packet in mbuf chain m contains a skeletal IP
+ * header (with len, off, ttl, proto, tos, src, dst).
+ * The mbuf chain containing the packet will be freed.
+ * The mbuf opt, if present, will not be freed.
+ */
+int
+ip_output(m0, opt, ro, flags, imo)
+	struct mbuf *m0;
+	struct mbuf *opt;
+	struct route *ro;
+	int flags;
+	struct ip_moptions *imo;
+{
+	register struct ip *ip, *mhip;
+	register struct ifnet *ifp;
+	register struct mbuf *m = m0;
+	register int hlen = sizeof (struct ip);
+	int len, off, error = 0;
+	struct route iproute;
+	struct sockaddr_in *dst;
+	struct in_ifaddr *ia;
+
+#ifdef	DIAGNOSTIC
+	if ((m->m_flags & M_PKTHDR) == 0)
+		panic("ip_output no HDR");
+#endif
+	if (opt) {
+		m = ip_insertoptions(m, opt, &len);
+		hlen = len;
+	}
+	ip = mtod(m, struct ip *);
+	/*
+	 * Fill in IP header.
+	 */
+	if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
+		ip->ip_v = IPVERSION;
+		ip->ip_off &= IP_DF;
+		ip->ip_id = htons(ip_id++);
+		ip->ip_hl = hlen >> 2;
+		ipstat.ips_localout++;
+	} else {
+		hlen = ip->ip_hl << 2;
+	}
+	/*
+	 * Route packet.
+	 */
+	if (ro == 0) {
+		ro = &iproute;
+		bzero((caddr_t)ro, sizeof (*ro));
+	}
+	dst = (struct sockaddr_in *)&ro->ro_dst;
+	/*
+	 * If there is a cached route,
+	 * check that it is to the same destination
+	 * and is still up.  If not, free it and try again.
+	 */
+	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
+	   dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
+		RTFREE(ro->ro_rt);
+		ro->ro_rt = (struct rtentry *)0;
+	}
+	if (ro->ro_rt == 0) {
+		dst->sin_family = AF_INET;
+		dst->sin_len = sizeof(*dst);
+		dst->sin_addr = ip->ip_dst;
+	}
+	/*
+	 * If routing to interface only,
+	 * short circuit routing lookup.
+	 */
+#define ifatoia(ifa)	((struct in_ifaddr *)(ifa))
+#define sintosa(sin)	((struct sockaddr *)(sin))
+	if (flags & IP_ROUTETOIF) {
+		if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
+		    (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
+			ipstat.ips_noroute++;
+			error = ENETUNREACH;
+			goto bad;
+		}
+		ifp = ia->ia_ifp;
+		ip->ip_ttl = 1;
+	} else {
+		if (ro->ro_rt == 0)
+			rtalloc(ro);
+		if (ro->ro_rt == 0) {
+			ipstat.ips_noroute++;
+			error = EHOSTUNREACH;
+			goto bad;
+		}
+		ia = ifatoia(ro->ro_rt->rt_ifa);
+		ifp = ro->ro_rt->rt_ifp;
+		ro->ro_rt->rt_use++;
+		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
+			dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
+	}
+	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
+		struct in_multi *inm;
+		extern struct ifnet loif;
+
+		m->m_flags |= M_MCAST;
+		/*
+		 * IP destination address is multicast.  Make sure "dst"
+		 * still points to the address in "ro".  (It may have been
+		 * changed to point to a gateway address, above.)
+		 */
+		dst = (struct sockaddr_in *)&ro->ro_dst;
+		/*
+		 * See if the caller provided any multicast options
+		 */
+		if (imo != NULL) {
+			ip->ip_ttl = imo->imo_multicast_ttl;
+			if (imo->imo_multicast_ifp != NULL)
+				ifp = imo->imo_multicast_ifp;
+		} else
+			ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
+		/*
+		 * Confirm that the outgoing interface supports multicast.
+		 */
+		if ((ifp->if_flags & IFF_MULTICAST) == 0) {
+			ipstat.ips_noroute++;
+			error = ENETUNREACH;
+			goto bad;
+		}
+		/*
+		 * If source address not specified yet, use address
+		 * of outgoing interface.
+		 */
+		if (ip->ip_src.s_addr == INADDR_ANY) {
+			register struct in_ifaddr *ia;
+
+			for (ia = in_ifaddr; ia; ia = ia->ia_next)
+				if (ia->ia_ifp == ifp) {
+					ip->ip_src = IA_SIN(ia)->sin_addr;
+					break;
+				}
+		}
+
+		IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
+		if (inm != NULL &&
+		   (imo == NULL || imo->imo_multicast_loop)) {
+			/*
+			 * If we belong to the destination multicast group
+			 * on the outgoing interface, and the caller did not
+			 * forbid loopback, loop back a copy.
+			 */
+			ip_mloopback(ifp, m, dst);
+		}
+#ifdef MROUTING
+		else {
+			/*
+			 * If we are acting as a multicast router, perform
+			 * multicast forwarding as if the packet had just
+			 * arrived on the interface to which we are about
+			 * to send.  The multicast forwarding function
+			 * recursively calls this function, using the
+			 * IP_FORWARDING flag to prevent infinite recursion.
+			 *
+			 * Multicasts that are looped back by ip_mloopback(),
+			 * above, will be forwarded by the ip_input() routine,
+			 * if necessary.
+			 */
+			extern struct socket *ip_mrouter;
+			if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
+				if (ip_mforward(m, ifp) != 0) {
+					m_freem(m);
+					goto done;
+				}
+			}
+		}
+#endif
+		/*
+		 * Multicasts with a time-to-live of zero may be looped-
+		 * back, above, but must not be transmitted on a network.
+		 * Also, multicasts addressed to the loopback interface
+		 * are not sent -- the above call to ip_mloopback() will
+		 * loop back a copy if this host actually belongs to the
+		 * destination group on the loopback interface.
+		 */
+		if (ip->ip_ttl == 0 || ifp == &loif) {
+			m_freem(m);
+			goto done;
+		}
+
+		goto sendit;
+	}
+#ifndef notdef
+	/*
+	 * If source address not specified yet, use address
+	 * of outgoing interface.
+	 */
+	if (ip->ip_src.s_addr == INADDR_ANY)
+		ip->ip_src = IA_SIN(ia)->sin_addr;
+#endif
+	/*
+	 * Look for broadcast address and
+	 * and verify user is allowed to send
+	 * such a packet.
+	 */
+	if (in_broadcast(dst->sin_addr, ifp)) {
+		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
+			error = EADDRNOTAVAIL;
+			goto bad;
+		}
+		if ((flags & IP_ALLOWBROADCAST) == 0) {
+			error = EACCES;
+			goto bad;
+		}
+		/* don't allow broadcast messages to be fragmented */
+		if ((u_short)ip->ip_len > ifp->if_mtu) {
+			error = EMSGSIZE;
+			goto bad;
+		}
+		m->m_flags |= M_BCAST;
+	} else
+		m->m_flags &= ~M_BCAST;
+
+sendit:
+	/*
+	 * If small enough for interface, can just send directly.
+	 */
+	if ((u_short)ip->ip_len <= ifp->if_mtu) {
+		ip->ip_len = htons((u_short)ip->ip_len);
+		ip->ip_off = htons((u_short)ip->ip_off);
+		ip->ip_sum = 0;
+		ip->ip_sum = in_cksum(m, hlen);
+		error = (*ifp->if_output)(ifp, m,
+				(struct sockaddr *)dst, ro->ro_rt);
+		goto done;
+	}
+	/*
+	 * Too large for interface; fragment if possible.
+	 * Must be able to put at least 8 bytes per fragment.
+	 */
+	if (ip->ip_off & IP_DF) {
+		error = EMSGSIZE;
+		ipstat.ips_cantfrag++;
+		goto bad;
+	}
+	len = (ifp->if_mtu - hlen) &~ 7;
+	if (len < 8) {
+		error = EMSGSIZE;
+		goto bad;
+	}
+
+    {
+	int mhlen, firstlen = len;
+	struct mbuf **mnext = &m->m_nextpkt;
+
+	/*
+	 * Loop through length of segment after first fragment,
+	 * make new header and copy data of each part and link onto chain.
+	 */
+	m0 = m;
+	mhlen = sizeof (struct ip);
+	for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
+		MGETHDR(m, M_DONTWAIT, MT_HEADER);
+		if (m == 0) {
+			error = ENOBUFS;
+			ipstat.ips_odropped++;
+			goto sendorfree;
+		}
+		m->m_data += max_linkhdr;
+		mhip = mtod(m, struct ip *);
+		*mhip = *ip;
+		if (hlen > sizeof (struct ip)) {
+			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
+			mhip->ip_hl = mhlen >> 2;
+		}
+		m->m_len = mhlen;
+		mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
+		if (ip->ip_off & IP_MF)
+			mhip->ip_off |= IP_MF;
+		if (off + len >= (u_short)ip->ip_len)
+			len = (u_short)ip->ip_len - off;
+		else
+			mhip->ip_off |= IP_MF;
+		mhip->ip_len = htons((u_short)(len + mhlen));
+		m->m_next = m_copy(m0, off, len);
+		if (m->m_next == 0) {
+			(void) m_free(m);
+			error = ENOBUFS;	/* ??? */
+			ipstat.ips_odropped++;
+			goto sendorfree;
+		}
+		m->m_pkthdr.len = mhlen + len;
+		m->m_pkthdr.rcvif = (struct ifnet *)0;
+		mhip->ip_off = htons((u_short)mhip->ip_off);
+		mhip->ip_sum = 0;
+		mhip->ip_sum = in_cksum(m, mhlen);
+		*mnext = m;
+		mnext = &m->m_nextpkt;
+		ipstat.ips_ofragments++;
+	}
+	/*
+	 * Update first fragment by trimming what's been copied out
+	 * and updating header, then send each fragment (in order).
+	 */
+	m = m0;
+	m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
+	m->m_pkthdr.len = hlen + firstlen;
+	ip->ip_len = htons((u_short)m->m_pkthdr.len);
+	ip->ip_off = htons((u_short)(ip->ip_off | IP_MF));
+	ip->ip_sum = 0;
+	ip->ip_sum = in_cksum(m, hlen);
+sendorfree:
+	for (m = m0; m; m = m0) {
+		m0 = m->m_nextpkt;
+		m->m_nextpkt = 0;
+		if (error == 0)
+			error = (*ifp->if_output)(ifp, m,
+			    (struct sockaddr *)dst, ro->ro_rt);
+		else
+			m_freem(m);
+	}
+
+	if (error == 0)
+		ipstat.ips_fragmented++;
+    }
+done:
+	if (ro == &iproute && (flags & IP_ROUTETOIF) == 0 && ro->ro_rt)
+		RTFREE(ro->ro_rt);
+	return (error);
+bad:
+	m_freem(m0);
+	goto done;
+}
+
+/*
+ * Insert IP options into preformed packet.
+ * Adjust IP destination as required for IP source routing,
+ * as indicated by a non-zero in_addr at the start of the options.
+ */
+static struct mbuf *
+ip_insertoptions(m, opt, phlen)
+	register struct mbuf *m;
+	struct mbuf *opt;
+	int *phlen;
+{
+	register struct ipoption *p = mtod(opt, struct ipoption *);
+	struct mbuf *n;
+	register struct ip *ip = mtod(m, struct ip *);
+	unsigned optlen;
+
+	optlen = opt->m_len - sizeof(p->ipopt_dst);
+	if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
+		return (m);		/* XXX should fail */
+	if (p->ipopt_dst.s_addr)
+		ip->ip_dst = p->ipopt_dst;
+	if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
+		MGETHDR(n, M_DONTWAIT, MT_HEADER);
+		if (n == 0)
+			return (m);
+		n->m_pkthdr.len = m->m_pkthdr.len + optlen;
+		m->m_len -= sizeof(struct ip);
+		m->m_data += sizeof(struct ip);
+		n->m_next = m;
+		m = n;
+		m->m_len = optlen + sizeof(struct ip);
+		m->m_data += max_linkhdr;
+		bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
+	} else {
+		m->m_data -= optlen;
+		m->m_len += optlen;
+		m->m_pkthdr.len += optlen;
+		ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
+	}
+	ip = mtod(m, struct ip *);
+	bcopy((caddr_t)p->ipopt_list, (caddr_t)(ip + 1), (unsigned)optlen);
+	*phlen = sizeof(struct ip) + optlen;
+	ip->ip_len += optlen;
+	return (m);
+}
+
+/*
+ * Copy options from ip to jp,
+ * omitting those not copied during fragmentation.
+ */
+int
+ip_optcopy(ip, jp)
+	struct ip *ip, *jp;
+{
+	register u_char *cp, *dp;
+	int opt, optlen, cnt;
+
+	cp = (u_char *)(ip + 1);
+	dp = (u_char *)(jp + 1);
+	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
+	for (; cnt > 0; cnt -= optlen, cp += optlen) {
+		opt = cp[0];
+		if (opt == IPOPT_EOL)
+			break;
+		if (opt == IPOPT_NOP) {
+			/* Preserve for IP mcast tunnel's LSRR alignment. */
+			*dp++ = IPOPT_NOP;
+			optlen = 1;
+			continue;
+		} else
+			optlen = cp[IPOPT_OLEN];
+		/* bogus lengths should have been caught by ip_dooptions */
+		if (optlen > cnt)
+			optlen = cnt;
+		if (IPOPT_COPIED(opt)) {
+			bcopy((caddr_t)cp, (caddr_t)dp, (unsigned)optlen);
+			dp += optlen;
+		}
+	}
+	for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
+		*dp++ = IPOPT_EOL;
+	return (optlen);
+}
+
+/*
+ * IP socket option processing.
+ */
+int
+ip_ctloutput(op, so, level, optname, mp)
+	int op;
+	struct socket *so;
+	int level, optname;
+	struct mbuf **mp;
+{
+	register struct inpcb *inp = sotoinpcb(so);
+	register struct mbuf *m = *mp;
+	register int optval;
+	int error = 0;
+
+	if (level != IPPROTO_IP) {
+		error = EINVAL;
+		if (op == PRCO_SETOPT && *mp)
+			(void) m_free(*mp);
+	} else switch (op) {
+
+	case PRCO_SETOPT:
+		switch (optname) {
+		case IP_OPTIONS:
+#ifdef notyet
+		case IP_RETOPTS:
+			return (ip_pcbopts(optname, &inp->inp_options, m));
+#else
+			return (ip_pcbopts(&inp->inp_options, m));
+#endif
+
+		case IP_TOS:
+		case IP_TTL:
+		case IP_RECVOPTS:
+		case IP_RECVRETOPTS:
+		case IP_RECVDSTADDR:
+			if (m->m_len != sizeof(int))
+				error = EINVAL;
+			else {
+				optval = *mtod(m, int *);
+				switch (optname) {
+
+				case IP_TOS:
+					inp->inp_ip.ip_tos = optval;
+					break;
+
+				case IP_TTL:
+					inp->inp_ip.ip_ttl = optval;
+					break;
+#define	OPTSET(bit) \
+	if (optval) \
+		inp->inp_flags |= bit; \
+	else \
+		inp->inp_flags &= ~bit;
+
+				case IP_RECVOPTS:
+					OPTSET(INP_RECVOPTS);
+					break;
+
+				case IP_RECVRETOPTS:
+					OPTSET(INP_RECVRETOPTS);
+					break;
+
+				case IP_RECVDSTADDR:
+					OPTSET(INP_RECVDSTADDR);
+					break;
+				}
+			}
+			break;
+#undef OPTSET
+
+		case IP_MULTICAST_IF:
+		case IP_MULTICAST_TTL:
+		case IP_MULTICAST_LOOP:
+		case IP_ADD_MEMBERSHIP:
+		case IP_DROP_MEMBERSHIP:
+			error = ip_setmoptions(optname, &inp->inp_moptions, m);
+			break;
+
+		default:
+			error = ENOPROTOOPT;
+			break;
+		}
+		if (m)
+			(void)m_free(m);
+		break;
+
+	case PRCO_GETOPT:
+		switch (optname) {
+		case IP_OPTIONS:
+		case IP_RETOPTS:
+			*mp = m = m_get(M_WAIT, MT_SOOPTS);
+			if (inp->inp_options) {
+				m->m_len = inp->inp_options->m_len;
+				bcopy(mtod(inp->inp_options, caddr_t),
+				    mtod(m, caddr_t), (unsigned)m->m_len);
+			} else
+				m->m_len = 0;
+			break;
+
+		case IP_TOS:
+		case IP_TTL:
+		case IP_RECVOPTS:
+		case IP_RECVRETOPTS:
+		case IP_RECVDSTADDR:
+			*mp = m = m_get(M_WAIT, MT_SOOPTS);
+			m->m_len = sizeof(int);
+			switch (optname) {
+
+			case IP_TOS:
+				optval = inp->inp_ip.ip_tos;
+				break;
+
+			case IP_TTL:
+				optval = inp->inp_ip.ip_ttl;
+				break;
+
+#define	OPTBIT(bit)	(inp->inp_flags & bit ? 1 : 0)
+
+			case IP_RECVOPTS:
+				optval = OPTBIT(INP_RECVOPTS);
+				break;
+
+			case IP_RECVRETOPTS:
+				optval = OPTBIT(INP_RECVRETOPTS);
+				break;
+
+			case IP_RECVDSTADDR:
+				optval = OPTBIT(INP_RECVDSTADDR);
+				break;
+			}
+			*mtod(m, int *) = optval;
+			break;
+
+		case IP_MULTICAST_IF:
+		case IP_MULTICAST_TTL:
+		case IP_MULTICAST_LOOP:
+		case IP_ADD_MEMBERSHIP:
+		case IP_DROP_MEMBERSHIP:
+			error = ip_getmoptions(optname, inp->inp_moptions, mp);
+			break;
+
+		default:
+			error = ENOPROTOOPT;
+			break;
+		}
+		break;
+	}
+	return (error);
+}
+
+/*
+ * Set up IP options in pcb for insertion in output packets.
+ * Store in mbuf with pointer in pcbopt, adding pseudo-option
+ * with destination address if source routed.
+ */
+int
+#ifdef notyet
+ip_pcbopts(optname, pcbopt, m)
+	int optname;
+#else
+ip_pcbopts(pcbopt, m)
+#endif
+	struct mbuf **pcbopt;
+	register struct mbuf *m;
+{
+	register cnt, optlen;
+	register u_char *cp;
+	u_char opt;
+
+	/* turn off any old options */
+	if (*pcbopt)
+		(void)m_free(*pcbopt);
+	*pcbopt = 0;
+	if (m == (struct mbuf *)0 || m->m_len == 0) {
+		/*
+		 * Only turning off any previous options.
+		 */
+		if (m)
+			(void)m_free(m);
+		return (0);
+	}
+
+#ifndef	vax
+	if (m->m_len % sizeof(long))
+		goto bad;
+#endif
+	/*
+	 * IP first-hop destination address will be stored before
+	 * actual options; move other options back
+	 * and clear it when none present.
+	 */
+	if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
+		goto bad;
+	cnt = m->m_len;
+	m->m_len += sizeof(struct in_addr);
+	cp = mtod(m, u_char *) + sizeof(struct in_addr);
+	ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
+	bzero(mtod(m, caddr_t), sizeof(struct in_addr));
+
+	for (; cnt > 0; cnt -= optlen, cp += optlen) {
+		opt = cp[IPOPT_OPTVAL];
+		if (opt == IPOPT_EOL)
+			break;
+		if (opt == IPOPT_NOP)
+			optlen = 1;
+		else {
+			optlen = cp[IPOPT_OLEN];
+			if (optlen <= IPOPT_OLEN || optlen > cnt)
+				goto bad;
+		}
+		switch (opt) {
+
+		default:
+			break;
+
+		case IPOPT_LSRR:
+		case IPOPT_SSRR:
+			/*
+			 * user process specifies route as:
+			 *	->A->B->C->D
+			 * D must be our final destination (but we can't
+			 * check that since we may not have connected yet).
+			 * A is first hop destination, which doesn't appear in
+			 * actual IP option, but is stored before the options.
+			 */
+			if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
+				goto bad;
+			m->m_len -= sizeof(struct in_addr);
+			cnt -= sizeof(struct in_addr);
+			optlen -= sizeof(struct in_addr);
+			cp[IPOPT_OLEN] = optlen;
+			/*
+			 * Move first hop before start of options.
+			 */
+			bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
+			    sizeof(struct in_addr));
+			/*
+			 * Then copy rest of options back
+			 * to close up the deleted entry.
+			 */
+			ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
+			    sizeof(struct in_addr)),
+			    (caddr_t)&cp[IPOPT_OFFSET+1],
+			    (unsigned)cnt + sizeof(struct in_addr));
+			break;
+		}
+	}
+	if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
+		goto bad;
+	*pcbopt = m;
+	return (0);
+
+bad:
+	(void)m_free(m);
+	return (EINVAL);
+}
+
+/*
+ * Set the IP multicast options in response to user setsockopt().
+ */
+int
+ip_setmoptions(optname, imop, m)
+	int optname;
+	struct ip_moptions **imop;
+	struct mbuf *m;
+{
+	register int error = 0;
+	u_char loop;
+	register int i;
+	struct in_addr addr;
+	register struct ip_mreq *mreq;
+	register struct ifnet *ifp;
+	register struct ip_moptions *imo = *imop;
+	struct route ro;
+	register struct sockaddr_in *dst;
+
+	if (imo == NULL) {
+		/*
+		 * No multicast option buffer attached to the pcb;
+		 * allocate one and initialize to default values.
+		 */
+		imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS,
+		    M_WAITOK);
+
+		if (imo == NULL)
+			return (ENOBUFS);
+		*imop = imo;
+		imo->imo_multicast_ifp = NULL;
+		imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
+		imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
+		imo->imo_num_memberships = 0;
+	}
+
+	switch (optname) {
+
+	case IP_MULTICAST_IF:
+		/*
+		 * Select the interface for outgoing multicast packets.
+		 */
+		if (m == NULL || m->m_len != sizeof(struct in_addr)) {
+			error = EINVAL;
+			break;
+		}
+		addr = *(mtod(m, struct in_addr *));
+		/*
+		 * INADDR_ANY is used to remove a previous selection.
+		 * When no interface is selected, a default one is
+		 * chosen every time a multicast packet is sent.
+		 */
+		if (addr.s_addr == INADDR_ANY) {
+			imo->imo_multicast_ifp = NULL;
+			break;
+		}
+		/*
+		 * The selected interface is identified by its local
+		 * IP address.  Find the interface and confirm that
+		 * it supports multicasting.
+		 */
+		INADDR_TO_IFP(addr, ifp);
+		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
+			error = EADDRNOTAVAIL;
+			break;
+		}
+		imo->imo_multicast_ifp = ifp;
+		break;
+
+	case IP_MULTICAST_TTL:
+		/*
+		 * Set the IP time-to-live for outgoing multicast packets.
+		 */
+		if (m == NULL || m->m_len != 1) {
+			error = EINVAL;
+			break;
+		}
+		imo->imo_multicast_ttl = *(mtod(m, u_char *));
+		break;
+
+	case IP_MULTICAST_LOOP:
+		/*
+		 * Set the loopback flag for outgoing multicast packets.
+		 * Must be zero or one.
+		 */
+		if (m == NULL || m->m_len != 1 ||
+		   (loop = *(mtod(m, u_char *))) > 1) {
+			error = EINVAL;
+			break;
+		}
+		imo->imo_multicast_loop = loop;
+		break;
+
+	case IP_ADD_MEMBERSHIP:
+		/*
+		 * Add a multicast group membership.
+		 * Group must be a valid IP multicast address.
+		 */
+		if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
+			error = EINVAL;
+			break;
+		}
+		mreq = mtod(m, struct ip_mreq *);
+		if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
+			error = EINVAL;
+			break;
+		}
+		/*
+		 * If no interface address was provided, use the interface of
+		 * the route to the given multicast address.
+		 */
+		if (mreq->imr_interface.s_addr == INADDR_ANY) {
+			ro.ro_rt = NULL;
+			dst = (struct sockaddr_in *)&ro.ro_dst;
+			dst->sin_len = sizeof(*dst);
+			dst->sin_family = AF_INET;
+			dst->sin_addr = mreq->imr_multiaddr;
+			rtalloc(&ro);
+			if (ro.ro_rt == NULL) {
+				error = EADDRNOTAVAIL;
+				break;
+			}
+			ifp = ro.ro_rt->rt_ifp;
+			rtfree(ro.ro_rt);
+		}
+		else {
+			INADDR_TO_IFP(mreq->imr_interface, ifp);
+		}
+		/*
+		 * See if we found an interface, and confirm that it
+		 * supports multicast.
+		 */
+		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
+			error = EADDRNOTAVAIL;
+			break;
+		}
+		/*
+		 * See if the membership already exists or if all the
+		 * membership slots are full.
+		 */
+		for (i = 0; i < imo->imo_num_memberships; ++i) {
+			if (imo->imo_membership[i]->inm_ifp == ifp &&
+			    imo->imo_membership[i]->inm_addr.s_addr
+						== mreq->imr_multiaddr.s_addr)
+				break;
+		}
+		if (i < imo->imo_num_memberships) {
+			error = EADDRINUSE;
+			break;
+		}
+		if (i == IP_MAX_MEMBERSHIPS) {
+			error = ETOOMANYREFS;
+			break;
+		}
+		/*
+		 * Everything looks good; add a new record to the multicast
+		 * address list for the given interface.
+		 */
+		if ((imo->imo_membership[i] =
+		    in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
+			error = ENOBUFS;
+			break;
+		}
+		++imo->imo_num_memberships;
+		break;
+
+	case IP_DROP_MEMBERSHIP:
+		/*
+		 * Drop a multicast group membership.
+		 * Group must be a valid IP multicast address.
+		 */
+		if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
+			error = EINVAL;
+			break;
+		}
+		mreq = mtod(m, struct ip_mreq *);
+		if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
+			error = EINVAL;
+			break;
+		}
+		/*
+		 * If an interface address was specified, get a pointer
+		 * to its ifnet structure.
+		 */
+		if (mreq->imr_interface.s_addr == INADDR_ANY)
+			ifp = NULL;
+		else {
+			INADDR_TO_IFP(mreq->imr_interface, ifp);
+			if (ifp == NULL) {
+				error = EADDRNOTAVAIL;
+				break;
+			}
+		}
+		/*
+		 * Find the membership in the membership array.
+		 */
+		for (i = 0; i < imo->imo_num_memberships; ++i) {
+			if ((ifp == NULL ||
+			     imo->imo_membership[i]->inm_ifp == ifp) &&
+			     imo->imo_membership[i]->inm_addr.s_addr ==
+			     mreq->imr_multiaddr.s_addr)
+				break;
+		}
+		if (i == imo->imo_num_memberships) {
+			error = EADDRNOTAVAIL;
+			break;
+		}
+		/*
+		 * Give up the multicast address record to which the
+		 * membership points.
+		 */
+		in_delmulti(imo->imo_membership[i]);
+		/*
+		 * Remove the gap in the membership array.
+		 */
+		for (++i; i < imo->imo_num_memberships; ++i)
+			imo->imo_membership[i-1] = imo->imo_membership[i];
+		--imo->imo_num_memberships;
+		break;
+
+	default:
+		error = EOPNOTSUPP;
+		break;
+	}
+
+	/*
+	 * If all options have default values, no need to keep the mbuf.
+	 */
+	if (imo->imo_multicast_ifp == NULL &&
+	    imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
+	    imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
+	    imo->imo_num_memberships == 0) {
+		free(*imop, M_IPMOPTS);
+		*imop = NULL;
+	}
+
+	return (error);
+}
+
+/*
+ * Return the IP multicast options in response to user getsockopt().
+ */
+int
+ip_getmoptions(optname, imo, mp)
+	int optname;
+	register struct ip_moptions *imo;
+	register struct mbuf **mp;
+{
+	u_char *ttl;
+	u_char *loop;
+	struct in_addr *addr;
+	struct in_ifaddr *ia;
+
+	*mp = m_get(M_WAIT, MT_SOOPTS);
+
+	switch (optname) {
+
+	case IP_MULTICAST_IF:
+		addr = mtod(*mp, struct in_addr *);
+		(*mp)->m_len = sizeof(struct in_addr);
+		if (imo == NULL || imo->imo_multicast_ifp == NULL)
+			addr->s_addr = INADDR_ANY;
+		else {
+			IFP_TO_IA(imo->imo_multicast_ifp, ia);
+			addr->s_addr = (ia == NULL) ? INADDR_ANY
+					: IA_SIN(ia)->sin_addr.s_addr;
+		}
+		return (0);
+
+	case IP_MULTICAST_TTL:
+		ttl = mtod(*mp, u_char *);
+		(*mp)->m_len = 1;
+		*ttl = (imo == NULL) ? IP_DEFAULT_MULTICAST_TTL
+				     : imo->imo_multicast_ttl;
+		return (0);
+
+	case IP_MULTICAST_LOOP:
+		loop = mtod(*mp, u_char *);
+		(*mp)->m_len = 1;
+		*loop = (imo == NULL) ? IP_DEFAULT_MULTICAST_LOOP
+				      : imo->imo_multicast_loop;
+		return (0);
+
+	default:
+		return (EOPNOTSUPP);
+	}
+}
+
+/*
+ * Discard the IP multicast options.
+ */
+void
+ip_freemoptions(imo)
+	register struct ip_moptions *imo;
+{
+	register int i;
+
+	if (imo != NULL) {
+		for (i = 0; i < imo->imo_num_memberships; ++i)
+			in_delmulti(imo->imo_membership[i]);
+		free(imo, M_IPMOPTS);
+	}
+}
+
+/*
+ * Routine called from ip_output() to loop back a copy of an IP multicast
+ * packet to the input queue of a specified interface.  Note that this
+ * calls the output routine of the loopback "driver", but with an interface
+ * pointer that might NOT be &loif -- easier than replicating that code here.
+ */
+static void
+ip_mloopback(ifp, m, dst)
+	struct ifnet *ifp;
+	register struct mbuf *m;
+	register struct sockaddr_in *dst;
+{
+	register struct ip *ip;
+	struct mbuf *copym;
+
+	copym = m_copy(m, 0, M_COPYALL);
+	if (copym != NULL) {
+		/*
+		 * We don't bother to fragment if the IP length is greater
+		 * than the interface's MTU.  Can this possibly matter?
+		 */
+		ip = mtod(copym, struct ip *);
+		ip->ip_len = htons((u_short)ip->ip_len);
+		ip->ip_off = htons((u_short)ip->ip_off);
+		ip->ip_sum = 0;
+		ip->ip_sum = in_cksum(copym, ip->ip_hl << 2);
+		(void) looutput(ifp, copym, (struct sockaddr *)dst, NULL);
+	}
+}
diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h
new file mode 100644
index 00000000000..27eda5e67cd
--- /dev/null
+++ b/sys/netinet/ip_var.h
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ip_var.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Overlay for ip header used by other protocols (tcp, udp).
+ */
+struct ipovly {
+	caddr_t	ih_next, ih_prev;	/* for protocol sequence q's */
+	u_char	ih_x1;			/* (unused) */
+	u_char	ih_pr;			/* protocol */
+	short	ih_len;			/* protocol length */
+	struct	in_addr ih_src;		/* source internet address */
+	struct	in_addr ih_dst;		/* destination internet address */
+};
+
+/*
+ * Ip reassembly queue structure.  Each fragment
+ * being reassembled is attached to one of these structures.
+ * They are timed out after ipq_ttl drops to 0, and may also
+ * be reclaimed if memory becomes tight.
+ */
+struct ipq {
+	struct	ipq *next,*prev;	/* to other reass headers */
+	u_char	ipq_ttl;		/* time for reass q to live */
+	u_char	ipq_p;			/* protocol of this fragment */
+	u_short	ipq_id;			/* sequence id for reassembly */
+	struct	ipasfrag *ipq_next,*ipq_prev;
+					/* to ip headers of fragments */
+	struct	in_addr ipq_src,ipq_dst;
+};
+
+/*
+ * Ip header, when holding a fragment.
+ *
+ * Note: ipf_next must be at same offset as ipq_next above
+ */
+struct	ipasfrag {
+#if BYTE_ORDER == LITTLE_ENDIAN 
+	u_char	ip_hl:4,
+		ip_v:4;
+#endif
+#if BYTE_ORDER == BIG_ENDIAN 
+	u_char	ip_v:4,
+		ip_hl:4;
+#endif
+	u_char	ipf_mff;		/* XXX overlays ip_tos: use low bit
+					 * to avoid destroying tos;
+					 * copied from (ip_off&IP_MF) */
+	short	ip_len;
+	u_short	ip_id;
+	short	ip_off;
+	u_char	ip_ttl;
+	u_char	ip_p;
+	u_short	ip_sum;
+	struct	ipasfrag *ipf_next;	/* next fragment */
+	struct	ipasfrag *ipf_prev;	/* previous fragment */
+};
+
+/*
+ * Structure stored in mbuf in inpcb.ip_options
+ * and passed to ip_output when ip options are in use.
+ * The actual length of the options (including ipopt_dst)
+ * is in m_len.
+ */
+#define MAX_IPOPTLEN	40
+
+struct ipoption {
+	struct	in_addr ipopt_dst;	/* first-hop dst if source routed */
+	char	ipopt_list[MAX_IPOPTLEN];	/* options proper */
+};
+
+/*
+ * Structure attached to inpcb.ip_moptions and
+ * passed to ip_output when IP multicast options are in use.
+ */
+struct ip_moptions {
+	struct	ifnet *imo_multicast_ifp; /* ifp for outgoing multicasts */
+	u_char	imo_multicast_ttl;	/* TTL for outgoing multicasts */
+	u_char	imo_multicast_loop;	/* 1 => hear sends if a member */
+	u_short	imo_num_memberships;	/* no. memberships this socket */
+	struct	in_multi *imo_membership[IP_MAX_MEMBERSHIPS];
+};
+
+struct	ipstat {
+	u_long	ips_total;		/* total packets received */
+	u_long	ips_badsum;		/* checksum bad */
+	u_long	ips_tooshort;		/* packet too short */
+	u_long	ips_toosmall;		/* not enough data */
+	u_long	ips_badhlen;		/* ip header length < data size */
+	u_long	ips_badlen;		/* ip length < ip header length */
+	u_long	ips_fragments;		/* fragments received */
+	u_long	ips_fragdropped;	/* frags dropped (dups, out of space) */
+	u_long	ips_fragtimeout;	/* fragments timed out */
+	u_long	ips_forward;		/* packets forwarded */
+	u_long	ips_cantforward;	/* packets rcvd for unreachable dest */
+	u_long	ips_redirectsent;	/* packets forwarded on same net */
+	u_long	ips_noproto;		/* unknown or unsupported protocol */
+	u_long	ips_delivered;		/* datagrams delivered to upper level*/
+	u_long	ips_localout;		/* total ip packets generated here */
+	u_long	ips_odropped;		/* lost packets due to nobufs, etc. */
+	u_long	ips_reassembled;	/* total packets reassembled ok */
+	u_long	ips_fragmented;		/* datagrams sucessfully fragmented */
+	u_long	ips_ofragments;		/* output fragments created */
+	u_long	ips_cantfrag;		/* don't fragment flag was set, etc. */
+	u_long	ips_badoptions;		/* error in option processing */
+	u_long	ips_noroute;		/* packets discarded due to no route */
+	u_long	ips_badvers;		/* ip version != 4 */
+	u_long	ips_rawout;		/* total raw ip packets generated */
+};
+
+#ifdef KERNEL
+/* flags passed to ip_output as last parameter */
+#define	IP_FORWARDING		0x1		/* most of ip header exists */
+#define	IP_RAWOUTPUT		0x2		/* raw ip header exists */
+#define	IP_ROUTETOIF		SO_DONTROUTE	/* bypass routing tables */
+#define	IP_ALLOWBROADCAST	SO_BROADCAST	/* can send broadcast packets */
+
+struct	ipstat	ipstat;
+struct	ipq	ipq;			/* ip reass. queue */
+u_short	ip_id;				/* ip packet ctr, for ids */
+int	ip_defttl;			/* default IP ttl */
+
+int	 in_control __P((struct socket *, int, caddr_t, struct ifnet *));
+int	 ip_ctloutput __P((int, struct socket *, int, int, struct mbuf **));
+void	 ip_deq __P((struct ipasfrag *));
+int	 ip_dooptions __P((struct mbuf *));
+void	 ip_drain __P((void));
+void	 ip_enq __P((struct ipasfrag *, struct ipasfrag *));
+void	 ip_forward __P((struct mbuf *, int));
+void	 ip_freef __P((struct ipq *));
+void	 ip_freemoptions __P((struct ip_moptions *));
+int	 ip_getmoptions __P((int, struct ip_moptions *, struct mbuf **));
+void	 ip_init __P((void));
+int	 ip_mforward __P((struct mbuf *, struct ifnet *));
+int	 ip_optcopy __P((struct ip *, struct ip *));
+int	 ip_output __P((struct mbuf *,
+	    struct mbuf *, struct route *, int, struct ip_moptions *));
+int	 ip_pcbopts __P((struct mbuf **, struct mbuf *));
+struct ip *
+	 ip_reass __P((struct ipasfrag *, struct ipq *));
+struct in_ifaddr *
+	 ip_rtaddr __P((struct in_addr));
+int	 ip_setmoptions __P((int, struct ip_moptions **, struct mbuf *));
+void	 ip_slowtimo __P((void));
+struct mbuf *
+	 ip_srcroute __P((void));
+void	 ip_stripoptions __P((struct mbuf *, struct mbuf *));
+int	 ip_sysctl __P((int *, u_int, void *, size_t *, void *, size_t));
+void	 ipintr __P((void));
+int	 rip_ctloutput __P((int, struct socket *, int, int, struct mbuf **));
+void	 rip_init __P((void));
+void	 rip_input __P((struct mbuf *));
+int	 rip_output __P((struct mbuf *, struct socket *, u_long));
+int	 rip_usrreq __P((struct socket *,
+	    int, struct mbuf *, struct mbuf *, struct mbuf *));
+#endif
diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c
new file mode 100644
index 00000000000..c8092ee9ec7
--- /dev/null
+++ b/sys/netinet/raw_ip.c
@@ -0,0 +1,389 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)raw_ip.c	8.2 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+#include <sys/systm.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_mroute.h>
+#include <netinet/in_pcb.h>
+
+struct inpcb rawinpcb;
+
+/*
+ * Nominal space allocated to a raw ip socket.
+ */
+#define	RIPSNDQ		8192
+#define	RIPRCVQ		8192
+
+/*
+ * Raw interface to IP protocol.
+ */
+
+/*
+ * Initialize raw connection block q.
+ */
+void
+rip_init()
+{
+
+	rawinpcb.inp_next = rawinpcb.inp_prev = &rawinpcb;
+}
+
+struct	sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET };
+/*
+ * Setup generic address and protocol structures
+ * for raw_input routine, then pass them along with
+ * mbuf chain.
+ */
+void
+rip_input(m)
+	struct mbuf *m;
+{
+	register struct ip *ip = mtod(m, struct ip *);
+	register struct inpcb *inp;
+	struct socket *last = 0;
+
+	ripsrc.sin_addr = ip->ip_src;
+	for (inp = rawinpcb.inp_next; inp != &rawinpcb; inp = inp->inp_next) {
+		if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != ip->ip_p)
+			continue;
+		if (inp->inp_laddr.s_addr &&
+		    inp->inp_laddr.s_addr == ip->ip_dst.s_addr)
+			continue;
+		if (inp->inp_faddr.s_addr &&
+		    inp->inp_faddr.s_addr == ip->ip_src.s_addr)
+			continue;
+		if (last) {
+			struct mbuf *n;
+			if (n = m_copy(m, 0, (int)M_COPYALL)) {
+				if (sbappendaddr(&last->so_rcv, &ripsrc,
+				    n, (struct mbuf *)0) == 0)
+					/* should notify about lost packet */
+					m_freem(n);
+				else
+					sorwakeup(last);
+			}
+		}
+		last = inp->inp_socket;
+	}
+	if (last) {
+		if (sbappendaddr(&last->so_rcv, &ripsrc,
+		    m, (struct mbuf *)0) == 0)
+			m_freem(m);
+		else
+			sorwakeup(last);
+	} else {
+		m_freem(m);
+		ipstat.ips_noproto++;
+		ipstat.ips_delivered--;
+	}
+}
+
+/*
+ * Generate IP header and pass packet to ip_output.
+ * Tack on options user may have setup with control call.
+ */
+int
+rip_output(m, so, dst)
+	register struct mbuf *m;
+	struct socket *so;
+	u_long dst;
+{
+	register struct ip *ip;
+	register struct inpcb *inp = sotoinpcb(so);
+	struct mbuf *opts;
+	int flags = (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST;
+
+	/*
+	 * If the user handed us a complete IP packet, use it.
+	 * Otherwise, allocate an mbuf for a header and fill it in.
+	 */
+	if ((inp->inp_flags & INP_HDRINCL) == 0) {
+		M_PREPEND(m, sizeof(struct ip), M_WAIT);
+		ip = mtod(m, struct ip *);
+		ip->ip_tos = 0;
+		ip->ip_off = 0;
+		ip->ip_p = inp->inp_ip.ip_p;
+		ip->ip_len = m->m_pkthdr.len;
+		ip->ip_src = inp->inp_laddr;
+		ip->ip_dst.s_addr = dst;
+		ip->ip_ttl = MAXTTL;
+		opts = inp->inp_options;
+	} else {
+		ip = mtod(m, struct ip *);
+		if (ip->ip_id == 0)
+			ip->ip_id = htons(ip_id++);
+		opts = NULL;
+		/* XXX prevent ip_output from overwriting header fields */
+		flags |= IP_RAWOUTPUT;
+		ipstat.ips_rawout++;
+	}
+	return (ip_output(m, opts, &inp->inp_route, flags, inp->inp_moptions));
+}
+
+/*
+ * Raw IP socket option processing.
+ */
+int
+rip_ctloutput(op, so, level, optname, m)
+	int op;
+	struct socket *so;
+	int level, optname;
+	struct mbuf **m;
+{
+	register struct inpcb *inp = sotoinpcb(so);
+	register int error;
+
+	if (level != IPPROTO_IP)
+		return (EINVAL);
+
+	switch (optname) {
+
+	case IP_HDRINCL:
+		if (op == PRCO_SETOPT || op == PRCO_GETOPT) {
+			if (m == 0 || *m == 0 || (*m)->m_len < sizeof (int))
+				return (EINVAL);
+			if (op == PRCO_SETOPT) {
+				if (*mtod(*m, int *))
+					inp->inp_flags |= INP_HDRINCL;
+				else
+					inp->inp_flags &= ~INP_HDRINCL;
+				(void)m_free(*m);
+			} else {
+				(*m)->m_len = sizeof (int);
+				*mtod(*m, int *) = inp->inp_flags & INP_HDRINCL;
+			}
+			return (0);
+		}
+		break;
+
+	case DVMRP_INIT:
+	case DVMRP_DONE:
+	case DVMRP_ADD_VIF:
+	case DVMRP_DEL_VIF:
+	case DVMRP_ADD_LGRP:
+	case DVMRP_DEL_LGRP:
+	case DVMRP_ADD_MRT:
+	case DVMRP_DEL_MRT:
+#ifdef MROUTING
+		if (op == PRCO_SETOPT) {
+			error = ip_mrouter_cmd(optname, so, *m);
+			if (*m)
+				(void)m_free(*m);
+		} else
+			error = EINVAL;
+		return (error);
+#else
+		if (op == PRCO_SETOPT && *m)
+			(void)m_free(*m);
+		return (EOPNOTSUPP);
+#endif
+	}
+	return (ip_ctloutput(op, so, level, optname, m));
+}
+
+u_long	rip_sendspace = RIPSNDQ;
+u_long	rip_recvspace = RIPRCVQ;
+
+/*ARGSUSED*/
+int
+rip_usrreq(so, req, m, nam, control)
+	register struct socket *so;
+	int req;
+	struct mbuf *m, *nam, *control;
+{
+	register int error = 0;
+	register struct inpcb *inp = sotoinpcb(so);
+#ifdef MROUTING
+	extern struct socket *ip_mrouter;
+#endif
+	switch (req) {
+
+	case PRU_ATTACH:
+		if (inp)
+			panic("rip_attach");
+		if ((so->so_state & SS_PRIV) == 0) {
+			error = EACCES;
+			break;
+		}
+		if ((error = soreserve(so, rip_sendspace, rip_recvspace)) ||
+		    (error = in_pcballoc(so, &rawinpcb)))
+			break;
+		inp = (struct inpcb *)so->so_pcb;
+		inp->inp_ip.ip_p = (int)nam;
+		break;
+
+	case PRU_DISCONNECT:
+		if ((so->so_state & SS_ISCONNECTED) == 0) {
+			error = ENOTCONN;
+			break;
+		}
+		/* FALLTHROUGH */
+	case PRU_ABORT:
+		soisdisconnected(so);
+		/* FALLTHROUGH */
+	case PRU_DETACH:
+		if (inp == 0)
+			panic("rip_detach");
+#ifdef MROUTING
+		if (so == ip_mrouter)
+			ip_mrouter_done();
+#endif
+		in_pcbdetach(inp);
+		break;
+
+	case PRU_BIND:
+	    {
+		struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
+
+		if (nam->m_len != sizeof(*addr)) {
+			error = EINVAL;
+			break;
+		}
+		if ((ifnet == 0) ||
+		    ((addr->sin_family != AF_INET) &&
+		     (addr->sin_family != AF_IMPLINK)) ||
+		    (addr->sin_addr.s_addr &&
+		     ifa_ifwithaddr((struct sockaddr *)addr) == 0)) {
+			error = EADDRNOTAVAIL;
+			break;
+		}
+		inp->inp_laddr = addr->sin_addr;
+		break;
+	    }
+	case PRU_CONNECT:
+	    {
+		struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
+
+		if (nam->m_len != sizeof(*addr)) {
+			error = EINVAL;
+			break;
+		}
+		if (ifnet == 0) {
+			error = EADDRNOTAVAIL;
+			break;
+		}
+		if ((addr->sin_family != AF_INET) &&
+		     (addr->sin_family != AF_IMPLINK)) {
+			error = EAFNOSUPPORT;
+			break;
+		}
+		inp->inp_faddr = addr->sin_addr;
+		soisconnected(so);
+		break;
+	    }
+
+	case PRU_CONNECT2:
+		error = EOPNOTSUPP;
+		break;
+
+	/*
+	 * Mark the connection as being incapable of further input.
+	 */
+	case PRU_SHUTDOWN:
+		socantsendmore(so);
+		break;
+
+	/*
+	 * Ship a packet out.  The appropriate raw output
+	 * routine handles any massaging necessary.
+	 */
+	case PRU_SEND:
+	    {
+		register u_long dst;
+
+		if (so->so_state & SS_ISCONNECTED) {
+			if (nam) {
+				error = EISCONN;
+				break;
+			}
+			dst = inp->inp_faddr.s_addr;
+		} else {
+			if (nam == NULL) {
+				error = ENOTCONN;
+				break;
+			}
+			dst = mtod(nam, struct sockaddr_in *)->sin_addr.s_addr;
+		}
+		error = rip_output(m, so, dst);
+		m = NULL;
+		break;
+	    }
+
+	case PRU_SENSE:
+		/*
+		 * stat: don't bother with a blocksize.
+		 */
+		return (0);
+
+	/*
+	 * Not supported.
+	 */
+	case PRU_RCVOOB:
+	case PRU_RCVD:
+	case PRU_LISTEN:
+	case PRU_ACCEPT:
+	case PRU_SENDOOB:
+		error = EOPNOTSUPP;
+		break;
+
+	case PRU_SOCKADDR:
+		in_setsockaddr(inp, nam);
+		break;
+
+	case PRU_PEERADDR:
+		in_setpeeraddr(inp, nam);
+		break;
+
+	default:
+		panic("rip_usrreq");
+	}
+	if (m != NULL)
+		m_freem(m);
+	return (error);
+}
diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h
new file mode 100644
index 00000000000..6b77ff663a4
--- /dev/null
+++ b/sys/netinet/tcp.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp.h	8.1 (Berkeley) 6/10/93
+ */
+
+typedef	u_long	tcp_seq;
+/*
+ * TCP header.
+ * Per RFC 793, September, 1981.
+ */
+struct tcphdr {
+	u_short	th_sport;		/* source port */
+	u_short	th_dport;		/* destination port */
+	tcp_seq	th_seq;			/* sequence number */
+	tcp_seq	th_ack;			/* acknowledgement number */
+#if BYTE_ORDER == LITTLE_ENDIAN 
+	u_char	th_x2:4,		/* (unused) */
+		th_off:4;		/* data offset */
+#endif
+#if BYTE_ORDER == BIG_ENDIAN 
+	u_char	th_off:4,		/* data offset */
+		th_x2:4;		/* (unused) */
+#endif
+	u_char	th_flags;
+#define	TH_FIN	0x01
+#define	TH_SYN	0x02
+#define	TH_RST	0x04
+#define	TH_PUSH	0x08
+#define	TH_ACK	0x10
+#define	TH_URG	0x20
+	u_short	th_win;			/* window */
+	u_short	th_sum;			/* checksum */
+	u_short	th_urp;			/* urgent pointer */
+};
+
+#define	TCPOPT_EOL		0
+#define	TCPOPT_NOP		1
+#define	TCPOPT_MAXSEG		2
+#define    TCPOLEN_MAXSEG		4
+#define TCPOPT_WINDOW		3
+#define    TCPOLEN_WINDOW		3
+#define TCPOPT_SACK_PERMITTED	4		/* Experimental */
+#define    TCPOLEN_SACK_PERMITTED	2
+#define TCPOPT_SACK		5		/* Experimental */
+#define TCPOPT_TIMESTAMP	8
+#define    TCPOLEN_TIMESTAMP		10
+#define    TCPOLEN_TSTAMP_APPA		(TCPOLEN_TIMESTAMP+2) /* appendix A */
+
+#define TCPOPT_TSTAMP_HDR	\
+    (TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)
+
+/*
+ * Default maximum segment size for TCP.
+ * With an IP MSS of 576, this is 536,
+ * but 512 is probably more convenient.
+ * This should be defined as MIN(512, IP_MSS - sizeof (struct tcpiphdr)).
+ */
+#define	TCP_MSS	512
+
+#define	TCP_MAXWIN	65535	/* largest value for (unscaled) window */
+
+#define TCP_MAX_WINSHIFT	14	/* maximum window shift */
+
+/*
+ * User-settable options (used with setsockopt).
+ */
+#define	TCP_NODELAY	0x01	/* don't delay send to coalesce packets */
+#define	TCP_MAXSEG	0x02	/* set maximum segment size */
diff --git a/sys/netinet/tcp_debug.c b/sys/netinet/tcp_debug.c
new file mode 100644
index 00000000000..ddb30927b4a
--- /dev/null
+++ b/sys/netinet/tcp_debug.c
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_debug.c	8.1 (Berkeley) 6/10/93
+ */
+
+#ifdef TCPDEBUG
+/* load symbolic names */
+#define PRUREQUESTS
+#define TCPSTATES
+#define	TCPTIMERS
+#define	TANAMES
+#endif
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+
+#include <net/route.h>
+#include <net/if.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#include <netinet/tcp_debug.h>
+
+#ifdef TCPDEBUG
+int	tcpconsdebug = 0;
+#endif
+/*
+ * Tcp debug routines
+ */
+void
+tcp_trace(act, ostate, tp, ti, req)
+	short act, ostate;
+	struct tcpcb *tp;
+	struct tcpiphdr *ti;
+	int req;
+{
+	tcp_seq seq, ack;
+	int len, flags;
+	struct tcp_debug *td = &tcp_debug[tcp_debx++];
+
+	if (tcp_debx == TCP_NDEBUG)
+		tcp_debx = 0;
+	td->td_time = iptime();
+	td->td_act = act;
+	td->td_ostate = ostate;
+	td->td_tcb = (caddr_t)tp;
+	if (tp)
+		td->td_cb = *tp;
+	else
+		bzero((caddr_t)&td->td_cb, sizeof (*tp));
+	if (ti)
+		td->td_ti = *ti;
+	else
+		bzero((caddr_t)&td->td_ti, sizeof (*ti));
+	td->td_req = req;
+#ifdef TCPDEBUG
+	if (tcpconsdebug == 0)
+		return;
+	if (tp)
+		printf("%x %s:", tp, tcpstates[ostate]);
+	else
+		printf("???????? ");
+	printf("%s ", tanames[act]);
+	switch (act) {
+
+	case TA_INPUT:
+	case TA_OUTPUT:
+	case TA_DROP:
+		if (ti == 0)
+			break;
+		seq = ti->ti_seq;
+		ack = ti->ti_ack;
+		len = ti->ti_len;
+		if (act == TA_OUTPUT) {
+			seq = ntohl(seq);
+			ack = ntohl(ack);
+			len = ntohs((u_short)len);
+		}
+		if (act == TA_OUTPUT)
+			len -= sizeof (struct tcphdr);
+		if (len)
+			printf("[%x..%x)", seq, seq+len);
+		else
+			printf("%x", seq);
+		printf("@%x, urp=%x", ack, ti->ti_urp);
+		flags = ti->ti_flags;
+		if (flags) {
+#ifndef lint
+			char *cp = "<";
+#define pf(f) { if (ti->ti_flags&TH_/**/f) { printf("%s%s", cp, "f"); cp = ","; } }
+			pf(SYN); pf(ACK); pf(FIN); pf(RST); pf(PUSH); pf(URG);
+#endif
+			printf(">");
+		}
+		break;
+
+	case TA_USER:
+		printf("%s", prurequests[req&0xff]);
+		if ((req & 0xff) == PRU_SLOWTIMO)
+			printf("<%s>", tcptimers[req>>8]);
+		break;
+	}
+	if (tp)
+		printf(" -> %s", tcpstates[tp->t_state]);
+	/* print out internal state of tp !?! */
+	printf("\n");
+	if (tp == 0)
+		return;
+	printf("\trcv_(nxt,wnd,up) (%x,%x,%x) snd_(una,nxt,max) (%x,%x,%x)\n",
+	    tp->rcv_nxt, tp->rcv_wnd, tp->rcv_up, tp->snd_una, tp->snd_nxt,
+	    tp->snd_max);
+	printf("\tsnd_(wl1,wl2,wnd) (%x,%x,%x)\n",
+	    tp->snd_wl1, tp->snd_wl2, tp->snd_wnd);
+#endif /* TCPDEBUG */
+}
diff --git a/sys/netinet/tcp_debug.h b/sys/netinet/tcp_debug.h
new file mode 100644
index 00000000000..c02c0cd521d
--- /dev/null
+++ b/sys/netinet/tcp_debug.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_debug.h	8.1 (Berkeley) 6/10/93
+ */
+
+struct	tcp_debug {
+	n_time	td_time;
+	short	td_act;
+	short	td_ostate;
+	caddr_t	td_tcb;
+	struct	tcpiphdr td_ti;
+	short	td_req;
+	struct	tcpcb td_cb;
+};
+
+#define	TA_INPUT 	0
+#define	TA_OUTPUT	1
+#define	TA_USER		2
+#define	TA_RESPOND	3
+#define	TA_DROP		4
+
+#ifdef TANAMES
+char	*tanames[] =
+    { "input", "output", "user", "respond", "drop" };
+#endif
+
+#define	TCP_NDEBUG 100
+struct	tcp_debug tcp_debug[TCP_NDEBUG];
+int	tcp_debx;
diff --git a/sys/netinet/tcp_fsm.h b/sys/netinet/tcp_fsm.h
new file mode 100644
index 00000000000..c5da7fc32d9
--- /dev/null
+++ b/sys/netinet/tcp_fsm.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_fsm.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * TCP FSM state definitions.
+ * Per RFC793, September, 1981.
+ */
+
+#define	TCP_NSTATES	11
+
+#define	TCPS_CLOSED		0	/* closed */
+#define	TCPS_LISTEN		1	/* listening for connection */
+#define	TCPS_SYN_SENT		2	/* active, have sent syn */
+#define	TCPS_SYN_RECEIVED	3	/* have send and received syn */
+/* states < TCPS_ESTABLISHED are those where connections not established */
+#define	TCPS_ESTABLISHED	4	/* established */
+#define	TCPS_CLOSE_WAIT		5	/* rcvd fin, waiting for close */
+/* states > TCPS_CLOSE_WAIT are those where user has closed */
+#define	TCPS_FIN_WAIT_1		6	/* have closed, sent fin */
+#define	TCPS_CLOSING		7	/* closed xchd FIN; await FIN ACK */
+#define	TCPS_LAST_ACK		8	/* had fin and close; await FIN ACK */
+/* states > TCPS_CLOSE_WAIT && < TCPS_FIN_WAIT_2 await ACK of FIN */
+#define	TCPS_FIN_WAIT_2		9	/* have closed, fin is acked */
+#define	TCPS_TIME_WAIT		10	/* in 2*msl quiet wait after close */
+
+#define	TCPS_HAVERCVDSYN(s)	((s) >= TCPS_SYN_RECEIVED)
+#define	TCPS_HAVERCVDFIN(s)	((s) >= TCPS_TIME_WAIT)
+
+#ifdef	TCPOUTFLAGS
+/*
+ * Flags used when sending segments in tcp_output.
+ * Basic flags (TH_RST,TH_ACK,TH_SYN,TH_FIN) are totally
+ * determined by state, with the proviso that TH_FIN is sent only
+ * if all data queued for output is included in the segment.
+ */
+u_char	tcp_outflags[TCP_NSTATES] = {
+    TH_RST|TH_ACK, 0, TH_SYN, TH_SYN|TH_ACK,
+    TH_ACK, TH_ACK,
+    TH_FIN|TH_ACK, TH_FIN|TH_ACK, TH_FIN|TH_ACK, TH_ACK, TH_ACK,
+};
+#endif
+
+#ifdef KPROF
+int	tcp_acounts[TCP_NSTATES][PRU_NREQ];
+#endif
+
+#ifdef	TCPSTATES
+char *tcpstates[] = {
+	"CLOSED",	"LISTEN",	"SYN_SENT",	"SYN_RCVD",
+	"ESTABLISHED",	"CLOSE_WAIT",	"FIN_WAIT_1",	"CLOSING",
+	"LAST_ACK",	"FIN_WAIT_2",	"TIME_WAIT",
+};
+#endif
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
new file mode 100644
index 00000000000..2dd1d749c40
--- /dev/null
+++ b/sys/netinet/tcp_input.c
@@ -0,0 +1,1647 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_input.c	8.5 (Berkeley) 4/10/94
+ */
+
+#ifndef TUBA_INCLUDE
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#include <netinet/tcp_debug.h>
+
+int	tcprexmtthresh = 3;
+struct	tcpiphdr tcp_saveti;
+struct	inpcb *tcp_last_inpcb = &tcb;
+
+extern u_long sb_max;
+
+#endif /* TUBA_INCLUDE */
+#define TCP_PAWS_IDLE	(24 * 24 * 60 * 60 * PR_SLOWHZ)
+
+/* for modulo comparisons of timestamps */
+#define TSTMP_LT(a,b)	((int)((a)-(b)) < 0)
+#define TSTMP_GEQ(a,b)	((int)((a)-(b)) >= 0)
+
+
+/*
+ * Insert segment ti into reassembly queue of tcp with
+ * control block tp.  Return TH_FIN if reassembly now includes
+ * a segment with FIN.  The macro form does the common case inline
+ * (segment is the next to be received on an established connection,
+ * and the queue is empty), avoiding linkage into and removal
+ * from the queue and repetition of various conversions.
+ * Set DELACK for segments received in order, but ack immediately
+ * when segments are out of order (so fast retransmit can work).
+ */
+#define	TCP_REASS(tp, ti, m, so, flags) { \
+	if ((ti)->ti_seq == (tp)->rcv_nxt && \
+	    (tp)->seg_next == (struct tcpiphdr *)(tp) && \
+	    (tp)->t_state == TCPS_ESTABLISHED) { \
+		tp->t_flags |= TF_DELACK; \
+		(tp)->rcv_nxt += (ti)->ti_len; \
+		flags = (ti)->ti_flags & TH_FIN; \
+		tcpstat.tcps_rcvpack++;\
+		tcpstat.tcps_rcvbyte += (ti)->ti_len;\
+		sbappend(&(so)->so_rcv, (m)); \
+		sorwakeup(so); \
+	} else { \
+		(flags) = tcp_reass((tp), (ti), (m)); \
+		tp->t_flags |= TF_ACKNOW; \
+	} \
+}
+#ifndef TUBA_INCLUDE
+
+int
+tcp_reass(tp, ti, m)
+	register struct tcpcb *tp;
+	register struct tcpiphdr *ti;
+	struct mbuf *m;
+{
+	register struct tcpiphdr *q;
+	struct socket *so = tp->t_inpcb->inp_socket;
+	int flags;
+
+	/*
+	 * Call with ti==0 after become established to
+	 * force pre-ESTABLISHED data up to user socket.
+	 */
+	if (ti == 0)
+		goto present;
+
+	/*
+	 * Find a segment which begins after this one does.
+	 */
+	for (q = tp->seg_next; q != (struct tcpiphdr *)tp;
+	    q = (struct tcpiphdr *)q->ti_next)
+		if (SEQ_GT(q->ti_seq, ti->ti_seq))
+			break;
+
+	/*
+	 * If there is a preceding segment, it may provide some of
+	 * our data already.  If so, drop the data from the incoming
+	 * segment.  If it provides all of our data, drop us.
+	 */
+	if ((struct tcpiphdr *)q->ti_prev != (struct tcpiphdr *)tp) {
+		register int i;
+		q = (struct tcpiphdr *)q->ti_prev;
+		/* conversion to int (in i) handles seq wraparound */
+		i = q->ti_seq + q->ti_len - ti->ti_seq;
+		if (i > 0) {
+			if (i >= ti->ti_len) {
+				tcpstat.tcps_rcvduppack++;
+				tcpstat.tcps_rcvdupbyte += ti->ti_len;
+				m_freem(m);
+				return (0);
+			}
+			m_adj(m, i);
+			ti->ti_len -= i;
+			ti->ti_seq += i;
+		}
+		q = (struct tcpiphdr *)(q->ti_next);
+	}
+	tcpstat.tcps_rcvoopack++;
+	tcpstat.tcps_rcvoobyte += ti->ti_len;
+	REASS_MBUF(ti) = m;		/* XXX */
+
+	/*
+	 * While we overlap succeeding segments trim them or,
+	 * if they are completely covered, dequeue them.
+	 */
+	while (q != (struct tcpiphdr *)tp) {
+		register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq;
+		if (i <= 0)
+			break;
+		if (i < q->ti_len) {
+			q->ti_seq += i;
+			q->ti_len -= i;
+			m_adj(REASS_MBUF(q), i);
+			break;
+		}
+		q = (struct tcpiphdr *)q->ti_next;
+		m = REASS_MBUF((struct tcpiphdr *)q->ti_prev);
+		remque(q->ti_prev);
+		m_freem(m);
+	}
+
+	/*
+	 * Stick new segment in its place.
+	 */
+	insque(ti, q->ti_prev);
+
+present:
+	/*
+	 * Present data to user, advancing rcv_nxt through
+	 * completed sequence space.
+	 */
+	if (TCPS_HAVERCVDSYN(tp->t_state) == 0)
+		return (0);
+	ti = tp->seg_next;
+	if (ti == (struct tcpiphdr *)tp || ti->ti_seq != tp->rcv_nxt)
+		return (0);
+	if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len)
+		return (0);
+	do {
+		tp->rcv_nxt += ti->ti_len;
+		flags = ti->ti_flags & TH_FIN;
+		remque(ti);
+		m = REASS_MBUF(ti);
+		ti = (struct tcpiphdr *)ti->ti_next;
+		if (so->so_state & SS_CANTRCVMORE)
+			m_freem(m);
+		else
+			sbappend(&so->so_rcv, m);
+	} while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt);
+	sorwakeup(so);
+	return (flags);
+}
+
+/*
+ * TCP input routine, follows pages 65-76 of the
+ * protocol specification dated September, 1981 very closely.
+ */
+void
+tcp_input(m, iphlen)
+	register struct mbuf *m;
+	int iphlen;
+{
+	register struct tcpiphdr *ti;
+	register struct inpcb *inp;
+	caddr_t optp = NULL;
+	int optlen;
+	int len, tlen, off;
+	register struct tcpcb *tp = 0;
+	register int tiflags;
+	struct socket *so;
+	int todrop, acked, ourfinisacked, needoutput = 0;
+	short ostate;
+	struct in_addr laddr;
+	int dropsocket = 0;
+	int iss = 0;
+	u_long tiwin, ts_val, ts_ecr;
+	int ts_present = 0;
+
+	tcpstat.tcps_rcvtotal++;
+	/*
+	 * Get IP and TCP header together in first mbuf.
+	 * Note: IP leaves IP header in first mbuf.
+	 */
+	ti = mtod(m, struct tcpiphdr *);
+	if (iphlen > sizeof (struct ip))
+		ip_stripoptions(m, (struct mbuf *)0);
+	if (m->m_len < sizeof (struct tcpiphdr)) {
+		if ((m = m_pullup(m, sizeof (struct tcpiphdr))) == 0) {
+			tcpstat.tcps_rcvshort++;
+			return;
+		}
+		ti = mtod(m, struct tcpiphdr *);
+	}
+
+	/*
+	 * Checksum extended TCP header and data.
+	 */
+	tlen = ((struct ip *)ti)->ip_len;
+	len = sizeof (struct ip) + tlen;
+	ti->ti_next = ti->ti_prev = 0;
+	ti->ti_x1 = 0;
+	ti->ti_len = (u_short)tlen;
+	HTONS(ti->ti_len);
+	if (ti->ti_sum = in_cksum(m, len)) {
+		tcpstat.tcps_rcvbadsum++;
+		goto drop;
+	}
+#endif /* TUBA_INCLUDE */
+
+	/*
+	 * Check that TCP offset makes sense,
+	 * pull out TCP options and adjust length.		XXX
+	 */
+	off = ti->ti_off << 2;
+	if (off < sizeof (struct tcphdr) || off > tlen) {
+		tcpstat.tcps_rcvbadoff++;
+		goto drop;
+	}
+	tlen -= off;
+	ti->ti_len = tlen;
+	if (off > sizeof (struct tcphdr)) {
+		if (m->m_len < sizeof(struct ip) + off) {
+			if ((m = m_pullup(m, sizeof (struct ip) + off)) == 0) {
+				tcpstat.tcps_rcvshort++;
+				return;
+			}
+			ti = mtod(m, struct tcpiphdr *);
+		}
+		optlen = off - sizeof (struct tcphdr);
+		optp = mtod(m, caddr_t) + sizeof (struct tcpiphdr);
+		/* 
+		 * Do quick retrieval of timestamp options ("options
+		 * prediction?").  If timestamp is the only option and it's
+		 * formatted as recommended in RFC 1323 appendix A, we
+		 * quickly get the values now and not bother calling
+		 * tcp_dooptions(), etc.
+		 */
+		if ((optlen == TCPOLEN_TSTAMP_APPA ||
+		     (optlen > TCPOLEN_TSTAMP_APPA &&
+			optp[TCPOLEN_TSTAMP_APPA] == TCPOPT_EOL)) &&
+		     *(u_long *)optp == htonl(TCPOPT_TSTAMP_HDR) &&
+		     (ti->ti_flags & TH_SYN) == 0) {
+			ts_present = 1;
+			ts_val = ntohl(*(u_long *)(optp + 4));
+			ts_ecr = ntohl(*(u_long *)(optp + 8));
+			optp = NULL;	/* we've parsed the options */
+		}
+	}
+	tiflags = ti->ti_flags;
+
+	/*
+	 * Convert TCP protocol specific fields to host format.
+	 */
+	NTOHL(ti->ti_seq);
+	NTOHL(ti->ti_ack);
+	NTOHS(ti->ti_win);
+	NTOHS(ti->ti_urp);
+
+	/*
+	 * Locate pcb for segment.
+	 */
+findpcb:
+	inp = tcp_last_inpcb;
+	if (inp->inp_lport != ti->ti_dport ||
+	    inp->inp_fport != ti->ti_sport ||
+	    inp->inp_faddr.s_addr != ti->ti_src.s_addr ||
+	    inp->inp_laddr.s_addr != ti->ti_dst.s_addr) {
+		inp = in_pcblookup(&tcb, ti->ti_src, ti->ti_sport,
+		    ti->ti_dst, ti->ti_dport, INPLOOKUP_WILDCARD);
+		if (inp)
+			tcp_last_inpcb = inp;
+		++tcpstat.tcps_pcbcachemiss;
+	}
+
+	/*
+	 * If the state is CLOSED (i.e., TCB does not exist) then
+	 * all data in the incoming segment is discarded.
+	 * If the TCB exists but is in CLOSED state, it is embryonic,
+	 * but should either do a listen or a connect soon.
+	 */
+	if (inp == 0)
+		goto dropwithreset;
+	tp = intotcpcb(inp);
+	if (tp == 0)
+		goto dropwithreset;
+	if (tp->t_state == TCPS_CLOSED)
+		goto drop;
+	
+	/* Unscale the window into a 32-bit value. */
+	if ((tiflags & TH_SYN) == 0)
+		tiwin = ti->ti_win << tp->snd_scale;
+	else
+		tiwin = ti->ti_win;
+
+	so = inp->inp_socket;
+	if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) {
+		if (so->so_options & SO_DEBUG) {
+			ostate = tp->t_state;
+			tcp_saveti = *ti;
+		}
+		if (so->so_options & SO_ACCEPTCONN) {
+			so = sonewconn(so, 0);
+			if (so == 0)
+				goto drop;
+			/*
+			 * This is ugly, but ....
+			 *
+			 * Mark socket as temporary until we're
+			 * committed to keeping it.  The code at
+			 * ``drop'' and ``dropwithreset'' check the
+			 * flag dropsocket to see if the temporary
+			 * socket created here should be discarded.
+			 * We mark the socket as discardable until
+			 * we're committed to it below in TCPS_LISTEN.
+			 */
+			dropsocket++;
+			inp = (struct inpcb *)so->so_pcb;
+			inp->inp_laddr = ti->ti_dst;
+			inp->inp_lport = ti->ti_dport;
+#if BSD>=43
+			inp->inp_options = ip_srcroute();
+#endif
+			tp = intotcpcb(inp);
+			tp->t_state = TCPS_LISTEN;
+
+			/* Compute proper scaling value from buffer space
+			 */
+			while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
+			   TCP_MAXWIN << tp->request_r_scale < so->so_rcv.sb_hiwat)
+				tp->request_r_scale++;
+		}
+	}
+
+	/*
+	 * Segment received on connection.
+	 * Reset idle time and keep-alive timer.
+	 */
+	tp->t_idle = 0;
+	tp->t_timer[TCPT_KEEP] = tcp_keepidle;
+
+	/*
+	 * Process options if not in LISTEN state,
+	 * else do it below (after getting remote address).
+	 */
+	if (optp && tp->t_state != TCPS_LISTEN)
+		tcp_dooptions(tp, optp, optlen, ti,
+			&ts_present, &ts_val, &ts_ecr);
+
+	/* 
+	 * Header prediction: check for the two common cases
+	 * of a uni-directional data xfer.  If the packet has
+	 * no control flags, is in-sequence, the window didn't
+	 * change and we're not retransmitting, it's a
+	 * candidate.  If the length is zero and the ack moved
+	 * forward, we're the sender side of the xfer.  Just
+	 * free the data acked & wake any higher level process
+	 * that was blocked waiting for space.  If the length
+	 * is non-zero and the ack didn't move, we're the
+	 * receiver side.  If we're getting packets in-order
+	 * (the reassembly queue is empty), add the data to
+	 * the socket buffer and note that we need a delayed ack.
+	 */
+	if (tp->t_state == TCPS_ESTABLISHED &&
+	    (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
+	    (!ts_present || TSTMP_GEQ(ts_val, tp->ts_recent)) &&
+	    ti->ti_seq == tp->rcv_nxt &&
+	    tiwin && tiwin == tp->snd_wnd &&
+	    tp->snd_nxt == tp->snd_max) {
+
+		/* 
+		 * If last ACK falls within this segment's sequence numbers,
+		 *  record the timestamp.
+		 */
+		if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent) &&
+		   SEQ_LT(tp->last_ack_sent, ti->ti_seq + ti->ti_len)) {
+			tp->ts_recent_age = tcp_now;
+			tp->ts_recent = ts_val;
+		}
+
+		if (ti->ti_len == 0) {
+			if (SEQ_GT(ti->ti_ack, tp->snd_una) &&
+			    SEQ_LEQ(ti->ti_ack, tp->snd_max) &&
+			    tp->snd_cwnd >= tp->snd_wnd) {
+				/*
+				 * this is a pure ack for outstanding data.
+				 */
+				++tcpstat.tcps_predack;
+				if (ts_present)
+					tcp_xmit_timer(tp, tcp_now-ts_ecr+1);
+				else if (tp->t_rtt &&
+					    SEQ_GT(ti->ti_ack, tp->t_rtseq))
+					tcp_xmit_timer(tp, tp->t_rtt);
+				acked = ti->ti_ack - tp->snd_una;
+				tcpstat.tcps_rcvackpack++;
+				tcpstat.tcps_rcvackbyte += acked;
+				sbdrop(&so->so_snd, acked);
+				tp->snd_una = ti->ti_ack;
+				m_freem(m);
+
+				/*
+				 * If all outstanding data are acked, stop
+				 * retransmit timer, otherwise restart timer
+				 * using current (possibly backed-off) value.
+				 * If process is waiting for space,
+				 * wakeup/selwakeup/signal.  If data
+				 * are ready to send, let tcp_output
+				 * decide between more output or persist.
+				 */
+				if (tp->snd_una == tp->snd_max)
+					tp->t_timer[TCPT_REXMT] = 0;
+				else if (tp->t_timer[TCPT_PERSIST] == 0)
+					tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+
+				if (so->so_snd.sb_flags & SB_NOTIFY)
+					sowwakeup(so);
+				if (so->so_snd.sb_cc)
+					(void) tcp_output(tp);
+				return;
+			}
+		} else if (ti->ti_ack == tp->snd_una &&
+		    tp->seg_next == (struct tcpiphdr *)tp &&
+		    ti->ti_len <= sbspace(&so->so_rcv)) {
+			/*
+			 * this is a pure, in-sequence data packet
+			 * with nothing on the reassembly queue and
+			 * we have enough buffer space to take it.
+			 */
+			++tcpstat.tcps_preddat;
+			tp->rcv_nxt += ti->ti_len;
+			tcpstat.tcps_rcvpack++;
+			tcpstat.tcps_rcvbyte += ti->ti_len;
+			/*
+			 * Drop TCP, IP headers and TCP options then add data
+			 * to socket buffer.
+			 */
+			m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
+			m->m_len -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
+			sbappend(&so->so_rcv, m);
+			sorwakeup(so);
+			tp->t_flags |= TF_DELACK;
+			return;
+		}
+	}
+
+	/*
+	 * Drop TCP, IP headers and TCP options.
+	 */
+	m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
+	m->m_len  -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
+
+	/*
+	 * Calculate amount of space in receive window,
+	 * and then do TCP input processing.
+	 * Receive window is amount of space in rcv queue,
+	 * but not less than advertised window.
+	 */
+	{ int win;
+
+	win = sbspace(&so->so_rcv);
+	if (win < 0)
+		win = 0;
+	tp->rcv_wnd = max(win, (int)(tp->rcv_adv - tp->rcv_nxt));
+	}
+
+	switch (tp->t_state) {
+
+	/*
+	 * If the state is LISTEN then ignore segment if it contains an RST.
+	 * If the segment contains an ACK then it is bad and send a RST.
+	 * If it does not contain a SYN then it is not interesting; drop it.
+	 * Don't bother responding if the destination was a broadcast.
+	 * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial
+	 * tp->iss, and send a segment:
+	 *     <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
+	 * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss.
+	 * Fill in remote peer address fields if not previously specified.
+	 * Enter SYN_RECEIVED state, and process any other fields of this
+	 * segment in this state.
+	 */
+	case TCPS_LISTEN: {
+		struct mbuf *am;
+		register struct sockaddr_in *sin;
+
+		if (tiflags & TH_RST)
+			goto drop;
+		if (tiflags & TH_ACK)
+			goto dropwithreset;
+		if ((tiflags & TH_SYN) == 0)
+			goto drop;
+		/*
+		 * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN
+		 * in_broadcast() should never return true on a received
+		 * packet with M_BCAST not set.
+		 */
+		if (m->m_flags & (M_BCAST|M_MCAST) ||
+		    IN_MULTICAST(ti->ti_dst.s_addr))
+			goto drop;
+		am = m_get(M_DONTWAIT, MT_SONAME);	/* XXX */
+		if (am == NULL)
+			goto drop;
+		am->m_len = sizeof (struct sockaddr_in);
+		sin = mtod(am, struct sockaddr_in *);
+		sin->sin_family = AF_INET;
+		sin->sin_len = sizeof(*sin);
+		sin->sin_addr = ti->ti_src;
+		sin->sin_port = ti->ti_sport;
+		bzero((caddr_t)sin->sin_zero, sizeof(sin->sin_zero));
+		laddr = inp->inp_laddr;
+		if (inp->inp_laddr.s_addr == INADDR_ANY)
+			inp->inp_laddr = ti->ti_dst;
+		if (in_pcbconnect(inp, am)) {
+			inp->inp_laddr = laddr;
+			(void) m_free(am);
+			goto drop;
+		}
+		(void) m_free(am);
+		tp->t_template = tcp_template(tp);
+		if (tp->t_template == 0) {
+			tp = tcp_drop(tp, ENOBUFS);
+			dropsocket = 0;		/* socket is already gone */
+			goto drop;
+		}
+		if (optp)
+			tcp_dooptions(tp, optp, optlen, ti,
+				&ts_present, &ts_val, &ts_ecr);
+		if (iss)
+			tp->iss = iss;
+		else
+			tp->iss = tcp_iss;
+		tcp_iss += TCP_ISSINCR/2;
+		tp->irs = ti->ti_seq;
+		tcp_sendseqinit(tp);
+		tcp_rcvseqinit(tp);
+		tp->t_flags |= TF_ACKNOW;
+		tp->t_state = TCPS_SYN_RECEIVED;
+		tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
+		dropsocket = 0;		/* committed to socket */
+		tcpstat.tcps_accepts++;
+		goto trimthenstep6;
+		}
+
+	/*
+	 * If the state is SYN_SENT:
+	 *	if seg contains an ACK, but not for our SYN, drop the input.
+	 *	if seg contains a RST, then drop the connection.
+	 *	if seg does not contain SYN, then drop it.
+	 * Otherwise this is an acceptable SYN segment
+	 *	initialize tp->rcv_nxt and tp->irs
+	 *	if seg contains ack then advance tp->snd_una
+	 *	if SYN has been acked change to ESTABLISHED else SYN_RCVD state
+	 *	arrange for segment to be acked (eventually)
+	 *	continue processing rest of data/controls, beginning with URG
+	 */
+	case TCPS_SYN_SENT:
+		if ((tiflags & TH_ACK) &&
+		    (SEQ_LEQ(ti->ti_ack, tp->iss) ||
+		     SEQ_GT(ti->ti_ack, tp->snd_max)))
+			goto dropwithreset;
+		if (tiflags & TH_RST) {
+			if (tiflags & TH_ACK)
+				tp = tcp_drop(tp, ECONNREFUSED);
+			goto drop;
+		}
+		if ((tiflags & TH_SYN) == 0)
+			goto drop;
+		if (tiflags & TH_ACK) {
+			tp->snd_una = ti->ti_ack;
+			if (SEQ_LT(tp->snd_nxt, tp->snd_una))
+				tp->snd_nxt = tp->snd_una;
+		}
+		tp->t_timer[TCPT_REXMT] = 0;
+		tp->irs = ti->ti_seq;
+		tcp_rcvseqinit(tp);
+		tp->t_flags |= TF_ACKNOW;
+		if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) {
+			tcpstat.tcps_connects++;
+			soisconnected(so);
+			tp->t_state = TCPS_ESTABLISHED;
+			/* Do window scaling on this connection? */
+			if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
+				(TF_RCVD_SCALE|TF_REQ_SCALE)) {
+				tp->snd_scale = tp->requested_s_scale;
+				tp->rcv_scale = tp->request_r_scale;
+			}
+			(void) tcp_reass(tp, (struct tcpiphdr *)0,
+				(struct mbuf *)0);
+			/*
+			 * if we didn't have to retransmit the SYN,
+			 * use its rtt as our initial srtt & rtt var.
+			 */
+			if (tp->t_rtt)
+				tcp_xmit_timer(tp, tp->t_rtt);
+		} else
+			tp->t_state = TCPS_SYN_RECEIVED;
+
+trimthenstep6:
+		/*
+		 * Advance ti->ti_seq to correspond to first data byte.
+		 * If data, trim to stay within window,
+		 * dropping FIN if necessary.
+		 */
+		ti->ti_seq++;
+		if (ti->ti_len > tp->rcv_wnd) {
+			todrop = ti->ti_len - tp->rcv_wnd;
+			m_adj(m, -todrop);
+			ti->ti_len = tp->rcv_wnd;
+			tiflags &= ~TH_FIN;
+			tcpstat.tcps_rcvpackafterwin++;
+			tcpstat.tcps_rcvbyteafterwin += todrop;
+		}
+		tp->snd_wl1 = ti->ti_seq - 1;
+		tp->rcv_up = ti->ti_seq;
+		goto step6;
+	}
+
+	/*
+	 * States other than LISTEN or SYN_SENT.
+	 * First check timestamp, if present.
+	 * Then check that at least some bytes of segment are within 
+	 * receive window.  If segment begins before rcv_nxt,
+	 * drop leading data (and SYN); if nothing left, just ack.
+	 * 
+	 * RFC 1323 PAWS: If we have a timestamp reply on this segment
+	 * and it's less than ts_recent, drop it.
+	 */
+	if (ts_present && (tiflags & TH_RST) == 0 && tp->ts_recent &&
+	    TSTMP_LT(ts_val, tp->ts_recent)) {
+
+		/* Check to see if ts_recent is over 24 days old.  */
+		if ((int)(tcp_now - tp->ts_recent_age) > TCP_PAWS_IDLE) {
+			/*
+			 * Invalidate ts_recent.  If this segment updates
+			 * ts_recent, the age will be reset later and ts_recent
+			 * will get a valid value.  If it does not, setting
+			 * ts_recent to zero will at least satisfy the
+			 * requirement that zero be placed in the timestamp
+			 * echo reply when ts_recent isn't valid.  The
+			 * age isn't reset until we get a valid ts_recent
+			 * because we don't want out-of-order segments to be
+			 * dropped when ts_recent is old.
+			 */
+			tp->ts_recent = 0;
+		} else {
+			tcpstat.tcps_rcvduppack++;
+			tcpstat.tcps_rcvdupbyte += ti->ti_len;
+			tcpstat.tcps_pawsdrop++;
+			goto dropafterack;
+		}
+	}
+
+	todrop = tp->rcv_nxt - ti->ti_seq;
+	if (todrop > 0) {
+		if (tiflags & TH_SYN) {
+			tiflags &= ~TH_SYN;
+			ti->ti_seq++;
+			if (ti->ti_urp > 1) 
+				ti->ti_urp--;
+			else
+				tiflags &= ~TH_URG;
+			todrop--;
+		}
+		if (todrop >= ti->ti_len) {
+			tcpstat.tcps_rcvduppack++;
+			tcpstat.tcps_rcvdupbyte += ti->ti_len;
+			/*
+			 * If segment is just one to the left of the window,
+			 * check two special cases:
+			 * 1. Don't toss RST in response to 4.2-style keepalive.
+			 * 2. If the only thing to drop is a FIN, we can drop
+			 *    it, but check the ACK or we will get into FIN
+			 *    wars if our FINs crossed (both CLOSING).
+			 * In either case, send ACK to resynchronize,
+			 * but keep on processing for RST or ACK.
+			 */
+			if ((tiflags & TH_FIN && todrop == ti->ti_len + 1)
+#ifdef TCP_COMPAT_42
+			  || (tiflags & TH_RST && ti->ti_seq == tp->rcv_nxt - 1)
+#endif
+			   ) {
+				todrop = ti->ti_len;
+				tiflags &= ~TH_FIN;
+				tp->t_flags |= TF_ACKNOW;
+			} else {
+				/*
+				 * Handle the case when a bound socket connects
+				 * to itself. Allow packets with a SYN and
+				 * an ACK to continue with the processing.
+				 */
+				if (todrop != 0 || (tiflags & TH_ACK) == 0)
+					goto dropafterack;
+			}
+		} else {
+			tcpstat.tcps_rcvpartduppack++;
+			tcpstat.tcps_rcvpartdupbyte += todrop;
+		}
+		m_adj(m, todrop);
+		ti->ti_seq += todrop;
+		ti->ti_len -= todrop;
+		if (ti->ti_urp > todrop)
+			ti->ti_urp -= todrop;
+		else {
+			tiflags &= ~TH_URG;
+			ti->ti_urp = 0;
+		}
+	}
+
+	/*
+	 * If new data are received on a connection after the
+	 * user processes are gone, then RST the other end.
+	 */
+	if ((so->so_state & SS_NOFDREF) &&
+	    tp->t_state > TCPS_CLOSE_WAIT && ti->ti_len) {
+		tp = tcp_close(tp);
+		tcpstat.tcps_rcvafterclose++;
+		goto dropwithreset;
+	}
+
+	/*
+	 * If segment ends after window, drop trailing data
+	 * (and PUSH and FIN); if nothing left, just ACK.
+	 */
+	todrop = (ti->ti_seq+ti->ti_len) - (tp->rcv_nxt+tp->rcv_wnd);
+	if (todrop > 0) {
+		tcpstat.tcps_rcvpackafterwin++;
+		if (todrop >= ti->ti_len) {
+			tcpstat.tcps_rcvbyteafterwin += ti->ti_len;
+			/*
+			 * If a new connection request is received
+			 * while in TIME_WAIT, drop the old connection
+			 * and start over if the sequence numbers
+			 * are above the previous ones.
+			 */
+			if (tiflags & TH_SYN &&
+			    tp->t_state == TCPS_TIME_WAIT &&
+			    SEQ_GT(ti->ti_seq, tp->rcv_nxt)) {
+				iss = tp->rcv_nxt + TCP_ISSINCR;
+				tp = tcp_close(tp);
+				goto findpcb;
+			}
+			/*
+			 * If window is closed can only take segments at
+			 * window edge, and have to drop data and PUSH from
+			 * incoming segments.  Continue processing, but
+			 * remember to ack.  Otherwise, drop segment
+			 * and ack.
+			 */
+			if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) {
+				tp->t_flags |= TF_ACKNOW;
+				tcpstat.tcps_rcvwinprobe++;
+			} else
+				goto dropafterack;
+		} else
+			tcpstat.tcps_rcvbyteafterwin += todrop;
+		m_adj(m, -todrop);
+		ti->ti_len -= todrop;
+		tiflags &= ~(TH_PUSH|TH_FIN);
+	}
+
+	/*
+	 * If last ACK falls within this segment's sequence numbers,
+	 * record its timestamp.
+	 */
+	if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent) &&
+	    SEQ_LT(tp->last_ack_sent, ti->ti_seq + ti->ti_len +
+		   ((tiflags & (TH_SYN|TH_FIN)) != 0))) {
+		tp->ts_recent_age = tcp_now;
+		tp->ts_recent = ts_val;
+	}
+
+	/*
+	 * If the RST bit is set examine the state:
+	 *    SYN_RECEIVED STATE:
+	 *	If passive open, return to LISTEN state.
+	 *	If active open, inform user that connection was refused.
+	 *    ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES:
+	 *	Inform user that connection was reset, and close tcb.
+	 *    CLOSING, LAST_ACK, TIME_WAIT STATES
+	 *	Close the tcb.
+	 */
+	if (tiflags&TH_RST) switch (tp->t_state) {
+
+	case TCPS_SYN_RECEIVED:
+		so->so_error = ECONNREFUSED;
+		goto close;
+
+	case TCPS_ESTABLISHED:
+	case TCPS_FIN_WAIT_1:
+	case TCPS_FIN_WAIT_2:
+	case TCPS_CLOSE_WAIT:
+		so->so_error = ECONNRESET;
+	close:
+		tp->t_state = TCPS_CLOSED;
+		tcpstat.tcps_drops++;
+		tp = tcp_close(tp);
+		goto drop;
+
+	case TCPS_CLOSING:
+	case TCPS_LAST_ACK:
+	case TCPS_TIME_WAIT:
+		tp = tcp_close(tp);
+		goto drop;
+	}
+
+	/*
+	 * If a SYN is in the window, then this is an
+	 * error and we send an RST and drop the connection.
+	 */
+	if (tiflags & TH_SYN) {
+		tp = tcp_drop(tp, ECONNRESET);
+		goto dropwithreset;
+	}
+
+	/*
+	 * If the ACK bit is off we drop the segment and return.
+	 */
+	if ((tiflags & TH_ACK) == 0)
+		goto drop;
+	
+	/*
+	 * Ack processing.
+	 */
+	switch (tp->t_state) {
+
+	/*
+	 * In SYN_RECEIVED state if the ack ACKs our SYN then enter
+	 * ESTABLISHED state and continue processing, otherwise
+	 * send an RST.
+	 */
+	case TCPS_SYN_RECEIVED:
+		if (SEQ_GT(tp->snd_una, ti->ti_ack) ||
+		    SEQ_GT(ti->ti_ack, tp->snd_max))
+			goto dropwithreset;
+		tcpstat.tcps_connects++;
+		soisconnected(so);
+		tp->t_state = TCPS_ESTABLISHED;
+		/* Do window scaling? */
+		if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
+			(TF_RCVD_SCALE|TF_REQ_SCALE)) {
+			tp->snd_scale = tp->requested_s_scale;
+			tp->rcv_scale = tp->request_r_scale;
+		}
+		(void) tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0);
+		tp->snd_wl1 = ti->ti_seq - 1;
+		/* fall into ... */
+
+	/*
+	 * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
+	 * ACKs.  If the ack is in the range
+	 *	tp->snd_una < ti->ti_ack <= tp->snd_max
+	 * then advance tp->snd_una to ti->ti_ack and drop
+	 * data from the retransmission queue.  If this ACK reflects
+	 * more up to date window information we update our window information.
+	 */
+	case TCPS_ESTABLISHED:
+	case TCPS_FIN_WAIT_1:
+	case TCPS_FIN_WAIT_2:
+	case TCPS_CLOSE_WAIT:
+	case TCPS_CLOSING:
+	case TCPS_LAST_ACK:
+	case TCPS_TIME_WAIT:
+
+		if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) {
+			if (ti->ti_len == 0 && tiwin == tp->snd_wnd) {
+				tcpstat.tcps_rcvdupack++;
+				/*
+				 * If we have outstanding data (other than
+				 * a window probe), this is a completely
+				 * duplicate ack (ie, window info didn't
+				 * change), the ack is the biggest we've
+				 * seen and we've seen exactly our rexmt
+				 * threshhold of them, assume a packet
+				 * has been dropped and retransmit it.
+				 * Kludge snd_nxt & the congestion
+				 * window so we send only this one
+				 * packet.
+				 *
+				 * We know we're losing at the current
+				 * window size so do congestion avoidance
+				 * (set ssthresh to half the current window
+				 * and pull our congestion window back to
+				 * the new ssthresh).
+				 *
+				 * Dup acks mean that packets have left the
+				 * network (they're now cached at the receiver) 
+				 * so bump cwnd by the amount in the receiver
+				 * to keep a constant cwnd packets in the
+				 * network.
+				 */
+				if (tp->t_timer[TCPT_REXMT] == 0 ||
+				    ti->ti_ack != tp->snd_una)
+					tp->t_dupacks = 0;
+				else if (++tp->t_dupacks == tcprexmtthresh) {
+					tcp_seq onxt = tp->snd_nxt;
+					u_int win =
+					    min(tp->snd_wnd, tp->snd_cwnd) / 2 /
+						tp->t_maxseg;
+
+					if (win < 2)
+						win = 2;
+					tp->snd_ssthresh = win * tp->t_maxseg;
+					tp->t_timer[TCPT_REXMT] = 0;
+					tp->t_rtt = 0;
+					tp->snd_nxt = ti->ti_ack;
+					tp->snd_cwnd = tp->t_maxseg;
+					(void) tcp_output(tp);
+					tp->snd_cwnd = tp->snd_ssthresh +
+					       tp->t_maxseg * tp->t_dupacks;
+					if (SEQ_GT(onxt, tp->snd_nxt))
+						tp->snd_nxt = onxt;
+					goto drop;
+				} else if (tp->t_dupacks > tcprexmtthresh) {
+					tp->snd_cwnd += tp->t_maxseg;
+					(void) tcp_output(tp);
+					goto drop;
+				}
+			} else
+				tp->t_dupacks = 0;
+			break;
+		}
+		/*
+		 * If the congestion window was inflated to account
+		 * for the other side's cached packets, retract it.
+		 */
+		if (tp->t_dupacks > tcprexmtthresh &&
+		    tp->snd_cwnd > tp->snd_ssthresh)
+			tp->snd_cwnd = tp->snd_ssthresh;
+		tp->t_dupacks = 0;
+		if (SEQ_GT(ti->ti_ack, tp->snd_max)) {
+			tcpstat.tcps_rcvacktoomuch++;
+			goto dropafterack;
+		}
+		acked = ti->ti_ack - tp->snd_una;
+		tcpstat.tcps_rcvackpack++;
+		tcpstat.tcps_rcvackbyte += acked;
+
+		/*
+		 * If we have a timestamp reply, update smoothed
+		 * round trip time.  If no timestamp is present but
+		 * transmit timer is running and timed sequence
+		 * number was acked, update smoothed round trip time.
+		 * Since we now have an rtt measurement, cancel the
+		 * timer backoff (cf., Phil Karn's retransmit alg.).
+		 * Recompute the initial retransmit timer.
+		 */
+		if (ts_present)
+			tcp_xmit_timer(tp, tcp_now-ts_ecr+1);
+		else if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq))
+			tcp_xmit_timer(tp,tp->t_rtt);
+
+		/*
+		 * If all outstanding data is acked, stop retransmit
+		 * timer and remember to restart (more output or persist).
+		 * If there is more data to be acked, restart retransmit
+		 * timer, using current (possibly backed-off) value.
+		 */
+		if (ti->ti_ack == tp->snd_max) {
+			tp->t_timer[TCPT_REXMT] = 0;
+			needoutput = 1;
+		} else if (tp->t_timer[TCPT_PERSIST] == 0)
+			tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+		/*
+		 * When new data is acked, open the congestion window.
+		 * If the window gives us less than ssthresh packets
+		 * in flight, open exponentially (maxseg per packet).
+		 * Otherwise open linearly: maxseg per window
+		 * (maxseg^2 / cwnd per packet), plus a constant
+		 * fraction of a packet (maxseg/8) to help larger windows
+		 * open quickly enough.
+		 */
+		{
+		register u_int cw = tp->snd_cwnd;
+		register u_int incr = tp->t_maxseg;
+
+		if (cw > tp->snd_ssthresh)
+			incr = incr * incr / cw + incr / 8;
+		tp->snd_cwnd = min(cw + incr, TCP_MAXWIN<<tp->snd_scale);
+		}
+		if (acked > so->so_snd.sb_cc) {
+			tp->snd_wnd -= so->so_snd.sb_cc;
+			sbdrop(&so->so_snd, (int)so->so_snd.sb_cc);
+			ourfinisacked = 1;
+		} else {
+			sbdrop(&so->so_snd, acked);
+			tp->snd_wnd -= acked;
+			ourfinisacked = 0;
+		}
+		if (so->so_snd.sb_flags & SB_NOTIFY)
+			sowwakeup(so);
+		tp->snd_una = ti->ti_ack;
+		if (SEQ_LT(tp->snd_nxt, tp->snd_una))
+			tp->snd_nxt = tp->snd_una;
+
+		switch (tp->t_state) {
+
+		/*
+		 * In FIN_WAIT_1 STATE in addition to the processing
+		 * for the ESTABLISHED state if our FIN is now acknowledged
+		 * then enter FIN_WAIT_2.
+		 */
+		case TCPS_FIN_WAIT_1:
+			if (ourfinisacked) {
+				/*
+				 * If we can't receive any more
+				 * data, then closing user can proceed.
+				 * Starting the timer is contrary to the
+				 * specification, but if we don't get a FIN
+				 * we'll hang forever.
+				 */
+				if (so->so_state & SS_CANTRCVMORE) {
+					soisdisconnected(so);
+					tp->t_timer[TCPT_2MSL] = tcp_maxidle;
+				}
+				tp->t_state = TCPS_FIN_WAIT_2;
+			}
+			break;
+
+	 	/*
+		 * In CLOSING STATE in addition to the processing for
+		 * the ESTABLISHED state if the ACK acknowledges our FIN
+		 * then enter the TIME-WAIT state, otherwise ignore
+		 * the segment.
+		 */
+		case TCPS_CLOSING:
+			if (ourfinisacked) {
+				tp->t_state = TCPS_TIME_WAIT;
+				tcp_canceltimers(tp);
+				tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
+				soisdisconnected(so);
+			}
+			break;
+
+		/*
+		 * In LAST_ACK, we may still be waiting for data to drain
+		 * and/or to be acked, as well as for the ack of our FIN.
+		 * If our FIN is now acknowledged, delete the TCB,
+		 * enter the closed state and return.
+		 */
+		case TCPS_LAST_ACK:
+			if (ourfinisacked) {
+				tp = tcp_close(tp);
+				goto drop;
+			}
+			break;
+
+		/*
+		 * In TIME_WAIT state the only thing that should arrive
+		 * is a retransmission of the remote FIN.  Acknowledge
+		 * it and restart the finack timer.
+		 */
+		case TCPS_TIME_WAIT:
+			tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
+			goto dropafterack;
+		}
+	}
+
+step6:
+	/*
+	 * Update window information.
+	 * Don't look at window if no ACK: TAC's send garbage on first SYN.
+	 */
+	if ((tiflags & TH_ACK) &&
+	    (SEQ_LT(tp->snd_wl1, ti->ti_seq) || tp->snd_wl1 == ti->ti_seq &&
+	    (SEQ_LT(tp->snd_wl2, ti->ti_ack) ||
+	     tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))) {
+		/* keep track of pure window updates */
+		if (ti->ti_len == 0 &&
+		    tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd)
+			tcpstat.tcps_rcvwinupd++;
+		tp->snd_wnd = tiwin;
+		tp->snd_wl1 = ti->ti_seq;
+		tp->snd_wl2 = ti->ti_ack;
+		if (tp->snd_wnd > tp->max_sndwnd)
+			tp->max_sndwnd = tp->snd_wnd;
+		needoutput = 1;
+	}
+
+	/*
+	 * Process segments with URG.
+	 */
+	if ((tiflags & TH_URG) && ti->ti_urp &&
+	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
+		/*
+		 * This is a kludge, but if we receive and accept
+		 * random urgent pointers, we'll crash in
+		 * soreceive.  It's hard to imagine someone
+		 * actually wanting to send this much urgent data.
+		 */
+		if (ti->ti_urp + so->so_rcv.sb_cc > sb_max) {
+			ti->ti_urp = 0;			/* XXX */
+			tiflags &= ~TH_URG;		/* XXX */
+			goto dodata;			/* XXX */
+		}
+		/*
+		 * If this segment advances the known urgent pointer,
+		 * then mark the data stream.  This should not happen
+		 * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
+		 * a FIN has been received from the remote side. 
+		 * In these states we ignore the URG.
+		 *
+		 * According to RFC961 (Assigned Protocols),
+		 * the urgent pointer points to the last octet
+		 * of urgent data.  We continue, however,
+		 * to consider it to indicate the first octet
+		 * of data past the urgent section as the original 
+		 * spec states (in one of two places).
+		 */
+		if (SEQ_GT(ti->ti_seq+ti->ti_urp, tp->rcv_up)) {
+			tp->rcv_up = ti->ti_seq + ti->ti_urp;
+			so->so_oobmark = so->so_rcv.sb_cc +
+			    (tp->rcv_up - tp->rcv_nxt) - 1;
+			if (so->so_oobmark == 0)
+				so->so_state |= SS_RCVATMARK;
+			sohasoutofband(so);
+			tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA);
+		}
+		/*
+		 * Remove out of band data so doesn't get presented to user.
+		 * This can happen independent of advancing the URG pointer,
+		 * but if two URG's are pending at once, some out-of-band
+		 * data may creep in... ick.
+		 */
+		if (ti->ti_urp <= ti->ti_len
+#ifdef SO_OOBINLINE
+		     && (so->so_options & SO_OOBINLINE) == 0
+#endif
+		     )
+			tcp_pulloutofband(so, ti, m);
+	} else
+		/*
+		 * If no out of band data is expected,
+		 * pull receive urgent pointer along
+		 * with the receive window.
+		 */
+		if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
+			tp->rcv_up = tp->rcv_nxt;
+dodata:							/* XXX */
+
+	/*
+	 * Process the segment text, merging it into the TCP sequencing queue,
+	 * and arranging for acknowledgment of receipt if necessary.
+	 * This process logically involves adjusting tp->rcv_wnd as data
+	 * is presented to the user (this happens in tcp_usrreq.c,
+	 * case PRU_RCVD).  If a FIN has already been received on this
+	 * connection then we just ignore the text.
+	 */
+	if ((ti->ti_len || (tiflags&TH_FIN)) &&
+	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
+		TCP_REASS(tp, ti, m, so, tiflags);
+		/*
+		 * Note the amount of data that peer has sent into
+		 * our window, in order to estimate the sender's
+		 * buffer size.
+		 */
+		len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt);
+	} else {
+		m_freem(m);
+		tiflags &= ~TH_FIN;
+	}
+
+	/*
+	 * If FIN is received ACK the FIN and let the user know
+	 * that the connection is closing.
+	 */
+	if (tiflags & TH_FIN) {
+		if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
+			socantrcvmore(so);
+			tp->t_flags |= TF_ACKNOW;
+			tp->rcv_nxt++;
+		}
+		switch (tp->t_state) {
+
+	 	/*
+		 * In SYN_RECEIVED and ESTABLISHED STATES
+		 * enter the CLOSE_WAIT state.
+		 */
+		case TCPS_SYN_RECEIVED:
+		case TCPS_ESTABLISHED:
+			tp->t_state = TCPS_CLOSE_WAIT;
+			break;
+
+	 	/*
+		 * If still in FIN_WAIT_1 STATE FIN has not been acked so
+		 * enter the CLOSING state.
+		 */
+		case TCPS_FIN_WAIT_1:
+			tp->t_state = TCPS_CLOSING;
+			break;
+
+	 	/*
+		 * In FIN_WAIT_2 state enter the TIME_WAIT state,
+		 * starting the time-wait timer, turning off the other 
+		 * standard timers.
+		 */
+		case TCPS_FIN_WAIT_2:
+			tp->t_state = TCPS_TIME_WAIT;
+			tcp_canceltimers(tp);
+			tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
+			soisdisconnected(so);
+			break;
+
+		/*
+		 * In TIME_WAIT state restart the 2 MSL time_wait timer.
+		 */
+		case TCPS_TIME_WAIT:
+			tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
+			break;
+		}
+	}
+	if (so->so_options & SO_DEBUG)
+		tcp_trace(TA_INPUT, ostate, tp, &tcp_saveti, 0);
+
+	/*
+	 * Return any desired output.
+	 */
+	if (needoutput || (tp->t_flags & TF_ACKNOW))
+		(void) tcp_output(tp);
+	return;
+
+dropafterack:
+	/*
+	 * Generate an ACK dropping incoming segment if it occupies
+	 * sequence space, where the ACK reflects our state.
+	 */
+	if (tiflags & TH_RST)
+		goto drop;
+	m_freem(m);
+	tp->t_flags |= TF_ACKNOW;
+	(void) tcp_output(tp);
+	return;
+
+dropwithreset:
+	/*
+	 * Generate a RST, dropping incoming segment.
+	 * Make ACK acceptable to originator of segment.
+	 * Don't bother to respond if destination was broadcast/multicast.
+	 */
+	if ((tiflags & TH_RST) || m->m_flags & (M_BCAST|M_MCAST) ||
+	    IN_MULTICAST(ti->ti_dst.s_addr))
+		goto drop;
+	if (tiflags & TH_ACK)
+		tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST);
+	else {
+		if (tiflags & TH_SYN)
+			ti->ti_len++;
+		tcp_respond(tp, ti, m, ti->ti_seq+ti->ti_len, (tcp_seq)0,
+		    TH_RST|TH_ACK);
+	}
+	/* destroy temporarily created socket */
+	if (dropsocket)
+		(void) soabort(so);
+	return;
+
+drop:
+	/*
+	 * Drop space held by incoming segment and return.
+	 */
+	if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
+		tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0);
+	m_freem(m);
+	/* destroy temporarily created socket */
+	if (dropsocket)
+		(void) soabort(so);
+	return;
+#ifndef TUBA_INCLUDE
+}
+
+void
+tcp_dooptions(tp, cp, cnt, ti, ts_present, ts_val, ts_ecr)
+	struct tcpcb *tp;
+	u_char *cp;
+	int cnt;
+	struct tcpiphdr *ti;
+	int *ts_present;
+	u_long *ts_val, *ts_ecr;
+{
+	u_short mss;
+	int opt, optlen;
+
+	for (; cnt > 0; cnt -= optlen, cp += optlen) {
+		opt = cp[0];
+		if (opt == TCPOPT_EOL)
+			break;
+		if (opt == TCPOPT_NOP)
+			optlen = 1;
+		else {
+			optlen = cp[1];
+			if (optlen <= 0)
+				break;
+		}
+		switch (opt) {
+
+		default:
+			continue;
+
+		case TCPOPT_MAXSEG:
+			if (optlen != TCPOLEN_MAXSEG)
+				continue;
+			if (!(ti->ti_flags & TH_SYN))
+				continue;
+			bcopy((char *) cp + 2, (char *) &mss, sizeof(mss));
+			NTOHS(mss);
+			(void) tcp_mss(tp, mss);	/* sets t_maxseg */
+			break;
+
+		case TCPOPT_WINDOW:
+			if (optlen != TCPOLEN_WINDOW)
+				continue;
+			if (!(ti->ti_flags & TH_SYN))
+				continue;
+			tp->t_flags |= TF_RCVD_SCALE;
+			tp->requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT);
+			break;
+
+		case TCPOPT_TIMESTAMP:
+			if (optlen != TCPOLEN_TIMESTAMP)
+				continue;
+			*ts_present = 1;
+			bcopy((char *)cp + 2, (char *) ts_val, sizeof(*ts_val));
+			NTOHL(*ts_val);
+			bcopy((char *)cp + 6, (char *) ts_ecr, sizeof(*ts_ecr));
+			NTOHL(*ts_ecr);
+
+			/* 
+			 * A timestamp received in a SYN makes
+			 * it ok to send timestamp requests and replies.
+			 */
+			if (ti->ti_flags & TH_SYN) {
+				tp->t_flags |= TF_RCVD_TSTMP;
+				tp->ts_recent = *ts_val;
+				tp->ts_recent_age = tcp_now;
+			}
+			break;
+		}
+	}
+}
+
+/*
+ * Pull out of band byte out of a segment so
+ * it doesn't appear in the user's data queue.
+ * It is still reflected in the segment length for
+ * sequencing purposes.
+ */
+void
+tcp_pulloutofband(so, ti, m)
+	struct socket *so;
+	struct tcpiphdr *ti;
+	register struct mbuf *m;
+{
+	int cnt = ti->ti_urp - 1;
+	
+	while (cnt >= 0) {
+		if (m->m_len > cnt) {
+			char *cp = mtod(m, caddr_t) + cnt;
+			struct tcpcb *tp = sototcpcb(so);
+
+			tp->t_iobc = *cp;
+			tp->t_oobflags |= TCPOOB_HAVEDATA;
+			bcopy(cp+1, cp, (unsigned)(m->m_len - cnt - 1));
+			m->m_len--;
+			return;
+		}
+		cnt -= m->m_len;
+		m = m->m_next;
+		if (m == 0)
+			break;
+	}
+	panic("tcp_pulloutofband");
+}
+
+/*
+ * Collect new round-trip time estimate
+ * and update averages and current timeout.
+ */
+void
+tcp_xmit_timer(tp, rtt)
+	register struct tcpcb *tp;
+	short rtt;
+{
+	register short delta;
+
+	tcpstat.tcps_rttupdated++;
+	if (tp->t_srtt != 0) {
+		/*
+		 * srtt is stored as fixed point with 3 bits after the
+		 * binary point (i.e., scaled by 8).  The following magic
+		 * is equivalent to the smoothing algorithm in rfc793 with
+		 * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
+		 * point).  Adjust rtt to origin 0.
+		 */
+		delta = rtt - 1 - (tp->t_srtt >> TCP_RTT_SHIFT);
+		if ((tp->t_srtt += delta) <= 0)
+			tp->t_srtt = 1;
+		/*
+		 * We accumulate a smoothed rtt variance (actually, a
+		 * smoothed mean difference), then set the retransmit
+		 * timer to smoothed rtt + 4 times the smoothed variance.
+		 * rttvar is stored as fixed point with 2 bits after the
+		 * binary point (scaled by 4).  The following is
+		 * equivalent to rfc793 smoothing with an alpha of .75
+		 * (rttvar = rttvar*3/4 + |delta| / 4).  This replaces
+		 * rfc793's wired-in beta.
+		 */
+		if (delta < 0)
+			delta = -delta;
+		delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT);
+		if ((tp->t_rttvar += delta) <= 0)
+			tp->t_rttvar = 1;
+	} else {
+		/* 
+		 * No rtt measurement yet - use the unsmoothed rtt.
+		 * Set the variance to half the rtt (so our first
+		 * retransmit happens at 3*rtt).
+		 */
+		tp->t_srtt = rtt << TCP_RTT_SHIFT;
+		tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
+	}
+	tp->t_rtt = 0;
+	tp->t_rxtshift = 0;
+
+	/*
+	 * the retransmit should happen at rtt + 4 * rttvar.
+	 * Because of the way we do the smoothing, srtt and rttvar
+	 * will each average +1/2 tick of bias.  When we compute
+	 * the retransmit timer, we want 1/2 tick of rounding and
+	 * 1 extra tick because of +-1/2 tick uncertainty in the
+	 * firing of the timer.  The bias will give us exactly the
+	 * 1.5 tick we need.  But, because the bias is
+	 * statistical, we have to test that we don't drop below
+	 * the minimum feasible timer (which is 2 ticks).
+	 */
+	TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
+	    tp->t_rttmin, TCPTV_REXMTMAX);
+	
+	/*
+	 * We received an ack for a packet that wasn't retransmitted;
+	 * it is probably safe to discard any error indications we've
+	 * received recently.  This isn't quite right, but close enough
+	 * for now (a route might have failed after we sent a segment,
+	 * and the return path might not be symmetrical).
+	 */
+	tp->t_softerror = 0;
+}
+
+/*
+ * Determine a reasonable value for maxseg size.
+ * If the route is known, check route for mtu.
+ * If none, use an mss that can be handled on the outgoing
+ * interface without forcing IP to fragment; if bigger than
+ * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES
+ * to utilize large mbufs.  If no route is found, route has no mtu,
+ * or the destination isn't local, use a default, hopefully conservative
+ * size (usually 512 or the default IP max size, but no more than the mtu
+ * of the interface), as we can't discover anything about intervening
+ * gateways or networks.  We also initialize the congestion/slow start
+ * window to be a single segment if the destination isn't local.
+ * While looking at the routing entry, we also initialize other path-dependent
+ * parameters from pre-set or cached values in the routing entry.
+ */
+int
+tcp_mss(tp, offer)
+	register struct tcpcb *tp;
+	u_int offer;
+{
+	struct route *ro;
+	register struct rtentry *rt;
+	struct ifnet *ifp;
+	register int rtt, mss;
+	u_long bufsize;
+	struct inpcb *inp;
+	struct socket *so;
+	extern int tcp_mssdflt;
+
+	inp = tp->t_inpcb;
+	ro = &inp->inp_route;
+
+	if ((rt = ro->ro_rt) == (struct rtentry *)0) {
+		/* No route yet, so try to acquire one */
+		if (inp->inp_faddr.s_addr != INADDR_ANY) {
+			ro->ro_dst.sa_family = AF_INET;
+			ro->ro_dst.sa_len = sizeof(ro->ro_dst);
+			((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
+				inp->inp_faddr;
+			rtalloc(ro);
+		}
+		if ((rt = ro->ro_rt) == (struct rtentry *)0)
+			return (tcp_mssdflt);
+	}
+	ifp = rt->rt_ifp;
+	so = inp->inp_socket;
+
+#ifdef RTV_MTU	/* if route characteristics exist ... */
+	/*
+	 * While we're here, check if there's an initial rtt
+	 * or rttvar.  Convert from the route-table units
+	 * to scaled multiples of the slow timeout timer.
+	 */
+	if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) {
+		/*
+		 * XXX the lock bit for MTU indicates that the value
+		 * is also a minimum value; this is subject to time.
+		 */
+		if (rt->rt_rmx.rmx_locks & RTV_RTT)
+			tp->t_rttmin = rtt / (RTM_RTTUNIT / PR_SLOWHZ);
+		tp->t_srtt = rtt / (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE));
+		if (rt->rt_rmx.rmx_rttvar)
+			tp->t_rttvar = rt->rt_rmx.rmx_rttvar /
+			    (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE));
+		else
+			/* default variation is +- 1 rtt */
+			tp->t_rttvar =
+			    tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE;
+		TCPT_RANGESET(tp->t_rxtcur,
+		    ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
+		    tp->t_rttmin, TCPTV_REXMTMAX);
+	}
+	/*
+	 * if there's an mtu associated with the route, use it
+	 */
+	if (rt->rt_rmx.rmx_mtu)
+		mss = rt->rt_rmx.rmx_mtu - sizeof(struct tcpiphdr);
+	else
+#endif /* RTV_MTU */
+	{
+		mss = ifp->if_mtu - sizeof(struct tcpiphdr);
+#if	(MCLBYTES & (MCLBYTES - 1)) == 0
+		if (mss > MCLBYTES)
+			mss &= ~(MCLBYTES-1);
+#else
+		if (mss > MCLBYTES)
+			mss = mss / MCLBYTES * MCLBYTES;
+#endif
+		if (!in_localaddr(inp->inp_faddr))
+			mss = min(mss, tcp_mssdflt);
+	}
+	/*
+	 * The current mss, t_maxseg, is initialized to the default value.
+	 * If we compute a smaller value, reduce the current mss.
+	 * If we compute a larger value, return it for use in sending
+	 * a max seg size option, but don't store it for use
+	 * unless we received an offer at least that large from peer.
+	 * However, do not accept offers under 32 bytes.
+	 */
+	if (offer)
+		mss = min(mss, offer);
+	mss = max(mss, 32);		/* sanity */
+	if (mss < tp->t_maxseg || offer != 0) {
+		/*
+		 * If there's a pipesize, change the socket buffer
+		 * to that size.  Make the socket buffers an integral
+		 * number of mss units; if the mss is larger than
+		 * the socket buffer, decrease the mss.
+		 */
+#ifdef RTV_SPIPE
+		if ((bufsize = rt->rt_rmx.rmx_sendpipe) == 0)
+#endif
+			bufsize = so->so_snd.sb_hiwat;
+		if (bufsize < mss)
+			mss = bufsize;
+		else {
+			bufsize = roundup(bufsize, mss);
+			if (bufsize > sb_max)
+				bufsize = sb_max;
+			(void)sbreserve(&so->so_snd, bufsize);
+		}
+		tp->t_maxseg = mss;
+
+#ifdef RTV_RPIPE
+		if ((bufsize = rt->rt_rmx.rmx_recvpipe) == 0)
+#endif
+			bufsize = so->so_rcv.sb_hiwat;
+		if (bufsize > mss) {
+			bufsize = roundup(bufsize, mss);
+			if (bufsize > sb_max)
+				bufsize = sb_max;
+			(void)sbreserve(&so->so_rcv, bufsize);
+		}
+	}
+	tp->snd_cwnd = mss;
+
+#ifdef RTV_SSTHRESH
+	if (rt->rt_rmx.rmx_ssthresh) {
+		/*
+		 * There's some sort of gateway or interface
+		 * buffer limit on the path.  Use this to set
+		 * the slow start threshhold, but set the
+		 * threshold to no less than 2*mss.
+		 */
+		tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh);
+	}
+#endif /* RTV_MTU */
+	return (mss);
+}
+#endif /* TUBA_INCLUDE */
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
new file mode 100644
index 00000000000..667579fc0ed
--- /dev/null
+++ b/sys/netinet/tcp_output.c
@@ -0,0 +1,599 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_output.c	8.3 (Berkeley) 12/30/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#define	TCPOUTFLAGS
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#include <netinet/tcp_debug.h>
+
+#ifdef notyet
+extern struct mbuf *m_copypack();
+#endif
+
+
+#define MAX_TCPOPTLEN	32	/* max # bytes that go in options */
+
+/*
+ * Tcp output routine: figure out what should be sent and send it.
+ */
+int
+tcp_output(tp)
+	register struct tcpcb *tp;
+{
+	register struct socket *so = tp->t_inpcb->inp_socket;
+	register long len, win;
+	int off, flags, error;
+	register struct mbuf *m;
+	register struct tcpiphdr *ti;
+	u_char opt[MAX_TCPOPTLEN];
+	unsigned optlen, hdrlen;
+	int idle, sendalot;
+
+	/*
+	 * Determine length of data that should be transmitted,
+	 * and flags that will be used.
+	 * If there is some data or critical controls (SYN, RST)
+	 * to send, then transmit; otherwise, investigate further.
+	 */
+	idle = (tp->snd_max == tp->snd_una);
+	if (idle && tp->t_idle >= tp->t_rxtcur)
+		/*
+		 * We have been idle for "a while" and no acks are
+		 * expected to clock out any data we send --
+		 * slow start to get ack "clock" running again.
+		 */
+		tp->snd_cwnd = tp->t_maxseg;
+again:
+	sendalot = 0;
+	off = tp->snd_nxt - tp->snd_una;
+	win = min(tp->snd_wnd, tp->snd_cwnd);
+
+	flags = tcp_outflags[tp->t_state];
+	/*
+	 * If in persist timeout with window of 0, send 1 byte.
+	 * Otherwise, if window is small but nonzero
+	 * and timer expired, we will send what we can
+	 * and go to transmit state.
+	 */
+	if (tp->t_force) {
+		if (win == 0) {
+			/*
+			 * If we still have some data to send, then
+			 * clear the FIN bit.  Usually this would
+			 * happen below when it realizes that we
+			 * aren't sending all the data.  However,
+			 * if we have exactly 1 byte of unset data,
+			 * then it won't clear the FIN bit below,
+			 * and if we are in persist state, we wind
+			 * up sending the packet without recording
+			 * that we sent the FIN bit.
+			 *
+			 * We can't just blindly clear the FIN bit,
+			 * because if we don't have any more data
+			 * to send then the probe will be the FIN
+			 * itself.
+			 */
+			if (off < so->so_snd.sb_cc)
+				flags &= ~TH_FIN;
+			win = 1;
+		} else {
+			tp->t_timer[TCPT_PERSIST] = 0;
+			tp->t_rxtshift = 0;
+		}
+	}
+
+	len = min(so->so_snd.sb_cc, win) - off;
+
+	if (len < 0) {
+		/*
+		 * If FIN has been sent but not acked,
+		 * but we haven't been called to retransmit,
+		 * len will be -1.  Otherwise, window shrank
+		 * after we sent into it.  If window shrank to 0,
+		 * cancel pending retransmit and pull snd_nxt
+		 * back to (closed) window.  We will enter persist
+		 * state below.  If the window didn't close completely,
+		 * just wait for an ACK.
+		 */
+		len = 0;
+		if (win == 0) {
+			tp->t_timer[TCPT_REXMT] = 0;
+			tp->snd_nxt = tp->snd_una;
+		}
+	}
+	if (len > tp->t_maxseg) {
+		len = tp->t_maxseg;
+		sendalot = 1;
+	}
+	if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc))
+		flags &= ~TH_FIN;
+
+	win = sbspace(&so->so_rcv);
+
+	/*
+	 * Sender silly window avoidance.  If connection is idle
+	 * and can send all data, a maximum segment,
+	 * at least a maximum default-size segment do it,
+	 * or are forced, do it; otherwise don't bother.
+	 * If peer's buffer is tiny, then send
+	 * when window is at least half open.
+	 * If retransmitting (possibly after persist timer forced us
+	 * to send into a small window), then must resend.
+	 */
+	if (len) {
+		if (len == tp->t_maxseg)
+			goto send;
+		if ((idle || tp->t_flags & TF_NODELAY) &&
+		    len + off >= so->so_snd.sb_cc)
+			goto send;
+		if (tp->t_force)
+			goto send;
+		if (len >= tp->max_sndwnd / 2)
+			goto send;
+		if (SEQ_LT(tp->snd_nxt, tp->snd_max))
+			goto send;
+	}
+
+	/*
+	 * Compare available window to amount of window
+	 * known to peer (as advertised window less
+	 * next expected input).  If the difference is at least two
+	 * max size segments, or at least 50% of the maximum possible
+	 * window, then want to send a window update to peer.
+	 */
+	if (win > 0) {
+		/* 
+		 * "adv" is the amount we can increase the window,
+		 * taking into account that we are limited by
+		 * TCP_MAXWIN << tp->rcv_scale.
+		 */
+		long adv = min(win, (long)TCP_MAXWIN << tp->rcv_scale) -
+			(tp->rcv_adv - tp->rcv_nxt);
+
+		if (adv >= (long) (2 * tp->t_maxseg))
+			goto send;
+		if (2 * adv >= (long) so->so_rcv.sb_hiwat)
+			goto send;
+	}
+
+	/*
+	 * Send if we owe peer an ACK.
+	 */
+	if (tp->t_flags & TF_ACKNOW)
+		goto send;
+	if (flags & (TH_SYN|TH_RST))
+		goto send;
+	if (SEQ_GT(tp->snd_up, tp->snd_una))
+		goto send;
+	/*
+	 * If our state indicates that FIN should be sent
+	 * and we have not yet done so, or we're retransmitting the FIN,
+	 * then we need to send.
+	 */
+	if (flags & TH_FIN &&
+	    ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una))
+		goto send;
+
+	/*
+	 * TCP window updates are not reliable, rather a polling protocol
+	 * using ``persist'' packets is used to insure receipt of window
+	 * updates.  The three ``states'' for the output side are:
+	 *	idle			not doing retransmits or persists
+	 *	persisting		to move a small or zero window
+	 *	(re)transmitting	and thereby not persisting
+	 *
+	 * tp->t_timer[TCPT_PERSIST]
+	 *	is set when we are in persist state.
+	 * tp->t_force
+	 *	is set when we are called to send a persist packet.
+	 * tp->t_timer[TCPT_REXMT]
+	 *	is set when we are retransmitting
+	 * The output side is idle when both timers are zero.
+	 *
+	 * If send window is too small, there is data to transmit, and no
+	 * retransmit or persist is pending, then go to persist state.
+	 * If nothing happens soon, send when timer expires:
+	 * if window is nonzero, transmit what we can,
+	 * otherwise force out a byte.
+	 */
+	if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 &&
+	    tp->t_timer[TCPT_PERSIST] == 0) {
+		tp->t_rxtshift = 0;
+		tcp_setpersist(tp);
+	}
+
+	/*
+	 * No reason to send a segment, just return.
+	 */
+	return (0);
+
+send:
+	/*
+	 * Before ESTABLISHED, force sending of initial options
+	 * unless TCP set not to do any options.
+	 * NOTE: we assume that the IP/TCP header plus TCP options
+	 * always fit in a single mbuf, leaving room for a maximum
+	 * link header, i.e.
+	 *	max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MHLEN
+	 */
+	optlen = 0;
+	hdrlen = sizeof (struct tcpiphdr);
+	if (flags & TH_SYN) {
+		tp->snd_nxt = tp->iss;
+		if ((tp->t_flags & TF_NOOPT) == 0) {
+			u_short mss;
+
+			opt[0] = TCPOPT_MAXSEG;
+			opt[1] = 4;
+			mss = htons((u_short) tcp_mss(tp, 0));
+			bcopy((caddr_t)&mss, (caddr_t)(opt + 2), sizeof(mss));
+			optlen = 4;
+	 
+			if ((tp->t_flags & TF_REQ_SCALE) &&
+			    ((flags & TH_ACK) == 0 ||
+			    (tp->t_flags & TF_RCVD_SCALE))) {
+				*((u_long *) (opt + optlen)) = htonl(
+					TCPOPT_NOP << 24 |
+					TCPOPT_WINDOW << 16 |
+					TCPOLEN_WINDOW << 8 |
+					tp->request_r_scale);
+				optlen += 4;
+			}
+		}
+ 	}
+ 
+ 	/*
+	 * Send a timestamp and echo-reply if this is a SYN and our side 
+	 * wants to use timestamps (TF_REQ_TSTMP is set) or both our side
+	 * and our peer have sent timestamps in our SYN's.
+ 	 */
+ 	if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
+ 	     (flags & TH_RST) == 0 &&
+ 	    ((flags & (TH_SYN|TH_ACK)) == TH_SYN ||
+	     (tp->t_flags & TF_RCVD_TSTMP))) {
+		u_long *lp = (u_long *)(opt + optlen);
+ 
+ 		/* Form timestamp option as shown in appendix A of RFC 1323. */
+ 		*lp++ = htonl(TCPOPT_TSTAMP_HDR);
+ 		*lp++ = htonl(tcp_now);
+ 		*lp   = htonl(tp->ts_recent);
+ 		optlen += TCPOLEN_TSTAMP_APPA;
+ 	}
+
+ 	hdrlen += optlen;
+ 
+	/*
+	 * Adjust data length if insertion of options will
+	 * bump the packet length beyond the t_maxseg length.
+	 */
+	 if (len > tp->t_maxseg - optlen) {
+		len = tp->t_maxseg - optlen;
+		sendalot = 1;
+	 }
+
+
+#ifdef DIAGNOSTIC
+ 	if (max_linkhdr + hdrlen > MHLEN)
+		panic("tcphdr too big");
+#endif
+
+	/*
+	 * Grab a header mbuf, attaching a copy of data to
+	 * be transmitted, and initialize the header from
+	 * the template for sends on this connection.
+	 */
+	if (len) {
+		if (tp->t_force && len == 1)
+			tcpstat.tcps_sndprobe++;
+		else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) {
+			tcpstat.tcps_sndrexmitpack++;
+			tcpstat.tcps_sndrexmitbyte += len;
+		} else {
+			tcpstat.tcps_sndpack++;
+			tcpstat.tcps_sndbyte += len;
+		}
+#ifdef notyet
+		if ((m = m_copypack(so->so_snd.sb_mb, off,
+		    (int)len, max_linkhdr + hdrlen)) == 0) {
+			error = ENOBUFS;
+			goto out;
+		}
+		/*
+		 * m_copypack left space for our hdr; use it.
+		 */
+		m->m_len += hdrlen;
+		m->m_data -= hdrlen;
+#else
+		MGETHDR(m, M_DONTWAIT, MT_HEADER);
+		if (m == NULL) {
+			error = ENOBUFS;
+			goto out;
+		}
+		m->m_data += max_linkhdr;
+		m->m_len = hdrlen;
+		if (len <= MHLEN - hdrlen - max_linkhdr) {
+			m_copydata(so->so_snd.sb_mb, off, (int) len,
+			    mtod(m, caddr_t) + hdrlen);
+			m->m_len += len;
+		} else {
+			m->m_next = m_copy(so->so_snd.sb_mb, off, (int) len);
+			if (m->m_next == 0)
+				len = 0;
+		}
+#endif
+		/*
+		 * If we're sending everything we've got, set PUSH.
+		 * (This will keep happy those implementations which only
+		 * give data to the user when a buffer fills or
+		 * a PUSH comes in.)
+		 */
+		if (off + len == so->so_snd.sb_cc)
+			flags |= TH_PUSH;
+	} else {
+		if (tp->t_flags & TF_ACKNOW)
+			tcpstat.tcps_sndacks++;
+		else if (flags & (TH_SYN|TH_FIN|TH_RST))
+			tcpstat.tcps_sndctrl++;
+		else if (SEQ_GT(tp->snd_up, tp->snd_una))
+			tcpstat.tcps_sndurg++;
+		else
+			tcpstat.tcps_sndwinup++;
+
+		MGETHDR(m, M_DONTWAIT, MT_HEADER);
+		if (m == NULL) {
+			error = ENOBUFS;
+			goto out;
+		}
+		m->m_data += max_linkhdr;
+		m->m_len = hdrlen;
+	}
+	m->m_pkthdr.rcvif = (struct ifnet *)0;
+	ti = mtod(m, struct tcpiphdr *);
+	if (tp->t_template == 0)
+		panic("tcp_output");
+	bcopy((caddr_t)tp->t_template, (caddr_t)ti, sizeof (struct tcpiphdr));
+
+	/*
+	 * Fill in fields, remembering maximum advertised
+	 * window for use in delaying messages about window sizes.
+	 * If resending a FIN, be sure not to use a new sequence number.
+	 */
+	if (flags & TH_FIN && tp->t_flags & TF_SENTFIN && 
+	    tp->snd_nxt == tp->snd_max)
+		tp->snd_nxt--;
+	/*
+	 * If we are doing retransmissions, then snd_nxt will
+	 * not reflect the first unsent octet.  For ACK only
+	 * packets, we do not want the sequence number of the
+	 * retransmitted packet, we want the sequence number
+	 * of the next unsent octet.  So, if there is no data
+	 * (and no SYN or FIN), use snd_max instead of snd_nxt
+	 * when filling in ti_seq.  But if we are in persist
+	 * state, snd_max might reflect one byte beyond the
+	 * right edge of the window, so use snd_nxt in that
+	 * case, since we know we aren't doing a retransmission.
+	 * (retransmit and persist are mutually exclusive...)
+	 */
+	if (len || (flags & (TH_SYN|TH_FIN)) || tp->t_timer[TCPT_PERSIST])
+		ti->ti_seq = htonl(tp->snd_nxt);
+	else
+		ti->ti_seq = htonl(tp->snd_max);
+	ti->ti_ack = htonl(tp->rcv_nxt);
+	if (optlen) {
+		bcopy((caddr_t)opt, (caddr_t)(ti + 1), optlen);
+		ti->ti_off = (sizeof (struct tcphdr) + optlen) >> 2;
+	}
+	ti->ti_flags = flags;
+	/*
+	 * Calculate receive window.  Don't shrink window,
+	 * but avoid silly window syndrome.
+	 */
+	if (win < (long)(so->so_rcv.sb_hiwat / 4) && win < (long)tp->t_maxseg)
+		win = 0;
+	if (win > (long)TCP_MAXWIN << tp->rcv_scale)
+		win = (long)TCP_MAXWIN << tp->rcv_scale;
+	if (win < (long)(tp->rcv_adv - tp->rcv_nxt))
+		win = (long)(tp->rcv_adv - tp->rcv_nxt);
+	ti->ti_win = htons((u_short) (win>>tp->rcv_scale));
+	if (SEQ_GT(tp->snd_up, tp->snd_nxt)) {
+		ti->ti_urp = htons((u_short)(tp->snd_up - tp->snd_nxt));
+		ti->ti_flags |= TH_URG;
+	} else
+		/*
+		 * If no urgent pointer to send, then we pull
+		 * the urgent pointer to the left edge of the send window
+		 * so that it doesn't drift into the send window on sequence
+		 * number wraparound.
+		 */
+		tp->snd_up = tp->snd_una;		/* drag it along */
+
+	/*
+	 * Put TCP length in extended header, and then
+	 * checksum extended header and data.
+	 */
+	if (len + optlen)
+		ti->ti_len = htons((u_short)(sizeof (struct tcphdr) +
+		    optlen + len));
+	ti->ti_sum = in_cksum(m, (int)(hdrlen + len));
+
+	/*
+	 * In transmit state, time the transmission and arrange for
+	 * the retransmit.  In persist state, just set snd_max.
+	 */
+	if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) {
+		tcp_seq startseq = tp->snd_nxt;
+
+		/*
+		 * Advance snd_nxt over sequence space of this segment.
+		 */
+		if (flags & (TH_SYN|TH_FIN)) {
+			if (flags & TH_SYN)
+				tp->snd_nxt++;
+			if (flags & TH_FIN) {
+				tp->snd_nxt++;
+				tp->t_flags |= TF_SENTFIN;
+			}
+		}
+		tp->snd_nxt += len;
+		if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
+			tp->snd_max = tp->snd_nxt;
+			/*
+			 * Time this transmission if not a retransmission and
+			 * not currently timing anything.
+			 */
+			if (tp->t_rtt == 0) {
+				tp->t_rtt = 1;
+				tp->t_rtseq = startseq;
+				tcpstat.tcps_segstimed++;
+			}
+		}
+
+		/*
+		 * Set retransmit timer if not currently set,
+		 * and not doing an ack or a keep-alive probe.
+		 * Initial value for retransmit timer is smoothed
+		 * round-trip time + 2 * round-trip time variance.
+		 * Initialize shift counter which is used for backoff
+		 * of retransmit time.
+		 */
+		if (tp->t_timer[TCPT_REXMT] == 0 &&
+		    tp->snd_nxt != tp->snd_una) {
+			tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+			if (tp->t_timer[TCPT_PERSIST]) {
+				tp->t_timer[TCPT_PERSIST] = 0;
+				tp->t_rxtshift = 0;
+			}
+		}
+	} else
+		if (SEQ_GT(tp->snd_nxt + len, tp->snd_max))
+			tp->snd_max = tp->snd_nxt + len;
+
+	/*
+	 * Trace.
+	 */
+	if (so->so_options & SO_DEBUG)
+		tcp_trace(TA_OUTPUT, tp->t_state, tp, ti, 0);
+
+	/*
+	 * Fill in IP length and desired time to live and
+	 * send to IP level.  There should be a better way
+	 * to handle ttl and tos; we could keep them in
+	 * the template, but need a way to checksum without them.
+	 */
+	m->m_pkthdr.len = hdrlen + len;
+#ifdef TUBA
+	if (tp->t_tuba_pcb)
+		error = tuba_output(m, tp);
+	else
+#endif
+    {
+	((struct ip *)ti)->ip_len = m->m_pkthdr.len;
+	((struct ip *)ti)->ip_ttl = tp->t_inpcb->inp_ip.ip_ttl;	/* XXX */
+	((struct ip *)ti)->ip_tos = tp->t_inpcb->inp_ip.ip_tos;	/* XXX */
+#if BSD >= 43
+	error = ip_output(m, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route,
+	    so->so_options & SO_DONTROUTE, 0);
+#else
+	error = ip_output(m, (struct mbuf *)0, &tp->t_inpcb->inp_route, 
+	    so->so_options & SO_DONTROUTE);
+#endif
+    }
+	if (error) {
+out:
+		if (error == ENOBUFS) {
+			tcp_quench(tp->t_inpcb, 0);
+			return (0);
+		}
+		if ((error == EHOSTUNREACH || error == ENETDOWN)
+		    && TCPS_HAVERCVDSYN(tp->t_state)) {
+			tp->t_softerror = error;
+			return (0);
+		}
+		return (error);
+	}
+	tcpstat.tcps_sndtotal++;
+
+	/*
+	 * Data sent (as far as we can tell).
+	 * If this advertises a larger window than any other segment,
+	 * then remember the size of the advertised window.
+	 * Any pending ACK has now been sent.
+	 */
+	if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv))
+		tp->rcv_adv = tp->rcv_nxt + win;
+	tp->last_ack_sent = tp->rcv_nxt;
+	tp->t_flags &= ~(TF_ACKNOW|TF_DELACK);
+	if (sendalot)
+		goto again;
+	return (0);
+}
+
+void
+tcp_setpersist(tp)
+	register struct tcpcb *tp;
+{
+	register t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
+
+	if (tp->t_timer[TCPT_REXMT])
+		panic("tcp_output REXMT");
+	/*
+	 * Start/restart persistance timer.
+	 */
+	TCPT_RANGESET(tp->t_timer[TCPT_PERSIST],
+	    t * tcp_backoff[tp->t_rxtshift],
+	    TCPTV_PERSMIN, TCPTV_PERSMAX);
+	if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
+		tp->t_rxtshift++;
+}
diff --git a/sys/netinet/tcp_reass.c b/sys/netinet/tcp_reass.c
new file mode 100644
index 00000000000..2dd1d749c40
--- /dev/null
+++ b/sys/netinet/tcp_reass.c
@@ -0,0 +1,1647 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_input.c	8.5 (Berkeley) 4/10/94
+ */
+
+#ifndef TUBA_INCLUDE
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#include <netinet/tcp_debug.h>
+
+int	tcprexmtthresh = 3;
+struct	tcpiphdr tcp_saveti;
+struct	inpcb *tcp_last_inpcb = &tcb;
+
+extern u_long sb_max;
+
+#endif /* TUBA_INCLUDE */
+#define TCP_PAWS_IDLE	(24 * 24 * 60 * 60 * PR_SLOWHZ)
+
+/* for modulo comparisons of timestamps */
+#define TSTMP_LT(a,b)	((int)((a)-(b)) < 0)
+#define TSTMP_GEQ(a,b)	((int)((a)-(b)) >= 0)
+
+
+/*
+ * Insert segment ti into reassembly queue of tcp with
+ * control block tp.  Return TH_FIN if reassembly now includes
+ * a segment with FIN.  The macro form does the common case inline
+ * (segment is the next to be received on an established connection,
+ * and the queue is empty), avoiding linkage into and removal
+ * from the queue and repetition of various conversions.
+ * Set DELACK for segments received in order, but ack immediately
+ * when segments are out of order (so fast retransmit can work).
+ */
+#define	TCP_REASS(tp, ti, m, so, flags) { \
+	if ((ti)->ti_seq == (tp)->rcv_nxt && \
+	    (tp)->seg_next == (struct tcpiphdr *)(tp) && \
+	    (tp)->t_state == TCPS_ESTABLISHED) { \
+		tp->t_flags |= TF_DELACK; \
+		(tp)->rcv_nxt += (ti)->ti_len; \
+		flags = (ti)->ti_flags & TH_FIN; \
+		tcpstat.tcps_rcvpack++;\
+		tcpstat.tcps_rcvbyte += (ti)->ti_len;\
+		sbappend(&(so)->so_rcv, (m)); \
+		sorwakeup(so); \
+	} else { \
+		(flags) = tcp_reass((tp), (ti), (m)); \
+		tp->t_flags |= TF_ACKNOW; \
+	} \
+}
+#ifndef TUBA_INCLUDE
+
+int
+tcp_reass(tp, ti, m)
+	register struct tcpcb *tp;
+	register struct tcpiphdr *ti;
+	struct mbuf *m;
+{
+	register struct tcpiphdr *q;
+	struct socket *so = tp->t_inpcb->inp_socket;
+	int flags;
+
+	/*
+	 * Call with ti==0 after become established to
+	 * force pre-ESTABLISHED data up to user socket.
+	 */
+	if (ti == 0)
+		goto present;
+
+	/*
+	 * Find a segment which begins after this one does.
+	 */
+	for (q = tp->seg_next; q != (struct tcpiphdr *)tp;
+	    q = (struct tcpiphdr *)q->ti_next)
+		if (SEQ_GT(q->ti_seq, ti->ti_seq))
+			break;
+
+	/*
+	 * If there is a preceding segment, it may provide some of
+	 * our data already.  If so, drop the data from the incoming
+	 * segment.  If it provides all of our data, drop us.
+	 */
+	if ((struct tcpiphdr *)q->ti_prev != (struct tcpiphdr *)tp) {
+		register int i;
+		q = (struct tcpiphdr *)q->ti_prev;
+		/* conversion to int (in i) handles seq wraparound */
+		i = q->ti_seq + q->ti_len - ti->ti_seq;
+		if (i > 0) {
+			if (i >= ti->ti_len) {
+				tcpstat.tcps_rcvduppack++;
+				tcpstat.tcps_rcvdupbyte += ti->ti_len;
+				m_freem(m);
+				return (0);
+			}
+			m_adj(m, i);
+			ti->ti_len -= i;
+			ti->ti_seq += i;
+		}
+		q = (struct tcpiphdr *)(q->ti_next);
+	}
+	tcpstat.tcps_rcvoopack++;
+	tcpstat.tcps_rcvoobyte += ti->ti_len;
+	REASS_MBUF(ti) = m;		/* XXX */
+
+	/*
+	 * While we overlap succeeding segments trim them or,
+	 * if they are completely covered, dequeue them.
+	 */
+	while (q != (struct tcpiphdr *)tp) {
+		register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq;
+		if (i <= 0)
+			break;
+		if (i < q->ti_len) {
+			q->ti_seq += i;
+			q->ti_len -= i;
+			m_adj(REASS_MBUF(q), i);
+			break;
+		}
+		q = (struct tcpiphdr *)q->ti_next;
+		m = REASS_MBUF((struct tcpiphdr *)q->ti_prev);
+		remque(q->ti_prev);
+		m_freem(m);
+	}
+
+	/*
+	 * Stick new segment in its place.
+	 */
+	insque(ti, q->ti_prev);
+
+present:
+	/*
+	 * Present data to user, advancing rcv_nxt through
+	 * completed sequence space.
+	 */
+	if (TCPS_HAVERCVDSYN(tp->t_state) == 0)
+		return (0);
+	ti = tp->seg_next;
+	if (ti == (struct tcpiphdr *)tp || ti->ti_seq != tp->rcv_nxt)
+		return (0);
+	if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len)
+		return (0);
+	do {
+		tp->rcv_nxt += ti->ti_len;
+		flags = ti->ti_flags & TH_FIN;
+		remque(ti);
+		m = REASS_MBUF(ti);
+		ti = (struct tcpiphdr *)ti->ti_next;
+		if (so->so_state & SS_CANTRCVMORE)
+			m_freem(m);
+		else
+			sbappend(&so->so_rcv, m);
+	} while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt);
+	sorwakeup(so);
+	return (flags);
+}
+
+/*
+ * TCP input routine, follows pages 65-76 of the
+ * protocol specification dated September, 1981 very closely.
+ */
+void
+tcp_input(m, iphlen)
+	register struct mbuf *m;
+	int iphlen;
+{
+	register struct tcpiphdr *ti;
+	register struct inpcb *inp;
+	caddr_t optp = NULL;
+	int optlen;
+	int len, tlen, off;
+	register struct tcpcb *tp = 0;
+	register int tiflags;
+	struct socket *so;
+	int todrop, acked, ourfinisacked, needoutput = 0;
+	short ostate;
+	struct in_addr laddr;
+	int dropsocket = 0;
+	int iss = 0;
+	u_long tiwin, ts_val, ts_ecr;
+	int ts_present = 0;
+
+	tcpstat.tcps_rcvtotal++;
+	/*
+	 * Get IP and TCP header together in first mbuf.
+	 * Note: IP leaves IP header in first mbuf.
+	 */
+	ti = mtod(m, struct tcpiphdr *);
+	if (iphlen > sizeof (struct ip))
+		ip_stripoptions(m, (struct mbuf *)0);
+	if (m->m_len < sizeof (struct tcpiphdr)) {
+		if ((m = m_pullup(m, sizeof (struct tcpiphdr))) == 0) {
+			tcpstat.tcps_rcvshort++;
+			return;
+		}
+		ti = mtod(m, struct tcpiphdr *);
+	}
+
+	/*
+	 * Checksum extended TCP header and data.
+	 */
+	tlen = ((struct ip *)ti)->ip_len;
+	len = sizeof (struct ip) + tlen;
+	ti->ti_next = ti->ti_prev = 0;
+	ti->ti_x1 = 0;
+	ti->ti_len = (u_short)tlen;
+	HTONS(ti->ti_len);
+	if (ti->ti_sum = in_cksum(m, len)) {
+		tcpstat.tcps_rcvbadsum++;
+		goto drop;
+	}
+#endif /* TUBA_INCLUDE */
+
+	/*
+	 * Check that TCP offset makes sense,
+	 * pull out TCP options and adjust length.		XXX
+	 */
+	off = ti->ti_off << 2;
+	if (off < sizeof (struct tcphdr) || off > tlen) {
+		tcpstat.tcps_rcvbadoff++;
+		goto drop;
+	}
+	tlen -= off;
+	ti->ti_len = tlen;
+	if (off > sizeof (struct tcphdr)) {
+		if (m->m_len < sizeof(struct ip) + off) {
+			if ((m = m_pullup(m, sizeof (struct ip) + off)) == 0) {
+				tcpstat.tcps_rcvshort++;
+				return;
+			}
+			ti = mtod(m, struct tcpiphdr *);
+		}
+		optlen = off - sizeof (struct tcphdr);
+		optp = mtod(m, caddr_t) + sizeof (struct tcpiphdr);
+		/* 
+		 * Do quick retrieval of timestamp options ("options
+		 * prediction?").  If timestamp is the only option and it's
+		 * formatted as recommended in RFC 1323 appendix A, we
+		 * quickly get the values now and not bother calling
+		 * tcp_dooptions(), etc.
+		 */
+		if ((optlen == TCPOLEN_TSTAMP_APPA ||
+		     (optlen > TCPOLEN_TSTAMP_APPA &&
+			optp[TCPOLEN_TSTAMP_APPA] == TCPOPT_EOL)) &&
+		     *(u_long *)optp == htonl(TCPOPT_TSTAMP_HDR) &&
+		     (ti->ti_flags & TH_SYN) == 0) {
+			ts_present = 1;
+			ts_val = ntohl(*(u_long *)(optp + 4));
+			ts_ecr = ntohl(*(u_long *)(optp + 8));
+			optp = NULL;	/* we've parsed the options */
+		}
+	}
+	tiflags = ti->ti_flags;
+
+	/*
+	 * Convert TCP protocol specific fields to host format.
+	 */
+	NTOHL(ti->ti_seq);
+	NTOHL(ti->ti_ack);
+	NTOHS(ti->ti_win);
+	NTOHS(ti->ti_urp);
+
+	/*
+	 * Locate pcb for segment.
+	 */
+findpcb:
+	inp = tcp_last_inpcb;
+	if (inp->inp_lport != ti->ti_dport ||
+	    inp->inp_fport != ti->ti_sport ||
+	    inp->inp_faddr.s_addr != ti->ti_src.s_addr ||
+	    inp->inp_laddr.s_addr != ti->ti_dst.s_addr) {
+		inp = in_pcblookup(&tcb, ti->ti_src, ti->ti_sport,
+		    ti->ti_dst, ti->ti_dport, INPLOOKUP_WILDCARD);
+		if (inp)
+			tcp_last_inpcb = inp;
+		++tcpstat.tcps_pcbcachemiss;
+	}
+
+	/*
+	 * If the state is CLOSED (i.e., TCB does not exist) then
+	 * all data in the incoming segment is discarded.
+	 * If the TCB exists but is in CLOSED state, it is embryonic,
+	 * but should either do a listen or a connect soon.
+	 */
+	if (inp == 0)
+		goto dropwithreset;
+	tp = intotcpcb(inp);
+	if (tp == 0)
+		goto dropwithreset;
+	if (tp->t_state == TCPS_CLOSED)
+		goto drop;
+	
+	/* Unscale the window into a 32-bit value. */
+	if ((tiflags & TH_SYN) == 0)
+		tiwin = ti->ti_win << tp->snd_scale;
+	else
+		tiwin = ti->ti_win;
+
+	so = inp->inp_socket;
+	if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) {
+		if (so->so_options & SO_DEBUG) {
+			ostate = tp->t_state;
+			tcp_saveti = *ti;
+		}
+		if (so->so_options & SO_ACCEPTCONN) {
+			so = sonewconn(so, 0);
+			if (so == 0)
+				goto drop;
+			/*
+			 * This is ugly, but ....
+			 *
+			 * Mark socket as temporary until we're
+			 * committed to keeping it.  The code at
+			 * ``drop'' and ``dropwithreset'' check the
+			 * flag dropsocket to see if the temporary
+			 * socket created here should be discarded.
+			 * We mark the socket as discardable until
+			 * we're committed to it below in TCPS_LISTEN.
+			 */
+			dropsocket++;
+			inp = (struct inpcb *)so->so_pcb;
+			inp->inp_laddr = ti->ti_dst;
+			inp->inp_lport = ti->ti_dport;
+#if BSD>=43
+			inp->inp_options = ip_srcroute();
+#endif
+			tp = intotcpcb(inp);
+			tp->t_state = TCPS_LISTEN;
+
+			/* Compute proper scaling value from buffer space
+			 */
+			while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
+			   TCP_MAXWIN << tp->request_r_scale < so->so_rcv.sb_hiwat)
+				tp->request_r_scale++;
+		}
+	}
+
+	/*
+	 * Segment received on connection.
+	 * Reset idle time and keep-alive timer.
+	 */
+	tp->t_idle = 0;
+	tp->t_timer[TCPT_KEEP] = tcp_keepidle;
+
+	/*
+	 * Process options if not in LISTEN state,
+	 * else do it below (after getting remote address).
+	 */
+	if (optp && tp->t_state != TCPS_LISTEN)
+		tcp_dooptions(tp, optp, optlen, ti,
+			&ts_present, &ts_val, &ts_ecr);
+
+	/* 
+	 * Header prediction: check for the two common cases
+	 * of a uni-directional data xfer.  If the packet has
+	 * no control flags, is in-sequence, the window didn't
+	 * change and we're not retransmitting, it's a
+	 * candidate.  If the length is zero and the ack moved
+	 * forward, we're the sender side of the xfer.  Just
+	 * free the data acked & wake any higher level process
+	 * that was blocked waiting for space.  If the length
+	 * is non-zero and the ack didn't move, we're the
+	 * receiver side.  If we're getting packets in-order
+	 * (the reassembly queue is empty), add the data to
+	 * the socket buffer and note that we need a delayed ack.
+	 */
+	if (tp->t_state == TCPS_ESTABLISHED &&
+	    (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
+	    (!ts_present || TSTMP_GEQ(ts_val, tp->ts_recent)) &&
+	    ti->ti_seq == tp->rcv_nxt &&
+	    tiwin && tiwin == tp->snd_wnd &&
+	    tp->snd_nxt == tp->snd_max) {
+
+		/* 
+		 * If last ACK falls within this segment's sequence numbers,
+		 *  record the timestamp.
+		 */
+		if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent) &&
+		   SEQ_LT(tp->last_ack_sent, ti->ti_seq + ti->ti_len)) {
+			tp->ts_recent_age = tcp_now;
+			tp->ts_recent = ts_val;
+		}
+
+		if (ti->ti_len == 0) {
+			if (SEQ_GT(ti->ti_ack, tp->snd_una) &&
+			    SEQ_LEQ(ti->ti_ack, tp->snd_max) &&
+			    tp->snd_cwnd >= tp->snd_wnd) {
+				/*
+				 * this is a pure ack for outstanding data.
+				 */
+				++tcpstat.tcps_predack;
+				if (ts_present)
+					tcp_xmit_timer(tp, tcp_now-ts_ecr+1);
+				else if (tp->t_rtt &&
+					    SEQ_GT(ti->ti_ack, tp->t_rtseq))
+					tcp_xmit_timer(tp, tp->t_rtt);
+				acked = ti->ti_ack - tp->snd_una;
+				tcpstat.tcps_rcvackpack++;
+				tcpstat.tcps_rcvackbyte += acked;
+				sbdrop(&so->so_snd, acked);
+				tp->snd_una = ti->ti_ack;
+				m_freem(m);
+
+				/*
+				 * If all outstanding data are acked, stop
+				 * retransmit timer, otherwise restart timer
+				 * using current (possibly backed-off) value.
+				 * If process is waiting for space,
+				 * wakeup/selwakeup/signal.  If data
+				 * are ready to send, let tcp_output
+				 * decide between more output or persist.
+				 */
+				if (tp->snd_una == tp->snd_max)
+					tp->t_timer[TCPT_REXMT] = 0;
+				else if (tp->t_timer[TCPT_PERSIST] == 0)
+					tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+
+				if (so->so_snd.sb_flags & SB_NOTIFY)
+					sowwakeup(so);
+				if (so->so_snd.sb_cc)
+					(void) tcp_output(tp);
+				return;
+			}
+		} else if (ti->ti_ack == tp->snd_una &&
+		    tp->seg_next == (struct tcpiphdr *)tp &&
+		    ti->ti_len <= sbspace(&so->so_rcv)) {
+			/*
+			 * this is a pure, in-sequence data packet
+			 * with nothing on the reassembly queue and
+			 * we have enough buffer space to take it.
+			 */
+			++tcpstat.tcps_preddat;
+			tp->rcv_nxt += ti->ti_len;
+			tcpstat.tcps_rcvpack++;
+			tcpstat.tcps_rcvbyte += ti->ti_len;
+			/*
+			 * Drop TCP, IP headers and TCP options then add data
+			 * to socket buffer.
+			 */
+			m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
+			m->m_len -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
+			sbappend(&so->so_rcv, m);
+			sorwakeup(so);
+			tp->t_flags |= TF_DELACK;
+			return;
+		}
+	}
+
+	/*
+	 * Drop TCP, IP headers and TCP options.
+	 */
+	m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
+	m->m_len  -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
+
+	/*
+	 * Calculate amount of space in receive window,
+	 * and then do TCP input processing.
+	 * Receive window is amount of space in rcv queue,
+	 * but not less than advertised window.
+	 */
+	{ int win;
+
+	win = sbspace(&so->so_rcv);
+	if (win < 0)
+		win = 0;
+	tp->rcv_wnd = max(win, (int)(tp->rcv_adv - tp->rcv_nxt));
+	}
+
+	switch (tp->t_state) {
+
+	/*
+	 * If the state is LISTEN then ignore segment if it contains an RST.
+	 * If the segment contains an ACK then it is bad and send a RST.
+	 * If it does not contain a SYN then it is not interesting; drop it.
+	 * Don't bother responding if the destination was a broadcast.
+	 * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial
+	 * tp->iss, and send a segment:
+	 *     <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
+	 * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss.
+	 * Fill in remote peer address fields if not previously specified.
+	 * Enter SYN_RECEIVED state, and process any other fields of this
+	 * segment in this state.
+	 */
+	case TCPS_LISTEN: {
+		struct mbuf *am;
+		register struct sockaddr_in *sin;
+
+		if (tiflags & TH_RST)
+			goto drop;
+		if (tiflags & TH_ACK)
+			goto dropwithreset;
+		if ((tiflags & TH_SYN) == 0)
+			goto drop;
+		/*
+		 * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN
+		 * in_broadcast() should never return true on a received
+		 * packet with M_BCAST not set.
+		 */
+		if (m->m_flags & (M_BCAST|M_MCAST) ||
+		    IN_MULTICAST(ti->ti_dst.s_addr))
+			goto drop;
+		am = m_get(M_DONTWAIT, MT_SONAME);	/* XXX */
+		if (am == NULL)
+			goto drop;
+		am->m_len = sizeof (struct sockaddr_in);
+		sin = mtod(am, struct sockaddr_in *);
+		sin->sin_family = AF_INET;
+		sin->sin_len = sizeof(*sin);
+		sin->sin_addr = ti->ti_src;
+		sin->sin_port = ti->ti_sport;
+		bzero((caddr_t)sin->sin_zero, sizeof(sin->sin_zero));
+		laddr = inp->inp_laddr;
+		if (inp->inp_laddr.s_addr == INADDR_ANY)
+			inp->inp_laddr = ti->ti_dst;
+		if (in_pcbconnect(inp, am)) {
+			inp->inp_laddr = laddr;
+			(void) m_free(am);
+			goto drop;
+		}
+		(void) m_free(am);
+		tp->t_template = tcp_template(tp);
+		if (tp->t_template == 0) {
+			tp = tcp_drop(tp, ENOBUFS);
+			dropsocket = 0;		/* socket is already gone */
+			goto drop;
+		}
+		if (optp)
+			tcp_dooptions(tp, optp, optlen, ti,
+				&ts_present, &ts_val, &ts_ecr);
+		if (iss)
+			tp->iss = iss;
+		else
+			tp->iss = tcp_iss;
+		tcp_iss += TCP_ISSINCR/2;
+		tp->irs = ti->ti_seq;
+		tcp_sendseqinit(tp);
+		tcp_rcvseqinit(tp);
+		tp->t_flags |= TF_ACKNOW;
+		tp->t_state = TCPS_SYN_RECEIVED;
+		tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
+		dropsocket = 0;		/* committed to socket */
+		tcpstat.tcps_accepts++;
+		goto trimthenstep6;
+		}
+
+	/*
+	 * If the state is SYN_SENT:
+	 *	if seg contains an ACK, but not for our SYN, drop the input.
+	 *	if seg contains a RST, then drop the connection.
+	 *	if seg does not contain SYN, then drop it.
+	 * Otherwise this is an acceptable SYN segment
+	 *	initialize tp->rcv_nxt and tp->irs
+	 *	if seg contains ack then advance tp->snd_una
+	 *	if SYN has been acked change to ESTABLISHED else SYN_RCVD state
+	 *	arrange for segment to be acked (eventually)
+	 *	continue processing rest of data/controls, beginning with URG
+	 */
+	case TCPS_SYN_SENT:
+		if ((tiflags & TH_ACK) &&
+		    (SEQ_LEQ(ti->ti_ack, tp->iss) ||
+		     SEQ_GT(ti->ti_ack, tp->snd_max)))
+			goto dropwithreset;
+		if (tiflags & TH_RST) {
+			if (tiflags & TH_ACK)
+				tp = tcp_drop(tp, ECONNREFUSED);
+			goto drop;
+		}
+		if ((tiflags & TH_SYN) == 0)
+			goto drop;
+		if (tiflags & TH_ACK) {
+			tp->snd_una = ti->ti_ack;
+			if (SEQ_LT(tp->snd_nxt, tp->snd_una))
+				tp->snd_nxt = tp->snd_una;
+		}
+		tp->t_timer[TCPT_REXMT] = 0;
+		tp->irs = ti->ti_seq;
+		tcp_rcvseqinit(tp);
+		tp->t_flags |= TF_ACKNOW;
+		if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) {
+			tcpstat.tcps_connects++;
+			soisconnected(so);
+			tp->t_state = TCPS_ESTABLISHED;
+			/* Do window scaling on this connection? */
+			if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
+				(TF_RCVD_SCALE|TF_REQ_SCALE)) {
+				tp->snd_scale = tp->requested_s_scale;
+				tp->rcv_scale = tp->request_r_scale;
+			}
+			(void) tcp_reass(tp, (struct tcpiphdr *)0,
+				(struct mbuf *)0);
+			/*
+			 * if we didn't have to retransmit the SYN,
+			 * use its rtt as our initial srtt & rtt var.
+			 */
+			if (tp->t_rtt)
+				tcp_xmit_timer(tp, tp->t_rtt);
+		} else
+			tp->t_state = TCPS_SYN_RECEIVED;
+
+trimthenstep6:
+		/*
+		 * Advance ti->ti_seq to correspond to first data byte.
+		 * If data, trim to stay within window,
+		 * dropping FIN if necessary.
+		 */
+		ti->ti_seq++;
+		if (ti->ti_len > tp->rcv_wnd) {
+			todrop = ti->ti_len - tp->rcv_wnd;
+			m_adj(m, -todrop);
+			ti->ti_len = tp->rcv_wnd;
+			tiflags &= ~TH_FIN;
+			tcpstat.tcps_rcvpackafterwin++;
+			tcpstat.tcps_rcvbyteafterwin += todrop;
+		}
+		tp->snd_wl1 = ti->ti_seq - 1;
+		tp->rcv_up = ti->ti_seq;
+		goto step6;
+	}
+
+	/*
+	 * States other than LISTEN or SYN_SENT.
+	 * First check timestamp, if present.
+	 * Then check that at least some bytes of segment are within 
+	 * receive window.  If segment begins before rcv_nxt,
+	 * drop leading data (and SYN); if nothing left, just ack.
+	 * 
+	 * RFC 1323 PAWS: If we have a timestamp reply on this segment
+	 * and it's less than ts_recent, drop it.
+	 */
+	if (ts_present && (tiflags & TH_RST) == 0 && tp->ts_recent &&
+	    TSTMP_LT(ts_val, tp->ts_recent)) {
+
+		/* Check to see if ts_recent is over 24 days old.  */
+		if ((int)(tcp_now - tp->ts_recent_age) > TCP_PAWS_IDLE) {
+			/*
+			 * Invalidate ts_recent.  If this segment updates
+			 * ts_recent, the age will be reset later and ts_recent
+			 * will get a valid value.  If it does not, setting
+			 * ts_recent to zero will at least satisfy the
+			 * requirement that zero be placed in the timestamp
+			 * echo reply when ts_recent isn't valid.  The
+			 * age isn't reset until we get a valid ts_recent
+			 * because we don't want out-of-order segments to be
+			 * dropped when ts_recent is old.
+			 */
+			tp->ts_recent = 0;
+		} else {
+			tcpstat.tcps_rcvduppack++;
+			tcpstat.tcps_rcvdupbyte += ti->ti_len;
+			tcpstat.tcps_pawsdrop++;
+			goto dropafterack;
+		}
+	}
+
+	todrop = tp->rcv_nxt - ti->ti_seq;
+	if (todrop > 0) {
+		if (tiflags & TH_SYN) {
+			tiflags &= ~TH_SYN;
+			ti->ti_seq++;
+			if (ti->ti_urp > 1) 
+				ti->ti_urp--;
+			else
+				tiflags &= ~TH_URG;
+			todrop--;
+		}
+		if (todrop >= ti->ti_len) {
+			tcpstat.tcps_rcvduppack++;
+			tcpstat.tcps_rcvdupbyte += ti->ti_len;
+			/*
+			 * If segment is just one to the left of the window,
+			 * check two special cases:
+			 * 1. Don't toss RST in response to 4.2-style keepalive.
+			 * 2. If the only thing to drop is a FIN, we can drop
+			 *    it, but check the ACK or we will get into FIN
+			 *    wars if our FINs crossed (both CLOSING).
+			 * In either case, send ACK to resynchronize,
+			 * but keep on processing for RST or ACK.
+			 */
+			if ((tiflags & TH_FIN && todrop == ti->ti_len + 1)
+#ifdef TCP_COMPAT_42
+			  || (tiflags & TH_RST && ti->ti_seq == tp->rcv_nxt - 1)
+#endif
+			   ) {
+				todrop = ti->ti_len;
+				tiflags &= ~TH_FIN;
+				tp->t_flags |= TF_ACKNOW;
+			} else {
+				/*
+				 * Handle the case when a bound socket connects
+				 * to itself. Allow packets with a SYN and
+				 * an ACK to continue with the processing.
+				 */
+				if (todrop != 0 || (tiflags & TH_ACK) == 0)
+					goto dropafterack;
+			}
+		} else {
+			tcpstat.tcps_rcvpartduppack++;
+			tcpstat.tcps_rcvpartdupbyte += todrop;
+		}
+		m_adj(m, todrop);
+		ti->ti_seq += todrop;
+		ti->ti_len -= todrop;
+		if (ti->ti_urp > todrop)
+			ti->ti_urp -= todrop;
+		else {
+			tiflags &= ~TH_URG;
+			ti->ti_urp = 0;
+		}
+	}
+
+	/*
+	 * If new data are received on a connection after the
+	 * user processes are gone, then RST the other end.
+	 */
+	if ((so->so_state & SS_NOFDREF) &&
+	    tp->t_state > TCPS_CLOSE_WAIT && ti->ti_len) {
+		tp = tcp_close(tp);
+		tcpstat.tcps_rcvafterclose++;
+		goto dropwithreset;
+	}
+
+	/*
+	 * If segment ends after window, drop trailing data
+	 * (and PUSH and FIN); if nothing left, just ACK.
+	 */
+	todrop = (ti->ti_seq+ti->ti_len) - (tp->rcv_nxt+tp->rcv_wnd);
+	if (todrop > 0) {
+		tcpstat.tcps_rcvpackafterwin++;
+		if (todrop >= ti->ti_len) {
+			tcpstat.tcps_rcvbyteafterwin += ti->ti_len;
+			/*
+			 * If a new connection request is received
+			 * while in TIME_WAIT, drop the old connection
+			 * and start over if the sequence numbers
+			 * are above the previous ones.
+			 */
+			if (tiflags & TH_SYN &&
+			    tp->t_state == TCPS_TIME_WAIT &&
+			    SEQ_GT(ti->ti_seq, tp->rcv_nxt)) {
+				iss = tp->rcv_nxt + TCP_ISSINCR;
+				tp = tcp_close(tp);
+				goto findpcb;
+			}
+			/*
+			 * If window is closed can only take segments at
+			 * window edge, and have to drop data and PUSH from
+			 * incoming segments.  Continue processing, but
+			 * remember to ack.  Otherwise, drop segment
+			 * and ack.
+			 */
+			if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) {
+				tp->t_flags |= TF_ACKNOW;
+				tcpstat.tcps_rcvwinprobe++;
+			} else
+				goto dropafterack;
+		} else
+			tcpstat.tcps_rcvbyteafterwin += todrop;
+		m_adj(m, -todrop);
+		ti->ti_len -= todrop;
+		tiflags &= ~(TH_PUSH|TH_FIN);
+	}
+
+	/*
+	 * If last ACK falls within this segment's sequence numbers,
+	 * record its timestamp.
+	 */
+	if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent) &&
+	    SEQ_LT(tp->last_ack_sent, ti->ti_seq + ti->ti_len +
+		   ((tiflags & (TH_SYN|TH_FIN)) != 0))) {
+		tp->ts_recent_age = tcp_now;
+		tp->ts_recent = ts_val;
+	}
+
+	/*
+	 * If the RST bit is set examine the state:
+	 *    SYN_RECEIVED STATE:
+	 *	If passive open, return to LISTEN state.
+	 *	If active open, inform user that connection was refused.
+	 *    ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES:
+	 *	Inform user that connection was reset, and close tcb.
+	 *    CLOSING, LAST_ACK, TIME_WAIT STATES
+	 *	Close the tcb.
+	 */
+	if (tiflags&TH_RST) switch (tp->t_state) {
+
+	case TCPS_SYN_RECEIVED:
+		so->so_error = ECONNREFUSED;
+		goto close;
+
+	case TCPS_ESTABLISHED:
+	case TCPS_FIN_WAIT_1:
+	case TCPS_FIN_WAIT_2:
+	case TCPS_CLOSE_WAIT:
+		so->so_error = ECONNRESET;
+	close:
+		tp->t_state = TCPS_CLOSED;
+		tcpstat.tcps_drops++;
+		tp = tcp_close(tp);
+		goto drop;
+
+	case TCPS_CLOSING:
+	case TCPS_LAST_ACK:
+	case TCPS_TIME_WAIT:
+		tp = tcp_close(tp);
+		goto drop;
+	}
+
+	/*
+	 * If a SYN is in the window, then this is an
+	 * error and we send an RST and drop the connection.
+	 */
+	if (tiflags & TH_SYN) {
+		tp = tcp_drop(tp, ECONNRESET);
+		goto dropwithreset;
+	}
+
+	/*
+	 * If the ACK bit is off we drop the segment and return.
+	 */
+	if ((tiflags & TH_ACK) == 0)
+		goto drop;
+	
+	/*
+	 * Ack processing.
+	 */
+	switch (tp->t_state) {
+
+	/*
+	 * In SYN_RECEIVED state if the ack ACKs our SYN then enter
+	 * ESTABLISHED state and continue processing, otherwise
+	 * send an RST.
+	 */
+	case TCPS_SYN_RECEIVED:
+		if (SEQ_GT(tp->snd_una, ti->ti_ack) ||
+		    SEQ_GT(ti->ti_ack, tp->snd_max))
+			goto dropwithreset;
+		tcpstat.tcps_connects++;
+		soisconnected(so);
+		tp->t_state = TCPS_ESTABLISHED;
+		/* Do window scaling? */
+		if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
+			(TF_RCVD_SCALE|TF_REQ_SCALE)) {
+			tp->snd_scale = tp->requested_s_scale;
+			tp->rcv_scale = tp->request_r_scale;
+		}
+		(void) tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0);
+		tp->snd_wl1 = ti->ti_seq - 1;
+		/* fall into ... */
+
+	/*
+	 * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
+	 * ACKs.  If the ack is in the range
+	 *	tp->snd_una < ti->ti_ack <= tp->snd_max
+	 * then advance tp->snd_una to ti->ti_ack and drop
+	 * data from the retransmission queue.  If this ACK reflects
+	 * more up to date window information we update our window information.
+	 */
+	case TCPS_ESTABLISHED:
+	case TCPS_FIN_WAIT_1:
+	case TCPS_FIN_WAIT_2:
+	case TCPS_CLOSE_WAIT:
+	case TCPS_CLOSING:
+	case TCPS_LAST_ACK:
+	case TCPS_TIME_WAIT:
+
+		if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) {
+			if (ti->ti_len == 0 && tiwin == tp->snd_wnd) {
+				tcpstat.tcps_rcvdupack++;
+				/*
+				 * If we have outstanding data (other than
+				 * a window probe), this is a completely
+				 * duplicate ack (ie, window info didn't
+				 * change), the ack is the biggest we've
+				 * seen and we've seen exactly our rexmt
+				 * threshhold of them, assume a packet
+				 * has been dropped and retransmit it.
+				 * Kludge snd_nxt & the congestion
+				 * window so we send only this one
+				 * packet.
+				 *
+				 * We know we're losing at the current
+				 * window size so do congestion avoidance
+				 * (set ssthresh to half the current window
+				 * and pull our congestion window back to
+				 * the new ssthresh).
+				 *
+				 * Dup acks mean that packets have left the
+				 * network (they're now cached at the receiver) 
+				 * so bump cwnd by the amount in the receiver
+				 * to keep a constant cwnd packets in the
+				 * network.
+				 */
+				if (tp->t_timer[TCPT_REXMT] == 0 ||
+				    ti->ti_ack != tp->snd_una)
+					tp->t_dupacks = 0;
+				else if (++tp->t_dupacks == tcprexmtthresh) {
+					tcp_seq onxt = tp->snd_nxt;
+					u_int win =
+					    min(tp->snd_wnd, tp->snd_cwnd) / 2 /
+						tp->t_maxseg;
+
+					if (win < 2)
+						win = 2;
+					tp->snd_ssthresh = win * tp->t_maxseg;
+					tp->t_timer[TCPT_REXMT] = 0;
+					tp->t_rtt = 0;
+					tp->snd_nxt = ti->ti_ack;
+					tp->snd_cwnd = tp->t_maxseg;
+					(void) tcp_output(tp);
+					tp->snd_cwnd = tp->snd_ssthresh +
+					       tp->t_maxseg * tp->t_dupacks;
+					if (SEQ_GT(onxt, tp->snd_nxt))
+						tp->snd_nxt = onxt;
+					goto drop;
+				} else if (tp->t_dupacks > tcprexmtthresh) {
+					tp->snd_cwnd += tp->t_maxseg;
+					(void) tcp_output(tp);
+					goto drop;
+				}
+			} else
+				tp->t_dupacks = 0;
+			break;
+		}
+		/*
+		 * If the congestion window was inflated to account
+		 * for the other side's cached packets, retract it.
+		 */
+		if (tp->t_dupacks > tcprexmtthresh &&
+		    tp->snd_cwnd > tp->snd_ssthresh)
+			tp->snd_cwnd = tp->snd_ssthresh;
+		tp->t_dupacks = 0;
+		if (SEQ_GT(ti->ti_ack, tp->snd_max)) {
+			tcpstat.tcps_rcvacktoomuch++;
+			goto dropafterack;
+		}
+		acked = ti->ti_ack - tp->snd_una;
+		tcpstat.tcps_rcvackpack++;
+		tcpstat.tcps_rcvackbyte += acked;
+
+		/*
+		 * If we have a timestamp reply, update smoothed
+		 * round trip time.  If no timestamp is present but
+		 * transmit timer is running and timed sequence
+		 * number was acked, update smoothed round trip time.
+		 * Since we now have an rtt measurement, cancel the
+		 * timer backoff (cf., Phil Karn's retransmit alg.).
+		 * Recompute the initial retransmit timer.
+		 */
+		if (ts_present)
+			tcp_xmit_timer(tp, tcp_now-ts_ecr+1);
+		else if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq))
+			tcp_xmit_timer(tp,tp->t_rtt);
+
+		/*
+		 * If all outstanding data is acked, stop retransmit
+		 * timer and remember to restart (more output or persist).
+		 * If there is more data to be acked, restart retransmit
+		 * timer, using current (possibly backed-off) value.
+		 */
+		if (ti->ti_ack == tp->snd_max) {
+			tp->t_timer[TCPT_REXMT] = 0;
+			needoutput = 1;
+		} else if (tp->t_timer[TCPT_PERSIST] == 0)
+			tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+		/*
+		 * When new data is acked, open the congestion window.
+		 * If the window gives us less than ssthresh packets
+		 * in flight, open exponentially (maxseg per packet).
+		 * Otherwise open linearly: maxseg per window
+		 * (maxseg^2 / cwnd per packet), plus a constant
+		 * fraction of a packet (maxseg/8) to help larger windows
+		 * open quickly enough.
+		 */
+		{
+		register u_int cw = tp->snd_cwnd;
+		register u_int incr = tp->t_maxseg;
+
+		if (cw > tp->snd_ssthresh)
+			incr = incr * incr / cw + incr / 8;
+		tp->snd_cwnd = min(cw + incr, TCP_MAXWIN<<tp->snd_scale);
+		}
+		if (acked > so->so_snd.sb_cc) {
+			tp->snd_wnd -= so->so_snd.sb_cc;
+			sbdrop(&so->so_snd, (int)so->so_snd.sb_cc);
+			ourfinisacked = 1;
+		} else {
+			sbdrop(&so->so_snd, acked);
+			tp->snd_wnd -= acked;
+			ourfinisacked = 0;
+		}
+		if (so->so_snd.sb_flags & SB_NOTIFY)
+			sowwakeup(so);
+		tp->snd_una = ti->ti_ack;
+		if (SEQ_LT(tp->snd_nxt, tp->snd_una))
+			tp->snd_nxt = tp->snd_una;
+
+		switch (tp->t_state) {
+
+		/*
+		 * In FIN_WAIT_1 STATE in addition to the processing
+		 * for the ESTABLISHED state if our FIN is now acknowledged
+		 * then enter FIN_WAIT_2.
+		 */
+		case TCPS_FIN_WAIT_1:
+			if (ourfinisacked) {
+				/*
+				 * If we can't receive any more
+				 * data, then closing user can proceed.
+				 * Starting the timer is contrary to the
+				 * specification, but if we don't get a FIN
+				 * we'll hang forever.
+				 */
+				if (so->so_state & SS_CANTRCVMORE) {
+					soisdisconnected(so);
+					tp->t_timer[TCPT_2MSL] = tcp_maxidle;
+				}
+				tp->t_state = TCPS_FIN_WAIT_2;
+			}
+			break;
+
+	 	/*
+		 * In CLOSING STATE in addition to the processing for
+		 * the ESTABLISHED state if the ACK acknowledges our FIN
+		 * then enter the TIME-WAIT state, otherwise ignore
+		 * the segment.
+		 */
+		case TCPS_CLOSING:
+			if (ourfinisacked) {
+				tp->t_state = TCPS_TIME_WAIT;
+				tcp_canceltimers(tp);
+				tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
+				soisdisconnected(so);
+			}
+			break;
+
+		/*
+		 * In LAST_ACK, we may still be waiting for data to drain
+		 * and/or to be acked, as well as for the ack of our FIN.
+		 * If our FIN is now acknowledged, delete the TCB,
+		 * enter the closed state and return.
+		 */
+		case TCPS_LAST_ACK:
+			if (ourfinisacked) {
+				tp = tcp_close(tp);
+				goto drop;
+			}
+			break;
+
+		/*
+		 * In TIME_WAIT state the only thing that should arrive
+		 * is a retransmission of the remote FIN.  Acknowledge
+		 * it and restart the finack timer.
+		 */
+		case TCPS_TIME_WAIT:
+			tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
+			goto dropafterack;
+		}
+	}
+
+step6:
+	/*
+	 * Update window information.
+	 * Don't look at window if no ACK: TAC's send garbage on first SYN.
+	 */
+	if ((tiflags & TH_ACK) &&
+	    (SEQ_LT(tp->snd_wl1, ti->ti_seq) || tp->snd_wl1 == ti->ti_seq &&
+	    (SEQ_LT(tp->snd_wl2, ti->ti_ack) ||
+	     tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))) {
+		/* keep track of pure window updates */
+		if (ti->ti_len == 0 &&
+		    tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd)
+			tcpstat.tcps_rcvwinupd++;
+		tp->snd_wnd = tiwin;
+		tp->snd_wl1 = ti->ti_seq;
+		tp->snd_wl2 = ti->ti_ack;
+		if (tp->snd_wnd > tp->max_sndwnd)
+			tp->max_sndwnd = tp->snd_wnd;
+		needoutput = 1;
+	}
+
+	/*
+	 * Process segments with URG.
+	 */
+	if ((tiflags & TH_URG) && ti->ti_urp &&
+	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
+		/*
+		 * This is a kludge, but if we receive and accept
+		 * random urgent pointers, we'll crash in
+		 * soreceive.  It's hard to imagine someone
+		 * actually wanting to send this much urgent data.
+		 */
+		if (ti->ti_urp + so->so_rcv.sb_cc > sb_max) {
+			ti->ti_urp = 0;			/* XXX */
+			tiflags &= ~TH_URG;		/* XXX */
+			goto dodata;			/* XXX */
+		}
+		/*
+		 * If this segment advances the known urgent pointer,
+		 * then mark the data stream.  This should not happen
+		 * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
+		 * a FIN has been received from the remote side. 
+		 * In these states we ignore the URG.
+		 *
+		 * According to RFC961 (Assigned Protocols),
+		 * the urgent pointer points to the last octet
+		 * of urgent data.  We continue, however,
+		 * to consider it to indicate the first octet
+		 * of data past the urgent section as the original 
+		 * spec states (in one of two places).
+		 */
+		if (SEQ_GT(ti->ti_seq+ti->ti_urp, tp->rcv_up)) {
+			tp->rcv_up = ti->ti_seq + ti->ti_urp;
+			so->so_oobmark = so->so_rcv.sb_cc +
+			    (tp->rcv_up - tp->rcv_nxt) - 1;
+			if (so->so_oobmark == 0)
+				so->so_state |= SS_RCVATMARK;
+			sohasoutofband(so);
+			tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA);
+		}
+		/*
+		 * Remove out of band data so doesn't get presented to user.
+		 * This can happen independent of advancing the URG pointer,
+		 * but if two URG's are pending at once, some out-of-band
+		 * data may creep in... ick.
+		 */
+		if (ti->ti_urp <= ti->ti_len
+#ifdef SO_OOBINLINE
+		     && (so->so_options & SO_OOBINLINE) == 0
+#endif
+		     )
+			tcp_pulloutofband(so, ti, m);
+	} else
+		/*
+		 * If no out of band data is expected,
+		 * pull receive urgent pointer along
+		 * with the receive window.
+		 */
+		if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
+			tp->rcv_up = tp->rcv_nxt;
+dodata:							/* XXX */
+
+	/*
+	 * Process the segment text, merging it into the TCP sequencing queue,
+	 * and arranging for acknowledgment of receipt if necessary.
+	 * This process logically involves adjusting tp->rcv_wnd as data
+	 * is presented to the user (this happens in tcp_usrreq.c,
+	 * case PRU_RCVD).  If a FIN has already been received on this
+	 * connection then we just ignore the text.
+	 */
+	if ((ti->ti_len || (tiflags&TH_FIN)) &&
+	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
+		TCP_REASS(tp, ti, m, so, tiflags);
+		/*
+		 * Note the amount of data that peer has sent into
+		 * our window, in order to estimate the sender's
+		 * buffer size.
+		 */
+		len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt);
+	} else {
+		m_freem(m);
+		tiflags &= ~TH_FIN;
+	}
+
+	/*
+	 * If FIN is received ACK the FIN and let the user know
+	 * that the connection is closing.
+	 */
+	if (tiflags & TH_FIN) {
+		if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
+			socantrcvmore(so);
+			tp->t_flags |= TF_ACKNOW;
+			tp->rcv_nxt++;
+		}
+		switch (tp->t_state) {
+
+	 	/*
+		 * In SYN_RECEIVED and ESTABLISHED STATES
+		 * enter the CLOSE_WAIT state.
+		 */
+		case TCPS_SYN_RECEIVED:
+		case TCPS_ESTABLISHED:
+			tp->t_state = TCPS_CLOSE_WAIT;
+			break;
+
+	 	/*
+		 * If still in FIN_WAIT_1 STATE FIN has not been acked so
+		 * enter the CLOSING state.
+		 */
+		case TCPS_FIN_WAIT_1:
+			tp->t_state = TCPS_CLOSING;
+			break;
+
+	 	/*
+		 * In FIN_WAIT_2 state enter the TIME_WAIT state,
+		 * starting the time-wait timer, turning off the other 
+		 * standard timers.
+		 */
+		case TCPS_FIN_WAIT_2:
+			tp->t_state = TCPS_TIME_WAIT;
+			tcp_canceltimers(tp);
+			tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
+			soisdisconnected(so);
+			break;
+
+		/*
+		 * In TIME_WAIT state restart the 2 MSL time_wait timer.
+		 */
+		case TCPS_TIME_WAIT:
+			tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
+			break;
+		}
+	}
+	if (so->so_options & SO_DEBUG)
+		tcp_trace(TA_INPUT, ostate, tp, &tcp_saveti, 0);
+
+	/*
+	 * Return any desired output.
+	 */
+	if (needoutput || (tp->t_flags & TF_ACKNOW))
+		(void) tcp_output(tp);
+	return;
+
+dropafterack:
+	/*
+	 * Generate an ACK dropping incoming segment if it occupies
+	 * sequence space, where the ACK reflects our state.
+	 */
+	if (tiflags & TH_RST)
+		goto drop;
+	m_freem(m);
+	tp->t_flags |= TF_ACKNOW;
+	(void) tcp_output(tp);
+	return;
+
+dropwithreset:
+	/*
+	 * Generate a RST, dropping incoming segment.
+	 * Make ACK acceptable to originator of segment.
+	 * Don't bother to respond if destination was broadcast/multicast.
+	 */
+	if ((tiflags & TH_RST) || m->m_flags & (M_BCAST|M_MCAST) ||
+	    IN_MULTICAST(ti->ti_dst.s_addr))
+		goto drop;
+	if (tiflags & TH_ACK)
+		tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST);
+	else {
+		if (tiflags & TH_SYN)
+			ti->ti_len++;
+		tcp_respond(tp, ti, m, ti->ti_seq+ti->ti_len, (tcp_seq)0,
+		    TH_RST|TH_ACK);
+	}
+	/* destroy temporarily created socket */
+	if (dropsocket)
+		(void) soabort(so);
+	return;
+
+drop:
+	/*
+	 * Drop space held by incoming segment and return.
+	 */
+	if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
+		tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0);
+	m_freem(m);
+	/* destroy temporarily created socket */
+	if (dropsocket)
+		(void) soabort(so);
+	return;
+#ifndef TUBA_INCLUDE
+}
+
+void
+tcp_dooptions(tp, cp, cnt, ti, ts_present, ts_val, ts_ecr)
+	struct tcpcb *tp;
+	u_char *cp;
+	int cnt;
+	struct tcpiphdr *ti;
+	int *ts_present;
+	u_long *ts_val, *ts_ecr;
+{
+	u_short mss;
+	int opt, optlen;
+
+	for (; cnt > 0; cnt -= optlen, cp += optlen) {
+		opt = cp[0];
+		if (opt == TCPOPT_EOL)
+			break;
+		if (opt == TCPOPT_NOP)
+			optlen = 1;
+		else {
+			optlen = cp[1];
+			if (optlen <= 0)
+				break;
+		}
+		switch (opt) {
+
+		default:
+			continue;
+
+		case TCPOPT_MAXSEG:
+			if (optlen != TCPOLEN_MAXSEG)
+				continue;
+			if (!(ti->ti_flags & TH_SYN))
+				continue;
+			bcopy((char *) cp + 2, (char *) &mss, sizeof(mss));
+			NTOHS(mss);
+			(void) tcp_mss(tp, mss);	/* sets t_maxseg */
+			break;
+
+		case TCPOPT_WINDOW:
+			if (optlen != TCPOLEN_WINDOW)
+				continue;
+			if (!(ti->ti_flags & TH_SYN))
+				continue;
+			tp->t_flags |= TF_RCVD_SCALE;
+			tp->requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT);
+			break;
+
+		case TCPOPT_TIMESTAMP:
+			if (optlen != TCPOLEN_TIMESTAMP)
+				continue;
+			*ts_present = 1;
+			bcopy((char *)cp + 2, (char *) ts_val, sizeof(*ts_val));
+			NTOHL(*ts_val);
+			bcopy((char *)cp + 6, (char *) ts_ecr, sizeof(*ts_ecr));
+			NTOHL(*ts_ecr);
+
+			/* 
+			 * A timestamp received in a SYN makes
+			 * it ok to send timestamp requests and replies.
+			 */
+			if (ti->ti_flags & TH_SYN) {
+				tp->t_flags |= TF_RCVD_TSTMP;
+				tp->ts_recent = *ts_val;
+				tp->ts_recent_age = tcp_now;
+			}
+			break;
+		}
+	}
+}
+
+/*
+ * Pull out of band byte out of a segment so
+ * it doesn't appear in the user's data queue.
+ * It is still reflected in the segment length for
+ * sequencing purposes.
+ */
+void
+tcp_pulloutofband(so, ti, m)
+	struct socket *so;
+	struct tcpiphdr *ti;
+	register struct mbuf *m;
+{
+	int cnt = ti->ti_urp - 1;
+	
+	while (cnt >= 0) {
+		if (m->m_len > cnt) {
+			char *cp = mtod(m, caddr_t) + cnt;
+			struct tcpcb *tp = sototcpcb(so);
+
+			tp->t_iobc = *cp;
+			tp->t_oobflags |= TCPOOB_HAVEDATA;
+			bcopy(cp+1, cp, (unsigned)(m->m_len - cnt - 1));
+			m->m_len--;
+			return;
+		}
+		cnt -= m->m_len;
+		m = m->m_next;
+		if (m == 0)
+			break;
+	}
+	panic("tcp_pulloutofband");
+}
+
+/*
+ * Collect new round-trip time estimate
+ * and update averages and current timeout.
+ */
+void
+tcp_xmit_timer(tp, rtt)
+	register struct tcpcb *tp;
+	short rtt;
+{
+	register short delta;
+
+	tcpstat.tcps_rttupdated++;
+	if (tp->t_srtt != 0) {
+		/*
+		 * srtt is stored as fixed point with 3 bits after the
+		 * binary point (i.e., scaled by 8).  The following magic
+		 * is equivalent to the smoothing algorithm in rfc793 with
+		 * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
+		 * point).  Adjust rtt to origin 0.
+		 */
+		delta = rtt - 1 - (tp->t_srtt >> TCP_RTT_SHIFT);
+		if ((tp->t_srtt += delta) <= 0)
+			tp->t_srtt = 1;
+		/*
+		 * We accumulate a smoothed rtt variance (actually, a
+		 * smoothed mean difference), then set the retransmit
+		 * timer to smoothed rtt + 4 times the smoothed variance.
+		 * rttvar is stored as fixed point with 2 bits after the
+		 * binary point (scaled by 4).  The following is
+		 * equivalent to rfc793 smoothing with an alpha of .75
+		 * (rttvar = rttvar*3/4 + |delta| / 4).  This replaces
+		 * rfc793's wired-in beta.
+		 */
+		if (delta < 0)
+			delta = -delta;
+		delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT);
+		if ((tp->t_rttvar += delta) <= 0)
+			tp->t_rttvar = 1;
+	} else {
+		/* 
+		 * No rtt measurement yet - use the unsmoothed rtt.
+		 * Set the variance to half the rtt (so our first
+		 * retransmit happens at 3*rtt).
+		 */
+		tp->t_srtt = rtt << TCP_RTT_SHIFT;
+		tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
+	}
+	tp->t_rtt = 0;
+	tp->t_rxtshift = 0;
+
+	/*
+	 * the retransmit should happen at rtt + 4 * rttvar.
+	 * Because of the way we do the smoothing, srtt and rttvar
+	 * will each average +1/2 tick of bias.  When we compute
+	 * the retransmit timer, we want 1/2 tick of rounding and
+	 * 1 extra tick because of +-1/2 tick uncertainty in the
+	 * firing of the timer.  The bias will give us exactly the
+	 * 1.5 tick we need.  But, because the bias is
+	 * statistical, we have to test that we don't drop below
+	 * the minimum feasible timer (which is 2 ticks).
+	 */
+	TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
+	    tp->t_rttmin, TCPTV_REXMTMAX);
+	
+	/*
+	 * We received an ack for a packet that wasn't retransmitted;
+	 * it is probably safe to discard any error indications we've
+	 * received recently.  This isn't quite right, but close enough
+	 * for now (a route might have failed after we sent a segment,
+	 * and the return path might not be symmetrical).
+	 */
+	tp->t_softerror = 0;
+}
+
+/*
+ * Determine a reasonable value for maxseg size.
+ * If the route is known, check route for mtu.
+ * If none, use an mss that can be handled on the outgoing
+ * interface without forcing IP to fragment; if bigger than
+ * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES
+ * to utilize large mbufs.  If no route is found, route has no mtu,
+ * or the destination isn't local, use a default, hopefully conservative
+ * size (usually 512 or the default IP max size, but no more than the mtu
+ * of the interface), as we can't discover anything about intervening
+ * gateways or networks.  We also initialize the congestion/slow start
+ * window to be a single segment if the destination isn't local.
+ * While looking at the routing entry, we also initialize other path-dependent
+ * parameters from pre-set or cached values in the routing entry.
+ */
+int
+tcp_mss(tp, offer)
+	register struct tcpcb *tp;
+	u_int offer;
+{
+	struct route *ro;
+	register struct rtentry *rt;
+	struct ifnet *ifp;
+	register int rtt, mss;
+	u_long bufsize;
+	struct inpcb *inp;
+	struct socket *so;
+	extern int tcp_mssdflt;
+
+	inp = tp->t_inpcb;
+	ro = &inp->inp_route;
+
+	if ((rt = ro->ro_rt) == (struct rtentry *)0) {
+		/* No route yet, so try to acquire one */
+		if (inp->inp_faddr.s_addr != INADDR_ANY) {
+			ro->ro_dst.sa_family = AF_INET;
+			ro->ro_dst.sa_len = sizeof(ro->ro_dst);
+			((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
+				inp->inp_faddr;
+			rtalloc(ro);
+		}
+		if ((rt = ro->ro_rt) == (struct rtentry *)0)
+			return (tcp_mssdflt);
+	}
+	ifp = rt->rt_ifp;
+	so = inp->inp_socket;
+
+#ifdef RTV_MTU	/* if route characteristics exist ... */
+	/*
+	 * While we're here, check if there's an initial rtt
+	 * or rttvar.  Convert from the route-table units
+	 * to scaled multiples of the slow timeout timer.
+	 */
+	if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) {
+		/*
+		 * XXX the lock bit for MTU indicates that the value
+		 * is also a minimum value; this is subject to time.
+		 */
+		if (rt->rt_rmx.rmx_locks & RTV_RTT)
+			tp->t_rttmin = rtt / (RTM_RTTUNIT / PR_SLOWHZ);
+		tp->t_srtt = rtt / (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE));
+		if (rt->rt_rmx.rmx_rttvar)
+			tp->t_rttvar = rt->rt_rmx.rmx_rttvar /
+			    (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE));
+		else
+			/* default variation is +- 1 rtt */
+			tp->t_rttvar =
+			    tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE;
+		TCPT_RANGESET(tp->t_rxtcur,
+		    ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
+		    tp->t_rttmin, TCPTV_REXMTMAX);
+	}
+	/*
+	 * if there's an mtu associated with the route, use it
+	 */
+	if (rt->rt_rmx.rmx_mtu)
+		mss = rt->rt_rmx.rmx_mtu - sizeof(struct tcpiphdr);
+	else
+#endif /* RTV_MTU */
+	{
+		mss = ifp->if_mtu - sizeof(struct tcpiphdr);
+#if	(MCLBYTES & (MCLBYTES - 1)) == 0
+		if (mss > MCLBYTES)
+			mss &= ~(MCLBYTES-1);
+#else
+		if (mss > MCLBYTES)
+			mss = mss / MCLBYTES * MCLBYTES;
+#endif
+		if (!in_localaddr(inp->inp_faddr))
+			mss = min(mss, tcp_mssdflt);
+	}
+	/*
+	 * The current mss, t_maxseg, is initialized to the default value.
+	 * If we compute a smaller value, reduce the current mss.
+	 * If we compute a larger value, return it for use in sending
+	 * a max seg size option, but don't store it for use
+	 * unless we received an offer at least that large from peer.
+	 * However, do not accept offers under 32 bytes.
+	 */
+	if (offer)
+		mss = min(mss, offer);
+	mss = max(mss, 32);		/* sanity */
+	if (mss < tp->t_maxseg || offer != 0) {
+		/*
+		 * If there's a pipesize, change the socket buffer
+		 * to that size.  Make the socket buffers an integral
+		 * number of mss units; if the mss is larger than
+		 * the socket buffer, decrease the mss.
+		 */
+#ifdef RTV_SPIPE
+		if ((bufsize = rt->rt_rmx.rmx_sendpipe) == 0)
+#endif
+			bufsize = so->so_snd.sb_hiwat;
+		if (bufsize < mss)
+			mss = bufsize;
+		else {
+			bufsize = roundup(bufsize, mss);
+			if (bufsize > sb_max)
+				bufsize = sb_max;
+			(void)sbreserve(&so->so_snd, bufsize);
+		}
+		tp->t_maxseg = mss;
+
+#ifdef RTV_RPIPE
+		if ((bufsize = rt->rt_rmx.rmx_recvpipe) == 0)
+#endif
+			bufsize = so->so_rcv.sb_hiwat;
+		if (bufsize > mss) {
+			bufsize = roundup(bufsize, mss);
+			if (bufsize > sb_max)
+				bufsize = sb_max;
+			(void)sbreserve(&so->so_rcv, bufsize);
+		}
+	}
+	tp->snd_cwnd = mss;
+
+#ifdef RTV_SSTHRESH
+	if (rt->rt_rmx.rmx_ssthresh) {
+		/*
+		 * There's some sort of gateway or interface
+		 * buffer limit on the path.  Use this to set
+		 * the slow start threshhold, but set the
+		 * threshold to no less than 2*mss.
+		 */
+		tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh);
+	}
+#endif /* RTV_MTU */
+	return (mss);
+}
+#endif /* TUBA_INCLUDE */
diff --git a/sys/netinet/tcp_seq.h b/sys/netinet/tcp_seq.h
new file mode 100644
index 00000000000..8912299ff79
--- /dev/null
+++ b/sys/netinet/tcp_seq.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_seq.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * TCP sequence numbers are 32 bit integers operated
+ * on with modular arithmetic.  These macros can be
+ * used to compare such integers.
+ */
+#define	SEQ_LT(a,b)	((int)((a)-(b)) < 0)
+#define	SEQ_LEQ(a,b)	((int)((a)-(b)) <= 0)
+#define	SEQ_GT(a,b)	((int)((a)-(b)) > 0)
+#define	SEQ_GEQ(a,b)	((int)((a)-(b)) >= 0)
+
+/*
+ * Macros to initialize tcp sequence numbers for
+ * send and receive from initial send and receive
+ * sequence numbers.
+ */
+#define	tcp_rcvseqinit(tp) \
+	(tp)->rcv_adv = (tp)->rcv_nxt = (tp)->irs + 1
+
+#define	tcp_sendseqinit(tp) \
+	(tp)->snd_una = (tp)->snd_nxt = (tp)->snd_max = (tp)->snd_up = \
+	    (tp)->iss
+
+#define	TCP_ISSINCR	(125*1024)	/* increment for tcp_iss each second */
+
+#ifdef KERNEL
+tcp_seq	tcp_iss;		/* tcp initial send seq # */
+#endif
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
new file mode 100644
index 00000000000..8edb853bede
--- /dev/null
+++ b/sys/netinet/tcp_subr.c
@@ -0,0 +1,445 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_subr.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+
+#include <net/route.h>
+#include <net/if.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+
+/* patchable/settable parameters for tcp */
+int 	tcp_mssdflt = TCP_MSS;
+int 	tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ;
+int	tcp_do_rfc1323 = 1;
+
+extern	struct inpcb *tcp_last_inpcb;
+
+/*
+ * Tcp initialization
+ */
+void
+tcp_init()
+{
+
+	tcp_iss = 1;		/* wrong */
+	tcb.inp_next = tcb.inp_prev = &tcb;
+	if (max_protohdr < sizeof(struct tcpiphdr))
+		max_protohdr = sizeof(struct tcpiphdr);
+	if (max_linkhdr + sizeof(struct tcpiphdr) > MHLEN)
+		panic("tcp_init");
+}
+
+/*
+ * Create template to be used to send tcp packets on a connection.
+ * Call after host entry created, allocates an mbuf and fills
+ * in a skeletal tcp/ip header, minimizing the amount of work
+ * necessary when the connection is used.
+ */
+struct tcpiphdr *
+tcp_template(tp)
+	struct tcpcb *tp;
+{
+	register struct inpcb *inp = tp->t_inpcb;
+	register struct mbuf *m;
+	register struct tcpiphdr *n;
+
+	if ((n = tp->t_template) == 0) {
+		m = m_get(M_DONTWAIT, MT_HEADER);
+		if (m == NULL)
+			return (0);
+		m->m_len = sizeof (struct tcpiphdr);
+		n = mtod(m, struct tcpiphdr *);
+	}
+	n->ti_next = n->ti_prev = 0;
+	n->ti_x1 = 0;
+	n->ti_pr = IPPROTO_TCP;
+	n->ti_len = htons(sizeof (struct tcpiphdr) - sizeof (struct ip));
+	n->ti_src = inp->inp_laddr;
+	n->ti_dst = inp->inp_faddr;
+	n->ti_sport = inp->inp_lport;
+	n->ti_dport = inp->inp_fport;
+	n->ti_seq = 0;
+	n->ti_ack = 0;
+	n->ti_x2 = 0;
+	n->ti_off = 5;
+	n->ti_flags = 0;
+	n->ti_win = 0;
+	n->ti_sum = 0;
+	n->ti_urp = 0;
+	return (n);
+}
+
+/*
+ * Send a single message to the TCP at address specified by
+ * the given TCP/IP header.  If m == 0, then we make a copy
+ * of the tcpiphdr at ti and send directly to the addressed host.
+ * This is used to force keep alive messages out using the TCP
+ * template for a connection tp->t_template.  If flags are given
+ * then we send a message back to the TCP which originated the
+ * segment ti, and discard the mbuf containing it and any other
+ * attached mbufs.
+ *
+ * In any case the ack and sequence number of the transmitted
+ * segment are as specified by the parameters.
+ */
+void
+tcp_respond(tp, ti, m, ack, seq, flags)
+	struct tcpcb *tp;
+	register struct tcpiphdr *ti;
+	register struct mbuf *m;
+	tcp_seq ack, seq;
+	int flags;
+{
+	register int tlen;
+	int win = 0;
+	struct route *ro = 0;
+
+	if (tp) {
+		win = sbspace(&tp->t_inpcb->inp_socket->so_rcv);
+		ro = &tp->t_inpcb->inp_route;
+	}
+	if (m == 0) {
+		m = m_gethdr(M_DONTWAIT, MT_HEADER);
+		if (m == NULL)
+			return;
+#ifdef TCP_COMPAT_42
+		tlen = 1;
+#else
+		tlen = 0;
+#endif
+		m->m_data += max_linkhdr;
+		*mtod(m, struct tcpiphdr *) = *ti;
+		ti = mtod(m, struct tcpiphdr *);
+		flags = TH_ACK;
+	} else {
+		m_freem(m->m_next);
+		m->m_next = 0;
+		m->m_data = (caddr_t)ti;
+		m->m_len = sizeof (struct tcpiphdr);
+		tlen = 0;
+#define xchg(a,b,type) { type t; t=a; a=b; b=t; }
+		xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, u_long);
+		xchg(ti->ti_dport, ti->ti_sport, u_short);
+#undef xchg
+	}
+	ti->ti_len = htons((u_short)(sizeof (struct tcphdr) + tlen));
+	tlen += sizeof (struct tcpiphdr);
+	m->m_len = tlen;
+	m->m_pkthdr.len = tlen;
+	m->m_pkthdr.rcvif = (struct ifnet *) 0;
+	ti->ti_next = ti->ti_prev = 0;
+	ti->ti_x1 = 0;
+	ti->ti_seq = htonl(seq);
+	ti->ti_ack = htonl(ack);
+	ti->ti_x2 = 0;
+	ti->ti_off = sizeof (struct tcphdr) >> 2;
+	ti->ti_flags = flags;
+	if (tp)
+		ti->ti_win = htons((u_short) (win >> tp->rcv_scale));
+	else
+		ti->ti_win = htons((u_short)win);
+	ti->ti_urp = 0;
+	ti->ti_sum = 0;
+	ti->ti_sum = in_cksum(m, tlen);
+	((struct ip *)ti)->ip_len = tlen;
+	((struct ip *)ti)->ip_ttl = ip_defttl;
+	(void) ip_output(m, NULL, ro, 0, NULL);
+}
+
+/*
+ * Create a new TCP control block, making an
+ * empty reassembly queue and hooking it to the argument
+ * protocol control block.
+ */
+struct tcpcb *
+tcp_newtcpcb(inp)
+	struct inpcb *inp;
+{
+	register struct tcpcb *tp;
+
+	tp = malloc(sizeof(*tp), M_PCB, M_NOWAIT);
+	if (tp == NULL)
+		return ((struct tcpcb *)0);
+	bzero((char *) tp, sizeof(struct tcpcb));
+	tp->seg_next = tp->seg_prev = (struct tcpiphdr *)tp;
+	tp->t_maxseg = tcp_mssdflt;
+
+	tp->t_flags = tcp_do_rfc1323 ? (TF_REQ_SCALE|TF_REQ_TSTMP) : 0;
+	tp->t_inpcb = inp;
+	/*
+	 * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
+	 * rtt estimate.  Set rttvar so that srtt + 2 * rttvar gives
+	 * reasonable initial retransmit time.
+	 */
+	tp->t_srtt = TCPTV_SRTTBASE;
+	tp->t_rttvar = tcp_rttdflt * PR_SLOWHZ << 2;
+	tp->t_rttmin = TCPTV_MIN;
+	TCPT_RANGESET(tp->t_rxtcur, 
+	    ((TCPTV_SRTTBASE >> 2) + (TCPTV_SRTTDFLT << 2)) >> 1,
+	    TCPTV_MIN, TCPTV_REXMTMAX);
+	tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
+	tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
+	inp->inp_ip.ip_ttl = ip_defttl;
+	inp->inp_ppcb = (caddr_t)tp;
+	return (tp);
+}
+
+/*
+ * Drop a TCP connection, reporting
+ * the specified error.  If connection is synchronized,
+ * then send a RST to peer.
+ */
+struct tcpcb *
+tcp_drop(tp, errno)
+	register struct tcpcb *tp;
+	int errno;
+{
+	struct socket *so = tp->t_inpcb->inp_socket;
+
+	if (TCPS_HAVERCVDSYN(tp->t_state)) {
+		tp->t_state = TCPS_CLOSED;
+		(void) tcp_output(tp);
+		tcpstat.tcps_drops++;
+	} else
+		tcpstat.tcps_conndrops++;
+	if (errno == ETIMEDOUT && tp->t_softerror)
+		errno = tp->t_softerror;
+	so->so_error = errno;
+	return (tcp_close(tp));
+}
+
+/*
+ * Close a TCP control block:
+ *	discard all space held by the tcp
+ *	discard internet protocol block
+ *	wake up any sleepers
+ */
+struct tcpcb *
+tcp_close(tp)
+	register struct tcpcb *tp;
+{
+	register struct tcpiphdr *t;
+	struct inpcb *inp = tp->t_inpcb;
+	struct socket *so = inp->inp_socket;
+	register struct mbuf *m;
+#ifdef RTV_RTT
+	register struct rtentry *rt;
+
+	/*
+	 * If we sent enough data to get some meaningful characteristics,
+	 * save them in the routing entry.  'Enough' is arbitrarily 
+	 * defined as the sendpipesize (default 4K) * 16.  This would
+	 * give us 16 rtt samples assuming we only get one sample per
+	 * window (the usual case on a long haul net).  16 samples is
+	 * enough for the srtt filter to converge to within 5% of the correct
+	 * value; fewer samples and we could save a very bogus rtt.
+	 *
+	 * Don't update the default route's characteristics and don't
+	 * update anything that the user "locked".
+	 */
+	if (SEQ_LT(tp->iss + so->so_snd.sb_hiwat * 16, tp->snd_max) &&
+	    (rt = inp->inp_route.ro_rt) &&
+	    ((struct sockaddr_in *)rt_key(rt))->sin_addr.s_addr != INADDR_ANY) {
+		register u_long i;
+
+		if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) {
+			i = tp->t_srtt *
+			    (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE));
+			if (rt->rt_rmx.rmx_rtt && i)
+				/*
+				 * filter this update to half the old & half
+				 * the new values, converting scale.
+				 * See route.h and tcp_var.h for a
+				 * description of the scaling constants.
+				 */
+				rt->rt_rmx.rmx_rtt =
+				    (rt->rt_rmx.rmx_rtt + i) / 2;
+			else
+				rt->rt_rmx.rmx_rtt = i;
+		}
+		if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) {
+			i = tp->t_rttvar *
+			    (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE));
+			if (rt->rt_rmx.rmx_rttvar && i)
+				rt->rt_rmx.rmx_rttvar =
+				    (rt->rt_rmx.rmx_rttvar + i) / 2;
+			else
+				rt->rt_rmx.rmx_rttvar = i;
+		}
+		/*
+		 * update the pipelimit (ssthresh) if it has been updated
+		 * already or if a pipesize was specified & the threshhold
+		 * got below half the pipesize.  I.e., wait for bad news
+		 * before we start updating, then update on both good
+		 * and bad news.
+		 */
+		if ((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 &&
+		    (i = tp->snd_ssthresh) && rt->rt_rmx.rmx_ssthresh ||
+		    i < (rt->rt_rmx.rmx_sendpipe / 2)) {
+			/*
+			 * convert the limit from user data bytes to
+			 * packets then to packet data bytes.
+			 */
+			i = (i + tp->t_maxseg / 2) / tp->t_maxseg;
+			if (i < 2)
+				i = 2;
+			i *= (u_long)(tp->t_maxseg + sizeof (struct tcpiphdr));
+			if (rt->rt_rmx.rmx_ssthresh)
+				rt->rt_rmx.rmx_ssthresh =
+				    (rt->rt_rmx.rmx_ssthresh + i) / 2;
+			else
+				rt->rt_rmx.rmx_ssthresh = i;
+		}
+	}
+#endif /* RTV_RTT */
+	/* free the reassembly queue, if any */
+	t = tp->seg_next;
+	while (t != (struct tcpiphdr *)tp) {
+		t = (struct tcpiphdr *)t->ti_next;
+		m = REASS_MBUF((struct tcpiphdr *)t->ti_prev);
+		remque(t->ti_prev);
+		m_freem(m);
+	}
+	if (tp->t_template)
+		(void) m_free(dtom(tp->t_template));
+	free(tp, M_PCB);
+	inp->inp_ppcb = 0;
+	soisdisconnected(so);
+	/* clobber input pcb cache if we're closing the cached connection */
+	if (inp == tcp_last_inpcb)
+		tcp_last_inpcb = &tcb;
+	in_pcbdetach(inp);
+	tcpstat.tcps_closed++;
+	return ((struct tcpcb *)0);
+}
+
+void
+tcp_drain()
+{
+
+}
+
+/*
+ * Notify a tcp user of an asynchronous error;
+ * store error as soft error, but wake up user
+ * (for now, won't do anything until can select for soft error).
+ */
+void
+tcp_notify(inp, error)
+	struct inpcb *inp;
+	int error;
+{
+	register struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb;
+	register struct socket *so = inp->inp_socket;
+
+	/*
+	 * Ignore some errors if we are hooked up.
+	 * If connection hasn't completed, has retransmitted several times,
+	 * and receives a second error, give up now.  This is better
+	 * than waiting a long time to establish a connection that
+	 * can never complete.
+	 */
+	if (tp->t_state == TCPS_ESTABLISHED &&
+	     (error == EHOSTUNREACH || error == ENETUNREACH ||
+	      error == EHOSTDOWN)) {
+		return;
+	} else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 &&
+	    tp->t_softerror)
+		so->so_error = error;
+	else 
+		tp->t_softerror = error;
+	wakeup((caddr_t) &so->so_timeo);
+	sorwakeup(so);
+	sowwakeup(so);
+}
+
+void
+tcp_ctlinput(cmd, sa, ip)
+	int cmd;
+	struct sockaddr *sa;
+	register struct ip *ip;
+{
+	register struct tcphdr *th;
+	extern struct in_addr zeroin_addr;
+	extern u_char inetctlerrmap[];
+	void (*notify) __P((struct inpcb *, int)) = tcp_notify;
+
+	if (cmd == PRC_QUENCH)
+		notify = tcp_quench;
+	else if (!PRC_IS_REDIRECT(cmd) &&
+		 ((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0))
+		return;
+	if (ip) {
+		th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
+		in_pcbnotify(&tcb, sa, th->th_dport, ip->ip_src, th->th_sport,
+			cmd, notify);
+	} else
+		in_pcbnotify(&tcb, sa, 0, zeroin_addr, 0, cmd, notify);
+}
+
+/*
+ * When a source quench is received, close congestion window
+ * to one segment.  We will gradually open it again as we proceed.
+ */
+void
+tcp_quench(inp, errno)
+	struct inpcb *inp;
+	int errno;
+{
+	struct tcpcb *tp = intotcpcb(inp);
+
+	if (tp)
+		tp->snd_cwnd = tp->t_maxseg;
+}
diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c
new file mode 100644
index 00000000000..0c0f0f8c2f1
--- /dev/null
+++ b/sys/netinet/tcp_timer.c
@@ -0,0 +1,312 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_timer.c	8.1 (Berkeley) 6/10/93
+ */
+
+#ifndef TUBA_INCLUDE
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+
+int	tcp_keepidle = TCPTV_KEEP_IDLE;
+int	tcp_keepintvl = TCPTV_KEEPINTVL;
+int	tcp_maxidle;
+#endif /* TUBA_INCLUDE */
+/*
+ * Fast timeout routine for processing delayed acks
+ */
+void
+tcp_fasttimo()
+{
+	register struct inpcb *inp;
+	register struct tcpcb *tp;
+	int s = splnet();
+
+	inp = tcb.inp_next;
+	if (inp)
+	for (; inp != &tcb; inp = inp->inp_next)
+		if ((tp = (struct tcpcb *)inp->inp_ppcb) &&
+		    (tp->t_flags & TF_DELACK)) {
+			tp->t_flags &= ~TF_DELACK;
+			tp->t_flags |= TF_ACKNOW;
+			tcpstat.tcps_delack++;
+			(void) tcp_output(tp);
+		}
+	splx(s);
+}
+
+/*
+ * Tcp protocol timeout routine called every 500 ms.
+ * Updates the timers in all active tcb's and
+ * causes finite state machine actions if timers expire.
+ */
+void
+tcp_slowtimo()
+{
+	register struct inpcb *ip, *ipnxt;
+	register struct tcpcb *tp;
+	int s = splnet();
+	register int i;
+
+	tcp_maxidle = TCPTV_KEEPCNT * tcp_keepintvl;
+	/*
+	 * Search through tcb's and update active timers.
+	 */
+	ip = tcb.inp_next;
+	if (ip == 0) {
+		splx(s);
+		return;
+	}
+	for (; ip != &tcb; ip = ipnxt) {
+		ipnxt = ip->inp_next;
+		tp = intotcpcb(ip);
+		if (tp == 0)
+			continue;
+		for (i = 0; i < TCPT_NTIMERS; i++) {
+			if (tp->t_timer[i] && --tp->t_timer[i] == 0) {
+				(void) tcp_usrreq(tp->t_inpcb->inp_socket,
+				    PRU_SLOWTIMO, (struct mbuf *)0,
+				    (struct mbuf *)i, (struct mbuf *)0);
+				if (ipnxt->inp_prev != ip)
+					goto tpgone;
+			}
+		}
+		tp->t_idle++;
+		if (tp->t_rtt)
+			tp->t_rtt++;
+tpgone:
+		;
+	}
+	tcp_iss += TCP_ISSINCR/PR_SLOWHZ;		/* increment iss */
+#ifdef TCP_COMPAT_42
+	if ((int)tcp_iss < 0)
+		tcp_iss = 0;				/* XXX */
+#endif
+	tcp_now++;					/* for timestamps */
+	splx(s);
+}
+#ifndef TUBA_INCLUDE
+
+/*
+ * Cancel all timers for TCP tp.
+ */
+void
+tcp_canceltimers(tp)
+	struct tcpcb *tp;
+{
+	register int i;
+
+	for (i = 0; i < TCPT_NTIMERS; i++)
+		tp->t_timer[i] = 0;
+}
+
+int	tcp_backoff[TCP_MAXRXTSHIFT + 1] =
+    { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
+
+/*
+ * TCP timer processing.
+ */
+struct tcpcb *
+tcp_timers(tp, timer)
+	register struct tcpcb *tp;
+	int timer;
+{
+	register int rexmt;
+
+	switch (timer) {
+
+	/*
+	 * 2 MSL timeout in shutdown went off.  If we're closed but
+	 * still waiting for peer to close and connection has been idle
+	 * too long, or if 2MSL time is up from TIME_WAIT, delete connection
+	 * control block.  Otherwise, check again in a bit.
+	 */
+	case TCPT_2MSL:
+		if (tp->t_state != TCPS_TIME_WAIT &&
+		    tp->t_idle <= tcp_maxidle)
+			tp->t_timer[TCPT_2MSL] = tcp_keepintvl;
+		else
+			tp = tcp_close(tp);
+		break;
+
+	/*
+	 * Retransmission timer went off.  Message has not
+	 * been acked within retransmit interval.  Back off
+	 * to a longer retransmit interval and retransmit one segment.
+	 */
+	case TCPT_REXMT:
+		if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
+			tp->t_rxtshift = TCP_MAXRXTSHIFT;
+			tcpstat.tcps_timeoutdrop++;
+			tp = tcp_drop(tp, tp->t_softerror ?
+			    tp->t_softerror : ETIMEDOUT);
+			break;
+		}
+		tcpstat.tcps_rexmttimeo++;
+		rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
+		TCPT_RANGESET(tp->t_rxtcur, rexmt,
+		    tp->t_rttmin, TCPTV_REXMTMAX);
+		tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+		/*
+		 * If losing, let the lower level know and try for
+		 * a better route.  Also, if we backed off this far,
+		 * our srtt estimate is probably bogus.  Clobber it
+		 * so we'll take the next rtt measurement as our srtt;
+		 * move the current srtt into rttvar to keep the current
+		 * retransmit times until then.
+		 */
+		if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
+			in_losing(tp->t_inpcb);
+			tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
+			tp->t_srtt = 0;
+		}
+		tp->snd_nxt = tp->snd_una;
+		/*
+		 * If timing a segment in this window, stop the timer.
+		 */
+		tp->t_rtt = 0;
+		/*
+		 * Close the congestion window down to one segment
+		 * (we'll open it by one segment for each ack we get).
+		 * Since we probably have a window's worth of unacked
+		 * data accumulated, this "slow start" keeps us from
+		 * dumping all that data as back-to-back packets (which
+		 * might overwhelm an intermediate gateway).
+		 *
+		 * There are two phases to the opening: Initially we
+		 * open by one mss on each ack.  This makes the window
+		 * size increase exponentially with time.  If the
+		 * window is larger than the path can handle, this
+		 * exponential growth results in dropped packet(s)
+		 * almost immediately.  To get more time between 
+		 * drops but still "push" the network to take advantage
+		 * of improving conditions, we switch from exponential
+		 * to linear window opening at some threshhold size.
+		 * For a threshhold, we use half the current window
+		 * size, truncated to a multiple of the mss.
+		 *
+		 * (the minimum cwnd that will give us exponential
+		 * growth is 2 mss.  We don't allow the threshhold
+		 * to go below this.)
+		 */
+		{
+		u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg;
+		if (win < 2)
+			win = 2;
+		tp->snd_cwnd = tp->t_maxseg;
+		tp->snd_ssthresh = win * tp->t_maxseg;
+		tp->t_dupacks = 0;
+		}
+		(void) tcp_output(tp);
+		break;
+
+	/*
+	 * Persistance timer into zero window.
+	 * Force a byte to be output, if possible.
+	 */
+	case TCPT_PERSIST:
+		tcpstat.tcps_persisttimeo++;
+		tcp_setpersist(tp);
+		tp->t_force = 1;
+		(void) tcp_output(tp);
+		tp->t_force = 0;
+		break;
+
+	/*
+	 * Keep-alive timer went off; send something
+	 * or drop connection if idle for too long.
+	 */
+	case TCPT_KEEP:
+		tcpstat.tcps_keeptimeo++;
+		if (tp->t_state < TCPS_ESTABLISHED)
+			goto dropit;
+		if (tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE &&
+		    tp->t_state <= TCPS_CLOSE_WAIT) {
+		    	if (tp->t_idle >= tcp_keepidle + tcp_maxidle)
+				goto dropit;
+			/*
+			 * Send a packet designed to force a response
+			 * if the peer is up and reachable:
+			 * either an ACK if the connection is still alive,
+			 * or an RST if the peer has closed the connection
+			 * due to timeout or reboot.
+			 * Using sequence number tp->snd_una-1
+			 * causes the transmitted zero-length segment
+			 * to lie outside the receive window;
+			 * by the protocol spec, this requires the
+			 * correspondent TCP to respond.
+			 */
+			tcpstat.tcps_keepprobe++;
+#ifdef TCP_COMPAT_42
+			/*
+			 * The keepalive packet must have nonzero length
+			 * to get a 4.2 host to respond.
+			 */
+			tcp_respond(tp, tp->t_template, (struct mbuf *)NULL,
+			    tp->rcv_nxt - 1, tp->snd_una - 1, 0);
+#else
+			tcp_respond(tp, tp->t_template, (struct mbuf *)NULL,
+			    tp->rcv_nxt, tp->snd_una - 1, 0);
+#endif
+			tp->t_timer[TCPT_KEEP] = tcp_keepintvl;
+		} else
+			tp->t_timer[TCPT_KEEP] = tcp_keepidle;
+		break;
+	dropit:
+		tcpstat.tcps_keepdrops++;
+		tp = tcp_drop(tp, ETIMEDOUT);
+		break;
+	}
+	return (tp);
+}
+#endif /* TUBA_INCLUDE */
diff --git a/sys/netinet/tcp_timer.h b/sys/netinet/tcp_timer.h
new file mode 100644
index 00000000000..301a10f4034
--- /dev/null
+++ b/sys/netinet/tcp_timer.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_timer.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Definitions of the TCP timers.  These timers are counted
+ * down PR_SLOWHZ times a second.
+ */
+#define	TCPT_NTIMERS	4
+
+#define	TCPT_REXMT	0		/* retransmit */
+#define	TCPT_PERSIST	1		/* retransmit persistance */
+#define	TCPT_KEEP	2		/* keep alive */
+#define	TCPT_2MSL	3		/* 2*msl quiet time timer */
+
+/*
+ * The TCPT_REXMT timer is used to force retransmissions.
+ * The TCP has the TCPT_REXMT timer set whenever segments
+ * have been sent for which ACKs are expected but not yet
+ * received.  If an ACK is received which advances tp->snd_una,
+ * then the retransmit timer is cleared (if there are no more
+ * outstanding segments) or reset to the base value (if there
+ * are more ACKs expected).  Whenever the retransmit timer goes off,
+ * we retransmit one unacknowledged segment, and do a backoff
+ * on the retransmit timer.
+ *
+ * The TCPT_PERSIST timer is used to keep window size information
+ * flowing even if the window goes shut.  If all previous transmissions
+ * have been acknowledged (so that there are no retransmissions in progress),
+ * and the window is too small to bother sending anything, then we start
+ * the TCPT_PERSIST timer.  When it expires, if the window is nonzero,
+ * we go to transmit state.  Otherwise, at intervals send a single byte
+ * into the peer's window to force him to update our window information.
+ * We do this at most as often as TCPT_PERSMIN time intervals,
+ * but no more frequently than the current estimate of round-trip
+ * packet time.  The TCPT_PERSIST timer is cleared whenever we receive
+ * a window update from the peer.
+ *
+ * The TCPT_KEEP timer is used to keep connections alive.  If an
+ * connection is idle (no segments received) for TCPTV_KEEP_INIT amount of time,
+ * but not yet established, then we drop the connection.  Once the connection
+ * is established, if the connection is idle for TCPTV_KEEP_IDLE time
+ * (and keepalives have been enabled on the socket), we begin to probe
+ * the connection.  We force the peer to send us a segment by sending:
+ *	<SEQ=SND.UNA-1><ACK=RCV.NXT><CTL=ACK>
+ * This segment is (deliberately) outside the window, and should elicit
+ * an ack segment in response from the peer.  If, despite the TCPT_KEEP
+ * initiated segments we cannot elicit a response from a peer in TCPT_MAXIDLE
+ * amount of time probing, then we drop the connection.
+ */
+
+/*
+ * Time constants.
+ */
+#define	TCPTV_MSL	( 30*PR_SLOWHZ)		/* max seg lifetime (hah!) */
+#define	TCPTV_SRTTBASE	0			/* base roundtrip time;
+						   if 0, no idea yet */
+#define	TCPTV_SRTTDFLT	(  3*PR_SLOWHZ)		/* assumed RTT if no info */
+
+#define	TCPTV_PERSMIN	(  5*PR_SLOWHZ)		/* retransmit persistance */
+#define	TCPTV_PERSMAX	( 60*PR_SLOWHZ)		/* maximum persist interval */
+
+#define	TCPTV_KEEP_INIT	( 75*PR_SLOWHZ)		/* initial connect keep alive */
+#define	TCPTV_KEEP_IDLE	(120*60*PR_SLOWHZ)	/* dflt time before probing */
+#define	TCPTV_KEEPINTVL	( 75*PR_SLOWHZ)		/* default probe interval */
+#define	TCPTV_KEEPCNT	8			/* max probes before drop */
+
+#define	TCPTV_MIN	(  1*PR_SLOWHZ)		/* minimum allowable value */
+#define	TCPTV_REXMTMAX	( 64*PR_SLOWHZ)		/* max allowable REXMT value */
+
+#define	TCP_LINGERTIME	120			/* linger at most 2 minutes */
+
+#define	TCP_MAXRXTSHIFT	12			/* maximum retransmits */
+
+#ifdef	TCPTIMERS
+char *tcptimers[] =
+    { "REXMT", "PERSIST", "KEEP", "2MSL" };
+#endif
+
+/*
+ * Force a time value to be in a certain range.
+ */
+#define	TCPT_RANGESET(tv, value, tvmin, tvmax) { \
+	(tv) = (value); \
+	if ((tv) < (tvmin)) \
+		(tv) = (tvmin); \
+	else if ((tv) > (tvmax)) \
+		(tv) = (tvmax); \
+}
+
+#ifdef KERNEL
+extern int tcp_keepidle;		/* time before keepalive probes begin */
+extern int tcp_keepintvl;		/* time between keepalive probes */
+extern int tcp_maxidle;			/* time to drop after starting probes */
+extern int tcp_ttl;			/* time to live for TCP segs */
+extern int tcp_backoff[];
+#endif
diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c
new file mode 100644
index 00000000000..8edb853bede
--- /dev/null
+++ b/sys/netinet/tcp_timewait.c
@@ -0,0 +1,445 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_subr.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+
+#include <net/route.h>
+#include <net/if.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+
+/* patchable/settable parameters for tcp */
+int 	tcp_mssdflt = TCP_MSS;
+int 	tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ;
+int	tcp_do_rfc1323 = 1;
+
+extern	struct inpcb *tcp_last_inpcb;
+
+/*
+ * Tcp initialization
+ */
+void
+tcp_init()
+{
+
+	tcp_iss = 1;		/* wrong */
+	tcb.inp_next = tcb.inp_prev = &tcb;
+	if (max_protohdr < sizeof(struct tcpiphdr))
+		max_protohdr = sizeof(struct tcpiphdr);
+	if (max_linkhdr + sizeof(struct tcpiphdr) > MHLEN)
+		panic("tcp_init");
+}
+
+/*
+ * Create template to be used to send tcp packets on a connection.
+ * Call after host entry created, allocates an mbuf and fills
+ * in a skeletal tcp/ip header, minimizing the amount of work
+ * necessary when the connection is used.
+ */
+struct tcpiphdr *
+tcp_template(tp)
+	struct tcpcb *tp;
+{
+	register struct inpcb *inp = tp->t_inpcb;
+	register struct mbuf *m;
+	register struct tcpiphdr *n;
+
+	if ((n = tp->t_template) == 0) {
+		m = m_get(M_DONTWAIT, MT_HEADER);
+		if (m == NULL)
+			return (0);
+		m->m_len = sizeof (struct tcpiphdr);
+		n = mtod(m, struct tcpiphdr *);
+	}
+	n->ti_next = n->ti_prev = 0;
+	n->ti_x1 = 0;
+	n->ti_pr = IPPROTO_TCP;
+	n->ti_len = htons(sizeof (struct tcpiphdr) - sizeof (struct ip));
+	n->ti_src = inp->inp_laddr;
+	n->ti_dst = inp->inp_faddr;
+	n->ti_sport = inp->inp_lport;
+	n->ti_dport = inp->inp_fport;
+	n->ti_seq = 0;
+	n->ti_ack = 0;
+	n->ti_x2 = 0;
+	n->ti_off = 5;
+	n->ti_flags = 0;
+	n->ti_win = 0;
+	n->ti_sum = 0;
+	n->ti_urp = 0;
+	return (n);
+}
+
+/*
+ * Send a single message to the TCP at address specified by
+ * the given TCP/IP header.  If m == 0, then we make a copy
+ * of the tcpiphdr at ti and send directly to the addressed host.
+ * This is used to force keep alive messages out using the TCP
+ * template for a connection tp->t_template.  If flags are given
+ * then we send a message back to the TCP which originated the
+ * segment ti, and discard the mbuf containing it and any other
+ * attached mbufs.
+ *
+ * In any case the ack and sequence number of the transmitted
+ * segment are as specified by the parameters.
+ */
+void
+tcp_respond(tp, ti, m, ack, seq, flags)
+	struct tcpcb *tp;
+	register struct tcpiphdr *ti;
+	register struct mbuf *m;
+	tcp_seq ack, seq;
+	int flags;
+{
+	register int tlen;
+	int win = 0;
+	struct route *ro = 0;
+
+	if (tp) {
+		win = sbspace(&tp->t_inpcb->inp_socket->so_rcv);
+		ro = &tp->t_inpcb->inp_route;
+	}
+	if (m == 0) {
+		m = m_gethdr(M_DONTWAIT, MT_HEADER);
+		if (m == NULL)
+			return;
+#ifdef TCP_COMPAT_42
+		tlen = 1;
+#else
+		tlen = 0;
+#endif
+		m->m_data += max_linkhdr;
+		*mtod(m, struct tcpiphdr *) = *ti;
+		ti = mtod(m, struct tcpiphdr *);
+		flags = TH_ACK;
+	} else {
+		m_freem(m->m_next);
+		m->m_next = 0;
+		m->m_data = (caddr_t)ti;
+		m->m_len = sizeof (struct tcpiphdr);
+		tlen = 0;
+#define xchg(a,b,type) { type t; t=a; a=b; b=t; }
+		xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, u_long);
+		xchg(ti->ti_dport, ti->ti_sport, u_short);
+#undef xchg
+	}
+	ti->ti_len = htons((u_short)(sizeof (struct tcphdr) + tlen));
+	tlen += sizeof (struct tcpiphdr);
+	m->m_len = tlen;
+	m->m_pkthdr.len = tlen;
+	m->m_pkthdr.rcvif = (struct ifnet *) 0;
+	ti->ti_next = ti->ti_prev = 0;
+	ti->ti_x1 = 0;
+	ti->ti_seq = htonl(seq);
+	ti->ti_ack = htonl(ack);
+	ti->ti_x2 = 0;
+	ti->ti_off = sizeof (struct tcphdr) >> 2;
+	ti->ti_flags = flags;
+	if (tp)
+		ti->ti_win = htons((u_short) (win >> tp->rcv_scale));
+	else
+		ti->ti_win = htons((u_short)win);
+	ti->ti_urp = 0;
+	ti->ti_sum = 0;
+	ti->ti_sum = in_cksum(m, tlen);
+	((struct ip *)ti)->ip_len = tlen;
+	((struct ip *)ti)->ip_ttl = ip_defttl;
+	(void) ip_output(m, NULL, ro, 0, NULL);
+}
+
+/*
+ * Create a new TCP control block, making an
+ * empty reassembly queue and hooking it to the argument
+ * protocol control block.
+ */
+struct tcpcb *
+tcp_newtcpcb(inp)
+	struct inpcb *inp;
+{
+	register struct tcpcb *tp;
+
+	tp = malloc(sizeof(*tp), M_PCB, M_NOWAIT);
+	if (tp == NULL)
+		return ((struct tcpcb *)0);
+	bzero((char *) tp, sizeof(struct tcpcb));
+	tp->seg_next = tp->seg_prev = (struct tcpiphdr *)tp;
+	tp->t_maxseg = tcp_mssdflt;
+
+	tp->t_flags = tcp_do_rfc1323 ? (TF_REQ_SCALE|TF_REQ_TSTMP) : 0;
+	tp->t_inpcb = inp;
+	/*
+	 * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
+	 * rtt estimate.  Set rttvar so that srtt + 2 * rttvar gives
+	 * reasonable initial retransmit time.
+	 */
+	tp->t_srtt = TCPTV_SRTTBASE;
+	tp->t_rttvar = tcp_rttdflt * PR_SLOWHZ << 2;
+	tp->t_rttmin = TCPTV_MIN;
+	TCPT_RANGESET(tp->t_rxtcur, 
+	    ((TCPTV_SRTTBASE >> 2) + (TCPTV_SRTTDFLT << 2)) >> 1,
+	    TCPTV_MIN, TCPTV_REXMTMAX);
+	tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
+	tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
+	inp->inp_ip.ip_ttl = ip_defttl;
+	inp->inp_ppcb = (caddr_t)tp;
+	return (tp);
+}
+
+/*
+ * Drop a TCP connection, reporting
+ * the specified error.  If connection is synchronized,
+ * then send a RST to peer.
+ */
+struct tcpcb *
+tcp_drop(tp, errno)
+	register struct tcpcb *tp;
+	int errno;
+{
+	struct socket *so = tp->t_inpcb->inp_socket;
+
+	if (TCPS_HAVERCVDSYN(tp->t_state)) {
+		tp->t_state = TCPS_CLOSED;
+		(void) tcp_output(tp);
+		tcpstat.tcps_drops++;
+	} else
+		tcpstat.tcps_conndrops++;
+	if (errno == ETIMEDOUT && tp->t_softerror)
+		errno = tp->t_softerror;
+	so->so_error = errno;
+	return (tcp_close(tp));
+}
+
+/*
+ * Close a TCP control block:
+ *	discard all space held by the tcp
+ *	discard internet protocol block
+ *	wake up any sleepers
+ */
+struct tcpcb *
+tcp_close(tp)
+	register struct tcpcb *tp;
+{
+	register struct tcpiphdr *t;
+	struct inpcb *inp = tp->t_inpcb;
+	struct socket *so = inp->inp_socket;
+	register struct mbuf *m;
+#ifdef RTV_RTT
+	register struct rtentry *rt;
+
+	/*
+	 * If we sent enough data to get some meaningful characteristics,
+	 * save them in the routing entry.  'Enough' is arbitrarily 
+	 * defined as the sendpipesize (default 4K) * 16.  This would
+	 * give us 16 rtt samples assuming we only get one sample per
+	 * window (the usual case on a long haul net).  16 samples is
+	 * enough for the srtt filter to converge to within 5% of the correct
+	 * value; fewer samples and we could save a very bogus rtt.
+	 *
+	 * Don't update the default route's characteristics and don't
+	 * update anything that the user "locked".
+	 */
+	if (SEQ_LT(tp->iss + so->so_snd.sb_hiwat * 16, tp->snd_max) &&
+	    (rt = inp->inp_route.ro_rt) &&
+	    ((struct sockaddr_in *)rt_key(rt))->sin_addr.s_addr != INADDR_ANY) {
+		register u_long i;
+
+		if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) {
+			i = tp->t_srtt *
+			    (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE));
+			if (rt->rt_rmx.rmx_rtt && i)
+				/*
+				 * filter this update to half the old & half
+				 * the new values, converting scale.
+				 * See route.h and tcp_var.h for a
+				 * description of the scaling constants.
+				 */
+				rt->rt_rmx.rmx_rtt =
+				    (rt->rt_rmx.rmx_rtt + i) / 2;
+			else
+				rt->rt_rmx.rmx_rtt = i;
+		}
+		if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) {
+			i = tp->t_rttvar *
+			    (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE));
+			if (rt->rt_rmx.rmx_rttvar && i)
+				rt->rt_rmx.rmx_rttvar =
+				    (rt->rt_rmx.rmx_rttvar + i) / 2;
+			else
+				rt->rt_rmx.rmx_rttvar = i;
+		}
+		/*
+		 * update the pipelimit (ssthresh) if it has been updated
+		 * already or if a pipesize was specified & the threshhold
+		 * got below half the pipesize.  I.e., wait for bad news
+		 * before we start updating, then update on both good
+		 * and bad news.
+		 */
+		if ((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 &&
+		    (i = tp->snd_ssthresh) && rt->rt_rmx.rmx_ssthresh ||
+		    i < (rt->rt_rmx.rmx_sendpipe / 2)) {
+			/*
+			 * convert the limit from user data bytes to
+			 * packets then to packet data bytes.
+			 */
+			i = (i + tp->t_maxseg / 2) / tp->t_maxseg;
+			if (i < 2)
+				i = 2;
+			i *= (u_long)(tp->t_maxseg + sizeof (struct tcpiphdr));
+			if (rt->rt_rmx.rmx_ssthresh)
+				rt->rt_rmx.rmx_ssthresh =
+				    (rt->rt_rmx.rmx_ssthresh + i) / 2;
+			else
+				rt->rt_rmx.rmx_ssthresh = i;
+		}
+	}
+#endif /* RTV_RTT */
+	/* free the reassembly queue, if any */
+	t = tp->seg_next;
+	while (t != (struct tcpiphdr *)tp) {
+		t = (struct tcpiphdr *)t->ti_next;
+		m = REASS_MBUF((struct tcpiphdr *)t->ti_prev);
+		remque(t->ti_prev);
+		m_freem(m);
+	}
+	if (tp->t_template)
+		(void) m_free(dtom(tp->t_template));
+	free(tp, M_PCB);
+	inp->inp_ppcb = 0;
+	soisdisconnected(so);
+	/* clobber input pcb cache if we're closing the cached connection */
+	if (inp == tcp_last_inpcb)
+		tcp_last_inpcb = &tcb;
+	in_pcbdetach(inp);
+	tcpstat.tcps_closed++;
+	return ((struct tcpcb *)0);
+}
+
+void
+tcp_drain()
+{
+
+}
+
+/*
+ * Notify a tcp user of an asynchronous error;
+ * store error as soft error, but wake up user
+ * (for now, won't do anything until can select for soft error).
+ */
+void
+tcp_notify(inp, error)
+	struct inpcb *inp;
+	int error;
+{
+	register struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb;
+	register struct socket *so = inp->inp_socket;
+
+	/*
+	 * Ignore some errors if we are hooked up.
+	 * If connection hasn't completed, has retransmitted several times,
+	 * and receives a second error, give up now.  This is better
+	 * than waiting a long time to establish a connection that
+	 * can never complete.
+	 */
+	if (tp->t_state == TCPS_ESTABLISHED &&
+	     (error == EHOSTUNREACH || error == ENETUNREACH ||
+	      error == EHOSTDOWN)) {
+		return;
+	} else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 &&
+	    tp->t_softerror)
+		so->so_error = error;
+	else 
+		tp->t_softerror = error;
+	wakeup((caddr_t) &so->so_timeo);
+	sorwakeup(so);
+	sowwakeup(so);
+}
+
+void
+tcp_ctlinput(cmd, sa, ip)
+	int cmd;
+	struct sockaddr *sa;
+	register struct ip *ip;
+{
+	register struct tcphdr *th;
+	extern struct in_addr zeroin_addr;
+	extern u_char inetctlerrmap[];
+	void (*notify) __P((struct inpcb *, int)) = tcp_notify;
+
+	if (cmd == PRC_QUENCH)
+		notify = tcp_quench;
+	else if (!PRC_IS_REDIRECT(cmd) &&
+		 ((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0))
+		return;
+	if (ip) {
+		th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
+		in_pcbnotify(&tcb, sa, th->th_dport, ip->ip_src, th->th_sport,
+			cmd, notify);
+	} else
+		in_pcbnotify(&tcb, sa, 0, zeroin_addr, 0, cmd, notify);
+}
+
+/*
+ * When a source quench is received, close congestion window
+ * to one segment.  We will gradually open it again as we proceed.
+ */
+void
+tcp_quench(inp, errno)
+	struct inpcb *inp;
+	int errno;
+{
+	struct tcpcb *tp = intotcpcb(inp);
+
+	if (tp)
+		tp->snd_cwnd = tp->t_maxseg;
+}
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
new file mode 100644
index 00000000000..38a08d6d0c2
--- /dev/null
+++ b/sys/netinet/tcp_usrreq.c
@@ -0,0 +1,517 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_usrreq.c	8.2 (Berkeley) 1/3/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/stat.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#include <netinet/tcp_debug.h>
+
+/*
+ * TCP protocol interface to socket abstraction.
+ */
+extern	char *tcpstates[];
+
+/*
+ * Process a TCP user request for TCP tb.  If this is a send request
+ * then m is the mbuf chain of send data.  If this is a timer expiration
+ * (called from the software clock routine), then timertype tells which timer.
+ */
+/*ARGSUSED*/
+int
+tcp_usrreq(so, req, m, nam, control)
+	struct socket *so;
+	int req;
+	struct mbuf *m, *nam, *control;
+{
+	register struct inpcb *inp;
+	register struct tcpcb *tp;
+	int s;
+	int error = 0;
+	int ostate;
+
+	if (req == PRU_CONTROL)
+		return (in_control(so, (int)m, (caddr_t)nam,
+			(struct ifnet *)control));
+	if (control && control->m_len) {
+		m_freem(control);
+		if (m)
+			m_freem(m);
+		return (EINVAL);
+	}
+
+	s = splnet();
+	inp = sotoinpcb(so);
+	/*
+	 * When a TCP is attached to a socket, then there will be
+	 * a (struct inpcb) pointed at by the socket, and this
+	 * structure will point at a subsidary (struct tcpcb).
+	 */
+	if (inp == 0 && req != PRU_ATTACH) {
+		splx(s);
+		return (EINVAL);		/* XXX */
+	}
+	if (inp) {
+		tp = intotcpcb(inp);
+		/* WHAT IF TP IS 0? */
+#ifdef KPROF
+		tcp_acounts[tp->t_state][req]++;
+#endif
+		ostate = tp->t_state;
+	} else
+		ostate = 0;
+	switch (req) {
+
+	/*
+	 * TCP attaches to socket via PRU_ATTACH, reserving space,
+	 * and an internet control block.
+	 */
+	case PRU_ATTACH:
+		if (inp) {
+			error = EISCONN;
+			break;
+		}
+		error = tcp_attach(so);
+		if (error)
+			break;
+		if ((so->so_options & SO_LINGER) && so->so_linger == 0)
+			so->so_linger = TCP_LINGERTIME;
+		tp = sototcpcb(so);
+		break;
+
+	/*
+	 * PRU_DETACH detaches the TCP protocol from the socket.
+	 * If the protocol state is non-embryonic, then can't
+	 * do this directly: have to initiate a PRU_DISCONNECT,
+	 * which may finish later; embryonic TCB's can just
+	 * be discarded here.
+	 */
+	case PRU_DETACH:
+		if (tp->t_state > TCPS_LISTEN)
+			tp = tcp_disconnect(tp);
+		else
+			tp = tcp_close(tp);
+		break;
+
+	/*
+	 * Give the socket an address.
+	 */
+	case PRU_BIND:
+		error = in_pcbbind(inp, nam);
+		if (error)
+			break;
+		break;
+
+	/*
+	 * Prepare to accept connections.
+	 */
+	case PRU_LISTEN:
+		if (inp->inp_lport == 0)
+			error = in_pcbbind(inp, (struct mbuf *)0);
+		if (error == 0)
+			tp->t_state = TCPS_LISTEN;
+		break;
+
+	/*
+	 * Initiate connection to peer.
+	 * Create a template for use in transmissions on this connection.
+	 * Enter SYN_SENT state, and mark socket as connecting.
+	 * Start keep-alive timer, and seed output sequence space.
+	 * Send initial segment on connection.
+	 */
+	case PRU_CONNECT:
+		if (inp->inp_lport == 0) {
+			error = in_pcbbind(inp, (struct mbuf *)0);
+			if (error)
+				break;
+		}
+		error = in_pcbconnect(inp, nam);
+		if (error)
+			break;
+		tp->t_template = tcp_template(tp);
+		if (tp->t_template == 0) {
+			in_pcbdisconnect(inp);
+			error = ENOBUFS;
+			break;
+		}
+		/* Compute window scaling to request.  */
+		while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
+		    (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
+			tp->request_r_scale++;
+		soisconnecting(so);
+		tcpstat.tcps_connattempt++;
+		tp->t_state = TCPS_SYN_SENT;
+		tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
+		tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2;
+		tcp_sendseqinit(tp);
+		error = tcp_output(tp);
+		break;
+
+	/*
+	 * Create a TCP connection between two sockets.
+	 */
+	case PRU_CONNECT2:
+		error = EOPNOTSUPP;
+		break;
+
+	/*
+	 * Initiate disconnect from peer.
+	 * If connection never passed embryonic stage, just drop;
+	 * else if don't need to let data drain, then can just drop anyways,
+	 * else have to begin TCP shutdown process: mark socket disconnecting,
+	 * drain unread data, state switch to reflect user close, and
+	 * send segment (e.g. FIN) to peer.  Socket will be really disconnected
+	 * when peer sends FIN and acks ours.
+	 *
+	 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
+	 */
+	case PRU_DISCONNECT:
+		tp = tcp_disconnect(tp);
+		break;
+
+	/*
+	 * Accept a connection.  Essentially all the work is
+	 * done at higher levels; just return the address
+	 * of the peer, storing through addr.
+	 */
+	case PRU_ACCEPT:
+		in_setpeeraddr(inp, nam);
+		break;
+
+	/*
+	 * Mark the connection as being incapable of further output.
+	 */
+	case PRU_SHUTDOWN:
+		socantsendmore(so);
+		tp = tcp_usrclosed(tp);
+		if (tp)
+			error = tcp_output(tp);
+		break;
+
+	/*
+	 * After a receive, possibly send window update to peer.
+	 */
+	case PRU_RCVD:
+		(void) tcp_output(tp);
+		break;
+
+	/*
+	 * Do a send by putting data in output queue and updating urgent
+	 * marker if URG set.  Possibly send more data.
+	 */
+	case PRU_SEND:
+		sbappend(&so->so_snd, m);
+		error = tcp_output(tp);
+		break;
+
+	/*
+	 * Abort the TCP.
+	 */
+	case PRU_ABORT:
+		tp = tcp_drop(tp, ECONNABORTED);
+		break;
+
+	case PRU_SENSE:
+		((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
+		(void) splx(s);
+		return (0);
+
+	case PRU_RCVOOB:
+		if ((so->so_oobmark == 0 &&
+		    (so->so_state & SS_RCVATMARK) == 0) ||
+		    so->so_options & SO_OOBINLINE ||
+		    tp->t_oobflags & TCPOOB_HADDATA) {
+			error = EINVAL;
+			break;
+		}
+		if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
+			error = EWOULDBLOCK;
+			break;
+		}
+		m->m_len = 1;
+		*mtod(m, caddr_t) = tp->t_iobc;
+		if (((int)nam & MSG_PEEK) == 0)
+			tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
+		break;
+
+	case PRU_SENDOOB:
+		if (sbspace(&so->so_snd) < -512) {
+			m_freem(m);
+			error = ENOBUFS;
+			break;
+		}
+		/*
+		 * According to RFC961 (Assigned Protocols),
+		 * the urgent pointer points to the last octet
+		 * of urgent data.  We continue, however,
+		 * to consider it to indicate the first octet
+		 * of data past the urgent section.
+		 * Otherwise, snd_up should be one lower.
+		 */
+		sbappend(&so->so_snd, m);
+		tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
+		tp->t_force = 1;
+		error = tcp_output(tp);
+		tp->t_force = 0;
+		break;
+
+	case PRU_SOCKADDR:
+		in_setsockaddr(inp, nam);
+		break;
+
+	case PRU_PEERADDR:
+		in_setpeeraddr(inp, nam);
+		break;
+
+	/*
+	 * TCP slow timer went off; going through this
+	 * routine for tracing's sake.
+	 */
+	case PRU_SLOWTIMO:
+		tp = tcp_timers(tp, (int)nam);
+		req |= (int)nam << 8;		/* for debug's sake */
+		break;
+
+	default:
+		panic("tcp_usrreq");
+	}
+	if (tp && (so->so_options & SO_DEBUG))
+		tcp_trace(TA_USER, ostate, tp, (struct tcpiphdr *)0, req);
+	splx(s);
+	return (error);
+}
+
+int
+tcp_ctloutput(op, so, level, optname, mp)
+	int op;
+	struct socket *so;
+	int level, optname;
+	struct mbuf **mp;
+{
+	int error = 0, s;
+	struct inpcb *inp;
+	register struct tcpcb *tp;
+	register struct mbuf *m;
+	register int i;
+
+	s = splnet();
+	inp = sotoinpcb(so);
+	if (inp == NULL) {
+		splx(s);
+		if (op == PRCO_SETOPT && *mp)
+			(void) m_free(*mp);
+		return (ECONNRESET);
+	}
+	if (level != IPPROTO_TCP) {
+		error = ip_ctloutput(op, so, level, optname, mp);
+		splx(s);
+		return (error);
+	}
+	tp = intotcpcb(inp);
+
+	switch (op) {
+
+	case PRCO_SETOPT:
+		m = *mp;
+		switch (optname) {
+
+		case TCP_NODELAY:
+			if (m == NULL || m->m_len < sizeof (int))
+				error = EINVAL;
+			else if (*mtod(m, int *))
+				tp->t_flags |= TF_NODELAY;
+			else
+				tp->t_flags &= ~TF_NODELAY;
+			break;
+
+		case TCP_MAXSEG:
+			if (m && (i = *mtod(m, int *)) > 0 && i <= tp->t_maxseg)
+				tp->t_maxseg = i;
+			else
+				error = EINVAL;
+			break;
+
+		default:
+			error = ENOPROTOOPT;
+			break;
+		}
+		if (m)
+			(void) m_free(m);
+		break;
+
+	case PRCO_GETOPT:
+		*mp = m = m_get(M_WAIT, MT_SOOPTS);
+		m->m_len = sizeof(int);
+
+		switch (optname) {
+		case TCP_NODELAY:
+			*mtod(m, int *) = tp->t_flags & TF_NODELAY;
+			break;
+		case TCP_MAXSEG:
+			*mtod(m, int *) = tp->t_maxseg;
+			break;
+		default:
+			error = ENOPROTOOPT;
+			break;
+		}
+		break;
+	}
+	splx(s);
+	return (error);
+}
+
+u_long	tcp_sendspace = 1024*8;
+u_long	tcp_recvspace = 1024*8;
+
+/*
+ * Attach TCP protocol to socket, allocating
+ * internet protocol control block, tcp control block,
+ * bufer space, and entering LISTEN state if to accept connections.
+ */
+int
+tcp_attach(so)
+	struct socket *so;
+{
+	register struct tcpcb *tp;
+	struct inpcb *inp;
+	int error;
+
+	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
+		error = soreserve(so, tcp_sendspace, tcp_recvspace);
+		if (error)
+			return (error);
+	}
+	error = in_pcballoc(so, &tcb);
+	if (error)
+		return (error);
+	inp = sotoinpcb(so);
+	tp = tcp_newtcpcb(inp);
+	if (tp == 0) {
+		int nofd = so->so_state & SS_NOFDREF;	/* XXX */
+
+		so->so_state &= ~SS_NOFDREF;	/* don't free the socket yet */
+		in_pcbdetach(inp);
+		so->so_state |= nofd;
+		return (ENOBUFS);
+	}
+	tp->t_state = TCPS_CLOSED;
+	return (0);
+}
+
+/*
+ * Initiate (or continue) disconnect.
+ * If embryonic state, just send reset (once).
+ * If in ``let data drain'' option and linger null, just drop.
+ * Otherwise (hard), mark socket disconnecting and drop
+ * current input data; switch states based on user close, and
+ * send segment to peer (with FIN).
+ */
+struct tcpcb *
+tcp_disconnect(tp)
+	register struct tcpcb *tp;
+{
+	struct socket *so = tp->t_inpcb->inp_socket;
+
+	if (tp->t_state < TCPS_ESTABLISHED)
+		tp = tcp_close(tp);
+	else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
+		tp = tcp_drop(tp, 0);
+	else {
+		soisdisconnecting(so);
+		sbflush(&so->so_rcv);
+		tp = tcp_usrclosed(tp);
+		if (tp)
+			(void) tcp_output(tp);
+	}
+	return (tp);
+}
+
+/*
+ * User issued close, and wish to trail through shutdown states:
+ * if never received SYN, just forget it.  If got a SYN from peer,
+ * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
+ * If already got a FIN from peer, then almost done; go to LAST_ACK
+ * state.  In all other cases, have already sent FIN to peer (e.g.
+ * after PRU_SHUTDOWN), and just have to play tedious game waiting
+ * for peer to send FIN or not respond to keep-alives, etc.
+ * We can let the user exit from the close as soon as the FIN is acked.
+ */
+struct tcpcb *
+tcp_usrclosed(tp)
+	register struct tcpcb *tp;
+{
+
+	switch (tp->t_state) {
+
+	case TCPS_CLOSED:
+	case TCPS_LISTEN:
+	case TCPS_SYN_SENT:
+		tp->t_state = TCPS_CLOSED;
+		tp = tcp_close(tp);
+		break;
+
+	case TCPS_SYN_RECEIVED:
+	case TCPS_ESTABLISHED:
+		tp->t_state = TCPS_FIN_WAIT_1;
+		break;
+
+	case TCPS_CLOSE_WAIT:
+		tp->t_state = TCPS_LAST_ACK;
+		break;
+	}
+	if (tp && tp->t_state >= TCPS_FIN_WAIT_2)
+		soisdisconnected(tp->t_inpcb->inp_socket);
+	return (tp);
+}
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
new file mode 100644
index 00000000000..8a8e7512114
--- /dev/null
+++ b/sys/netinet/tcp_var.h
@@ -0,0 +1,278 @@
+/*
+ * Copyright (c) 1982, 1986, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_var.h	8.3 (Berkeley) 4/10/94
+ */
+
+/*
+ * Kernel variables for tcp.
+ */
+
+/*
+ * Tcp control block, one per tcp; fields:
+ */
+struct tcpcb {
+	struct	tcpiphdr *seg_next;	/* sequencing queue */
+	struct	tcpiphdr *seg_prev;
+	short	t_state;		/* state of this connection */
+	short	t_timer[TCPT_NTIMERS];	/* tcp timers */
+	short	t_rxtshift;		/* log(2) of rexmt exp. backoff */
+	short	t_rxtcur;		/* current retransmit value */
+	short	t_dupacks;		/* consecutive dup acks recd */
+	u_short	t_maxseg;		/* maximum segment size */
+	char	t_force;		/* 1 if forcing out a byte */
+	u_short	t_flags;
+#define	TF_ACKNOW	0x0001		/* ack peer immediately */
+#define	TF_DELACK	0x0002		/* ack, but try to delay it */
+#define	TF_NODELAY	0x0004		/* don't delay packets to coalesce */
+#define	TF_NOOPT	0x0008		/* don't use tcp options */
+#define	TF_SENTFIN	0x0010		/* have sent FIN */
+#define	TF_REQ_SCALE	0x0020		/* have/will request window scaling */
+#define	TF_RCVD_SCALE	0x0040		/* other side has requested scaling */
+#define	TF_REQ_TSTMP	0x0080		/* have/will request timestamps */
+#define	TF_RCVD_TSTMP	0x0100		/* a timestamp was received in SYN */
+#define	TF_SACK_PERMIT	0x0200		/* other side said I could SACK */
+
+	struct	tcpiphdr *t_template;	/* skeletal packet for transmit */
+	struct	inpcb *t_inpcb;		/* back pointer to internet pcb */
+/*
+ * The following fields are used as in the protocol specification.
+ * See RFC783, Dec. 1981, page 21.
+ */
+/* send sequence variables */
+	tcp_seq	snd_una;		/* send unacknowledged */
+	tcp_seq	snd_nxt;		/* send next */
+	tcp_seq	snd_up;			/* send urgent pointer */
+	tcp_seq	snd_wl1;		/* window update seg seq number */
+	tcp_seq	snd_wl2;		/* window update seg ack number */
+	tcp_seq	iss;			/* initial send sequence number */
+	u_long	snd_wnd;		/* send window */
+/* receive sequence variables */
+	u_long	rcv_wnd;		/* receive window */
+	tcp_seq	rcv_nxt;		/* receive next */
+	tcp_seq	rcv_up;			/* receive urgent pointer */
+	tcp_seq	irs;			/* initial receive sequence number */
+/*
+ * Additional variables for this implementation.
+ */
+/* receive variables */
+	tcp_seq	rcv_adv;		/* advertised window */
+/* retransmit variables */
+	tcp_seq	snd_max;		/* highest sequence number sent;
+					 * used to recognize retransmits
+					 */
+/* congestion control (for slow start, source quench, retransmit after loss) */
+	u_long	snd_cwnd;		/* congestion-controlled window */
+	u_long	snd_ssthresh;		/* snd_cwnd size threshhold for
+					 * for slow start exponential to
+					 * linear switch
+					 */
+/*
+ * transmit timing stuff.  See below for scale of srtt and rttvar.
+ * "Variance" is actually smoothed difference.
+ */
+	short	t_idle;			/* inactivity time */
+	short	t_rtt;			/* round trip time */
+	tcp_seq	t_rtseq;		/* sequence number being timed */
+	short	t_srtt;			/* smoothed round-trip time */
+	short	t_rttvar;		/* variance in round-trip time */
+	u_short	t_rttmin;		/* minimum rtt allowed */
+	u_long	max_sndwnd;		/* largest window peer has offered */
+
+/* out-of-band data */
+	char	t_oobflags;		/* have some */
+	char	t_iobc;			/* input character */
+#define	TCPOOB_HAVEDATA	0x01
+#define	TCPOOB_HADDATA	0x02
+	short	t_softerror;		/* possible error not yet reported */
+
+/* RFC 1323 variables */
+	u_char	snd_scale;		/* window scaling for send window */
+	u_char	rcv_scale;		/* window scaling for recv window */
+	u_char	request_r_scale;	/* pending window scaling */
+	u_char	requested_s_scale;
+	u_long	ts_recent;		/* timestamp echo data */
+	u_long	ts_recent_age;		/* when last updated */
+	tcp_seq	last_ack_sent;
+
+/* TUBA stuff */
+	caddr_t	t_tuba_pcb;		/* next level down pcb for TCP over z */
+};
+
+#define	intotcpcb(ip)	((struct tcpcb *)(ip)->inp_ppcb)
+#define	sototcpcb(so)	(intotcpcb(sotoinpcb(so)))
+
+/*
+ * The smoothed round-trip time and estimated variance
+ * are stored as fixed point numbers scaled by the values below.
+ * For convenience, these scales are also used in smoothing the average
+ * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed).
+ * With these scales, srtt has 3 bits to the right of the binary point,
+ * and thus an "ALPHA" of 0.875.  rttvar has 2 bits to the right of the
+ * binary point, and is smoothed with an ALPHA of 0.75.
+ */
+#define	TCP_RTT_SCALE		8	/* multiplier for srtt; 3 bits frac. */
+#define	TCP_RTT_SHIFT		3	/* shift for srtt; 3 bits frac. */
+#define	TCP_RTTVAR_SCALE	4	/* multiplier for rttvar; 2 bits */
+#define	TCP_RTTVAR_SHIFT	2	/* multiplier for rttvar; 2 bits */
+
+/*
+ * The initial retransmission should happen at rtt + 4 * rttvar.
+ * Because of the way we do the smoothing, srtt and rttvar
+ * will each average +1/2 tick of bias.  When we compute
+ * the retransmit timer, we want 1/2 tick of rounding and
+ * 1 extra tick because of +-1/2 tick uncertainty in the
+ * firing of the timer.  The bias will give us exactly the
+ * 1.5 tick we need.  But, because the bias is
+ * statistical, we have to test that we don't drop below
+ * the minimum feasible timer (which is 2 ticks).
+ * This macro assumes that the value of TCP_RTTVAR_SCALE
+ * is the same as the multiplier for rttvar.
+ */
+#define	TCP_REXMTVAL(tp) \
+	(((tp)->t_srtt >> TCP_RTT_SHIFT) + (tp)->t_rttvar)
+
+/* XXX
+ * We want to avoid doing m_pullup on incoming packets but that
+ * means avoiding dtom on the tcp reassembly code.  That in turn means
+ * keeping an mbuf pointer in the reassembly queue (since we might
+ * have a cluster).  As a quick hack, the source & destination
+ * port numbers (which are no longer needed once we've located the
+ * tcpcb) are overlayed with an mbuf pointer.
+ */
+#define REASS_MBUF(ti) (*(struct mbuf **)&((ti)->ti_t))
+
+/*
+ * TCP statistics.
+ * Many of these should be kept per connection,
+ * but that's inconvenient at the moment.
+ */
+struct	tcpstat {
+	u_long	tcps_connattempt;	/* connections initiated */
+	u_long	tcps_accepts;		/* connections accepted */
+	u_long	tcps_connects;		/* connections established */
+	u_long	tcps_drops;		/* connections dropped */
+	u_long	tcps_conndrops;		/* embryonic connections dropped */
+	u_long	tcps_closed;		/* conn. closed (includes drops) */
+	u_long	tcps_segstimed;		/* segs where we tried to get rtt */
+	u_long	tcps_rttupdated;	/* times we succeeded */
+	u_long	tcps_delack;		/* delayed acks sent */
+	u_long	tcps_timeoutdrop;	/* conn. dropped in rxmt timeout */
+	u_long	tcps_rexmttimeo;	/* retransmit timeouts */
+	u_long	tcps_persisttimeo;	/* persist timeouts */
+	u_long	tcps_keeptimeo;		/* keepalive timeouts */
+	u_long	tcps_keepprobe;		/* keepalive probes sent */
+	u_long	tcps_keepdrops;		/* connections dropped in keepalive */
+
+	u_long	tcps_sndtotal;		/* total packets sent */
+	u_long	tcps_sndpack;		/* data packets sent */
+	u_long	tcps_sndbyte;		/* data bytes sent */
+	u_long	tcps_sndrexmitpack;	/* data packets retransmitted */
+	u_long	tcps_sndrexmitbyte;	/* data bytes retransmitted */
+	u_long	tcps_sndacks;		/* ack-only packets sent */
+	u_long	tcps_sndprobe;		/* window probes sent */
+	u_long	tcps_sndurg;		/* packets sent with URG only */
+	u_long	tcps_sndwinup;		/* window update-only packets sent */
+	u_long	tcps_sndctrl;		/* control (SYN|FIN|RST) packets sent */
+
+	u_long	tcps_rcvtotal;		/* total packets received */
+	u_long	tcps_rcvpack;		/* packets received in sequence */
+	u_long	tcps_rcvbyte;		/* bytes received in sequence */
+	u_long	tcps_rcvbadsum;		/* packets received with ccksum errs */
+	u_long	tcps_rcvbadoff;		/* packets received with bad offset */
+	u_long	tcps_rcvshort;		/* packets received too short */
+	u_long	tcps_rcvduppack;	/* duplicate-only packets received */
+	u_long	tcps_rcvdupbyte;	/* duplicate-only bytes received */
+	u_long	tcps_rcvpartduppack;	/* packets with some duplicate data */
+	u_long	tcps_rcvpartdupbyte;	/* dup. bytes in part-dup. packets */
+	u_long	tcps_rcvoopack;		/* out-of-order packets received */
+	u_long	tcps_rcvoobyte;		/* out-of-order bytes received */
+	u_long	tcps_rcvpackafterwin;	/* packets with data after window */
+	u_long	tcps_rcvbyteafterwin;	/* bytes rcvd after window */
+	u_long	tcps_rcvafterclose;	/* packets rcvd after "close" */
+	u_long	tcps_rcvwinprobe;	/* rcvd window probe packets */
+	u_long	tcps_rcvdupack;		/* rcvd duplicate acks */
+	u_long	tcps_rcvacktoomuch;	/* rcvd acks for unsent data */
+	u_long	tcps_rcvackpack;	/* rcvd ack packets */
+	u_long	tcps_rcvackbyte;	/* bytes acked by rcvd acks */
+	u_long	tcps_rcvwinupd;		/* rcvd window update packets */
+	u_long	tcps_pawsdrop;		/* segments dropped due to PAWS */
+	u_long	tcps_predack;		/* times hdr predict ok for acks */
+	u_long	tcps_preddat;		/* times hdr predict ok for data pkts */
+	u_long	tcps_pcbcachemiss;
+};
+
+#ifdef KERNEL
+struct	inpcb tcb;		/* head of queue of active tcpcb's */
+struct	tcpstat tcpstat;	/* tcp statistics */
+u_long	tcp_now;		/* for RFC 1323 timestamps */
+
+int	 tcp_attach __P((struct socket *));
+void	 tcp_canceltimers __P((struct tcpcb *));
+struct tcpcb *
+	 tcp_close __P((struct tcpcb *));
+void	 tcp_ctlinput __P((int, struct sockaddr *, struct ip *));
+int	 tcp_ctloutput __P((int, struct socket *, int, int, struct mbuf **));
+struct tcpcb *
+	 tcp_disconnect __P((struct tcpcb *));
+struct tcpcb *
+	 tcp_drop __P((struct tcpcb *, int));
+void	 tcp_dooptions __P((struct tcpcb *,
+	    u_char *, int, struct tcpiphdr *, int *, u_long *, u_long *));
+void	 tcp_drain __P((void));
+void	 tcp_fasttimo __P((void));
+void	 tcp_init __P((void));
+void	 tcp_input __P((struct mbuf *, int));
+int	 tcp_mss __P((struct tcpcb *, u_int));
+struct tcpcb *
+	 tcp_newtcpcb __P((struct inpcb *));
+void	 tcp_notify __P((struct inpcb *, int));
+int	 tcp_output __P((struct tcpcb *));
+void	 tcp_pulloutofband __P((struct socket *,
+	    struct tcpiphdr *, struct mbuf *));
+void	 tcp_quench __P((struct inpcb *, int));
+int	 tcp_reass __P((struct tcpcb *, struct tcpiphdr *, struct mbuf *));
+void	 tcp_respond __P((struct tcpcb *,
+	    struct tcpiphdr *, struct mbuf *, u_long, u_long, int));
+void	 tcp_setpersist __P((struct tcpcb *));
+void	 tcp_slowtimo __P((void));
+struct tcpiphdr *
+	 tcp_template __P((struct tcpcb *));
+struct tcpcb *
+	 tcp_timers __P((struct tcpcb *, int));
+void	 tcp_trace __P((int, int, struct tcpcb *, struct tcpiphdr *, int));
+struct tcpcb *
+	 tcp_usrclosed __P((struct tcpcb *));
+int	 tcp_usrreq __P((struct socket *,
+	    int, struct mbuf *, struct mbuf *, struct mbuf *));
+void	 tcp_xmit_timer __P((struct tcpcb *, int));
+#endif
diff --git a/sys/netinet/tcpip.h b/sys/netinet/tcpip.h
new file mode 100644
index 00000000000..5000ae303ce
--- /dev/null
+++ b/sys/netinet/tcpip.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcpip.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Tcp+ip header, after ip options removed.
+ */
+struct tcpiphdr {
+	struct 	ipovly ti_i;		/* overlaid ip structure */
+	struct	tcphdr ti_t;		/* tcp header */
+};
+#define	ti_next		ti_i.ih_next
+#define	ti_prev		ti_i.ih_prev
+#define	ti_x1		ti_i.ih_x1
+#define	ti_pr		ti_i.ih_pr
+#define	ti_len		ti_i.ih_len
+#define	ti_src		ti_i.ih_src
+#define	ti_dst		ti_i.ih_dst
+#define	ti_sport	ti_t.th_sport
+#define	ti_dport	ti_t.th_dport
+#define	ti_seq		ti_t.th_seq
+#define	ti_ack		ti_t.th_ack
+#define	ti_x2		ti_t.th_x2
+#define	ti_off		ti_t.th_off
+#define	ti_flags	ti_t.th_flags
+#define	ti_win		ti_t.th_win
+#define	ti_sum		ti_t.th_sum
+#define	ti_urp		ti_t.th_urp
diff --git a/sys/netinet/udp.h b/sys/netinet/udp.h
new file mode 100644
index 00000000000..354a213cbc2
--- /dev/null
+++ b/sys/netinet/udp.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)udp.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Udp protocol header.
+ * Per RFC 768, September, 1981.
+ */
+struct udphdr {
+	u_short	uh_sport;		/* source port */
+	u_short	uh_dport;		/* destination port */
+	short	uh_ulen;		/* udp length */
+	u_short	uh_sum;			/* udp checksum */
+};
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
new file mode 100644
index 00000000000..95b1895ac0a
--- /dev/null
+++ b/sys/netinet/udp_usrreq.c
@@ -0,0 +1,640 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)udp_usrreq.c	8.4 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+#include <sys/stat.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
+
+/*
+ * UDP protocol implementation.
+ * Per RFC 768, August, 1980.
+ */
+#ifndef	COMPAT_42
+int	udpcksum = 1;
+#else
+int	udpcksum = 0;		/* XXX */
+#endif
+
+struct	sockaddr_in udp_in = { sizeof(udp_in), AF_INET };
+struct	inpcb *udp_last_inpcb = &udb;
+
+static	void udp_detach __P((struct inpcb *));
+static	void udp_notify __P((struct inpcb *, int));
+static	struct mbuf *udp_saveopt __P((caddr_t, int, int));
+
+void
+udp_init()
+{
+	udb.inp_next = udb.inp_prev = &udb;
+}
+
+void
+udp_input(m, iphlen)
+	register struct mbuf *m;
+	int iphlen;
+{
+	register struct ip *ip;
+	register struct udphdr *uh;
+	register struct inpcb *inp;
+	struct mbuf *opts = 0;
+	int len;
+	struct ip save_ip;
+
+	udpstat.udps_ipackets++;
+
+	/*
+	 * Strip IP options, if any; should skip this,
+	 * make available to user, and use on returned packets,
+	 * but we don't yet have a way to check the checksum
+	 * with options still present.
+	 */
+	if (iphlen > sizeof (struct ip)) {
+		ip_stripoptions(m, (struct mbuf *)0);
+		iphlen = sizeof(struct ip);
+	}
+
+	/*
+	 * Get IP and UDP header together in first mbuf.
+	 */
+	ip = mtod(m, struct ip *);
+	if (m->m_len < iphlen + sizeof(struct udphdr)) {
+		if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == 0) {
+			udpstat.udps_hdrops++;
+			return;
+		}
+		ip = mtod(m, struct ip *);
+	}
+	uh = (struct udphdr *)((caddr_t)ip + iphlen);
+
+	/*
+	 * Make mbuf data length reflect UDP length.
+	 * If not enough data to reflect UDP length, drop.
+	 */
+	len = ntohs((u_short)uh->uh_ulen);
+	if (ip->ip_len != len) {
+		if (len > ip->ip_len) {
+			udpstat.udps_badlen++;
+			goto bad;
+		}
+		m_adj(m, len - ip->ip_len);
+		/* ip->ip_len = len; */
+	}
+	/*
+	 * Save a copy of the IP header in case we want restore it
+	 * for sending an ICMP error message in response.
+	 */
+	save_ip = *ip;
+
+	/*
+	 * Checksum extended UDP header and data.
+	 */
+	if (udpcksum && uh->uh_sum) {
+		((struct ipovly *)ip)->ih_next = 0;
+		((struct ipovly *)ip)->ih_prev = 0;
+		((struct ipovly *)ip)->ih_x1 = 0;
+		((struct ipovly *)ip)->ih_len = uh->uh_ulen;
+		if (uh->uh_sum = in_cksum(m, len + sizeof (struct ip))) {
+			udpstat.udps_badsum++;
+			m_freem(m);
+			return;
+		}
+	}
+
+	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
+	    in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) {
+		struct socket *last;
+		/*
+		 * Deliver a multicast or broadcast datagram to *all* sockets
+		 * for which the local and remote addresses and ports match
+		 * those of the incoming datagram.  This allows more than
+		 * one process to receive multi/broadcasts on the same port.
+		 * (This really ought to be done for unicast datagrams as
+		 * well, but that would cause problems with existing
+		 * applications that open both address-specific sockets and
+		 * a wildcard socket listening to the same port -- they would
+		 * end up receiving duplicates of every unicast datagram.
+		 * Those applications open the multiple sockets to overcome an
+		 * inadequacy of the UDP socket interface, but for backwards
+		 * compatibility we avoid the problem here rather than
+		 * fixing the interface.  Maybe 4.5BSD will remedy this?)
+		 */
+
+		/*
+		 * Construct sockaddr format source address.
+		 */
+		udp_in.sin_port = uh->uh_sport;
+		udp_in.sin_addr = ip->ip_src;
+		m->m_len -= sizeof (struct udpiphdr);
+		m->m_data += sizeof (struct udpiphdr);
+		/*
+		 * Locate pcb(s) for datagram.
+		 * (Algorithm copied from raw_intr().)
+		 */
+		last = NULL;
+		for (inp = udb.inp_next; inp != &udb; inp = inp->inp_next) {
+			if (inp->inp_lport != uh->uh_dport)
+				continue;
+			if (inp->inp_laddr.s_addr != INADDR_ANY) {
+				if (inp->inp_laddr.s_addr !=
+				    ip->ip_dst.s_addr)
+					continue;
+			}
+			if (inp->inp_faddr.s_addr != INADDR_ANY) {
+				if (inp->inp_faddr.s_addr !=
+				    ip->ip_src.s_addr ||
+				    inp->inp_fport != uh->uh_sport)
+					continue;
+			}
+
+			if (last != NULL) {
+				struct mbuf *n;
+
+				if ((n = m_copy(m, 0, M_COPYALL)) != NULL) {
+					if (sbappendaddr(&last->so_rcv,
+						(struct sockaddr *)&udp_in,
+						n, (struct mbuf *)0) == 0) {
+						m_freem(n);
+						udpstat.udps_fullsock++;
+					} else
+						sorwakeup(last);
+				}
+			}
+			last = inp->inp_socket;
+			/*
+			 * Don't look for additional matches if this one does
+			 * not have either the SO_REUSEPORT or SO_REUSEADDR
+			 * socket options set.  This heuristic avoids searching
+			 * through all pcbs in the common case of a non-shared
+			 * port.  It * assumes that an application will never
+			 * clear these options after setting them.
+			 */
+			if ((last->so_options&(SO_REUSEPORT|SO_REUSEADDR) == 0))
+				break;
+		}
+
+		if (last == NULL) {
+			/*
+			 * No matching pcb found; discard datagram.
+			 * (No need to send an ICMP Port Unreachable
+			 * for a broadcast or multicast datgram.)
+			 */
+			udpstat.udps_noportbcast++;
+			goto bad;
+		}
+		if (sbappendaddr(&last->so_rcv, (struct sockaddr *)&udp_in,
+		     m, (struct mbuf *)0) == 0) {
+			udpstat.udps_fullsock++;
+			goto bad;
+		}
+		sorwakeup(last);
+		return;
+	}
+	/*
+	 * Locate pcb for datagram.
+	 */
+	inp = udp_last_inpcb;
+	if (inp->inp_lport != uh->uh_dport ||
+	    inp->inp_fport != uh->uh_sport ||
+	    inp->inp_faddr.s_addr != ip->ip_src.s_addr ||
+	    inp->inp_laddr.s_addr != ip->ip_dst.s_addr) {
+		inp = in_pcblookup(&udb, ip->ip_src, uh->uh_sport,
+		    ip->ip_dst, uh->uh_dport, INPLOOKUP_WILDCARD);
+		if (inp)
+			udp_last_inpcb = inp;
+		udpstat.udpps_pcbcachemiss++;
+	}
+	if (inp == 0) {
+		udpstat.udps_noport++;
+		if (m->m_flags & (M_BCAST | M_MCAST)) {
+			udpstat.udps_noportbcast++;
+			goto bad;
+		}
+		*ip = save_ip;
+		ip->ip_len += iphlen;
+		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
+		return;
+	}
+
+	/*
+	 * Construct sockaddr format source address.
+	 * Stuff source address and datagram in user buffer.
+	 */
+	udp_in.sin_port = uh->uh_sport;
+	udp_in.sin_addr = ip->ip_src;
+	if (inp->inp_flags & INP_CONTROLOPTS) {
+		struct mbuf **mp = &opts;
+
+		if (inp->inp_flags & INP_RECVDSTADDR) {
+			*mp = udp_saveopt((caddr_t) &ip->ip_dst,
+			    sizeof(struct in_addr), IP_RECVDSTADDR);
+			if (*mp)
+				mp = &(*mp)->m_next;
+		}
+#ifdef notyet
+		/* options were tossed above */
+		if (inp->inp_flags & INP_RECVOPTS) {
+			*mp = udp_saveopt((caddr_t) opts_deleted_above,
+			    sizeof(struct in_addr), IP_RECVOPTS);
+			if (*mp)
+				mp = &(*mp)->m_next;
+		}
+		/* ip_srcroute doesn't do what we want here, need to fix */
+		if (inp->inp_flags & INP_RECVRETOPTS) {
+			*mp = udp_saveopt((caddr_t) ip_srcroute(),
+			    sizeof(struct in_addr), IP_RECVRETOPTS);
+			if (*mp)
+				mp = &(*mp)->m_next;
+		}
+#endif
+	}
+	iphlen += sizeof(struct udphdr);
+	m->m_len -= iphlen;
+	m->m_pkthdr.len -= iphlen;
+	m->m_data += iphlen;
+	if (sbappendaddr(&inp->inp_socket->so_rcv, (struct sockaddr *)&udp_in,
+	    m, opts) == 0) {
+		udpstat.udps_fullsock++;
+		goto bad;
+	}
+	sorwakeup(inp->inp_socket);
+	return;
+bad:
+	m_freem(m);
+	if (opts)
+		m_freem(opts);
+}
+
+/*
+ * Create a "control" mbuf containing the specified data
+ * with the specified type for presentation with a datagram.
+ */
+struct mbuf *
+udp_saveopt(p, size, type)
+	caddr_t p;
+	register int size;
+	int type;
+{
+	register struct cmsghdr *cp;
+	struct mbuf *m;
+
+	if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
+		return ((struct mbuf *) NULL);
+	cp = (struct cmsghdr *) mtod(m, struct cmsghdr *);
+	bcopy(p, CMSG_DATA(cp), size);
+	size += sizeof(*cp);
+	m->m_len = size;
+	cp->cmsg_len = size;
+	cp->cmsg_level = IPPROTO_IP;
+	cp->cmsg_type = type;
+	return (m);
+}
+
+/*
+ * Notify a udp user of an asynchronous error;
+ * just wake up so that he can collect error status.
+ */
+static void
+udp_notify(inp, errno)
+	register struct inpcb *inp;
+	int errno;
+{
+	inp->inp_socket->so_error = errno;
+	sorwakeup(inp->inp_socket);
+	sowwakeup(inp->inp_socket);
+}
+
+void
+udp_ctlinput(cmd, sa, ip)
+	int cmd;
+	struct sockaddr *sa;
+	register struct ip *ip;
+{
+	register struct udphdr *uh;
+	extern struct in_addr zeroin_addr;
+	extern u_char inetctlerrmap[];
+
+	if (!PRC_IS_REDIRECT(cmd) &&
+	    ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0))
+		return;
+	if (ip) {
+		uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
+		in_pcbnotify(&udb, sa, uh->uh_dport, ip->ip_src, uh->uh_sport,
+			cmd, udp_notify);
+	} else
+		in_pcbnotify(&udb, sa, 0, zeroin_addr, 0, cmd, udp_notify);
+}
+
+int
+udp_output(inp, m, addr, control)
+	register struct inpcb *inp;
+	register struct mbuf *m;
+	struct mbuf *addr, *control;
+{
+	register struct udpiphdr *ui;
+	register int len = m->m_pkthdr.len;
+	struct in_addr laddr;
+	int s, error = 0;
+
+	if (control)
+		m_freem(control);		/* XXX */
+
+	if (addr) {
+		laddr = inp->inp_laddr;
+		if (inp->inp_faddr.s_addr != INADDR_ANY) {
+			error = EISCONN;
+			goto release;
+		}
+		/*
+		 * Must block input while temporarily connected.
+		 */
+		s = splnet();
+		error = in_pcbconnect(inp, addr);
+		if (error) {
+			splx(s);
+			goto release;
+		}
+	} else {
+		if (inp->inp_faddr.s_addr == INADDR_ANY) {
+			error = ENOTCONN;
+			goto release;
+		}
+	}
+	/*
+	 * Calculate data length and get a mbuf
+	 * for UDP and IP headers.
+	 */
+	M_PREPEND(m, sizeof(struct udpiphdr), M_DONTWAIT);
+	if (m == 0) {
+		error = ENOBUFS;
+		goto release;
+	}
+
+	/*
+	 * Fill in mbuf with extended UDP header
+	 * and addresses and length put into network format.
+	 */
+	ui = mtod(m, struct udpiphdr *);
+	ui->ui_next = ui->ui_prev = 0;
+	ui->ui_x1 = 0;
+	ui->ui_pr = IPPROTO_UDP;
+	ui->ui_len = htons((u_short)len + sizeof (struct udphdr));
+	ui->ui_src = inp->inp_laddr;
+	ui->ui_dst = inp->inp_faddr;
+	ui->ui_sport = inp->inp_lport;
+	ui->ui_dport = inp->inp_fport;
+	ui->ui_ulen = ui->ui_len;
+
+	/*
+	 * Stuff checksum and output datagram.
+	 */
+	ui->ui_sum = 0;
+	if (udpcksum) {
+	    if ((ui->ui_sum = in_cksum(m, sizeof (struct udpiphdr) + len)) == 0)
+		ui->ui_sum = 0xffff;
+	}
+	((struct ip *)ui)->ip_len = sizeof (struct udpiphdr) + len;
+	((struct ip *)ui)->ip_ttl = inp->inp_ip.ip_ttl;	/* XXX */
+	((struct ip *)ui)->ip_tos = inp->inp_ip.ip_tos;	/* XXX */
+	udpstat.udps_opackets++;
+	error = ip_output(m, inp->inp_options, &inp->inp_route,
+	    inp->inp_socket->so_options & (SO_DONTROUTE | SO_BROADCAST),
+	    inp->inp_moptions);
+
+	if (addr) {
+		in_pcbdisconnect(inp);
+		inp->inp_laddr = laddr;
+		splx(s);
+	}
+	return (error);
+
+release:
+	m_freem(m);
+	return (error);
+}
+
+u_long	udp_sendspace = 9216;		/* really max datagram size */
+u_long	udp_recvspace = 40 * (1024 + sizeof(struct sockaddr_in));
+					/* 40 1K datagrams */
+
+/*ARGSUSED*/
+int
+udp_usrreq(so, req, m, addr, control)
+	struct socket *so;
+	int req;
+	struct mbuf *m, *addr, *control;
+{
+	struct inpcb *inp = sotoinpcb(so);
+	int error = 0;
+	int s;
+
+	if (req == PRU_CONTROL)
+		return (in_control(so, (int)m, (caddr_t)addr,
+			(struct ifnet *)control));
+	if (inp == NULL && req != PRU_ATTACH) {
+		error = EINVAL;
+		goto release;
+	}
+	/*
+	 * Note: need to block udp_input while changing
+	 * the udp pcb queue and/or pcb addresses.
+	 */
+	switch (req) {
+
+	case PRU_ATTACH:
+		if (inp != NULL) {
+			error = EINVAL;
+			break;
+		}
+		s = splnet();
+		error = in_pcballoc(so, &udb);
+		splx(s);
+		if (error)
+			break;
+		error = soreserve(so, udp_sendspace, udp_recvspace);
+		if (error)
+			break;
+		((struct inpcb *) so->so_pcb)->inp_ip.ip_ttl = ip_defttl;
+		break;
+
+	case PRU_DETACH:
+		udp_detach(inp);
+		break;
+
+	case PRU_BIND:
+		s = splnet();
+		error = in_pcbbind(inp, addr);
+		splx(s);
+		break;
+
+	case PRU_LISTEN:
+		error = EOPNOTSUPP;
+		break;
+
+	case PRU_CONNECT:
+		if (inp->inp_faddr.s_addr != INADDR_ANY) {
+			error = EISCONN;
+			break;
+		}
+		s = splnet();
+		error = in_pcbconnect(inp, addr);
+		splx(s);
+		if (error == 0)
+			soisconnected(so);
+		break;
+
+	case PRU_CONNECT2:
+		error = EOPNOTSUPP;
+		break;
+
+	case PRU_ACCEPT:
+		error = EOPNOTSUPP;
+		break;
+
+	case PRU_DISCONNECT:
+		if (inp->inp_faddr.s_addr == INADDR_ANY) {
+			error = ENOTCONN;
+			break;
+		}
+		s = splnet();
+		in_pcbdisconnect(inp);
+		inp->inp_laddr.s_addr = INADDR_ANY;
+		splx(s);
+		so->so_state &= ~SS_ISCONNECTED;		/* XXX */
+		break;
+
+	case PRU_SHUTDOWN:
+		socantsendmore(so);
+		break;
+
+	case PRU_SEND:
+		return (udp_output(inp, m, addr, control));
+
+	case PRU_ABORT:
+		soisdisconnected(so);
+		udp_detach(inp);
+		break;
+
+	case PRU_SOCKADDR:
+		in_setsockaddr(inp, addr);
+		break;
+
+	case PRU_PEERADDR:
+		in_setpeeraddr(inp, addr);
+		break;
+
+	case PRU_SENSE:
+		/*
+		 * stat: don't bother with a blocksize.
+		 */
+		return (0);
+
+	case PRU_SENDOOB:
+	case PRU_FASTTIMO:
+	case PRU_SLOWTIMO:
+	case PRU_PROTORCV:
+	case PRU_PROTOSEND:
+		error =  EOPNOTSUPP;
+		break;
+
+	case PRU_RCVD:
+	case PRU_RCVOOB:
+		return (EOPNOTSUPP);	/* do not free mbuf's */
+
+	default:
+		panic("udp_usrreq");
+	}
+
+release:
+	if (control) {
+		printf("udp control data unexpectedly retained\n");
+		m_freem(control);
+	}
+	if (m)
+		m_freem(m);
+	return (error);
+}
+
+static void
+udp_detach(inp)
+	struct inpcb *inp;
+{
+	int s = splnet();
+
+	if (inp == udp_last_inpcb)
+		udp_last_inpcb = &udb;
+	in_pcbdetach(inp);
+	splx(s);
+}
+
+/*
+ * Sysctl for udp variables.
+ */
+udp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
+	int *name;
+	u_int namelen;
+	void *oldp;
+	size_t *oldlenp;
+	void *newp;
+	size_t newlen;
+{
+	/* All sysctl names at this level are terminal. */
+	if (namelen != 1)
+		return (ENOTDIR);
+
+	switch (name[0]) {
+	case UDPCTL_CHECKSUM:
+		return (sysctl_int(oldp, oldlenp, newp, newlen, &udpcksum));
+	default:
+		return (ENOPROTOOPT);
+	}
+	/* NOTREACHED */
+}
diff --git a/sys/netinet/udp_var.h b/sys/netinet/udp_var.h
new file mode 100644
index 00000000000..e8a21d261c5
--- /dev/null
+++ b/sys/netinet/udp_var.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)udp_var.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * UDP kernel structures and variables.
+ */
+struct	udpiphdr {
+	struct 	ipovly ui_i;		/* overlaid ip structure */
+	struct	udphdr ui_u;		/* udp header */
+};
+#define	ui_next		ui_i.ih_next
+#define	ui_prev		ui_i.ih_prev
+#define	ui_x1		ui_i.ih_x1
+#define	ui_pr		ui_i.ih_pr
+#define	ui_len		ui_i.ih_len
+#define	ui_src		ui_i.ih_src
+#define	ui_dst		ui_i.ih_dst
+#define	ui_sport	ui_u.uh_sport
+#define	ui_dport	ui_u.uh_dport
+#define	ui_ulen		ui_u.uh_ulen
+#define	ui_sum		ui_u.uh_sum
+
+struct	udpstat {
+				/* input statistics: */
+	u_long	udps_ipackets;		/* total input packets */
+	u_long	udps_hdrops;		/* packet shorter than header */
+	u_long	udps_badsum;		/* checksum error */
+	u_long	udps_badlen;		/* data length larger than packet */
+	u_long	udps_noport;		/* no socket on port */
+	u_long	udps_noportbcast;	/* of above, arrived as broadcast */
+	u_long	udps_fullsock;		/* not delivered, input socket full */
+	u_long	udpps_pcbcachemiss;	/* input packets missing pcb cache */
+				/* output statistics: */
+	u_long	udps_opackets;		/* total output packets */
+};
+
+/*
+ * Names for UDP sysctl objects
+ */
+#define	UDPCTL_CHECKSUM		1	/* checksum UDP packets */
+#define UDPCTL_MAXID		2
+
+#define UDPCTL_NAMES { \
+	{ 0, 0 }, \
+	{ "checksum", CTLTYPE_INT }, \
+}
+
+#ifdef KERNEL
+struct	inpcb udb;
+struct	udpstat udpstat;
+
+void	 udp_ctlinput __P((int, struct sockaddr *, struct ip *));
+void	 udp_init __P((void));
+void	 udp_input __P((struct mbuf *, int));
+int	 udp_output __P((struct inpcb *,
+	    struct mbuf *, struct mbuf *, struct mbuf *));
+int	 udp_sysctl __P((int *, u_int, void *, size_t *, void *, size_t));
+int	 udp_usrreq __P((struct socket *,
+	    int, struct mbuf *, struct mbuf *, struct mbuf *));
+#endif
diff --git a/sys/netiso/argo_debug.h b/sys/netiso/argo_debug.h
new file mode 100644
index 00000000000..653982f005a
--- /dev/null
+++ b/sys/netiso/argo_debug.h
@@ -0,0 +1,296 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)argo_debug.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*****************************************************************
+				Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* 
+ * $Header: argo_debug.h,v 4.6 88/07/19 15:53:40 hagens Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/argo_debug.h,v $
+ */
+
+#ifndef __ARGO_DEBUG__
+#define __ARGO_DEBUG__
+
+#define dump_buf(a, b) Dump_buf((caddr_t)(a), (int)(b))
+
+/***********************************************
+ * Lint stuff
+ **********************************************/
+#if	defined(lint)
+/* 
+ * lint can't handle the flaky vacuous definitions 
+ * of IFDEBUG, ENDDEBUG, etc.
+ */
+#endif	/* defined(lint) */
+
+/***********************************************
+ * DEBUG ON:
+ **********************************************/
+#ifndef ARGO_DEBUG
+#define ARGO_DEBUG
+#endif /* ARGO_DEBUG */
+
+
+#ifdef ARGO_DEBUG
+/*
+    #ifndef TPPT
+    #define TPPT
+    #endif TPPT
+
+    #ifndef TP_PERF_MEAS
+    #define TP_PERF_MEAS
+    #endif TP_PERF_MEAS
+*/
+
+unsigned char	argo_debug[128];
+
+#define IFDEBUG(ascii) \
+	if(argo_debug[ascii]) { 
+#define ENDDEBUG  ; }
+
+#else  /* ARGO_DEBUG */
+
+/***********************************************
+ * DEBUG OFF:
+ **********************************************/
+
+#ifndef STAR
+#define STAR *
+#endif	/* STAR */
+#define IFDEBUG(ascii)	 //*beginning of comment*/STAR
+#define ENDDEBUG	 STAR/*end of comment*//
+
+#endif /* ARGO_DEBUG */
+
+/***********************************************
+ * ASSERT 
+ **********************************************/
+#ifdef ARGO_DEBUG
+
+#ifndef lint
+#define ASSERT(phrase) \
+if( !(phrase) ) printf("ASSERTION NOT VALID at line %d file %s\n",__LINE__,__FILE__)
+#else /* lint */
+#define ASSERT(phrase) /* phrase */
+#endif /* lint */
+
+#else /* ARGO_DEBUG */
+
+#define ASSERT(phrase) /* phrase */
+
+#endif /* ARGO_DEBUG */
+
+
+/***********************************************
+ * CLNP DEBUG OPTIONS
+ **********************************************/
+#define	D_INPUT			'\1'
+/* clnp input */
+#define	D_OUTPUT		'\2'
+/* clnp output */
+#define	D_ROUTE			'\3'
+/* clnp routing */
+#define	D_CTLINPUT		'\4'
+/* clnp control input */
+#define	D_CTLOUTPUT		'\5'
+/* clnp control output */
+#define D_OPTIONS		'\6'
+/* clnp options */
+#define	D_IOCTL			'\7'
+/* iso ioctls */
+#define D_ETHER			'\10'
+/* clnp over ethernet */
+#define D_TOKEN			'\11'
+/* clnp over token ring */
+#define D_ADCOM			'\12'
+/* clnp over the adcom */
+#define D_ISO			'\13'	
+/* iso address family */
+#define	D_FORWARD		'\14'
+/* clnp forwarding */
+#define	D_DUMPOUT		'\15'
+/* dump clnp outgoing packets */
+#define	D_DUMPIN		'\16'	
+/* dump clnp input packets */
+#define D_DISCARD		'\17'	
+/* debug clnp packet discard/er function */
+#define D_FRAG			'\20'	
+/* clnp fragmentation */
+#define	D_REASS			'\21'	
+/* clnp reassembly */
+
+char *clnp_iso_addrp();
+
+/***********************************************
+ * ESIS DEBUG OPTIONS
+ **********************************************/
+#define	D_ESISOUTPUT	'\30'
+#define	D_ESISINPUT		'\31'
+#define D_SNPA			'\32'
+
+/***********************************************
+ * ISIS DEBUG OPTIONS
+ **********************************************/
+#define D_ISISOUTPUT	'\40'
+#define D_ISISINPUT		'\41'
+
+/***********************************************
+ * EON DEBUG OPTION
+ **********************************************/
+#define	D_EON			'\57'
+
+/***********************************************
+ * CONS DEBUG OPTIONS
+ **********************************************/
+
+#define D_ECNWORK		'\60'
+#define D_ECNOUT		'\61'
+#define D_ECNFIN		'\62'
+#define D_ECNDWN		'\63'
+#define D_ECNUTIL		'\64'
+
+#define D_INCOMING		'\70'
+#define D_CDATA			'\71'
+#define D_CFIND			'\72'
+#define D_CDUMP_REQ		'\73'
+#define D_CADDR			'\74'
+#define D_CCONS			'\75'
+#define D_CCONN			'\76'
+
+
+/***********************************************
+ * TP DEBUG OPTIONS
+ **********************************************/
+
+#define D_SETPARAMS		'\137'
+#define D_RTT 			'\140'
+
+#define D_ACKRECV 		'\141'
+#define D_ACKSEND 		'\142'
+#define D_CONN 			'\143'
+#define D_CREDIT 		'\144'
+#define D_DATA 			'\145'
+#define D_DRIVER 		'\146'
+
+#define D_EMIT 			'\147'
+#define D_ERROR_EMIT 	'\150'
+#define D_TPINPUT 		'\151'
+#define D_INDICATION 	'\152'
+#define D_CHKSUM 		'\153'
+
+#define D_RENEG 		'\154'
+#define D_PERF_MEAS 	'\155'
+#define D_MBUF_MEAS 	'\156'
+#define D_RTC 			'\157'
+#define D_SB 			'\160'
+
+#define D_DISASTER_CHECK '\161'
+#define D_REQUEST 		'\162'
+#define D_STASH 		'\163'
+#define D_NEWSOCK 		'\164'
+#define D_TIMER 		'\165'
+
+#define D_TPIOCTL 		'\166'
+#define D_SIZE_CHECK 	'\167'
+#define D_2ER 			'\170'
+#define D_DISASTER_CHECK_W '\171'
+
+#define D_XPD 			'\172'
+#define D_SYSCALL 		'\173'
+#define D_DROP 			'\174'
+#define D_ZDREF 		'\175'
+#define D_TPISO			'\176'
+#define D_QUENCH		'\177'
+
+void dump_mbuf();
+
+/***********************************************
+ * New mbuf types for debugging w/ netstat -m
+ * This messes up 4.4 malloc for now. need bigger
+ * mbtypes array for now.
+ **********************************************/
+#ifdef notdef
+
+#define 	TPMT_DATA	0x21
+#define 	TPMT_RCVRTC	0x42
+#define 	TPMT_SNDRTC	0x41
+#define 	TPMT_TPHDR	0x22
+#define 	TPMT_IPHDR	0x32
+#define 	TPMT_SONAME	0x28
+#define 	TPMT_EOT	0x40
+#define 	TPMT_XPD	0x44
+#define 	TPMT_PCB	0x23
+#define 	TPMT_PERF	0x45
+
+#else /* ARGO_DEBUG */
+
+#define 	TPMT_DATA	MT_DATA
+#define 	TPMT_RCVRTC	MT_DATA
+#define 	TPMT_SNDRTC	MT_DATA
+#define 	TPMT_IPHDR	MT_HEADER
+#define 	TPMT_TPHDR	MT_HEADER
+#define 	TPMT_SONAME	MT_SONAME
+/* MT_EOT and MT_XPD are defined in tp_param.h */
+#define 	TPMT_XPD	MT_OOBDATA
+#define 	TPMT_PCB	MT_PCB
+#define 	TPMT_PERF	MT_PCB
+
+#endif /* ARGO_DEBUG */
+
+#endif /* __ARGO_DEBUG__ */
diff --git a/sys/netiso/clnl.h b/sys/netiso/clnl.h
new file mode 100644
index 00000000000..87227dc273b
--- /dev/null
+++ b/sys/netiso/clnl.h
@@ -0,0 +1,64 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)clnl.h	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+struct clnl_protosw {
+	int (*clnl_input)();	/* input routine */
+};
diff --git a/sys/netiso/clnp.h b/sys/netiso/clnp.h
new file mode 100644
index 00000000000..4c81ba37d9c
--- /dev/null
+++ b/sys/netiso/clnp.h
@@ -0,0 +1,463 @@
+/*-
+ * Copyright (c) 1991, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)clnp.h	8.2 (Berkeley) 4/16/94
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: /big/BSD4.4/isis-usr/src/sys/netiso/RCS/clnp.h,v 1.1 1992/02/07 18:14:59 hagens Exp hagens $ */
+/* $Source: /big/BSD4.4/isis-usr/src/sys/netiso/RCS/clnp.h,v $ */
+
+/* should be config option but cpp breaks with too many #defines */
+#define	DECBIT
+
+/*
+ *	Return true if the mbuf is a cluster mbuf
+ */
+#define	IS_CLUSTER(m)	((m)->m_flags & M_EXT)
+
+/*
+ *	Move the halfword into the two characters
+ */
+#define	HTOC(msb, lsb, hword)\
+	(msb) = (u_char)((hword) >> 8);\
+	(lsb) = (u_char)((hword) & 0xff)
+/*
+ *	Move the two charcters into the halfword
+ */
+#define	CTOH(msb, lsb, hword)\
+	(hword) = ((msb) << 8) | (lsb)
+
+/*
+ *	Return true if the checksum has been set - ie. the checksum is
+ *	not zero
+ */
+#define	CKSUM_REQUIRED(clnp)\
+	(((clnp)->cnf_cksum_msb != 0) || ((clnp)->cnf_cksum_lsb != 0))
+
+/*
+ *	Fixed part of clnp header
+ */
+struct clnp_fixed {
+	u_char	cnf_proto_id;		/* network layer protocol identifier */
+	u_char	cnf_hdr_len;		/* length indicator (octets) */
+	u_char	cnf_vers;			/* version/protocol identifier extension */
+	u_char	cnf_ttl;			/* lifetime (500 milliseconds) */
+	u_char	cnf_type;			/* type code */
+								/* Includes err_ok, more_segs, and seg_ok */
+	u_char	cnf_seglen_msb;		/* pdu segment length (octets) high byte */
+	u_char	cnf_seglen_lsb;		/* pdu segment length (octets) low byte */
+	u_char	cnf_cksum_msb;		/* checksum high byte */
+	u_char	cnf_cksum_lsb;		/* checksum low byte */
+};
+#define CNF_TYPE	0x1f
+#define CNF_ERR_OK	0x20
+#define CNF_MORE_SEGS	0x40
+#define CNF_SEG_OK	0x80
+
+#define CLNP_CKSUM_OFF	0x07	/* offset of checksum */
+
+#define	clnl_fixed	clnp_fixed
+
+/*
+ *	Segmentation part of clnp header
+ */
+struct clnp_segment {
+	u_short	cng_id;				/* data unit identifier */
+	u_short	cng_off;			/* segment offset */
+	u_short	cng_tot_len;		/* total length */
+};
+
+/*
+ *	Clnp fragment reassembly structures:
+ *
+ *	All packets undergoing reassembly are linked together in 
+ *	clnp_fragl structures. Each clnp_fragl structure contains a
+ *	pointer to the original clnp packet header, as well as a 
+ *	list of packet fragments. Each packet fragment
+ *	is headed by a clnp_frag structure. This structure contains the
+ *	offset of the first and last byte of the fragment, as well as
+ *	a pointer to the data (an mbuf chain) of the fragment.
+ */
+
+/*
+ *	NOTE:
+ *		The clnp_frag structure is stored in an mbuf immedately preceeding
+ *	the fragment data. Since there are words in this struct,
+ *	it must be word aligned. 
+ *
+ *	NOTE:
+ *		All the fragment code assumes that the entire clnp header is 
+ *	contained in the first mbuf.
+ */
+struct clnp_frag {
+	u_int				cfr_first;		/* offset of first byte of this frag */
+	u_int				cfr_last;		/* offset of last byte of this frag */
+	u_int				cfr_bytes;		/* bytes to shave to get to data */
+	struct mbuf			*cfr_data;		/* ptr to data for this frag */
+	struct clnp_frag	*cfr_next;		/* next fragment in list */
+};
+
+struct clnp_fragl {
+	struct iso_addr		cfl_src;		/* source of the pkt */
+	struct iso_addr		cfl_dst;		/* destination of the pkt */
+	u_short				cfl_id;			/* id of the pkt */
+	u_char				cfl_ttl;		/* current ttl of pkt */
+	u_short				cfl_last;		/* offset of last byte of packet */
+	struct mbuf 		*cfl_orighdr;	/* ptr to original header */
+	struct clnp_frag	*cfl_frags;		/* linked list of fragments for pkt */
+	struct clnp_fragl	*cfl_next;		/* next pkt being reassembled */
+};
+
+/*
+ *	The following structure is used to index into an options section
+ *	of a clnp datagram. These values can be used without worry that
+ *	offset or length fields are invalid or too big, etc. That is,
+ *	the consistancy of the options will be guaranteed before this
+ *	structure is filled in. Any pointer (field ending in p) is
+ *	actually the offset from the beginning of the mbuf the option
+ *	is contained in.  A value of NULL for any pointer
+ *	means that the option is not present. The length any option
+ *	does not include the option code or option length fields.
+ */
+struct clnp_optidx {
+	u_short	cni_securep;		/* ptr to beginning of security option */
+	char	cni_secure_len;		/* length of entire security option */
+
+	u_short	cni_srcrt_s;		/* offset of start of src rt option */
+	u_short	cni_srcrt_len;		/* length of entire src rt option */
+
+	u_short	cni_recrtp;			/* ptr to beginning of recrt option */
+	char	cni_recrt_len;		/* length of entire recrt option */
+
+	char	cni_priorp;			/* ptr to priority option */
+
+	u_short	cni_qos_formatp;	/* ptr to format of qos option */
+	char	cni_qos_len;		/* length of entire qos option */
+
+	u_char	cni_er_reason;		/* reason from ER pdu option */
+
+								/* ESIS options */
+
+	u_short	cni_esct;			/* value from ISH ESCT option */
+
+	u_short	cni_netmaskp;		/* ptr to beginning of netmask option */
+	char	cni_netmask_len;		/* length of entire netmask option */
+
+	u_short	cni_snpamaskp;		/* ptr to beginning of snpamask option */
+	char	cni_snpamask_len;		/* length of entire snpamask option */
+
+};
+
+#define	ER_INVALREAS	0xff	/* code for invalid ER pdu discard reason */
+
+/* given an mbuf and addr of option, return offset from data of mbuf */
+#define CLNP_OPTTOOFF(m, opt)\
+	((u_short) (opt - mtod(m, caddr_t)))
+
+/* given an mbuf and offset of option, return address of option */
+#define CLNP_OFFTOOPT(m, off)\
+	((caddr_t) (mtod(m, caddr_t) + off))
+
+/*	return true iff src route is valid */
+#define	CLNPSRCRT_VALID(oidx)\
+	((oidx) && (oidx->cni_srcrt_s))
+
+/*	return the offset field of the src rt */
+#define CLNPSRCRT_OFF(oidx, options)\
+	(*((u_char *)(CLNP_OFFTOOPT(options, oidx->cni_srcrt_s) + 1)))
+
+/*	return the type field of the src rt */
+#define CLNPSRCRT_TYPE(oidx, options)\
+	((u_char)(*(CLNP_OFFTOOPT(options, oidx->cni_srcrt_s))))
+
+/* return the length of the current address */
+#define CLNPSRCRT_CLEN(oidx, options)\
+	((u_char)(*(CLNP_OFFTOOPT(options, oidx->cni_srcrt_s) + CLNPSRCRT_OFF(oidx, options) - 1)))
+
+/* return the address of the current address */
+#define CLNPSRCRT_CADDR(oidx, options)\
+	((caddr_t)(CLNP_OFFTOOPT(options, oidx->cni_srcrt_s) + CLNPSRCRT_OFF(oidx, options)))
+
+/* 
+ *	return true if the src route has run out of routes
+ *	this is true if the offset of next route is greater than the end of the rt 
+ */
+#define	CLNPSRCRT_TERM(oidx, options)\
+	(CLNPSRCRT_OFF(oidx, options) > oidx->cni_srcrt_len)
+
+/*
+ *	Options a user can set/get
+ */
+#define	CLNPOPT_FLAGS	0x01	/* flags: seg permitted, no er xmit, etc  */
+#define	CLNPOPT_OPTS	0x02	/* datagram options */
+
+/*
+ *	Values for particular datagram options
+ */
+#define	CLNPOVAL_PAD		0xcc	/* padding */
+#define	CLNPOVAL_SECURE		0xc5	/* security */
+#define	CLNPOVAL_SRCRT		0xc8	/* source routing */
+#define	CLNPOVAL_RECRT		0xcb	/* record route */
+#define	CLNPOVAL_QOS		0xc3	/* quality of service */
+#define	CLNPOVAL_PRIOR		0xcd	/* priority */
+#define CLNPOVAL_ERREAS		0xc1	/* ER PDU ONLY: reason for discard */
+
+#define	CLNPOVAL_SRCSPEC	0x40	/* source address specific */
+#define	CLNPOVAL_DSTSPEC	0x80	/* destination address specific */
+#define	CLNPOVAL_GLOBAL		0xc0	/* globally unique */
+
+/* Globally Unique QOS */
+#define	CLNPOVAL_SEQUENCING	0x10	/* sequencing preferred */
+#define CLNPOVAL_CONGESTED	0x08	/* congestion experienced */
+#define CLNPOVAL_LOWDELAY	0x04	/* low transit delay */
+
+#define	CLNPOVAL_PARTRT		0x00	/* partial source routing */
+#define CLNPOVAL_COMPRT		0x01	/* complete source routing */
+
+/*
+ *	Clnp flags used in a control block flags field. 
+ *	NOTE: these must be out of the range of bits defined in ../net/raw_cb.h
+ */
+#define	CLNP_NO_SEG		0x010	/* segmentation not permitted */
+#define	CLNP_NO_ER		0x020	/* do not generate ERs */
+#define CLNP_SEND_RAW	0x080	/* send pkt as RAW DT rather than TP DT */
+#define	CLNP_NO_CKSUM	0x100	/* don't use clnp checksum */
+#define CLNP_ECHO		0x200	/* send echo request */
+#define	CLNP_NOCACHE	0x400	/* don't store cache information */
+#define CLNP_ECHOR		0x800	/* send echo reply */
+
+/* valid clnp flags */
+#define CLNP_VFLAGS		(CLNP_SEND_RAW|CLNP_NO_SEG|CLNP_NO_ER|CLNP_NO_CKSUM\
+	|CLNP_ECHO|CLNP_NOCACHE|CLNP_ECHOR)
+
+/* 
+ *	Constants used by clnp
+ */
+#define	CLNP_HDR_MIN	(sizeof (struct clnp_fixed))
+#define	CLNP_HDR_MAX	(254)
+#define	CLNP_TTL_UNITS	2					/* 500 milliseconds */
+#define CLNP_TTL		15*CLNP_TTL_UNITS	/* time to live (seconds) */
+#define	ISO8473_V1		0x01
+
+/*
+ *	Clnp packet types
+ *	In order to test raw clnp and tp/clnp simultaneously, a third type of
+ *	packet has been defined: CLNP_RAW. This is done so that the input
+ *	routine can switch to the correct input routine (rclnp_input or
+ *	tpclnp_input) based on the type field. If clnp had a higher level protocol
+ *	field, this would not be necessary.
+ */
+#define	CLNP_DT			0x1C	/* normal data */
+#define	CLNP_ER			0x01	/* error report */
+#define	CLNP_RAW		0x1D	/* debug only */
+#define CLNP_EC			0x1E	/* echo packet */
+#define CLNP_ECR		0x1F	/* echo reply */
+
+/*
+ *	ER pdu error codes
+ */
+#define GEN_NOREAS			0x00	/* reason not specified */
+#define GEN_PROTOERR		0x01	/* protocol procedure error */
+#define GEN_BADCSUM			0x02	/* incorrect checksum */
+#define GEN_CONGEST			0x03	/* pdu discarded due to congestion */
+#define GEN_HDRSYNTAX		0x04	/* header syntax error */
+#define GEN_SEGNEEDED		0x05	/* segmentation needed, but not permitted */
+#define GEN_INCOMPLETE		0x06	/* incomplete pdu received */
+#define GEN_DUPOPT			0x07	/* duplicate option */
+
+/* address errors */
+#define ADDR_DESTUNREACH	0x80	/* destination address unreachable */
+#define ADDR_DESTUNKNOWN	0x81	/* destination address unknown */
+
+/* source routing */
+#define SRCRT_UNSPECERR		0x90	/* unspecified src rt error */
+#define SRCRT_SYNTAX		0x91	/* syntax error in src rt field */
+#define SRCRT_UNKNOWNADDR	0x92	/* unknown addr in src rt field */
+#define SRCRT_BADPATH		0x93	/* path not acceptable */
+
+/* lifetime */
+#define TTL_EXPTRANSIT		0xa0	/* lifetime expired during transit */
+#define TTL_EXPREASS		0xa1	/* lifetime expired during reassembly */
+
+/* pdu discarded */
+#define DISC_UNSUPPOPT		0xb0	/* unsupported option not specified? */
+#define DISC_UNSUPPVERS		0xb1	/* unsupported protocol version */
+#define DISC_UNSUPPSECURE	0xb2	/* unsupported security option */
+#define DISC_UNSUPPSRCRT	0xb3	/* unsupported src rt option */
+#define DISC_UNSUPPRECRT	0xb4	/* unsupported rec rt option */
+
+/* reassembly */
+#define REASS_INTERFERE		0xc0	/* reassembly interference */
+#define CLNP_ERRORS		22
+
+
+#ifdef KERNEL
+int clnp_er_index();
+#endif
+
+#ifdef CLNP_ER_CODES
+u_char clnp_er_codes[CLNP_ERRORS] =  {
+GEN_NOREAS, GEN_PROTOERR, GEN_BADCSUM, GEN_CONGEST,
+GEN_HDRSYNTAX, GEN_SEGNEEDED, GEN_INCOMPLETE, GEN_DUPOPT,
+ADDR_DESTUNREACH, ADDR_DESTUNKNOWN,
+SRCRT_UNSPECERR, SRCRT_SYNTAX, SRCRT_UNKNOWNADDR, SRCRT_BADPATH,
+TTL_EXPTRANSIT, TTL_EXPREASS,
+DISC_UNSUPPOPT, DISC_UNSUPPVERS, DISC_UNSUPPSECURE,
+DISC_UNSUPPSRCRT, DISC_UNSUPPRECRT, REASS_INTERFERE };
+#endif
+
+#ifdef	TROLL
+
+#define	TR_DUPEND		0x01	/* duplicate end of fragment */
+#define TR_DUPPKT		0x02	/* duplicate entire packet */
+#define	TR_DROPPKT		0x04	/* drop packet on output */
+#define TR_TRIM			0x08	/* trim bytes from packet */
+#define TR_CHANGE		0x10	/* change bytes in packet */
+#define TR_MTU			0x20	/* delta to change device mtu */
+#define	TR_CHUCK		0x40	/* drop packet in rclnp_input */
+#define	TR_BLAST		0x80	/* force rclnp_output to blast many packet */
+#define	TR_RAWLOOP		0x100	/* make if_loop call clnpintr directly */
+struct troll {
+	int		tr_ops;				/* operations to perform */
+	float	tr_dup_size;		/* % to duplicate */
+	float	tr_dup_freq;		/* frequency to duplicate packets */
+	float	tr_drop_freq;		/* frequence to drop packets */
+	int		tr_mtu_adj;			/* delta to adjust if mtu */
+	int		tr_blast_cnt;		/* # of pkts to blast out */
+};
+
+#define	SN_OUTPUT(clcp, m)\
+	troll_output(clcp->clc_ifp, m, clcp->clc_firsthop, clcp->clc_rt)
+
+#define	SN_MTU(ifp, rt) (((rt && rt->rt_rmx.rmx_mtu) ?\
+	rt->rt_rmx.rmx_mtu : clnp_badmtu(ifp, rt, __LINE__, __FILE__))\
+		- trollctl.tr_mtu_adj)
+
+#ifdef KERNEL
+extern float troll_random;
+#endif
+
+#else	/* NO TROLL */
+
+#define	SN_OUTPUT(clcp, m)\
+	(*clcp->clc_ifp->if_output)(clcp->clc_ifp, m, clcp->clc_firsthop, clcp->clc_rt)
+
+#define	SN_MTU(ifp, rt) (((rt && rt->rt_rmx.rmx_mtu) ?\
+	rt->rt_rmx.rmx_mtu : clnp_badmtu(ifp, rt, __LINE__, __FILE__)))
+
+#endif	/* TROLL */
+
+/*
+ *	Macro to remove an address from a clnp header
+ */
+#define CLNP_EXTRACT_ADDR(isoa, hoff, hend)\
+	{\
+		isoa.isoa_len = (u_char)*hoff;\
+		if ((((++hoff) + isoa.isoa_len) > hend) ||\
+			(isoa.isoa_len > 20) || (isoa.isoa_len == 0)) {\
+			hoff = (caddr_t)0;\
+		} else {\
+			(void) bcopy(hoff, (caddr_t)isoa.isoa_genaddr, isoa.isoa_len);\
+			hoff += isoa.isoa_len;\
+		}\
+	}
+
+/*
+ *	Macro to insert an address into a clnp header
+ */
+#define CLNP_INSERT_ADDR(hoff, isoa)\
+	*hoff++ = (isoa).isoa_len;\
+	(void) bcopy((caddr_t)((isoa).isoa_genaddr), hoff, (isoa).isoa_len);\
+	hoff += (isoa).isoa_len;
+
+/*
+ *	Clnp hdr cache.	Whenever a clnp packet is sent, a copy of the
+ *	header is made and kept in this cache. In addition to a copy of
+ *	the cached clnp hdr, the cache contains
+ *	information necessary to determine whether the new packet
+ *	to send requires a new header to be built.
+ */
+struct clnp_cache {
+	/* these fields are used to check the validity of the cache */
+	struct iso_addr		clc_dst;		/* destination of packet */
+	struct mbuf 		*clc_options;	/* ptr to options mbuf */
+	int					clc_flags;		/* flags passed to clnp_output */
+
+	/* these fields are state that clnp_output requires to finish the pkt */
+	int					clc_segoff;		/* offset of seg part of header */
+	struct rtentry		*clc_rt;		/* ptr to rtentry (points into
+											the route structure) */
+	struct sockaddr		*clc_firsthop;	/* first hop of packet */
+	struct ifnet		*clc_ifp;		/* ptr to interface structure */
+	struct iso_ifaddr	*clc_ifa;		/* ptr to interface address */
+	struct mbuf 		*clc_hdr;		/* cached pkt hdr (finally)! */
+};
+
+#ifndef	satosiso
+#define	satosiso(sa)\
+	((struct sockaddr_iso *)(sa))
+#endif
+
+#ifdef	KERNEL
+caddr_t			clnp_insert_addr();
+struct iso_addr	*clnp_srcaddr();
+struct mbuf		*clnp_reass();
+#ifdef	TROLL
+struct troll	trollctl;
+#endif	/* TROLL */
+#endif	/* KERNEL */
diff --git a/sys/netiso/clnp_debug.c b/sys/netiso/clnp_debug.c
new file mode 100644
index 00000000000..964638e244b
--- /dev/null
+++ b/sys/netiso/clnp_debug.c
@@ -0,0 +1,260 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)clnp_debug.c	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: clnp_debug.c,v 4.2 88/06/29 14:58:34 hagens Exp $ */
+/* $Source: /usr/argo/sys/netargo/RCS/clnp_debug.c,v $ */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netiso/iso.h>
+#include <netiso/clnp.h>
+#include <netiso/clnp_stat.h>
+#include <netiso/argo_debug.h>
+
+#ifdef	ARGO_DEBUG
+
+#ifdef	TESTDEBUG
+#ifdef notdef
+struct addr_37 u_37 = {
+	{0x00, 0x02, 0x00, 0x10, 0x20, 0x30, 0x35}, 
+	{0x01, 0x02, 0x03, 0x04, 0x50, 0x60, 0x70, 0x80, 0x90}
+};
+struct addr_osinet u_osinet = {
+	{0x00, 0x04},
+	{0x00, 0x02, 0x00, 0x01, 0x23, 0x42, 0x78, 0x20, 0x01, 0x05, 0x00}
+};
+#endif /* notdef */
+struct addr_rfc986 u_rfc986 = {
+	{0x00, 0x06},
+	{0x01, 0xc0, 0x0c, 0x0c, 0xab, 0x11}
+};
+struct addr_rfc986 u_bad = {
+	{0x00, 0x01},
+	{0x01, 0xc0, 0x0c, 0x0c, 0xab, 0x11}
+};
+#include <stdio.h>
+main()
+{
+	struct iso_addr	a;
+
+	a.isoa_afi = AFI_37;
+	a.isoa_u.addr_37 = u_37;
+	a.isoa_len = 17;
+	printf("type 37: %s\n", clnp_iso_addrp(&a));
+
+	a.isoa_afi = AFI_OSINET;
+	a.isoa_u.addr_osinet = u_osinet;
+	a.isoa_len = 14;
+	printf("type osinet: %s\n", clnp_iso_addrp(&a));
+
+	a.isoa_afi = AFI_RFC986;
+	a.isoa_u.addr_rfc986 = u_rfc986;
+	a.isoa_len = 9;
+	printf("type rfc986: %s\n", clnp_iso_addrp(&a));
+
+	a.isoa_afi = 12;
+	a.isoa_u.addr_rfc986 = u_rfc986;
+	a.isoa_len = 9;
+	printf("type bad afi: %s\n", clnp_iso_addrp(&a));
+
+	a.isoa_afi = AFI_RFC986;
+	a.isoa_u.addr_rfc986 = u_bad;
+	a.isoa_len = 9;
+	printf("type bad idi: %s\n", clnp_iso_addrp(&a));
+}
+#endif	/* TESTDEBUG */
+
+unsigned int	clnp_debug;
+static char letters[] = "0123456789abcdef";
+
+/*
+ *	Print buffer in hex, return addr of where we left off.
+ *	Do not null terminate.
+ */
+char *
+clnp_hexp(src, len, where)
+char	*src;		/* src of data to print */
+int		len;				/* lengthof src */
+char	*where;		/* where to put data */
+{
+	int i;
+
+	for (i=0; i<len; i++) {
+		register int j = ((u_char *)src)[i];
+		*where++ = letters[j >> 4];
+		*where++ = letters[j & 0x0f];
+	}
+	return where;
+}
+
+/*
+ *	Return a ptr to a human readable form of an iso addr 
+ */
+static char iso_addr_b[50];
+#define	DELIM	'.';
+
+char *
+clnp_iso_addrp(isoa)
+struct iso_addr *isoa;
+{
+	char	*cp;
+
+	/* print length */
+	sprintf(iso_addr_b, "[%d] ", isoa->isoa_len);
+
+	/* set cp to end of what we have */
+	cp = iso_addr_b;
+	while (*cp)
+		cp++;
+
+	/* print afi */
+	cp = clnp_hexp(isoa->isoa_genaddr, (int)isoa->isoa_len, cp);
+#ifdef notdef
+	*cp++ = DELIM;
+
+	/* print type specific part */
+	switch(isoa->isoa_afi) {
+		case AFI_37:
+			cp = clnp_hexp(isoa->t37_idi, ADDR37_IDI_LEN, cp);
+			*cp++ = DELIM;
+			cp = clnp_hexp(isoa->t37_dsp, ADDR37_DSP_LEN, cp);
+			break;
+		
+/* 		case AFI_OSINET:*/
+		case AFI_RFC986: {
+			u_short	idi;
+
+			/* osinet and rfc986 have idi in the same place */
+			/* print idi */
+			cp = clnp_hexp(isoa->rfc986_idi, ADDROSINET_IDI_LEN, cp);
+			*cp++ = DELIM;
+			CTOH(isoa->rfc986_idi[0], isoa->rfc986_idi[1], idi);
+
+			if (idi == IDI_OSINET) {
+				struct ovl_osinet *oosi = (struct ovl_osinet *)isoa;
+				cp = clnp_hexp(oosi->oosi_orgid, OVLOSINET_ORGID_LEN, cp);
+				*cp++ = DELIM;
+				cp = clnp_hexp(oosi->oosi_snetid, OVLOSINET_SNETID_LEN, cp);
+				*cp++ = DELIM;
+				cp = clnp_hexp(oosi->oosi_snpa, OVLOSINET_SNPA_LEN, cp);
+				*cp++ = DELIM;
+				cp = clnp_hexp(oosi->oosi_nsap, OVLOSINET_NSAP_LEN, cp);
+			} else if (idi == IDI_RFC986) {
+				struct ovl_rfc986 *o986 = (struct ovl_rfc986 *)isoa;
+				cp = clnp_hexp(&o986->o986_vers, 1, cp);
+				*cp++ = DELIM;
+#ifdef  vax
+				sprintf(cp, "%d.%d.%d.%d.%d", 
+				o986->o986_inetaddr[0] & 0xff,
+				o986->o986_inetaddr[1] & 0xff,
+				o986->o986_inetaddr[2] & 0xff,
+				o986->o986_inetaddr[3] & 0xff,
+				o986->o986_upid & 0xff);
+				return(iso_addr_b);
+#else
+				cp = clnp_hexp(&o986->o986_inetaddr[0], 1, cp);
+				*cp++ = DELIM;
+				cp = clnp_hexp(&o986->o986_inetaddr[1], 1, cp);
+				*cp++ = DELIM;
+				cp = clnp_hexp(&o986->o986_inetaddr[2], 1, cp);
+				*cp++ = DELIM;
+				cp = clnp_hexp(&o986->o986_inetaddr[3], 1, cp);
+				*cp++ = DELIM;
+				cp = clnp_hexp(&o986->o986_upid, 1, cp);
+#endif /* vax */
+			}
+			
+		} break;
+
+		default:
+			*cp++ = '?';
+			break;
+	}
+#endif /* notdef */
+	*cp = (char)0;
+	
+	return(iso_addr_b);
+}
+
+char *
+clnp_saddr_isop(s)
+register struct sockaddr_iso *s;
+{
+	register char	*cp = clnp_iso_addrp(&s->siso_addr);
+
+	while (*cp) cp++;
+	*cp++ = '(';
+	cp = clnp_hexp(TSEL(s), (int)s->siso_tlen, cp);
+	*cp++ = ')';
+	*cp++ = 0;
+	return (iso_addr_b);
+}
+
+#endif	/* ARGO_DEBUG */
diff --git a/sys/netiso/clnp_er.c b/sys/netiso/clnp_er.c
new file mode 100644
index 00000000000..8b7f45b77a4
--- /dev/null
+++ b/sys/netiso/clnp_er.c
@@ -0,0 +1,375 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)clnp_er.c	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: /var/src/sys/netiso/RCS/clnp_er.c,v 5.1 89/02/09 16:20:18 hagens Exp $ */
+/* $Source: /var/src/sys/netiso/RCS/clnp_er.c,v $ */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netiso/iso.h>
+#include <netiso/iso_var.h>
+#include <netiso/iso_pcb.h>
+#define CLNP_ER_CODES
+#include <netiso/clnp.h>
+#include <netiso/clnp_stat.h>
+#include <netiso/argo_debug.h>
+
+static struct clnp_fixed er_template = {
+	ISO8473_CLNP,	/* network identifier */
+	0,				/* length */
+	ISO8473_V1,		/* version */
+	CLNP_TTL,		/* ttl */
+	CLNP_ER,		/* type */
+	0,				/* segment length */
+	0				/* checksum */
+};
+
+/*
+ * FUNCTION:		clnp_er_input
+ *
+ * PURPOSE:			Process an ER pdu.
+ *
+ * RETURNS:			
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+clnp_er_input(m, src, reason)
+struct mbuf		*m;		/* ptr to packet itself */
+struct iso_addr	*src;	/* ptr to src of er */
+u_char			reason;	/* reason code of er */
+{
+	int	cmd = -1;
+	extern u_char clnp_protox[];
+
+	IFDEBUG(D_CTLINPUT)
+		printf("clnp_er_input: m x%x, src %s, reason x%x\n", m, 
+			clnp_iso_addrp(src), reason);
+	ENDDEBUG
+
+	INCSTAT(cns_er_inhist[clnp_er_index(reason)]);
+	switch (reason) {
+		case GEN_NOREAS:
+		case GEN_PROTOERR:
+			break;
+		case GEN_BADCSUM:		
+			cmd = PRC_PARAMPROB;
+			break;
+		case GEN_CONGEST:		
+			cmd = PRC_QUENCH;
+			break;
+		case GEN_HDRSYNTAX:		
+			cmd = PRC_PARAMPROB;
+			break;
+		case GEN_SEGNEEDED:		
+			cmd = PRC_MSGSIZE; 
+			break;
+		case GEN_INCOMPLETE:	
+			cmd = PRC_PARAMPROB; 		
+			break;
+		case GEN_DUPOPT:		
+			cmd = PRC_PARAMPROB;		
+			break;
+		case ADDR_DESTUNREACH:	
+			cmd = PRC_UNREACH_HOST; 	
+			break;
+		case ADDR_DESTUNKNOWN:	
+			cmd = PRC_UNREACH_PROTOCOL; 
+			break;
+		case SRCRT_UNSPECERR:
+		case SRCRT_SYNTAX:
+		case SRCRT_UNKNOWNADDR:
+		case SRCRT_BADPATH:
+			cmd = PRC_UNREACH_SRCFAIL;
+			break;
+		case TTL_EXPTRANSIT:	
+			cmd = PRC_TIMXCEED_INTRANS;	
+			break;
+		case TTL_EXPREASS:		
+			cmd = PRC_TIMXCEED_REASS;	
+			break;
+		case DISC_UNSUPPOPT:
+		case DISC_UNSUPPVERS:
+		case DISC_UNSUPPSECURE:
+		case DISC_UNSUPPSRCRT:
+		case DISC_UNSUPPRECRT:
+			cmd = PRC_PARAMPROB; 
+			break;
+		case REASS_INTERFERE:	
+			cmd = PRC_TIMXCEED_REASS;
+			break;
+	}
+
+	/*
+	 *	tpclnp_ctlinput1 is called directly so that we don't
+	 *	have to build an iso_sockaddr out of src.
+	 */
+	if (cmd >= 0)
+		tpclnp_ctlinput1(cmd, src);
+
+	m_freem(m);
+}
+
+/*
+ * FUNCTION:		clnp_discard
+ *
+ * PURPOSE:			Discard a clnp datagram
+ *
+ * RETURNS:			nothing
+ *
+ * SIDE EFFECTS:	Will emit an ER pdu if possible
+ *
+ * NOTES:			This code assumes that we have previously tried to pull
+ *					up the header of the datagram into one mbuf.
+ */
+clnp_discard(m, reason)
+struct mbuf	*m;		/* header of packet to discard */
+char					reason;	/* reason for discard */
+{
+	IFDEBUG(D_DISCARD)
+		printf("clnp_discard: m x%x, reason x%x\n", m, reason);
+	ENDDEBUG
+
+	if (m != NULL) {
+		if (m->m_len >= sizeof(struct clnp_fixed)) {
+			register struct clnp_fixed *clnp = mtod(m, struct clnp_fixed *);
+
+			if (((clnp->cnf_type & CNF_TYPE) != CLNP_ER) &&
+				(clnp->cnf_type & CNF_ERR_OK)) {
+					clnp_emit_er(m, reason);
+					return;
+			}
+		}
+		m_freem(m);
+	}
+}
+
+/*
+ * FUNCTION:		clnp_emit_er
+ *
+ * PURPOSE:			Send an ER pdu.
+ *					The src of the of the ER pdu is the host that is sending
+ *					the ER (ie. us), *not* the original destination of the
+ *					packet.
+ *
+ * RETURNS:			nothing
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			Takes responsibility for freeing mbuf passed
+ *					This function may be called with a packet that
+ *					was created by us; in this case, do not send
+ *					an ER.
+ */
+clnp_emit_er(m, reason)
+struct mbuf	*m;		/* header of packet to discard */
+char					reason;	/* reason for discard */
+{
+	register struct clnp_fixed	*clnp = mtod(m, struct clnp_fixed *);
+	register struct clnp_fixed	*er;
+	struct route_iso			route;
+	struct ifnet				*ifp;
+	struct sockaddr				*first_hop;
+	struct iso_addr				src, dst, *our_addr;
+	caddr_t						hoff, hend;
+	int							total_len;		/* total len of dg */
+	struct mbuf 				*m0;			/* contains er pdu hdr */
+	struct iso_ifaddr			*ia = 0;
+
+	IFDEBUG(D_DISCARD)
+		printf("clnp_emit_er: m x%x, hdr len %d\n", m, clnp->cnf_hdr_len);
+	ENDDEBUG
+
+	bzero((caddr_t)&route, sizeof(route));
+
+	/*
+	 *	If header length is incorrect, or entire header is not contained
+	 *	in this mbuf, we punt
+	 */
+	if ((clnp->cnf_hdr_len < CLNP_HDR_MIN) ||
+		(clnp->cnf_hdr_len > CLNP_HDR_MAX) ||
+		(clnp->cnf_hdr_len > m->m_len))
+		goto bad;
+	
+	/* extract src, dest address */
+	hend = (caddr_t)clnp + clnp->cnf_hdr_len;
+	hoff = (caddr_t)clnp + sizeof(struct clnp_fixed);
+	CLNP_EXTRACT_ADDR(dst, hoff, hend);
+	if (hoff == (caddr_t)0) {
+		goto bad;
+	}
+	CLNP_EXTRACT_ADDR(src, hoff, hend);
+	if (hoff == (caddr_t)0) {
+		goto bad;
+	}
+	
+	/*
+	 *	Do not send ER if we generated the packet.
+	 */
+	if (clnp_ours(&src))
+		goto bad;
+
+	/* 
+	 *	Trim mbuf to hold only the header.
+	 *	This mbuf will be the 'data' of the er pdu
+	 */
+	if (m->m_next != NULL) {
+		m_freem(m->m_next);
+		m->m_next = NULL;
+	}
+
+	if (m->m_len > clnp->cnf_hdr_len)
+		m_adj(m, (int)-(m->m_len - (int)clnp->cnf_hdr_len));
+
+	/* route er pdu: note we send pkt to src of original packet  */
+	if (clnp_route(&src, &route, /* flags */0, &first_hop, &ia) != 0)
+		goto bad;
+
+	/* compute our address based upon firsthop/ifp */
+	if (ia)
+			our_addr = &ia->ia_addr.siso_addr;
+	else
+			goto bad;
+	ifp = ia->ia_ifp;
+
+	IFDEBUG(D_DISCARD)
+		printf("clnp_emit_er: to %s", clnp_iso_addrp(&src));
+		printf(" from %s\n", clnp_iso_addrp(our_addr));
+	ENDDEBUG
+
+	IFDEBUG(D_DISCARD)
+		printf("clnp_emit_er: packet routed to %s\n", 
+			clnp_iso_addrp(&((struct sockaddr_iso *)first_hop)->siso_addr));
+	ENDDEBUG
+
+	/* allocate mbuf for er pdu header: punt on no space */
+	MGET(m0, M_DONTWAIT, MT_HEADER);
+	if (m0 == 0)
+		goto bad;
+	
+	m0->m_next = m;
+	er = mtod(m0, struct clnp_fixed *);
+	*er = er_template;
+
+	/* setup src/dst on er pdu */
+	/* NOTE REVERSAL OF SRC/DST */
+	hoff = (caddr_t)er + sizeof(struct clnp_fixed);
+	CLNP_INSERT_ADDR(hoff, src);
+	CLNP_INSERT_ADDR(hoff, *our_addr);
+
+	/*
+	 *	TODO: if complete src rt was specified, then reverse path, and
+	 *	copy into er as option.
+	 */
+
+	/* add er option */
+	*hoff++ = CLNPOVAL_ERREAS;	/* code */
+	*hoff++ = 2;				/* length */
+	*hoff++ = reason;			/* discard reason */
+	*hoff++ = 0;				/* error localization = not specified */
+
+	/* set length */
+	er->cnf_hdr_len = m0->m_len = (u_char)(hoff - (caddr_t)er);
+	total_len = m0->m_len + m->m_len;
+	HTOC(er->cnf_seglen_msb, er->cnf_seglen_lsb, total_len);
+
+	/* compute checksum (on header only) */
+	iso_gen_csum(m0, CLNP_CKSUM_OFF, (int)er->cnf_hdr_len);
+
+	/* trim packet if too large for interface */
+	if (total_len > ifp->if_mtu)
+		m_adj(m0, -(total_len - ifp->if_mtu));
+	
+	/* send packet */
+	INCSTAT(cns_er_outhist[clnp_er_index(reason)]);
+	(void) (*ifp->if_output)(ifp, m0, first_hop, route.ro_rt);
+	goto done;
+
+bad:
+	m_freem(m);
+
+done:
+	/* free route if it is a temp */
+	if (route.ro_rt != NULL)
+		RTFREE(route.ro_rt);
+}
+
+clnp_er_index(p)
+u_char p;
+{
+	register u_char *cp = clnp_er_codes + CLNP_ERRORS;
+	while (cp > clnp_er_codes) {
+		cp--;
+		if (*cp == p)
+			return (cp - clnp_er_codes);
+	}
+	return (CLNP_ERRORS + 1);
+}
diff --git a/sys/netiso/clnp_frag.c b/sys/netiso/clnp_frag.c
new file mode 100644
index 00000000000..546a592ccf7
--- /dev/null
+++ b/sys/netiso/clnp_frag.c
@@ -0,0 +1,859 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)clnp_frag.c	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: /var/src/sys/netiso/RCS/clnp_frag.c,v 5.1 89/02/09 16:20:26 hagens Exp $ */
+/* $Source: /var/src/sys/netiso/RCS/clnp_frag.c,v $ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netiso/iso.h>
+#include <netiso/iso_var.h>
+#include <netiso/clnp.h>
+#include <netiso/clnp_stat.h>
+#include <netiso/argo_debug.h>
+
+/* all fragments are hung off this list */
+struct clnp_fragl	*clnp_frags = NULL;
+
+struct mbuf	*clnp_comp_pdu();
+
+
+/*
+ * FUNCTION:		clnp_fragment
+ *
+ * PURPOSE:			Fragment a datagram, and send the itty bitty pieces
+ *					out over an interface.
+ *
+ * RETURNS:			success - 0
+ *					failure - unix error code
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			If there is an error sending the packet, clnp_discard
+ *					is called to discard the packet and send an ER. If
+ *					clnp_fragment was called from clnp_output, then
+ *					we generated the packet, and should not send an 
+ *					ER -- clnp_emit_er will check for this. Otherwise,
+ *					the packet was fragmented during forwarding. In this
+ *					case, we ought to send an ER back.
+ */
+clnp_fragment(ifp, m, first_hop, total_len, segoff, flags, rt)
+struct ifnet	*ifp;		/* ptr to outgoing interface */
+struct mbuf		*m;			/* ptr to packet */
+struct sockaddr	*first_hop;	/* ptr to first hop */
+int				total_len;	/* length of datagram */
+int				segoff;		/* offset of segpart in hdr */
+int				flags;		/* flags passed to clnp_output */
+struct rtentry *rt;			/* route if direct ether */
+{
+	struct clnp_fixed		*clnp = mtod(m, struct clnp_fixed *);
+	int						hdr_len = (int)clnp->cnf_hdr_len;
+	int						frag_size = (SN_MTU(ifp, rt) - hdr_len) & ~7;
+
+	total_len -= hdr_len;
+	if ((clnp->cnf_type & CNF_SEG_OK) &&
+		(total_len >= 8) &&
+		(frag_size > 8 || (frag_size == 8 && !(total_len & 7)))) {
+
+		struct mbuf			*hdr = NULL;		/* save copy of clnp hdr */
+		struct mbuf			*frag_hdr = NULL;
+		struct mbuf			*frag_data = NULL;
+		struct clnp_segment	seg_part;			/* segmentation header */
+		int					frag_base;
+		int					error = 0;
+
+
+		INCSTAT(cns_fragmented);
+        (void) bcopy(segoff + mtod(m, caddr_t), (caddr_t)&seg_part,
+            sizeof(seg_part));
+		frag_base = ntohs(seg_part.cng_off);
+		/*
+		 *	Duplicate header, and remove from packet
+		 */
+		if ((hdr = m_copy(m, 0, hdr_len)) == NULL) {
+			clnp_discard(m, GEN_CONGEST);
+			return(ENOBUFS);
+		}
+		m_adj(m, hdr_len);
+
+		while (total_len > 0) {
+			int		remaining, last_frag;
+
+			IFDEBUG(D_FRAG)
+				struct mbuf *mdump = frag_hdr;
+				int tot_mlen = 0;
+				printf("clnp_fragment: total_len %d:\n", total_len);
+				while (mdump != NULL) {
+					printf("\tmbuf x%x, m_len %d\n", 
+						mdump, mdump->m_len);
+					tot_mlen += mdump->m_len;
+					mdump = mdump->m_next;
+				}
+				printf("clnp_fragment: sum of mbuf chain %d:\n", tot_mlen);
+			ENDDEBUG
+			
+			frag_size = min(total_len, frag_size);
+			if ((remaining = total_len - frag_size) == 0)
+				last_frag = 1;
+			else {
+				/*
+				 *  If this fragment will cause the last one to 
+				 *	be less than 8 bytes, shorten this fragment a bit.
+				 *  The obscure test on frag_size above ensures that
+				 *  frag_size will be positive.
+				 */
+				last_frag = 0;
+				if (remaining < 8)
+						frag_size -= 8;
+			}
+			
+
+			IFDEBUG(D_FRAG)
+				printf("clnp_fragment: seg off %d, size %d, remaining %d\n", 
+					ntohs(seg_part.cng_off), frag_size, total_len-frag_size);
+				if (last_frag)
+					printf("clnp_fragment: last fragment\n");
+			ENDDEBUG
+
+			if (last_frag) {
+				/* 
+				 *	this is the last fragment; we don't need to get any other
+				 *	mbufs.
+				 */
+				frag_hdr = hdr;
+				frag_data = m;
+			} else {
+				/* duplicate header and data mbufs */
+				if ((frag_hdr = m_copy(hdr, 0, (int)M_COPYALL)) == NULL) {
+					clnp_discard(hdr, GEN_CONGEST);
+					m_freem(m);
+					return(ENOBUFS);
+				}
+				if ((frag_data = m_copy(m, 0, frag_size)) == NULL) {
+					clnp_discard(hdr, GEN_CONGEST);
+					m_freem(m);
+					m_freem(frag_hdr);
+					return(ENOBUFS);
+				}
+				INCSTAT(cns_fragments);
+			}
+			clnp = mtod(frag_hdr, struct clnp_fixed *);
+
+			if (!last_frag)
+				clnp->cnf_type |= CNF_MORE_SEGS;
+			
+			/* link together */
+			m_cat(frag_hdr, frag_data);
+
+			/* insert segmentation part; updated below */
+			bcopy((caddr_t)&seg_part, mtod(frag_hdr, caddr_t) + segoff,
+				sizeof(struct clnp_segment));
+
+			{
+				int	derived_len = hdr_len + frag_size;
+				HTOC(clnp->cnf_seglen_msb, clnp->cnf_seglen_lsb, derived_len);
+				if ((frag_hdr->m_flags & M_PKTHDR) == 0)
+					panic("clnp_frag:lost header");
+				frag_hdr->m_pkthdr.len = derived_len;
+			}
+			/* compute clnp checksum (on header only) */
+			if (flags & CLNP_NO_CKSUM) {
+				HTOC(clnp->cnf_cksum_msb, clnp->cnf_cksum_lsb, 0);
+			} else {
+				iso_gen_csum(frag_hdr, CLNP_CKSUM_OFF, hdr_len);
+			}
+
+			IFDEBUG(D_DUMPOUT)
+				struct mbuf *mdump = frag_hdr;
+				printf("clnp_fragment: sending dg:\n");
+				while (mdump != NULL) {
+					printf("\tmbuf x%x, m_len %d\n", mdump, mdump->m_len);
+					mdump = mdump->m_next;
+				}
+			ENDDEBUG
+
+#ifdef	TROLL
+			error = troll_output(ifp, frag_hdr, first_hop, rt);
+#else
+			error = (*ifp->if_output)(ifp, frag_hdr, first_hop, rt);
+#endif	/* TROLL */
+
+			/*
+			 *	Tough situation: if the error occured on the last 
+			 *	fragment, we can not send an ER, as the if_output
+			 *	routine consumed the packet. If the error occured
+			 *	on any intermediate packets, we can send an ER
+			 *	because we still have the original header in (m).
+			 */
+			if (error) {
+				if (frag_hdr != hdr) {
+					/* 
+					 *	The error was not on the last fragment. We must
+					 *	free hdr and m before returning
+					 */
+					clnp_discard(hdr, GEN_NOREAS);
+					m_freem(m);
+				}
+				return(error);
+			}
+
+			/* bump segment offset, trim data mbuf, and decrement count left */
+#ifdef	TROLL
+			/*
+			 *	Decrement frag_size by some fraction. This will cause the
+			 *	next fragment to start 'early', thus duplicating the end
+			 *	of the current fragment.  troll.tr_dup_size controls
+			 *	the fraction. If positive, it specifies the fraction. If
+			 *	negative, a random fraction is used.
+			 */
+			if ((trollctl.tr_ops & TR_DUPEND) && (!last_frag)) {
+				int num_bytes = frag_size;
+
+				if (trollctl.tr_dup_size > 0) 
+					num_bytes *= trollctl.tr_dup_size;
+				else
+					num_bytes *= troll_random();
+				frag_size -= num_bytes;
+			}
+#endif	/* TROLL */
+			total_len -= frag_size;
+			if (!last_frag) {
+				frag_base += frag_size;
+				seg_part.cng_off = htons(frag_base);
+				m_adj(m, frag_size);
+			}
+		}
+		return(0);
+	} else {
+	cantfrag:
+		INCSTAT(cns_cantfrag);
+		clnp_discard(m, GEN_SEGNEEDED);
+		return(EMSGSIZE);
+	}
+}
+
+/*
+ * FUNCTION:		clnp_reass
+ *
+ * PURPOSE:			Attempt to reassemble a clnp packet given the current
+ *					fragment. If reassembly succeeds (all the fragments
+ *					are present), then return a pointer to an mbuf chain
+ *					containing the reassembled packet. This packet will
+ *					appear in the mbufs as if it had just arrived in
+ *					one piece. 
+ *
+ *					If reassembly fails, then save this fragment and
+ *					return 0.
+ *
+ * RETURNS:			Ptr to assembled packet, or 0
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ *		clnp_slowtimo can not affect this code because clnpintr, and thus
+ *		this code, is called at a higher priority than clnp_slowtimo.
+ */
+struct mbuf *
+clnp_reass(m, src, dst, seg)
+struct mbuf 		*m;		/* new fragment */
+struct iso_addr		*src;	/* src of new fragment */
+struct iso_addr		*dst; 	/* dst of new fragment */
+struct clnp_segment	*seg;	/* segment part of fragment header */
+{
+	register struct clnp_fragl		*cfh;
+
+	/* look for other fragments of this datagram */
+	for (cfh = clnp_frags; cfh != NULL; cfh = cfh->cfl_next) {
+		if (seg->cng_id == cfh->cfl_id &&
+		    iso_addrmatch1(src, &cfh->cfl_src) && 
+			iso_addrmatch1(dst, &cfh->cfl_dst)) {
+			IFDEBUG(D_REASS)
+				printf("clnp_reass: found packet\n");
+			ENDDEBUG
+			/*
+			 *	There are other fragments here already. Lets see if
+			 *	this fragment is of any help
+			 */
+			clnp_insert_frag(cfh, m, seg);
+			if (m = clnp_comp_pdu(cfh)) {
+				register struct clnp_fixed *clnp = mtod(m, struct clnp_fixed *);
+				HTOC(clnp->cnf_seglen_msb, clnp->cnf_seglen_lsb,
+					 seg->cng_tot_len);
+			}
+			return (m);
+		}
+	}
+
+	IFDEBUG(D_REASS)
+		printf("clnp_reass: new packet!\n");
+	ENDDEBUG
+
+	/*
+	 *	This is the first fragment. If src is not consuming too many
+	 *	resources, then create a new fragment list and add
+	 *	this fragment to the list.
+	 */
+	/* TODO: don't let one src hog all the reassembly buffers */
+	if (!clnp_newpkt(m, src, dst, seg) /* || this src is a hog */) {
+		INCSTAT(cns_fragdropped);
+		clnp_discard(m, GEN_CONGEST);
+	}
+
+	return(NULL);
+}
+
+/*
+ * FUNCTION:		clnp_newpkt
+ *
+ * PURPOSE:			Create the necessary structures to handle a new
+ *					fragmented clnp packet.
+ *
+ * RETURNS:			non-zero if it succeeds, zero if fails.
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			Failure is only due to insufficient resources.
+ */
+clnp_newpkt(m, src, dst, seg)
+struct mbuf 		*m;		/* new fragment */
+struct iso_addr		*src;	/* src of new fragment */
+struct iso_addr		*dst; 	/* dst of new fragment */
+struct clnp_segment	*seg;	/* segment part of fragment header */
+{
+	register struct clnp_fragl		*cfh;
+	register struct clnp_fixed		*clnp;
+	struct mbuf 					*m0;
+	
+	clnp = mtod(m, struct clnp_fixed *);
+
+	/* 
+	 *	Allocate new clnp fragl structure to act as header of all fragments
+	 *	for this datagram.
+	 */
+	MGET(m0, M_DONTWAIT, MT_FTABLE);
+	if (m0 == NULL) {
+		return (0);
+	}
+	cfh = mtod(m0, struct clnp_fragl *);
+
+	/* 
+	 *	Duplicate the header of this fragment, and save in cfh.
+	 *	Free m0 and return if m_copy does not succeed.
+	 */
+	if ((cfh->cfl_orighdr = m_copy(m, 0, (int)clnp->cnf_hdr_len)) == NULL) {
+		m_freem(m0);
+		return (0);
+	}
+	
+	/* Fill in rest of fragl structure */
+	bcopy((caddr_t)src, (caddr_t)&cfh->cfl_src, sizeof(struct iso_addr));
+	bcopy((caddr_t)dst, (caddr_t)&cfh->cfl_dst, sizeof(struct iso_addr));
+	cfh->cfl_id = seg->cng_id;
+	cfh->cfl_ttl = clnp->cnf_ttl;
+	cfh->cfl_last = (seg->cng_tot_len - clnp->cnf_hdr_len) - 1;
+	cfh->cfl_frags = NULL;
+	cfh->cfl_next = NULL;
+
+	/* Insert into list of packets */
+	cfh->cfl_next = clnp_frags;
+	clnp_frags = cfh;
+
+	/* Insert this fragment into list headed by cfh */
+	clnp_insert_frag(cfh, m, seg);
+	return(1);
+}
+
+/*
+ * FUNCTION:		clnp_insert_frag
+ *
+ * PURPOSE:			Insert fragment into list headed by 'cf'.
+ *
+ * RETURNS:			nothing
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			This is the 'guts' of the reassembly algorithm.
+ *					Each fragment in this list contains a clnp_frag
+ *					structure followed by the data of the fragment.
+ *					The clnp_frag structure actually lies on top of
+ *					part of the old clnp header.
+ */
+clnp_insert_frag(cfh, m, seg)
+struct clnp_fragl	*cfh;	/* header of list of packet fragments */
+struct mbuf 		*m;		/* new fragment */
+struct clnp_segment	*seg;	/* segment part of fragment header */
+{
+	register struct clnp_fixed	*clnp;	/* clnp hdr of fragment */
+	register struct clnp_frag	*cf;	/* generic fragment ptr */
+	register struct clnp_frag 	*cf_sub = NULL;	/* frag subsequent to new one */
+	register struct clnp_frag 	*cf_prev = NULL; /* frag previous to new one */
+	u_short						first;	/* offset of first byte of initial pdu*/
+	u_short						last;	/* offset of last byte of initial pdu */
+	u_short						fraglen;/* length of fragment */
+	
+	clnp = mtod(m, struct clnp_fixed *);
+	first = seg->cng_off;
+	CTOH(clnp->cnf_seglen_msb, clnp->cnf_seglen_lsb, fraglen);
+	fraglen -= clnp->cnf_hdr_len;
+	last = (first + fraglen) - 1;
+
+	IFDEBUG(D_REASS)
+		printf("clnp_insert_frag: New fragment: [%d ... %d], len %d\n",
+			first, last, fraglen);
+		printf("clnp_insert_frag: current fragments:\n");
+		for (cf = cfh->cfl_frags; cf != NULL; cf = cf->cfr_next) {
+			printf("\tcf x%x: [%d ... %d]\n", cf, cf->cfr_first, cf->cfr_last);
+		}
+	ENDDEBUG
+
+	if (cfh->cfl_frags != NULL) {
+		/*
+		 *	Find fragment which begins after the new one
+		 */
+		for (cf = cfh->cfl_frags; cf != NULL; cf_prev = cf, cf = cf->cfr_next) {
+			if (cf->cfr_first > first) {
+				cf_sub = cf;
+				break;
+			}
+		}
+
+		IFDEBUG(D_REASS)
+			printf("clnp_insert_frag: Previous frag is ");
+			if (cf_prev == NULL)
+				printf("NULL\n");
+			else 
+				printf("[%d ... %d]\n", cf_prev->cfr_first, cf_prev->cfr_last);
+			printf("clnp_insert_frag: Subsequent frag is ");
+			if (cf_sub == NULL)
+				printf("NULL\n");
+			else 
+				printf("[%d ... %d]\n", cf_sub->cfr_first, cf_sub->cfr_last);
+		ENDDEBUG
+
+		/*
+		 *	If there is a fragment before the new one, check if it
+		 *	overlaps the new one. If so, then trim the end of the
+		 *	previous one.
+		 */
+		if (cf_prev != NULL) {
+			if (cf_prev->cfr_last > first) {
+				u_short overlap = cf_prev->cfr_last - first;
+
+				IFDEBUG(D_REASS)
+					printf("clnp_insert_frag: previous overlaps by %d\n",
+						overlap);
+				ENDDEBUG
+
+				if (overlap > fraglen) {
+					/*
+					 *	The new fragment is entirely contained in the
+					 *	preceeding one. We can punt on the new frag
+					 *	completely.
+					 */
+					m_freem(m);
+					return;
+				} else {
+					/* Trim data off of end of previous fragment */
+					/* inc overlap to prevent duplication of last byte */
+					overlap++;
+					m_adj(cf_prev->cfr_data, -(int)overlap);
+					cf_prev->cfr_last -= overlap;
+				}
+			}
+		}
+
+		/*
+		 *	For all fragments past the new one, check if any data on
+		 *	the new one overlaps data on existing fragments. If so,
+		 *	then trim the extra data off the end of the new one.
+		 */
+		for (cf = cf_sub; cf != NULL; cf = cf->cfr_next) {
+			if (cf->cfr_first < last) {
+				u_short overlap = last - cf->cfr_first;
+
+				IFDEBUG(D_REASS)
+					printf("clnp_insert_frag: subsequent overlaps by %d\n",
+						overlap);
+				ENDDEBUG
+
+				if (overlap > fraglen) {
+					/*
+					 *	The new fragment is entirely contained in the
+					 *	succeeding one. This should not happen, because
+					 *	early on in this code we scanned for the fragment
+					 *	which started after the new one!
+					 */
+					m_freem(m);
+					printf("clnp_insert_frag: internal error!\n");
+					return;
+				} else {
+					/* Trim data off of end of new fragment */
+					/* inc overlap to prevent duplication of last byte */
+					overlap++;
+					m_adj(m, -(int)overlap);
+					last -= overlap;
+				}
+			}
+		}
+	}
+
+	/*
+	 *	Insert the new fragment beween cf_prev and cf_sub
+	 *
+	 *	Note: the clnp hdr is still in the mbuf. 
+	 *	If the data of the mbuf is not word aligned, shave off enough
+	 *	so that it is. Then, cast the clnp_frag structure on top
+	 *	of the clnp header. 
+	 *	The clnp_hdr will not be used again (as we already have
+	 *	saved a copy of it).
+	 *
+	 *	Save in cfr_bytes the number of bytes to shave off to get to
+	 *	the data of the packet. This is used when we coalesce fragments;
+	 *	the clnp_frag structure must be removed before joining mbufs.
+	 */
+	{
+		int	pad;
+		u_int	bytes;
+
+		/* determine if header is not word aligned */
+		pad = (int)clnp % 4;
+		if (pad < 0)
+			pad = -pad;
+
+		/* bytes is number of bytes left in front of data */
+		bytes = clnp->cnf_hdr_len - pad;
+
+		IFDEBUG(D_REASS)
+			printf("clnp_insert_frag: clnp x%x requires %d alignment\n",
+				clnp, pad);
+		ENDDEBUG
+
+		/* make it word aligned if necessary */
+		if (pad)
+			m_adj(m, pad);
+
+		cf = mtod(m, struct clnp_frag *);
+		cf->cfr_bytes = bytes;
+
+		IFDEBUG(D_REASS)
+			printf("clnp_insert_frag: cf now x%x, cfr_bytes %d\n", cf,
+				cf->cfr_bytes);
+		ENDDEBUG
+	}
+	cf->cfr_first = first;
+	cf->cfr_last = last;
+
+
+	/*
+	 *	The data is the mbuf itself, although we must remember that the
+	 *	first few bytes are actually a clnp_frag structure
+	 */
+	cf->cfr_data = m;
+
+	/* link into place */
+	cf->cfr_next = cf_sub;
+	if (cf_prev == NULL)
+		cfh->cfl_frags = cf;
+	else
+		cf_prev->cfr_next = cf;
+}
+
+/*
+ * FUNCTION:		clnp_comp_pdu
+ *
+ * PURPOSE:			Scan the list of fragments headed by cfh. Merge
+ *					any contigious fragments into one. If, after
+ *					traversing all the fragments, it is determined that
+ *					the packet is complete, then return a pointer to
+ *					the packet (with header prepended). Otherwise,
+ *					return NULL.
+ *
+ * RETURNS:			NULL, or a pointer to the assembled pdu in an mbuf chain.
+ *
+ * SIDE EFFECTS:	Will colapse contigious fragments into one.
+ *
+ * NOTES:			This code assumes that there are no overlaps of
+ *					fragment pdus.
+ */
+struct mbuf *
+clnp_comp_pdu(cfh)
+struct clnp_fragl	*cfh;		/* fragment header */
+{
+	register struct clnp_frag	*cf = cfh->cfl_frags;
+
+	while (cf->cfr_next != NULL) {
+		register struct clnp_frag	*cf_next = cf->cfr_next;
+
+		IFDEBUG(D_REASS)
+			printf("clnp_comp_pdu: comparing: [%d ... %d] to [%d ... %d]\n",
+				cf->cfr_first, cf->cfr_last, cf_next->cfr_first, 
+				cf_next->cfr_last);
+		ENDDEBUG
+
+		if (cf->cfr_last == (cf_next->cfr_first - 1)) {
+			/*
+			 *	Merge fragment cf and cf_next
+			 *
+			 *	- update cf header
+			 *	- trim clnp_frag structure off of cf_next
+			 *	- append cf_next to cf
+			 */
+			struct clnp_frag	cf_next_hdr;
+			struct clnp_frag	*next_frag;
+
+			cf_next_hdr = *cf_next;
+			next_frag = cf_next->cfr_next;
+
+			IFDEBUG(D_REASS)
+				struct mbuf *mdump;
+				int l;
+				printf("clnp_comp_pdu: merging fragments\n");
+				printf("clnp_comp_pdu: 1st: [%d ... %d] (bytes %d)\n", 
+					cf->cfr_first, cf->cfr_last, cf->cfr_bytes);
+				mdump = cf->cfr_data;
+				l = 0;
+				while (mdump != NULL) {
+					printf("\tmbuf x%x, m_len %d\n", mdump, mdump->m_len);
+					l += mdump->m_len;
+					mdump = mdump->m_next;
+				}
+				printf("\ttotal len: %d\n", l);
+				printf("clnp_comp_pdu: 2nd: [%d ... %d] (bytes %d)\n", 
+					cf_next->cfr_first, cf_next->cfr_last, cf_next->cfr_bytes);
+				mdump = cf_next->cfr_data;
+				l = 0;
+				while (mdump != NULL) {
+					printf("\tmbuf x%x, m_len %d\n", mdump, mdump->m_len);
+					l += mdump->m_len;
+					mdump = mdump->m_next;
+				}
+				printf("\ttotal len: %d\n", l);
+			ENDDEBUG
+
+			cf->cfr_last = cf_next->cfr_last;
+			/*
+			 *	After this m_adj, the cf_next ptr is useless because we
+			 *	have adjusted the clnp_frag structure away...
+			 */
+			IFDEBUG(D_REASS)
+				printf("clnp_comp_pdu: shaving off %d bytes\n", 
+					cf_next_hdr.cfr_bytes);
+			ENDDEBUG
+			m_adj(cf_next_hdr.cfr_data, (int)cf_next_hdr.cfr_bytes);
+			m_cat(cf->cfr_data, cf_next_hdr.cfr_data);
+			cf->cfr_next = next_frag;
+		} else {
+			cf = cf->cfr_next;
+		}
+	}
+
+	cf = cfh->cfl_frags;
+
+	IFDEBUG(D_REASS)
+		struct mbuf *mdump = cf->cfr_data;
+		printf("clnp_comp_pdu: first frag now: [%d ... %d]\n", cf->cfr_first,
+			cf->cfr_last);
+		printf("clnp_comp_pdu: data for frag:\n");
+		while (mdump != NULL) {
+			printf("mbuf x%x, m_len %d\n", mdump, mdump->m_len);
+/* 			dump_buf(mtod(mdump, caddr_t), mdump->m_len);*/
+			mdump = mdump->m_next;
+		}
+	ENDDEBUG
+
+	/* Check if datagram is complete */
+	if ((cf->cfr_first == 0) && (cf->cfr_last == cfh->cfl_last)) {
+		/*
+		 *	We have a complete pdu!
+		 *	- Remove the frag header from (only) remaining fragment
+		 *		(which is not really a fragment anymore, as the datagram is
+		 *		complete).
+		 *	- Prepend a clnp header
+		 */
+		struct mbuf	*data = cf->cfr_data;
+		struct mbuf	*hdr = cfh->cfl_orighdr;
+		struct clnp_fragl *scan;
+
+		IFDEBUG(D_REASS)
+			printf("clnp_comp_pdu: complete pdu!\n");
+		ENDDEBUG
+
+		m_adj(data, (int)cf->cfr_bytes);
+		m_cat(hdr, data);
+
+		IFDEBUG(D_DUMPIN)
+			struct mbuf *mdump = hdr;
+			printf("clnp_comp_pdu: pdu is:\n");
+			while (mdump != NULL) {
+				printf("mbuf x%x, m_len %d\n", mdump, mdump->m_len);
+/* 				dump_buf(mtod(mdump, caddr_t), mdump->m_len);*/
+				mdump = mdump->m_next;
+			}
+		ENDDEBUG
+
+		/*
+		 *	Remove cfh from the list of fragmented pdus
+		 */
+		if (clnp_frags == cfh) {
+			clnp_frags = cfh->cfl_next;
+		} else {
+			for (scan = clnp_frags; scan != NULL; scan = scan->cfl_next) {
+				if (scan->cfl_next == cfh) {
+					scan->cfl_next = cfh->cfl_next;
+					break;
+				}
+			}
+		}
+
+		/* free cfh */
+		m_freem(dtom(cfh));
+
+		return(hdr);
+	}
+
+	return(NULL);
+}
+#ifdef	TROLL
+static int troll_cnt;
+#include <sys/time.h>
+/*
+ * FUNCTION:		troll_random
+ *
+ * PURPOSE:			generate a pseudo-random number between 0 and 1
+ *
+ * RETURNS:			the random number
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			This is based on the clock.
+ */
+float troll_random()
+{
+	extern struct timeval time;
+	long	t = time.tv_usec % 100;
+
+	return((float)t / (float) 100);
+}
+
+/*
+ * FUNCTION:		troll_output
+ *
+ * PURPOSE:			Do something sneaky with the datagram passed. Possible
+ *					operations are:
+ *						Duplicate the packet
+ *						Drop the packet
+ *						Trim some number of bytes from the packet
+ *						Munge some byte in the packet
+ *
+ * RETURNS:			0, or unix error code
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			The operation of this procedure is regulated by the
+ *					troll control structure (Troll).
+ */
+troll_output(ifp, m, dst, rt)
+struct ifnet	*ifp;
+struct mbuf		*m;
+struct sockaddr	*dst;
+struct rtentry *rt;
+{
+	int	err = 0;
+	troll_cnt++;
+
+	if (trollctl.tr_ops & TR_DUPPKT) {
+		/*
+		 *	Duplicate every Nth packet
+		 *	TODO: random?
+		 */
+		float	f_freq = troll_cnt * trollctl.tr_dup_freq;
+		int		i_freq = troll_cnt * trollctl.tr_dup_freq;
+		if (i_freq == f_freq) {
+			struct mbuf *dup = m_copy(m, 0, (int)M_COPYALL);
+			if (dup != NULL)
+				err = (*ifp->if_output)(ifp, dup, dst, rt);
+		}
+		if (!err)
+			err = (*ifp->if_output)(ifp, m, dst, rt);
+		return(err);
+	} else if (trollctl.tr_ops & TR_DROPPKT) {
+	} else if (trollctl.tr_ops & TR_CHANGE) {
+		struct clnp_fixed *clnp = mtod(m, struct clnp_fixed *);
+		clnp->cnf_cksum_msb = 0;
+		err = (*ifp->if_output)(ifp, m, dst, rt);
+		return(err);
+	} else {
+		err = (*ifp->if_output)(ifp, m, dst, rt);
+		return(err);
+	}
+}
+
+#endif	/* TROLL */
diff --git a/sys/netiso/clnp_input.c b/sys/netiso/clnp_input.c
new file mode 100644
index 00000000000..c49de95e5fa
--- /dev/null
+++ b/sys/netiso/clnp_input.c
@@ -0,0 +1,551 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)clnp_input.c	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: /var/src/sys/netiso/RCS/clnp_input.c,v 5.1 89/02/09 16:20:32 hagens Exp $ */
+/* $Source: /var/src/sys/netiso/RCS/clnp_input.c,v $ */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/route.h>
+
+#include <netiso/iso.h>
+#include <netiso/iso_var.h>
+#include <netiso/iso_snpac.h>
+#include <netiso/clnp.h>
+#include <netiso/clnl.h>
+#include <netiso/esis.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/if_ether.h>
+#include <netiso/eonvar.h>
+#include <netiso/clnp_stat.h>
+#include <netiso/argo_debug.h>
+
+#ifdef ISO
+u_char		clnp_protox[ISOPROTO_MAX];
+struct clnl_protosw clnl_protox[256];
+int			clnpqmaxlen = IFQ_MAXLEN;	/* RAH? why is this a variable */
+struct mbuf	*clnp_data_ck();
+
+int	clnp_input();
+
+int	esis_input();
+
+#ifdef	ISO_X25ESIS
+int	x25esis_input();
+#endif	/* ISO_X25ESIS */
+
+/*
+ * FUNCTION:		clnp_init
+ *
+ * PURPOSE:			clnp initialization. Fill in clnp switch tables.
+ *
+ * RETURNS:			none
+ *
+ * SIDE EFFECTS:	fills in clnp_protox table with correct offsets into
+ *					the isosw table.
+ *
+ * NOTES:			
+ */
+clnp_init()
+{
+	register struct protosw *pr;
+
+	/*
+	 *	CLNP protox initialization
+	 */
+	if ((pr = pffindproto(PF_ISO, ISOPROTO_RAW, SOCK_RAW)) == 0)
+		printf("clnl_init: no raw CLNP\n");
+	else
+		clnp_protox[ISOPROTO_RAW] = pr - isosw;
+
+	if ((pr = pffindproto(PF_ISO, ISOPROTO_TP, SOCK_SEQPACKET)) == 0)
+		printf("clnl_init: no tp/clnp\n");
+	else
+		clnp_protox[ISOPROTO_TP] = pr - isosw;
+
+	/*
+	 *	CLNL protox initialization
+	 */
+	clnl_protox[ISO8473_CLNP].clnl_input = clnp_input;
+
+	clnlintrq.ifq_maxlen = clnpqmaxlen;
+}
+
+/*
+ * FUNCTION:		clnlintr
+ *
+ * PURPOSE:			Process a packet on the clnl input queue
+ *
+ * RETURNS:			nothing.
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+clnlintr()
+{
+	register struct mbuf		*m;		/* ptr to first mbuf of pkt */
+	register struct clnl_fixed	*clnl;	/* ptr to fixed part of clnl hdr */
+	int							s;		/* save and restore priority */
+	struct clnl_protosw			*clnlsw;/* ptr to protocol switch */
+	struct snpa_hdr				sh;		/* subnetwork hdr */
+
+	/*
+	 *	Get next datagram off clnl input queue
+	 */
+next:
+	s = splimp();
+	/* IF_DEQUEUESNPAHDR(&clnlintrq, m, sh);*/
+	IF_DEQUEUE(&clnlintrq, m);
+	splx(s);
+
+
+	if (m == 0)		/* nothing to do */
+		return;
+	if ((m->m_flags & M_PKTHDR) == 0 || m->m_pkthdr.rcvif == 0) {
+		m_freem(m);
+		goto next;
+	} else {
+		register struct ifaddr *ifa;
+		for (ifa = m->m_pkthdr.rcvif->if_addrlist; ifa; ifa = ifa->ifa_next)
+			if (ifa->ifa_addr->sa_family == AF_ISO)
+				break;
+		if (ifa == 0) {
+			m_freem(m);
+			goto next;
+		}
+	}
+	bzero((caddr_t)&sh, sizeof(sh));
+	sh.snh_flags = m->m_flags & (M_MCAST|M_BCAST);
+	switch((sh.snh_ifp = m->m_pkthdr.rcvif)->if_type) {
+		extern int ether_output();
+	case IFT_EON:
+		bcopy(mtod(m, caddr_t), (caddr_t)sh.snh_dhost, sizeof(u_long));
+		bcopy(sizeof(u_long) + mtod(m, caddr_t),
+					(caddr_t)sh.snh_shost, sizeof(u_long));
+		sh.snh_dhost[4] = mtod(m, u_char *)[sizeof(struct ip) +
+								_offsetof(struct eon_hdr, eonh_class)];
+		m->m_data += EONIPLEN;
+		m->m_len -= EONIPLEN;
+		m->m_pkthdr.len -= EONIPLEN;
+		break;
+
+	default:
+		if (sh.snh_ifp->if_output == ether_output) {
+			bcopy((caddr_t)(mtod(m, struct ether_header *)->ether_dhost),
+				(caddr_t)sh.snh_dhost, 2*sizeof(sh.snh_dhost));
+			m->m_data += sizeof (struct ether_header);
+			m->m_len -= sizeof (struct ether_header);
+			m->m_pkthdr.len -= sizeof (struct ether_header);
+		}
+	}
+	IFDEBUG(D_INPUT)
+		int i;
+		printf("clnlintr: src:");
+		for (i=0; i<6; i++)
+			printf("%x%c", sh.snh_shost[i] & 0xff, (i<5) ? ':' : ' ');
+		printf(" dst:");
+		for (i=0; i<6; i++)
+			printf("%x%c", sh.snh_dhost[i] & 0xff, (i<5) ? ':' : ' ');
+		printf("\n");
+	ENDDEBUG
+
+	/*
+	 *	Get the fixed part of the clnl header into the first mbuf.
+	 *	Drop the packet if this fails.
+	 *	Do not call m_pullup if we have a cluster mbuf or the
+	 *	data is not there.
+	 */
+	if ((IS_CLUSTER(m) || (m->m_len < sizeof(struct clnl_fixed))) &&
+		((m = m_pullup(m, sizeof(struct clnl_fixed))) == 0)) {
+		INCSTAT(cns_toosmall);	/* TODO: use clnl stats */
+		goto next;				/* m_pullup discards mbuf */
+	}
+
+	clnl = mtod(m, struct clnl_fixed *);
+
+	/*
+	 *	Drop packet if the length of the header is not reasonable.
+	 */
+	if ((clnl->cnf_hdr_len < CLNP_HDR_MIN) || 
+		(clnl->cnf_hdr_len > CLNP_HDR_MAX)) {
+		INCSTAT(cns_badhlen);	/* TODO: use clnl stats */
+		m_freem(m);
+		goto next;
+	}
+
+	/*
+	 *	If the header is not contained in this mbuf, make it so.
+	 *	Drop packet if this fails.
+	 *	Note: m_pullup will allocate a cluster mbuf if necessary
+	 */
+	if (clnl->cnf_hdr_len > m->m_len) {
+		if ((m = m_pullup(m, (int)clnl->cnf_hdr_len)) == 0) {
+			INCSTAT(cns_badhlen);	/* TODO: use clnl stats */
+			goto next;	/* m_pullup discards mbuf */
+		}
+		clnl = mtod(m, struct clnl_fixed *);
+	}
+
+	clnlsw = &clnl_protox[clnl->cnf_proto_id];
+
+
+	if (clnlsw->clnl_input)
+		(*clnlsw->clnl_input) (m, &sh);
+	else
+		m_freem(m);
+
+	goto next;
+}
+
+/*
+ * FUNCTION:		clnp_input
+ *
+ * PURPOSE:			process an incoming clnp packet
+ *
+ * RETURNS:			nothing
+ *
+ * SIDE EFFECTS:	increments fields of clnp_stat structure.
+ *					
+ * NOTES:
+ *	TODO: I would like to make seg_part a pointer into the mbuf, but 
+ *	will it be correctly aligned?
+ */
+clnp_input(m, shp)
+struct mbuf		*m;		/* ptr to first mbuf of pkt */
+struct snpa_hdr	*shp;	/* subnetwork header */
+{
+	register struct clnp_fixed	*clnp;	/* ptr to fixed part of header */
+	struct sockaddr_iso			source; /* source address of pkt */
+	struct sockaddr_iso			target; /* destination address of pkt */
+#define src	source.siso_addr
+#define dst	target.siso_addr
+	caddr_t						hoff;	/* current offset in packet */
+	caddr_t						hend;	/* address of end of header info */
+	struct clnp_segment			seg_part; /* segment part of hdr */
+	int							seg_off=0; /* offset of segment part of hdr */
+	int							seg_len;/* length of packet data&hdr in bytes */
+	struct clnp_optidx			oidx, *oidxp = NULL;	/* option index */
+	extern int 					iso_systype;	/* used by ESIS config resp */
+	extern struct sockaddr_iso	blank_siso;		/* used for initializing */
+	int							need_afrin = 0; 
+										/* true if congestion experienced */
+										/* which means you need afrin nose */
+										/* spray. How clever! */
+
+	IFDEBUG(D_INPUT)
+		printf(
+		   "clnp_input: proccessing dg; First mbuf m_len %d, m_type x%x, %s\n", 
+			m->m_len, m->m_type, IS_CLUSTER(m) ? "cluster" : "normal");
+	ENDDEBUG
+	need_afrin = 0;
+
+	/*
+	 *	If no iso addresses have been set, there is nothing
+	 *	to do with the packet.
+	 */
+	if (iso_ifaddr == NULL) {
+		clnp_discard(m, ADDR_DESTUNREACH);
+		return;
+	}
+	
+	INCSTAT(cns_total);
+	clnp = mtod(m, struct clnp_fixed *);
+
+	IFDEBUG(D_DUMPIN)
+		struct mbuf *mhead;
+		int			total_len = 0;
+		printf("clnp_input: clnp header:\n");
+		dump_buf(mtod(m, caddr_t), clnp->cnf_hdr_len);
+		printf("clnp_input: mbuf chain:\n");
+		for (mhead = m; mhead != NULL; mhead=mhead->m_next) {
+			printf("m x%x, len %d\n", mhead, mhead->m_len);
+			total_len += mhead->m_len;
+		}
+		printf("clnp_input: total length of mbuf chain %d:\n", total_len);
+	ENDDEBUG
+
+	/*
+	 *	Compute checksum (if necessary) and drop packet if
+	 *	checksum does not match
+	 */
+	if (CKSUM_REQUIRED(clnp) && iso_check_csum(m, (int)clnp->cnf_hdr_len)) {
+		INCSTAT(cns_badcsum);
+		clnp_discard(m, GEN_BADCSUM);
+		return;
+	}
+
+	if (clnp->cnf_vers != ISO8473_V1) {
+		INCSTAT(cns_badvers);
+		clnp_discard(m, DISC_UNSUPPVERS);
+		return;
+	}
+
+
+ 	/* check mbuf data length: clnp_data_ck will free mbuf upon error */
+	CTOH(clnp->cnf_seglen_msb, clnp->cnf_seglen_lsb, seg_len);
+	if ((m = clnp_data_ck(m, seg_len)) == 0)
+		return;
+	
+	clnp = mtod(m, struct clnp_fixed *);
+	hend = (caddr_t)clnp + clnp->cnf_hdr_len;
+
+	/* 
+	 *	extract the source and destination address
+	 *	drop packet on failure
+	 */
+	source = target = blank_siso;
+
+	hoff = (caddr_t)clnp + sizeof(struct clnp_fixed);
+	CLNP_EXTRACT_ADDR(dst, hoff, hend);
+	if (hoff == (caddr_t)0) {
+		INCSTAT(cns_badaddr);
+		clnp_discard(m, GEN_INCOMPLETE);
+		return;
+	}
+	CLNP_EXTRACT_ADDR(src, hoff, hend);
+	if (hoff == (caddr_t)0) {
+		INCSTAT(cns_badaddr);
+		clnp_discard(m, GEN_INCOMPLETE);
+		return;
+	}
+
+	IFDEBUG(D_INPUT)
+		printf("clnp_input: from %s", clnp_iso_addrp(&src));
+		printf(" to %s\n", clnp_iso_addrp(&dst));
+	ENDDEBUG
+
+	/*
+	 *	extract the segmentation information, if it is present.
+	 *	drop packet on failure
+	 */
+	if (((clnp->cnf_type & CNF_TYPE) != CLNP_ER) &&
+		(clnp->cnf_type & CNF_SEG_OK)) {
+		if (hoff + sizeof(struct clnp_segment) > hend) {
+			INCSTAT(cns_noseg);
+			clnp_discard(m, GEN_INCOMPLETE);
+			return;
+		} else {
+			(void) bcopy(hoff, (caddr_t)&seg_part, sizeof(struct clnp_segment));
+			/* make sure segmentation fields are in host order */
+			seg_part.cng_id = ntohs(seg_part.cng_id);
+			seg_part.cng_off = ntohs(seg_part.cng_off);
+			seg_part.cng_tot_len = ntohs(seg_part.cng_tot_len);
+			seg_off = hoff - (caddr_t)clnp;
+			hoff += sizeof(struct clnp_segment);
+		}
+	}
+
+	/*
+	 *	process options if present. If clnp_opt_sanity returns
+	 *	false (indicating an error was found in the options) or
+	 *	an unsupported option was found
+	 *	then drop packet and emit an ER.
+	 */
+	if (hoff < hend) {
+		int		errcode;
+
+		oidxp = &oidx;
+		errcode = clnp_opt_sanity(m, hoff, hend-hoff, oidxp);
+
+		/* we do not support security */
+		if ((errcode == 0) && (oidxp->cni_securep))
+			errcode = DISC_UNSUPPSECURE;
+
+		/* the er option is valid with ER pdus only */
+		if ((errcode == 0) && (oidxp->cni_er_reason != ER_INVALREAS) && 
+			((clnp->cnf_type & CNF_TYPE) != CLNP_ER))
+			errcode = DISC_UNSUPPOPT;
+
+#ifdef	DECBIT
+		/* check if the congestion experienced bit is set */
+		if (oidxp->cni_qos_formatp) {
+			caddr_t	qosp = CLNP_OFFTOOPT(m, oidxp->cni_qos_formatp);
+			u_char	qos = *qosp;
+
+			need_afrin = ((qos & (CLNPOVAL_GLOBAL|CLNPOVAL_CONGESTED)) ==
+				(CLNPOVAL_GLOBAL|CLNPOVAL_CONGESTED));
+			if (need_afrin)
+				INCSTAT(cns_congest_rcvd);
+		}
+#endif	/* DECBIT */
+
+		if (errcode != 0) {
+			clnp_discard(m, (char)errcode);
+			IFDEBUG(D_INPUT)
+				printf("clnp_input: dropped (err x%x) due to bad options\n",
+					errcode);
+			ENDDEBUG
+			return;
+		}
+	}
+	
+	/*
+	 *	check if this packet is for us. if not, then forward
+	 */
+	if (clnp_ours(&dst) == 0) {
+		IFDEBUG(D_INPUT)
+			printf("clnp_input: forwarding packet not for us\n");
+		ENDDEBUG
+ 		clnp_forward(m, seg_len, &dst, oidxp, seg_off, shp);
+		return;
+	}
+
+	/*
+	 *	ESIS Configuration Response Function
+	 *
+	 *	If the packet received was sent to the multicast address
+	 *	all end systems, then send an esh to the source
+	 */
+	if ((shp->snh_flags & M_MCAST) && (iso_systype == SNPA_ES)) {
+		extern short esis_holding_time;
+
+		esis_shoutput(shp->snh_ifp, ESIS_ESH, esis_holding_time,
+			shp->snh_shost, 6, &dst);
+	}
+
+	/*
+	 *	If this is a fragment, then try to reassemble it. If clnp_reass
+	 *	returns non NULL, the packet has been reassembled, and should
+	 *	be give to TP. Otherwise the fragment has been delt with
+	 *	by the reassembly code (either stored or deleted). In either case
+	 *	we should have nothing more to do with it.
+	 */
+	if (((clnp->cnf_type & CNF_TYPE) != CLNP_ER) &&
+		(clnp->cnf_type & CNF_SEG_OK) &&
+		(seg_len != seg_part.cng_tot_len)) {
+		struct mbuf	*m0;
+
+		if ((m0 = clnp_reass(m, &src, &dst, &seg_part)) != NULL) {
+			m = m0;
+			clnp = mtod(m, struct clnp_fixed *);
+			INCSTAT(cns_reassembled);
+		} else {
+			return;
+		}
+	}
+	
+	/*
+	 *	give the packet to the higher layer
+	 *
+	 *	Note: the total length of packet
+	 *	is the total length field of the segmentation part,
+	 *	or, if absent, the segment length field of the
+	 *	header.
+	 */
+	INCSTAT(cns_delivered);
+	switch (clnp->cnf_type & CNF_TYPE) {
+	case CLNP_ER:
+		/*
+		 *	This ER must have the er option.
+		 *	If the option is not present, discard datagram.
+		 */
+		if (oidxp == NULL || oidxp->cni_er_reason == ER_INVALREAS) {
+			clnp_discard(m, GEN_HDRSYNTAX);
+		} else {
+			clnp_er_input(m, &src, oidxp->cni_er_reason);
+		}
+		break;
+
+	case CLNP_DT:
+		(*isosw[clnp_protox[ISOPROTO_TP]].pr_input)(m, &source, &target,
+			clnp->cnf_hdr_len, need_afrin);
+		break;
+
+ 	case CLNP_RAW:
+	case CLNP_ECR:
+		IFDEBUG(D_INPUT)
+			printf("clnp_input: raw input of %d bytes\n",
+				clnp->cnf_type & CNF_SEG_OK ? seg_part.cng_tot_len : seg_len);
+		ENDDEBUG
+		(*isosw[clnp_protox[ISOPROTO_RAW]].pr_input)(m, &source, &target,
+					clnp->cnf_hdr_len);
+		break;
+
+	case CLNP_EC:
+		IFDEBUG(D_INPUT)
+			printf("clnp_input: echoing packet\n");
+		ENDDEBUG
+		(void)clnp_echoreply(m,
+			(clnp->cnf_type & CNF_SEG_OK ? (int)seg_part.cng_tot_len : seg_len),
+			&source, &target, oidxp);
+		break;
+
+	default:
+ 		printf("clnp_input: unknown clnp pkt type %d\n",
+			clnp->cnf_type & CNF_TYPE);
+		clnp_stat.cns_delivered--;
+		clnp_stat.cns_noproto++;
+		clnp_discard(m, GEN_HDRSYNTAX);
+ 		break;
+	}
+}
+#endif /* ISO */
diff --git a/sys/netiso/clnp_options.c b/sys/netiso/clnp_options.c
new file mode 100644
index 00000000000..250b438664f
--- /dev/null
+++ b/sys/netiso/clnp_options.c
@@ -0,0 +1,532 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)clnp_options.c	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: /var/src/sys/netiso/RCS/clnp_options.c,v 5.1 89/02/09 16:20:37 hagens Exp $ */
+/* $Source: /var/src/sys/netiso/RCS/clnp_options.c,v $ */
+
+#ifdef ISO
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netiso/iso.h>
+#include <netiso/clnp.h>
+#include <netiso/clnp_stat.h>
+#include <netiso/argo_debug.h>
+
+/*
+ * FUNCTION:		clnp_update_srcrt
+ *
+ * PURPOSE:			Process src rt option accompanying a clnp datagram.
+ *						- bump src route ptr if src routing and
+ *							we appear current in src route list.
+ *
+ * RETURNS:			none
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			If source routing has been terminated, do nothing.
+ */
+clnp_update_srcrt(options, oidx)
+struct mbuf			*options;	/* ptr to options mbuf */
+struct clnp_optidx	*oidx;		/* ptr to option index */
+{
+	u_char			len;	/* length of current address */
+	struct iso_addr	isoa;	/* copy current address into here */
+
+	if (CLNPSRCRT_TERM(oidx, options)) {
+		IFDEBUG(D_OPTIONS)
+			printf("clnp_update_srcrt: src rt terminated\n");
+		ENDDEBUG
+		return;
+	}
+
+	len = CLNPSRCRT_CLEN(oidx, options);
+	bcopy(CLNPSRCRT_CADDR(oidx, options), (caddr_t)&isoa, len);
+	isoa.isoa_len = len;
+		
+	IFDEBUG(D_OPTIONS)
+		printf("clnp_update_srcrt: current src rt: %s\n", 
+			clnp_iso_addrp(&isoa));
+	ENDDEBUG
+
+	if (clnp_ours(&isoa)) {
+		IFDEBUG(D_OPTIONS)
+			printf("clnp_update_srcrt: updating src rt\n");
+		ENDDEBUG
+
+		/* update pointer to next src route */
+		len++;	/* count length byte too! */
+		CLNPSRCRT_OFF(oidx, options) += len;
+	}
+}
+
+/*
+ * FUNCTION:		clnp_dooptions
+ *
+ * PURPOSE:			Process options accompanying a clnp datagram.
+ *					Processing includes
+ *						- log our address if recording route
+ *
+ * RETURNS:			none
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+clnp_dooptions(options, oidx, ifp, isoa)
+struct mbuf			*options;	/* ptr to options mbuf */
+struct clnp_optidx	*oidx;		/* ptr to option index */
+struct ifnet		*ifp;		/* ptr to interface pkt is leaving on */
+struct iso_addr		*isoa;		/* ptr to our address for this ifp */
+{
+	/*
+	 *	If record route is specified, move all
+	 *	existing records over, and insert the address of
+	 *	interface passed
+	 */
+	if (oidx->cni_recrtp) {
+		char 	*opt;			/* ptr to beginning of recrt option */
+		u_char	off;			/* offset from opt of first free byte */
+		char	*rec_start;		/* beginning of new rt recorded */
+
+		opt = CLNP_OFFTOOPT(options, oidx->cni_recrtp);
+		off = *(opt + 1);
+		rec_start = opt + off - 1;
+
+		IFDEBUG(D_OPTIONS)
+			printf("clnp_dooptions: record route: option x%x for %d bytes\n",
+				opt, oidx->cni_recrt_len);
+			printf("\tfree slot offset x%x\n", off);
+			printf("clnp_dooptions: recording %s\n", clnp_iso_addrp(isoa));
+			printf("clnp_dooptions: option dump:\n");
+			dump_buf(opt, oidx->cni_recrt_len);
+		ENDDEBUG
+
+		/* proceed only if recording has not been terminated */
+		if (off != 0xff) {
+			int new_addrlen = isoa->isoa_len + 1;
+			/* 
+			 *	if there is insufficient room to store the next address,
+			 *	then terminate recording. Plus 1 on isoa_len is for the
+			 *	length byte itself
+			 */
+			if (oidx->cni_recrt_len - (off - 1) < new_addrlen) {
+				*(opt + 1) = 0xff;	/* terminate recording */
+			} else {
+				IFDEBUG(D_OPTIONS)
+					printf("clnp_dooptions: new addr at x%x for %d\n",
+						rec_start, new_addrlen);
+				ENDDEBUG
+
+				bcopy((caddr_t)isoa, rec_start, new_addrlen);
+
+				/* update offset field */
+				*(opt + 1) += new_addrlen;
+
+				IFDEBUG(D_OPTIONS)
+					printf("clnp_dooptions: new option dump:\n");
+					dump_buf(opt, oidx->cni_recrt_len);
+				ENDDEBUG
+			}
+		}
+	}
+}
+
+/*
+ * FUNCTION:		clnp_set_opts
+ *
+ * PURPOSE:			Check the data mbuf passed for option sanity. If it is
+ *					ok, then set the options ptr to address the data mbuf.
+ *					If an options mbuf exists, free it. This implies that
+ *					any old options will be lost. If data is NULL, simply
+ *					free any old options.
+ *
+ * RETURNS:			unix error code
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+clnp_set_opts(options, data)
+struct mbuf	**options;	/* target for option information */
+struct mbuf	**data;		/* source of option information */
+{
+	int					error = 0;	/* error return value */
+	struct clnp_optidx	dummy;		/* dummy index - not used */
+
+	/*
+	 *	remove any existing options
+	 */
+	if (*options != NULL) {
+		m_freem(*options);
+		*options = NULL;
+	}
+
+	if (*data != NULL) {
+		/*
+		 *	Insure that the options are reasonable.
+		 *
+		 *	Also, we do not support security, priority,
+		 *	nor do we allow one to send an ER option
+		 *
+		 *	The QOS parameter is checked for the DECBIT.
+		 */
+		if ((clnp_opt_sanity(*data, mtod(*data, caddr_t), (*data)->m_len, 
+			&dummy) != 0) ||
+				(dummy.cni_securep) ||
+				(dummy.cni_priorp) ||
+				(dummy.cni_er_reason != ER_INVALREAS)) {
+			error = EINVAL;
+		} else {
+			*options = *data;
+			*data = NULL;	/* so caller won't free mbuf @ *data */
+		}
+	}
+	return error;
+}
+
+/*
+ * FUNCTION:		clnp_opt_sanity
+ *
+ * PURPOSE:			Check the options (beginning at opts for len bytes) for
+ *					sanity. In addition, fill in the option index structure 
+ *					in with information about each option discovered.
+ *
+ * RETURNS:			success (options check out) - 0
+ *					failure - an ER pdu error code describing failure
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			Each pointer field of the option index is filled in with
+ *					the offset from the beginning of the mbuf data, not the
+ *					actual address.
+ */
+clnp_opt_sanity(m, opts, len, oidx)
+struct mbuf 		*m;		/* mbuf options reside in */
+caddr_t				opts;	/* ptr to buffer containing options */
+int					len;	/* length of buffer */
+struct clnp_optidx	*oidx;	/* RETURN: filled in with option idx info */
+{
+	u_char	opcode;			/* code of particular option */
+	u_char	oplen;			/* length of a particular option */
+	caddr_t	opts_end;		/* ptr to end of options */
+	u_char	pad = 0, secure = 0, srcrt = 0, recrt = 0, qos = 0, prior = 0;
+							/* flags for catching duplicate options */
+	
+	IFDEBUG(D_OPTIONS)
+		printf("clnp_opt_sanity: checking %d bytes of data:\n", len);
+		dump_buf(opts, len);
+	ENDDEBUG
+
+	/* clear option index field if passed */
+	bzero((caddr_t)oidx, sizeof(struct clnp_optidx));
+
+	/*
+	 *	We need to indicate whether the ER option is present. This is done
+	 *	by overloading the er_reason field to also indicate presense of
+	 *	the option along with the option value. I would like ER_INVALREAS
+	 *	to have value 0, but alas, 0 is a valid er reason...
+	 */
+	oidx->cni_er_reason = ER_INVALREAS;
+
+	opts_end = opts + len;
+	while (opts < opts_end) {
+		/* must have at least 2 bytes per option (opcode and len) */
+		if (opts + 2 > opts_end)
+			return(GEN_INCOMPLETE);
+		
+		opcode = *opts++;
+		oplen = *opts++;
+		IFDEBUG(D_OPTIONS)
+			printf("clnp_opt_sanity: opcode is %x and oplen %d\n",
+				opcode, oplen);
+			printf("clnp_opt_sanity: clnpoval_SRCRT is %x\n", CLNPOVAL_SRCRT);
+
+				switch (opcode) {
+					case CLNPOVAL_PAD: {
+						printf("CLNPOVAL_PAD\n");
+					} break;
+					case CLNPOVAL_SECURE: {
+						printf("CLNPOVAL_SECURE\n");
+					} break;
+					case CLNPOVAL_SRCRT: {
+							printf("CLNPOVAL_SRCRT\n");
+					} break;
+					case CLNPOVAL_RECRT: {
+						printf("CLNPOVAL_RECRT\n");
+					} break;
+					case CLNPOVAL_QOS: {
+						printf("CLNPOVAL_QOS\n");
+					} break;
+					case CLNPOVAL_PRIOR: {
+						printf("CLNPOVAL_PRIOR\n");
+					} break;
+					case CLNPOVAL_ERREAS: {
+						printf("CLNPOVAL_ERREAS\n");
+					} break;
+					default:
+						printf("UKNOWN option %x\n", opcode);
+				}
+		ENDDEBUG
+
+		/* don't allow crazy length values */
+		if (opts + oplen > opts_end)
+			return(GEN_INCOMPLETE);
+
+		switch (opcode) {
+			case CLNPOVAL_PAD:
+				/*
+				 *	Padding: increment pointer by length of padding
+				 */
+				if (pad++)						/* duplicate ? */
+					return(GEN_DUPOPT);
+				opts += oplen;
+				break;
+
+			case CLNPOVAL_SECURE: {
+				u_char	format = *opts;
+
+				if (secure++)					/* duplicate ? */
+					return(GEN_DUPOPT);
+				/*
+				 *	Security: high 2 bits of first octet indicate format
+				 *	(00 in high bits is reserved).
+				 *	Remaining bits must be 0. Remaining octets indicate
+				 *	actual security
+				 */
+				if (((format & 0x3f) > 0) ||	/* low 6 bits set ? */
+					((format & 0xc0) == 0))		/* high 2 bits zero ? */
+					return(GEN_HDRSYNTAX);
+
+				oidx->cni_securep = CLNP_OPTTOOFF(m, opts);
+				oidx->cni_secure_len = oplen;
+				opts += oplen;
+			} break;
+
+			case CLNPOVAL_SRCRT: {
+				u_char	type, offset;	/* type of rt, offset of start */
+				caddr_t	route_end;		/* address of end of route option */
+
+				IFDEBUG(D_OPTIONS)
+					printf("clnp_opt_sanity: SRC RT\n");
+				ENDDEBUG
+
+				if (srcrt++)					/* duplicate ? */
+					return(GEN_DUPOPT);
+				/* 
+				 *	source route: There must be 2 bytes following the length
+				 *	field: type and offset. The type must be either
+				 *	partial route or complete route. The offset field must
+				 *	be within the option. A single exception is made, however.
+				 *	The offset may be 1 greater than the length. This case 
+				 *	occurs when the last source route record is consumed. 
+				 *	In this case, we ignore the source route option.
+				 *	RAH? You should be able to set offset to 'ff' like in record
+				 *	route!
+				 *	Following this is a series of address fields. 
+				 *	Each address field is composed of a (length, address) pair.
+				 *	Insure that the offset and each address length is reasonable
+				 */
+				route_end = opts + oplen;
+
+				if (opts + 2 > route_end)
+					return(SRCRT_SYNTAX);
+
+				type = *opts;
+				offset = *(opts+1);
+
+
+				/* type must be partial or complete */
+				if (!((type == CLNPOVAL_PARTRT) || (type == CLNPOVAL_COMPRT)))
+					return(SRCRT_SYNTAX);
+				
+				oidx->cni_srcrt_s = CLNP_OPTTOOFF(m, opts);
+				oidx->cni_srcrt_len = oplen;
+
+				opts += offset-1;	/*set opts to first addr in rt */
+
+				/* 
+				 *	Offset must be reasonable:
+				 *	less than end of options, or equal to end of options
+				 */
+				if (opts >= route_end) {
+					if (opts == route_end) {
+						IFDEBUG(D_OPTIONS)
+							printf("clnp_opt_sanity: end of src route info\n");
+						ENDDEBUG
+						break;
+					} else 
+						return(SRCRT_SYNTAX);
+				}
+
+				while (opts < route_end) {
+					u_char	addrlen = *opts++;
+					if (opts + addrlen > route_end)
+						return(SRCRT_SYNTAX);
+					opts += addrlen;
+				}
+			} break;
+			case CLNPOVAL_RECRT: {
+				u_char	type, offset;	/* type of rt, offset of start */
+				caddr_t	record_end;		/* address of end of record option */
+
+				if (recrt++)					/* duplicate ? */
+					return(GEN_DUPOPT);
+				/*
+				 *	record route: after the length field, expect a
+				 *	type and offset. Type must be partial or complete.
+				 *	Offset indicates where to start recording. Insure it
+				 *	is within the option. All ones for offset means
+				 *	recording is terminated.
+				 */
+				record_end = opts + oplen;
+
+				oidx->cni_recrtp = CLNP_OPTTOOFF(m, opts);
+				oidx->cni_recrt_len = oplen;
+
+				if (opts + 2 > record_end)
+					return(GEN_INCOMPLETE);
+
+				type = *opts;
+				offset = *(opts+1);
+
+				/* type must be partial or complete */
+				if (!((type == CLNPOVAL_PARTRT) || (type == CLNPOVAL_COMPRT)))
+					return(GEN_HDRSYNTAX);
+				
+				/* offset must be reasonable */
+				if ((offset < 0xff) && (opts + offset > record_end))
+					return(GEN_HDRSYNTAX);
+				opts += oplen;
+			} break;
+			case CLNPOVAL_QOS: {
+				u_char	format = *opts;
+
+				if (qos++)					/* duplicate ? */
+					return(GEN_DUPOPT);
+				/*
+				 *	qos: high 2 bits of first octet indicate format
+				 *	(00 in high bits is reserved).
+				 *	Remaining bits must be 0 (unless format indicates
+				 *	globally unique qos, in which case remaining bits indicate
+				 *	qos (except bit 6 which is reserved)).  Otherwise,
+				 *	remaining octets indicate actual qos.
+				 */
+				if (((format & 0xc0) == 0) ||	/* high 2 bits zero ? */
+					(((format & 0xc0) != CLNPOVAL_GLOBAL) && 
+						((format & 0x3f) > 0))) /* not global,low bits used ? */
+					return(GEN_HDRSYNTAX);
+				
+				oidx->cni_qos_formatp = CLNP_OPTTOOFF(m, opts);
+				oidx->cni_qos_len = oplen;
+
+				opts += oplen;
+			} break;
+
+			case CLNPOVAL_PRIOR: {
+				if (prior++)				/* duplicate ? */
+					return(GEN_DUPOPT);
+				/*
+				 *	priority: value must be one byte long
+				 */
+				if (oplen != 1)
+					return(GEN_HDRSYNTAX);
+				
+				oidx->cni_priorp = CLNP_OPTTOOFF(m, opts);
+
+				opts += oplen;
+			} break;
+
+			case CLNPOVAL_ERREAS: {
+				/*
+				 *	er reason: value must be two bytes long
+				 */
+				if (oplen != 2)
+					return(GEN_HDRSYNTAX);
+
+				oidx->cni_er_reason = *opts;
+
+				opts += oplen;
+			} break;
+
+			default: {
+				IFDEBUG(D_OPTIONS)
+					printf("clnp_opt_sanity: UNKNOWN OPTION 0x%x\n", opcode);
+				ENDDEBUG
+				return(DISC_UNSUPPOPT);
+			}
+		}
+	}
+		IFDEBUG(D_OPTIONS)
+			printf("clnp_opt_sanity: return(0)\n", opcode);
+		ENDDEBUG
+	return(0);
+}
+#endif	/* ISO */
diff --git a/sys/netiso/clnp_output.c b/sys/netiso/clnp_output.c
new file mode 100644
index 00000000000..aba9f6e00bd
--- /dev/null
+++ b/sys/netiso/clnp_output.c
@@ -0,0 +1,561 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)clnp_output.c	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: /var/src/sys/netiso/RCS/clnp_output.c,v 5.0 89/02/08 12:00:15 hagens Exp $ */
+/* $Source: /var/src/sys/netiso/RCS/clnp_output.c,v $ */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netiso/iso.h>
+#include <netiso/iso_var.h>
+#include <netiso/iso_pcb.h>
+#include <netiso/clnp.h>
+#include <netiso/clnp_stat.h>
+#include <netiso/argo_debug.h>
+
+static struct clnp_fixed dt_template = {
+	ISO8473_CLNP,	/* network identifier */
+	0,				/* length */
+	ISO8473_V1,		/* version */
+	CLNP_TTL,		/* ttl */
+	CLNP_DT|CNF_SEG_OK|CNF_ERR_OK,		/* type */
+	0,				/* segment length */
+	0				/* checksum */
+};
+
+static struct clnp_fixed raw_template = {
+	ISO8473_CLNP,	/* network identifier */
+	0,				/* length */
+	ISO8473_V1,		/* version */
+	CLNP_TTL,		/* ttl */
+	CLNP_RAW|CNF_SEG_OK|CNF_ERR_OK,		/* type */
+	0,				/* segment length */
+	0				/* checksum */
+};
+
+static struct clnp_fixed echo_template = {
+	ISO8473_CLNP,	/* network identifier */
+	0,				/* length */
+	ISO8473_V1,		/* version */
+	CLNP_TTL,		/* ttl */
+	CLNP_EC|CNF_SEG_OK|CNF_ERR_OK,		/* type */
+	0,				/* segment length */
+	0				/* checksum */
+};
+
+static struct clnp_fixed echor_template = {
+	ISO8473_CLNP,	/* network identifier */
+	0,				/* length */
+	ISO8473_V1,		/* version */
+	CLNP_TTL,		/* ttl */
+	CLNP_ECR|CNF_SEG_OK|CNF_ERR_OK,		/* type */
+	0,				/* segment length */
+	0				/* checksum */
+};
+
+#ifdef	DECBIT
+u_char qos_option[] = {CLNPOVAL_QOS, 1, 
+	CLNPOVAL_GLOBAL|CLNPOVAL_SEQUENCING|CLNPOVAL_LOWDELAY};
+#endif	/* DECBIT */
+
+int				clnp_id = 0;		/* id for segmented dgrams */
+
+/*
+ * FUNCTION:		clnp_output
+ *
+ * PURPOSE:			output the data in the mbuf as a clnp datagram
+ *
+ *					The data specified by m0 is sent as a clnp datagram. 
+ *					The mbuf chain m0 will be freed when this routine has
+ *					returned.
+ *
+ *					If options is non-null, it points to an mbuf which contains
+ *					options to be sent with the datagram. The options must
+ *					be formatted in the mbuf according to clnp rules. Options
+ *					will not be freed.
+ *
+ *					Datalen specifies the length of the data in m0. 
+ *
+ *					Src and dst are the addresses for the packet. 
+ *
+ *					If route is non-null, it is used as the route for 
+ *					the packet. 
+ *
+ *					By default, a DT is sent. However, if flags & CNLP_SEND_ER
+ *					then an ER will be sent. If flags & CLNP_SEND_RAW, then
+ *					the packet will be send as raw clnp.
+ *
+ * RETURNS:			0	success
+ *					appropriate error code
+ *
+ * SIDE EFFECTS:	none
+ *
+ * NOTES:			
+ *					Flags are interpretated as follows:
+ *						CLNP_NO_SEG - do not allow this pkt to be segmented.
+ *						CLNP_NO_ER  - have pkt request ER suppression.
+ *						CLNP_SEND_RAW - send pkt as RAW DT rather than TP DT
+ *						CLNP_NO_CKSUM - don't compute clnp checksum
+ *						CLNP_ECHO - send as ECHO packet
+ *
+ *					When checking for a cached packet, clnp checks
+ *					that the route taken is still up. It does not
+ *					check that the route is still to the same destination.
+ *					This means that any entity that alters an existing
+ *					route for an isopcb (such as when a redirect arrives)
+ *					must invalidate the clnp cache. It might be perferable
+ *					to have clnp check that the route has the same dest, but
+ *					by avoiding this check, we save a call to iso_addrmatch1.
+ */
+clnp_output(m0, isop, datalen, flags)
+struct mbuf			*m0;		/* data for the packet */
+struct isopcb		*isop;		/* iso pcb */
+int					datalen;	/* number of bytes of data in m0 */
+int					flags;		/* flags */
+{
+	int							error = 0;		/* return value of function */
+	register struct mbuf		*m = m0;		/* mbuf for clnp header chain */
+	register struct clnp_fixed	*clnp;			/* ptr to fixed part of hdr */
+	register caddr_t			hoff;			/* offset into header */
+	int							total_len;		/* total length of packet */
+	struct iso_addr				*src;		/* ptr to source address */
+	struct iso_addr				*dst;		/* ptr to destination address */
+	struct clnp_cache			clc;		/* storage for cache information */
+	struct clnp_cache			*clcp = NULL;	/* ptr to clc */
+	int							hdrlen = 0;
+
+	dst = &isop->isop_faddr->siso_addr;
+	if (isop->isop_laddr == 0) {
+		struct iso_ifaddr *ia = 0;
+		clnp_route(dst, &isop->isop_route, flags, 0, &ia);
+		if (ia == 0 || ia->ia_ifa.ifa_addr->sa_family != AF_ISO)
+			return (ENETUNREACH);
+		src = &ia->ia_addr.siso_addr;
+	} else
+		src = &isop->isop_laddr->siso_addr;
+
+	IFDEBUG(D_OUTPUT)
+		printf("clnp_output: to %s", clnp_iso_addrp(dst));
+		printf(" from %s of %d bytes\n", clnp_iso_addrp(src), datalen);
+		printf("\toptions x%x, flags x%x, isop_clnpcache x%x\n", 
+			isop->isop_options, flags, isop->isop_clnpcache);
+	ENDDEBUG
+
+	if (isop->isop_clnpcache != NULL) {
+		clcp = mtod(isop->isop_clnpcache, struct clnp_cache *);
+	}
+	
+	/*
+	 *	Check if cache is valid ...
+	 */
+	IFDEBUG(D_OUTPUT)
+		printf("clnp_output: ck cache: clcp %x\n", clcp);
+		if (clcp != NULL) {
+			printf("\tclc_dst %s\n", clnp_iso_addrp(&clcp->clc_dst));
+			printf("\tisop_opts x%x, clc_opts x%x\n", isop->isop_options,
+				clcp->clc_options);
+			if (isop->isop_route.ro_rt)
+				printf("\tro_rt x%x, rt_flags x%x\n",
+					isop->isop_route.ro_rt, isop->isop_route.ro_rt->rt_flags);
+			printf("\tflags x%x, clc_flags x%x\n", flags, clcp->clc_flags);
+			printf("\tclc_hdr x%x\n", clcp->clc_hdr);
+		}
+	ENDDEBUG
+	if ((clcp != NULL) &&								/* cache exists */
+		(isop->isop_options == clcp->clc_options) && 	/* same options */
+		(iso_addrmatch1(dst, &clcp->clc_dst)) &&		/* dst still same */
+		(isop->isop_route.ro_rt != NULL) &&				/* route exists */
+		(isop->isop_route.ro_rt == clcp->clc_rt) &&		/* and is cached */
+		(isop->isop_route.ro_rt->rt_flags & RTF_UP) &&	/* route still up */
+		(flags == clcp->clc_flags) &&					/* same flags */
+		(clcp->clc_hdr != NULL)) {						/* hdr mbuf exists */
+		/*
+		 *	The cache is valid
+		 */
+
+		IFDEBUG(D_OUTPUT)
+			printf("clnp_output: using cache\n");
+		ENDDEBUG
+
+		m = m_copy(clcp->clc_hdr, 0, (int)M_COPYALL);
+		if (m == NULL) {
+			/*
+			 *	No buffers left to copy cached packet header. Use
+			 *	the cached packet header this time, and
+			 *	mark the hdr as vacant
+			 */
+			m = clcp->clc_hdr;
+			clcp->clc_hdr = NULL;
+		}
+		m->m_next = m0;	/* ASSUMES pkt hdr is 1 mbuf long */
+		clnp = mtod(m, struct clnp_fixed *);
+	} else {
+		struct clnp_optidx	*oidx = NULL;		/* index to clnp options */
+
+		/*
+		 *	The cache is not valid. Allocate an mbuf (if necessary)
+		 *	to hold cached info. If one is not available, then
+		 *	don't bother with the cache
+		 */
+		INCSTAT(cns_cachemiss);
+		if (flags & CLNP_NOCACHE) {
+			clcp = &clc;
+		} else {
+			if (isop->isop_clnpcache == NULL) {
+				/*
+				 *	There is no clnpcache. Allocate an mbuf to hold one
+				 */
+				if ((isop->isop_clnpcache = m_get(M_DONTWAIT, MT_HEADER))
+					== NULL) {
+					/*
+					 *	No mbufs available. Pretend that we don't want
+					 *	caching this time.
+					 */
+					IFDEBUG(D_OUTPUT)
+						printf("clnp_output: no mbufs to allocate to cache\n");
+					ENDDEBUG
+					flags  |= CLNP_NOCACHE;
+					clcp = &clc;
+				} else {
+					clcp = mtod(isop->isop_clnpcache, struct clnp_cache *);
+				}
+			} else {
+				/*
+				 *	A clnpcache mbuf exists. If the clc_hdr is not null,
+				 *	we must free it, as a new one is about to be created.
+				 */
+				clcp = mtod(isop->isop_clnpcache, struct clnp_cache *);
+				if (clcp->clc_hdr != NULL) {
+					/*
+					 *	The clc_hdr is not null but a clnpcache mbuf exists.
+					 *	This means that there was a cache, but the existing
+					 *	copy of the hdr is no longer valid. Free it now
+					 *	before we lose the pointer to it.
+					 */
+					IFDEBUG(D_OUTPUT)
+						printf("clnp_output: freeing old clc_hdr 0x%x\n",
+						clcp->clc_hdr);
+					ENDDEBUG
+					m_free(clcp->clc_hdr);
+					IFDEBUG(D_OUTPUT)
+						printf("clnp_output: freed old clc_hdr (done)\n");
+					ENDDEBUG
+				}
+			}
+		}
+		IFDEBUG(D_OUTPUT)
+			printf("clnp_output: NEW clcp x%x\n",clcp);
+		ENDDEBUG
+		bzero((caddr_t)clcp, sizeof(struct clnp_cache));
+
+		if (isop->isop_optindex)
+			oidx = mtod(isop->isop_optindex, struct clnp_optidx *);
+
+		/*
+		 *	Don't allow packets with security, quality of service,
+		 *	priority, or error report options to be sent.
+		 */
+		if ((isop->isop_options) && (oidx)) {
+			if ((oidx->cni_securep) ||
+				(oidx->cni_priorp) ||
+				(oidx->cni_qos_formatp) ||
+				(oidx->cni_er_reason != ER_INVALREAS)) {
+				IFDEBUG(D_OUTPUT)
+					printf("clnp_output: pkt dropped - option unsupported\n");
+				ENDDEBUG
+				m_freem(m0);
+				return(EINVAL);
+			}
+		}
+
+		/*
+		 *	Don't allow any invalid flags to be set
+		 */
+		if ((flags & (CLNP_VFLAGS)) != flags) {
+			IFDEBUG(D_OUTPUT)
+				printf("clnp_output: packet dropped - flags unsupported\n");
+			ENDDEBUG
+			INCSTAT(cns_odropped);
+			m_freem(m0);
+			return(EINVAL);
+		}
+
+		/*
+		 *	Don't allow funny lengths on dst; src may be zero in which
+		 *	case we insert the source address based upon the interface
+		 */
+		if ((src->isoa_len > sizeof(struct iso_addr)) || 
+			(dst->isoa_len == 0) ||
+			(dst->isoa_len > sizeof(struct iso_addr))) {
+			m_freem(m0);
+			INCSTAT(cns_odropped);
+			return(ENAMETOOLONG);
+		}
+
+		/*
+		 *	Grab mbuf to contain header
+		 */
+		MGETHDR(m, M_DONTWAIT, MT_HEADER);
+		if (m == 0) {
+			m_freem(m0);
+			INCSTAT(cns_odropped);
+			return(ENOBUFS);
+		}
+		INCSTAT(cns_sent);
+		m->m_next = m0;
+		clnp = mtod(m, struct clnp_fixed *);
+		clcp->clc_segoff = 0;
+
+		/*
+		 *	Fill in all of fixed hdr except lengths and checksum
+		 */
+		if (flags & CLNP_SEND_RAW) {
+			*clnp = raw_template;
+		} else if (flags & CLNP_ECHO) {
+			*clnp = echo_template;
+		} else if (flags & CLNP_ECHOR) {
+			*clnp = echor_template;
+		} else {
+			*clnp = dt_template;
+		}
+		if (flags & CLNP_NO_SEG)
+			clnp->cnf_type &= ~CNF_SEG_OK;
+		if (flags & CLNP_NO_ER)
+			clnp->cnf_type &= ~CNF_ERR_OK;
+
+		/*
+		 *	Route packet; special case for source rt
+		 */
+		if ((isop->isop_options) && CLNPSRCRT_VALID(oidx)) {
+			IFDEBUG(D_OUTPUT)
+				printf("clnp_output: calling clnp_srcroute\n");
+			ENDDEBUG
+			error = clnp_srcroute(isop->isop_options, oidx, &isop->isop_route,
+				&clcp->clc_firsthop, &clcp->clc_ifa, dst);
+		} else {
+			IFDEBUG(D_OUTPUT)
+			ENDDEBUG
+			error = clnp_route(dst, &isop->isop_route, flags, 
+				&clcp->clc_firsthop, &clcp->clc_ifa);
+		}
+		if (error || (clcp->clc_ifa == 0)) {
+			IFDEBUG(D_OUTPUT)
+				printf("clnp_output: route failed, errno %d\n", error);
+				printf("@clcp:\n");
+				dump_buf(clcp, sizeof (struct clnp_cache));
+			ENDDEBUG
+			goto bad;
+		}
+		clcp->clc_rt = isop->isop_route.ro_rt;	/* XXX */
+		clcp->clc_ifp = clcp->clc_ifa->ia_ifp;  /* XXX */
+
+		IFDEBUG(D_OUTPUT)
+			printf("clnp_output: packet routed to %s\n", 
+				clnp_iso_addrp(
+					&((struct sockaddr_iso *)clcp->clc_firsthop)->siso_addr));
+		ENDDEBUG
+		
+		/*
+		 *	If src address is not yet specified, use address of 
+		 *	interface. NOTE: this will now update the laddr field in
+		 *	the isopcb. Is this desirable? RAH?
+		 */
+		if (src->isoa_len == 0) {
+			src = &(clcp->clc_ifa->ia_addr.siso_addr);
+			IFDEBUG(D_OUTPUT)
+				printf("clnp_output: new src %s\n", clnp_iso_addrp(src));
+			ENDDEBUG
+		}
+
+		/*
+		 *	Insert the source and destination address,
+		 */
+		hoff = (caddr_t)clnp + sizeof(struct clnp_fixed);
+		CLNP_INSERT_ADDR(hoff, *dst);
+		CLNP_INSERT_ADDR(hoff, *src);
+
+		/*
+		 *	Leave room for the segment part, if segmenting is selected
+		 */
+		if (clnp->cnf_type & CNF_SEG_OK) {
+			clcp->clc_segoff = hoff - (caddr_t)clnp;
+			hoff += sizeof(struct clnp_segment);
+		}
+
+		clnp->cnf_hdr_len = m->m_len = (u_char)(hoff - (caddr_t)clnp);
+		hdrlen = clnp->cnf_hdr_len;
+
+#ifdef	DECBIT
+		/*
+		 *	Add the globally unique QOS (with room for congestion experienced
+		 *	bit). I can safely assume that this option is not in the options
+		 *	mbuf below because I checked that the option was not specified
+		 *	previously
+		 */
+		if ((m->m_len + sizeof(qos_option)) < MLEN) {
+			bcopy((caddr_t)qos_option, hoff, sizeof(qos_option));
+			clnp->cnf_hdr_len += sizeof(qos_option);
+			hdrlen += sizeof(qos_option);
+			m->m_len += sizeof(qos_option);
+		}
+#endif	/* DECBIT */
+
+		/*
+		 *	If an options mbuf is present, concatenate a copy to the hdr mbuf.
+		 */
+		if (isop->isop_options) {
+			struct mbuf *opt_copy = m_copy(isop->isop_options, 0, (int)M_COPYALL);
+			if (opt_copy == NULL) {
+				error = ENOBUFS;
+				goto bad;
+			}
+			/* Link in place */
+			opt_copy->m_next = m->m_next;
+			m->m_next = opt_copy;
+
+			/* update size of header */
+			clnp->cnf_hdr_len += opt_copy->m_len;
+			hdrlen += opt_copy->m_len;
+		}
+
+		if (hdrlen > CLNP_HDR_MAX) {
+			error = EMSGSIZE;
+			goto bad;
+		}
+
+		/*
+		 *	Now set up the cache entry in the pcb
+		 */
+		if ((flags & CLNP_NOCACHE) == 0) {
+			if (clcp->clc_hdr = m_copy(m, 0, (int)clnp->cnf_hdr_len)) {
+				clcp->clc_dst  = *dst;
+				clcp->clc_flags = flags;
+				clcp->clc_options = isop->isop_options;
+			}
+		}
+	}
+	/*
+	 *	If small enough for interface, send directly
+	 *	Fill in segmentation part of hdr if using the full protocol
+	 */
+	total_len = clnp->cnf_hdr_len + datalen;
+	if (clnp->cnf_type & CNF_SEG_OK) {
+		struct clnp_segment	seg_part;		/* segment part of hdr */
+		seg_part.cng_id = htons(clnp_id++);
+		seg_part.cng_off = htons(0);
+		seg_part.cng_tot_len = htons(total_len);
+		(void) bcopy((caddr_t)&seg_part, (caddr_t) clnp + clcp->clc_segoff, 
+			sizeof(seg_part));
+	}
+	if (total_len <= SN_MTU(clcp->clc_ifp, clcp->clc_rt)) {
+		HTOC(clnp->cnf_seglen_msb, clnp->cnf_seglen_lsb, total_len);
+		m->m_pkthdr.len = total_len;
+		/*
+		 *	Compute clnp checksum (on header only)
+		 */
+		if (flags & CLNP_NO_CKSUM) {
+			HTOC(clnp->cnf_cksum_msb, clnp->cnf_cksum_lsb, 0);
+		} else {
+			iso_gen_csum(m, CLNP_CKSUM_OFF, (int)clnp->cnf_hdr_len);
+		}
+
+		IFDEBUG(D_DUMPOUT)
+			struct mbuf *mdump = m;
+			printf("clnp_output: sending dg:\n");
+			while (mdump != NULL) {
+				dump_buf(mtod(mdump, caddr_t), mdump->m_len);
+				mdump = mdump->m_next;
+			}
+		ENDDEBUG
+
+		error = SN_OUTPUT(clcp, m);
+		goto done;
+	} else {
+		/*
+		 * Too large for interface; fragment if possible.
+		 */
+		error = clnp_fragment(clcp->clc_ifp, m, clcp->clc_firsthop,
+							total_len, clcp->clc_segoff, flags, clcp->clc_rt);
+		goto done;
+	}
+bad:
+	m_freem(m);
+done:
+	if (error) {
+		clnp_stat.cns_sent--;
+		clnp_stat.cns_odropped++;
+	}
+	return (error);
+}
+
+int clnp_ctloutput()
+{
+}
diff --git a/sys/netiso/clnp_raw.c b/sys/netiso/clnp_raw.c
new file mode 100644
index 00000000000..0bc3dbac4b1
--- /dev/null
+++ b/sys/netiso/clnp_raw.c
@@ -0,0 +1,352 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)clnp_raw.c	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+				Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: clnp_raw.c,v 4.2 88/06/29 14:58:56 hagens Exp $ */
+/* $Source: /usr/argo/sys/netiso/RCS/clnp_raw.c,v $ */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+
+#include <net/if.h>
+#include <net/route.h>
+#include <net/raw_cb.h>
+
+#include <netiso/iso.h>
+#include <netiso/iso_pcb.h>
+#include <netiso/clnp.h>
+#include <netiso/clnp_stat.h>
+#include <netiso/argo_debug.h>
+
+#include <netiso/tp_user.h>		/* XXX -- defines SOL_NETWORK */
+
+struct sockproto	rclnp_proto	= { PF_ISO, 0 };
+/*
+ * FUNCTION:		rclnp_input
+ *
+ * PURPOSE:			Setup generic address an protocol structures for
+ *					raw input routine, then pass them along with the
+ *					mbuf chain.
+ *
+ * RETURNS:			none
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			The protocol field of rclnp_proto is set to zero indicating
+ *					no protocol.
+ */
+rclnp_input(m, src, dst, hdrlen)
+struct mbuf 		*m;		/* ptr to packet */
+struct sockaddr_iso	*src;	/* ptr to src address */
+struct sockaddr_iso	*dst;	/* ptr to dest address */
+int					hdrlen; /* length (in bytes) of clnp header */
+{
+#ifdef	TROLL
+	if (trollctl.tr_ops & TR_CHUCK) {
+		m_freem(m);
+		return;
+	}
+#endif	/* TROLL */
+
+	raw_input(m, &rclnp_proto, (struct sockaddr *)src, (struct sockaddr *)dst);
+}
+
+/*
+ * FUNCTION:		rclnp_output
+ *
+ * PURPOSE:			Prepare to send a raw clnp packet. Setup src and dest
+ *					addresses, count the number of bytes to send, and
+ *					call clnp_output.
+ *
+ * RETURNS:			success - 0
+ *					failure - an appropriate error code
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+rclnp_output(m0, so)
+struct mbuf		*m0;		/* packet to send */
+struct socket	*so;	/* socket to send from */
+{
+	register struct mbuf	*m;			/* used to scan a chain */
+	int						len = 0;	/* store length of chain here */
+	struct rawisopcb		*rp = sotorawisopcb(so); /* ptr to raw cb */
+	int						error;		/* return value of function */
+	int						flags;		/* flags for clnp_output */
+
+	if (0 == (m0->m_flags & M_PKTHDR))
+		return (EINVAL);
+	/*
+	 *	Set up src address. If user has bound socket to an address, use it.
+	 *	Otherwise, do not specify src (clnp_output will fill it in).
+	 */
+	if (rp->risop_rcb.rcb_laddr) {
+		if (rp->risop_isop.isop_sladdr.siso_family != AF_ISO) {
+bad:
+			m_freem(m0);
+			return(EAFNOSUPPORT);
+		}
+	}
+	/* set up dest address */
+	if (rp->risop_rcb.rcb_faddr == 0)
+		goto bad;
+	rp->risop_isop.isop_sfaddr =
+				*(struct sockaddr_iso *)rp->risop_rcb.rcb_faddr;
+	rp->risop_isop.isop_faddr = &rp->risop_isop.isop_sfaddr;
+
+	/* get flags and ship it off */
+	flags = rp->risop_flags & CLNP_VFLAGS;
+
+	error = clnp_output(m0, &rp->risop_isop, m0->m_pkthdr.len,
+												flags|CLNP_NOCACHE);
+
+	return (error);
+}
+
+/*
+ * FUNCTION:		rclnp_ctloutput
+ *
+ * PURPOSE:			Raw clnp socket option processing
+ *					All options are stored inside an mbuf. 
+ *
+ * RETURNS:			success - 0
+ *					failure - unix error code
+ *
+ * SIDE EFFECTS:	If the options mbuf does not exist, it the mbuf passed
+ *					is used.
+ *
+ * NOTES:			
+ */
+rclnp_ctloutput(op, so, level, optname, m)
+int				op;				/* type of operation */
+struct socket	*so;			/* ptr to socket */
+int 			level;			/* level of option */
+int				optname;		/* name of option */
+struct mbuf		**m;			/* ptr to ptr to option data */
+{
+	int						error = 0;
+	register struct rawisopcb	*rp = sotorawisopcb(so);/* raw cb ptr */
+
+	IFDEBUG(D_CTLOUTPUT)
+		printf("rclnp_ctloutput: op = x%x, level = x%x, name = x%x\n",
+			op, level, optname);
+		if (*m != NULL) {
+			printf("rclnp_ctloutput: %d bytes of mbuf data\n", (*m)->m_len);
+			dump_buf(mtod((*m), caddr_t), (*m)->m_len);
+		}
+	ENDDEBUG
+
+#ifdef SOL_NETWORK
+	if (level != SOL_NETWORK)
+		error = EINVAL;
+	else switch (op) {
+#else
+	switch (op) {
+#endif /* SOL_NETWORK */
+		case PRCO_SETOPT:
+			switch (optname) {
+				case CLNPOPT_FLAGS: {
+					u_short	usr_flags;
+					/* 
+					 *	Insure that the data passed has exactly one short in it 
+					 */
+					if ((*m == NULL) || ((*m)->m_len != sizeof(short))) {
+						error = EINVAL;
+						break;
+					}
+					 
+					/*
+					 *	Don't allow invalid flags to be set
+					 */
+					usr_flags = (*mtod((*m), short *));
+
+					if ((usr_flags & (CLNP_VFLAGS)) != usr_flags) {
+						error = EINVAL;
+					} else
+						rp->risop_flags |= usr_flags;
+
+					} break;
+			
+				case CLNPOPT_OPTS:
+					if (error = clnp_set_opts(&rp->risop_isop.isop_options, m))
+						break;
+					rp->risop_isop.isop_optindex = m_get(M_WAIT, MT_SOOPTS);
+					(void) clnp_opt_sanity(rp->risop_isop.isop_options, 
+						mtod(rp->risop_isop.isop_options, caddr_t),
+						rp->risop_isop.isop_options->m_len, 
+						mtod(rp->risop_isop.isop_optindex,
+							struct clnp_optidx *));
+					break;
+			} 
+			break;
+
+		case PRCO_GETOPT:
+#ifdef notdef
+			/* commented out to keep hi C quiet */
+			switch (optname) {
+				default:
+					error = EINVAL;
+					break;
+			}
+#endif /* notdef */
+			break;
+		default:
+			error = EINVAL;
+			break;
+	}
+	if (op == PRCO_SETOPT) {
+		/* note: m_freem does not barf is *m is NULL */
+		m_freem(*m);
+		*m = NULL;
+	}
+	
+	return error;
+}
+
+/*ARGSUSED*/
+clnp_usrreq(so, req, m, nam, control)
+	register struct socket *so;
+	int req;
+	struct mbuf *m, *nam, *control;
+{
+	register int error = 0;
+	register struct rawisopcb *rp = sotorawisopcb(so);
+
+	rp = sotorawisopcb(so);
+	switch (req) {
+
+	case PRU_ATTACH:
+		if (rp)
+			panic("rip_attach");
+		MALLOC(rp, struct rawisopcb *, sizeof *rp, M_PCB, M_WAITOK);
+		if (rp == 0)
+			return (ENOBUFS);
+		bzero((caddr_t)rp, sizeof *rp);
+		so->so_pcb = (caddr_t)rp;
+		break;
+
+	case PRU_DETACH:
+		if (rp == 0)
+			panic("rip_detach");
+		if (rp->risop_isop.isop_options)
+			m_freem(rp->risop_isop.isop_options);
+		if (rp->risop_isop.isop_route.ro_rt)
+			RTFREE(rp->risop_isop.isop_route.ro_rt);
+		if (rp->risop_rcb.rcb_laddr)
+			rp->risop_rcb.rcb_laddr = 0;
+		/* free clnp cached hdr if necessary */
+		if (rp->risop_isop.isop_clnpcache != NULL) {
+			struct clnp_cache *clcp = 
+				mtod(rp->risop_isop.isop_clnpcache, struct clnp_cache *);
+			if (clcp->clc_hdr != NULL) {
+				m_free(clcp->clc_hdr);
+			}
+			m_free(rp->risop_isop.isop_clnpcache);
+		}
+		if (rp->risop_isop.isop_optindex != NULL)
+			m_free(rp->risop_isop.isop_optindex);
+
+		break;
+
+	case PRU_BIND:
+	    {
+		struct sockaddr_iso *addr = mtod(nam, struct sockaddr_iso *);
+
+		if (nam->m_len != sizeof(*addr))
+			return (EINVAL);
+		if ((ifnet == 0) ||
+		    (addr->siso_family != AF_ISO) ||
+		    (addr->siso_addr.isoa_len  &&
+		     ifa_ifwithaddr((struct sockaddr *)addr) == 0))
+			return (EADDRNOTAVAIL);
+		rp->risop_isop.isop_sladdr = *addr;
+		rp->risop_rcb.rcb_laddr = (struct sockaddr *)
+			(rp->risop_isop.isop_laddr = &rp->risop_isop.isop_sladdr);
+		return (0);
+	    }
+	case PRU_CONNECT:
+	    {
+		struct sockaddr_iso *addr = mtod(nam, struct sockaddr_iso *);
+
+		if ((nam->m_len > sizeof(*addr)) || (addr->siso_len > sizeof(*addr)))
+			return (EINVAL);
+		if (ifnet == 0)
+			return (EADDRNOTAVAIL);
+		if (addr->siso_family != AF_ISO)
+		rp->risop_isop.isop_sfaddr = *addr;
+		rp->risop_rcb.rcb_faddr = (struct sockaddr *)
+			(rp->risop_isop.isop_faddr = &rp->risop_isop.isop_sfaddr);
+		soisconnected(so);
+		return (0);
+	    }
+	}
+	error =  raw_usrreq(so, req, m, nam, control);
+
+	if (error && req == PRU_ATTACH && so->so_pcb)
+		free((caddr_t)rp, M_PCB);
+	return (error);
+}
diff --git a/sys/netiso/clnp_stat.h b/sys/netiso/clnp_stat.h
new file mode 100644
index 00000000000..07cd72c63e4
--- /dev/null
+++ b/sys/netiso/clnp_stat.h
@@ -0,0 +1,103 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)clnp_stat.h	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: /var/src/sys/netiso/RCS/clnp_stat.h,v 5.1 89/02/09 16:20:42 hagens Exp $ */
+/* $Source: /var/src/sys/netiso/RCS/clnp_stat.h,v $ */
+
+
+#ifndef __CLNP_STAT__
+#define __CLNP_STAT__
+
+struct clnp_stat {
+	int cns_total;			/* total pkts received */
+	int	cns_toosmall;		/* fixed part of header too small */
+	int	cns_badhlen;		/* header length is not reasonable */
+	int	cns_badcsum;		/* checksum on packet failed */
+	int cns_badaddr;		/* address fields were not reasonable */
+	int	cns_badvers;		/* incorrect version */
+	int cns_noseg;			/* segment information forgotten */
+	int cns_noproto;		/* incorrect protocol id */
+	int	cns_delivered;		/* packets consumed by protocol */
+	int	cns_ttlexpired;		/* ttl has expired */
+	int cns_forward;		/* forwarded packets */
+	int cns_sent;			/* total packets sent */
+	int cns_odropped;		/* o.k. packets discarded, e.g. ENOBUFS */
+	int cns_cantforward;	/* non-forwarded packets */
+	int cns_fragmented;		/* packets fragmented */
+	int cns_fragments;		/* fragments received */
+	int cns_fragdropped;	/* fragments discarded */
+	int cns_fragtimeout;	/* fragments timed out */
+	int cns_ofragments;		/* fragments generated */
+	int cns_cantfrag;		/* fragmentation prohibited */
+	int cns_reassembled;	/* packets reconstructed */
+	int	cns_cachemiss;		/* cache misses */
+	int cns_congest_set;	/* congestion experienced bit set */
+	int cns_congest_rcvd;	/* congestion experienced bit received */
+	int cns_er_inhist[CLNP_ERRORS + 1];
+	int cns_er_outhist[CLNP_ERRORS + 1];
+} clnp_stat ;
+
+#ifdef INCSTAT
+#undef INCSTAT
+#endif /* INCSTAT */
+#define INCSTAT(x) clnp_stat./**/x/**/++
+
+#endif /* __CLNP_STAT__ */
diff --git a/sys/netiso/clnp_subr.c b/sys/netiso/clnp_subr.c
new file mode 100644
index 00000000000..c877811be16
--- /dev/null
+++ b/sys/netiso/clnp_subr.c
@@ -0,0 +1,658 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)clnp_subr.c	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: /var/src/sys/netiso/RCS/clnp_subr.c,v 5.1 89/02/09 16:20:46 hagens Exp $ */
+/* $Source: /var/src/sys/netiso/RCS/clnp_subr.c,v $ */
+
+#ifdef ISO
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+
+#include <net/if.h>
+#include <net/route.h>
+#include <net/if_dl.h>
+
+#include <netiso/iso.h>
+#include <netiso/iso_var.h>
+#include <netiso/iso_pcb.h>
+#include <netiso/iso_snpac.h>
+#include <netiso/clnp.h>
+#include <netiso/clnp_stat.h>
+#include <netiso/argo_debug.h>
+
+/*
+ * FUNCTION:		clnp_data_ck
+ *
+ * PURPOSE:			Check that the amount of data in the mbuf chain is
+ *					at least as much as the clnp header would have us
+ *					expect. Trim mbufs if longer than expected, drop
+ *					packet if shorter than expected.
+ *
+ * RETURNS:			success - ptr to mbuf chain
+ *					failure - 0
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+struct mbuf *
+clnp_data_ck(m, length)
+register struct mbuf	*m;		/* ptr to mbuf chain containing hdr & data */
+int						length;	/* length (in bytes) of packet */
+ {
+	register int 			len;		/* length of data */
+	register struct mbuf	*mhead;		/* ptr to head of chain */
+
+	len = -length;
+	mhead = m;
+	for (;;) {
+		len += m->m_len;
+		if (m->m_next == 0)
+			break;
+		m = m->m_next;
+	}
+	if (len != 0) {
+		if (len < 0) {
+			INCSTAT(cns_toosmall);
+			clnp_discard(mhead, GEN_INCOMPLETE);
+			return 0;
+		}
+		if (len <= m->m_len)
+			m->m_len -= len;
+		else
+			m_adj(mhead, -len);
+	}
+	return mhead;
+}
+
+#ifdef notdef
+/*
+ * FUNCTION:		clnp_extract_addr
+ *
+ * PURPOSE:			Extract the source and destination address from the
+ *					supplied buffer. Place them in the supplied address buffers.
+ *					If insufficient data is supplied, then fail.
+ *
+ * RETURNS:			success - Address of first byte in the packet past 
+ *						the address part.
+ *					failure - 0
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+caddr_t
+clnp_extract_addr(bufp, buflen, srcp, destp)
+caddr_t					bufp;		/* ptr to buffer containing addresses */
+int						buflen;		/* length of buffer */
+register struct iso_addr	*srcp;		/* ptr to source address buffer */
+register struct iso_addr	*destp;		/* ptr to destination address buffer */
+ {
+	int	len;		/* argument to bcopy */
+
+	/* 
+	 *	check that we have enough data. Plus1 is for length octet
+	 */
+	if ((u_char)*bufp + 1 > buflen) {
+		return((caddr_t)0);
+	}
+	len = destp->isoa_len = (u_char)*bufp++;
+	(void) bcopy(bufp, (caddr_t)destp, len);
+	buflen -= len;
+	bufp += len;
+
+	/* 
+	 *	check that we have enough data. Plus1 is for length octet
+	 */
+	if ((u_char)*bufp + 1 > buflen) {
+		return((caddr_t)0);
+	}
+	len = srcp->isoa_len = (u_char)* bufp++;
+	(void) bcopy(bufp, (caddr_t)srcp, len);
+	bufp += len;
+
+	/*
+	 *	Insure that the addresses make sense
+	 */
+	if (iso_ck_addr(srcp) && iso_ck_addr(destp))
+		return bufp;
+	else
+		return (caddr_t) 0;
+}
+#endif	/* notdef */
+
+/*
+ * FUNCTION:		clnp_ours
+ *
+ * PURPOSE:			Decide whether the supplied packet is destined for
+ *					us, or that it should be forwarded on.
+ *
+ * RETURNS:			packet is for us - 1
+ *					packet is not for us - 0
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+clnp_ours(dst)
+register struct iso_addr *dst;		/* ptr to destination address */
+{
+	register struct iso_ifaddr *ia;	/* scan through interface addresses */
+
+	for (ia = iso_ifaddr; ia; ia = ia->ia_next) {
+		IFDEBUG(D_ROUTE)
+			printf("clnp_ours: ia_sis x%x, dst x%x\n", &ia->ia_addr, 
+				dst);
+		ENDDEBUG
+		/*
+		 * XXX Warning:
+		 * We are overloading siso_tlen in the if's address, as an nsel length.
+		 */
+		if (dst->isoa_len == ia->ia_addr.siso_nlen &&
+			bcmp((caddr_t)ia->ia_addr.siso_addr.isoa_genaddr,
+				 (caddr_t)dst->isoa_genaddr,
+				 ia->ia_addr.siso_nlen - ia->ia_addr.siso_tlen) == 0)
+					return 1;
+	}
+	return 0;
+}
+
+/* Dec bit set if ifp qlen is greater than congest_threshold */
+int congest_threshold = 0;
+
+/*
+ * FUNCTION:		clnp_forward
+ *
+ * PURPOSE:			Forward the datagram passed
+ *					clnpintr guarantees that the header will be
+ *					contigious (a cluster mbuf will be used if necessary).
+ *
+ *					If oidx is NULL, no options are present.
+ *
+ * RETURNS:			nothing
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+clnp_forward(m, len, dst, oidx, seg_off, inbound_shp)
+struct mbuf			*m;		/* pkt to forward */
+int					len;	/* length of pkt */
+struct iso_addr		*dst;	/* destination address */
+struct clnp_optidx	*oidx;	/* option index */
+int					seg_off;/* offset of segmentation part */
+struct snpa_hdr		*inbound_shp;	/* subnetwork header of inbound packet */
+{
+	struct clnp_fixed		*clnp;	/* ptr to fixed part of header */
+	int						error;	/* return value of route function */
+	struct sockaddr			*next_hop;	/* next hop for dgram */
+	struct ifnet			*ifp;	/* ptr to outgoing interface */
+	struct iso_ifaddr		*ia = 0;/* ptr to iso name for ifp */
+	struct route_iso		route;	/* filled in by clnp_route */
+	extern int				iso_systype;
+
+	clnp = mtod(m, struct clnp_fixed *);
+	bzero((caddr_t)&route, sizeof(route)); /* MUST be done before "bad:" */
+
+	/*
+	 *	Don't forward multicast or broadcast packets
+	 */
+	if ((inbound_shp) && (IS_MULTICAST(inbound_shp->snh_dhost))) {
+		IFDEBUG(D_FORWARD)
+			printf("clnp_forward: dropping multicast packet\n");
+		ENDDEBUG
+		clnp->cnf_type &= ~CNF_ERR_OK; /* so we don't generate an ER */
+		clnp_discard(m, 0);
+		INCSTAT(cns_cantforward);
+		goto done;
+	}
+
+	IFDEBUG(D_FORWARD)
+		printf("clnp_forward: %d bytes, to %s, options x%x\n", len,
+			clnp_iso_addrp(dst), oidx);
+	ENDDEBUG
+
+	/*
+	 *	Decrement ttl, and if zero drop datagram
+	 *	Can't compare ttl as less than zero 'cause its a unsigned
+	 */
+	if ((clnp->cnf_ttl == 0) || (--clnp->cnf_ttl == 0)) {
+		IFDEBUG(D_FORWARD)
+			printf("clnp_forward: discarding datagram because ttl is zero\n");
+		ENDDEBUG
+		INCSTAT(cns_ttlexpired);
+		clnp_discard(m, TTL_EXPTRANSIT);
+		goto done;
+	}
+	/*
+	 *	Route packet; special case for source rt
+	 */
+	if CLNPSRCRT_VALID(oidx) {
+		/*
+		 *	Update src route first
+		 */
+		clnp_update_srcrt(m, oidx);
+		error = clnp_srcroute(m, oidx, &route, &next_hop, &ia, dst);
+	} else {
+		error = clnp_route(dst, &route, 0, &next_hop, &ia);
+	}
+	if (error || ia == 0) {
+		IFDEBUG(D_FORWARD)
+			printf("clnp_forward: can't route packet (errno %d)\n", error);
+		ENDDEBUG
+		clnp_discard(m, ADDR_DESTUNREACH);
+		INCSTAT(cns_cantforward);
+		goto done;
+	}
+	ifp = ia->ia_ifp;
+
+	IFDEBUG(D_FORWARD)
+		printf("clnp_forward: packet routed to %s\n", 
+			clnp_iso_addrp(&((struct sockaddr_iso *)next_hop)->siso_addr));
+	ENDDEBUG
+
+	INCSTAT(cns_forward);
+
+	/*
+	 *	If we are an intermediate system and
+	 *	we are routing outbound on the same ifp that the packet
+	 *	arrived upon, and we know the next hop snpa, 
+	 *	then generate a redirect request
+	 */
+	if ((iso_systype & SNPA_IS) && (inbound_shp) && 
+		(ifp == inbound_shp->snh_ifp))
+		    esis_rdoutput(inbound_shp, m, oidx, dst, route.ro_rt);
+	/*
+	 *	If options are present, update them
+	 */
+	if (oidx) {
+		struct iso_addr	*mysrc = &ia->ia_addr.siso_addr;
+		if (mysrc == NULL) {
+			clnp_discard(m, ADDR_DESTUNREACH);
+			INCSTAT(cns_cantforward);
+			clnp_stat.cns_forward--;
+			goto done;
+		} else {
+			(void) clnp_dooptions(m, oidx, ifp, mysrc);
+		}
+	}
+
+#ifdef	DECBIT
+	if (ifp->if_snd.ifq_len > congest_threshold) {
+		/*
+		 *	Congestion! Set the Dec Bit and thank Dave Oran
+		 */
+		IFDEBUG(D_FORWARD)
+			printf("clnp_forward: congestion experienced\n");
+		ENDDEBUG
+		if ((oidx) && (oidx->cni_qos_formatp)) {
+			caddr_t	qosp = CLNP_OFFTOOPT(m, oidx->cni_qos_formatp);
+			u_char	qos = *qosp;
+			IFDEBUG(D_FORWARD)
+				printf("clnp_forward: setting congestion bit (qos x%x)\n", qos);
+			ENDDEBUG
+			if ((qos & CLNPOVAL_GLOBAL) == CLNPOVAL_GLOBAL) {
+				qos |= CLNPOVAL_CONGESTED;
+				INCSTAT(cns_congest_set);
+				*qosp = qos;
+			}
+		}
+	}
+#endif	/* DECBIT */
+	
+	/*
+	 *	Dispatch the datagram if it is small enough, otherwise fragment
+	 */
+	if (len <= SN_MTU(ifp, route.ro_rt)) {
+		iso_gen_csum(m, CLNP_CKSUM_OFF, (int)clnp->cnf_hdr_len);
+		(void) (*ifp->if_output)(ifp, m, next_hop, route.ro_rt);
+	} else {
+		(void) clnp_fragment(ifp, m, next_hop, len, seg_off, /* flags */0, route.ro_rt);
+	}
+	
+done:
+	/*
+	 *	Free route
+	 */
+	if (route.ro_rt != NULL) {
+		RTFREE(route.ro_rt);
+	}
+}
+
+#ifdef	notdef
+/*
+ * FUNCTION:		clnp_insert_addr
+ *
+ * PURPOSE:			Insert the address part into a clnp datagram.
+ *
+ * RETURNS:			Address of first byte after address part in datagram.
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			Assume that there is enough space for the address part.
+ */
+caddr_t
+clnp_insert_addr(bufp, srcp, dstp)
+caddr_t						bufp;	/* address of where addr part goes */
+register struct iso_addr	*srcp;	/* ptr to src addr */
+register struct iso_addr	*dstp;	/* ptr to dst addr */
+{
+	*bufp++ = dstp->isoa_len;
+	(void) bcopy((caddr_t)dstp, bufp, dstp->isoa_len);
+	bufp += dstp->isoa_len;
+
+	*bufp++ = srcp->isoa_len;
+	(void) bcopy((caddr_t)srcp, bufp, srcp->isoa_len);
+	bufp += srcp->isoa_len;
+
+	return bufp;
+}
+
+#endif	/* notdef */
+
+/*
+ * FUNCTION:		clnp_route
+ *
+ * PURPOSE:			Route a clnp datagram to the first hop toward its 
+ *					destination. In many cases, the first hop will be
+ *					the destination. The address of a route
+ *					is specified. If a routing entry is present in
+ *					that route, and it is still up to the same destination,
+ *					then no further action is necessary. Otherwise, a
+ *					new routing entry will be allocated.
+ *
+ * RETURNS:			route found - 0
+ *					unix error code
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			It is up to the caller to free the routing entry
+ *					allocated in route.
+ */
+clnp_route(dst, ro, flags, first_hop, ifa)
+	struct iso_addr	*dst;			/* ptr to datagram destination */
+	register struct	route_iso *ro;	/* existing route structure */
+	int flags;						/* flags for routing */
+	struct sockaddr **first_hop;	/* result: fill in with ptr to firsthop */
+	struct iso_ifaddr **ifa;		/* result: fill in with ptr to interface */
+{
+	if (flags & SO_DONTROUTE) {
+		struct iso_ifaddr *ia;
+
+		if (ro->ro_rt) {
+			RTFREE(ro->ro_rt);
+			ro->ro_rt = 0;
+		}
+		bzero((caddr_t)&ro->ro_dst, sizeof(ro->ro_dst));
+		bcopy((caddr_t)dst, (caddr_t)&ro->ro_dst.siso_addr,
+			1 + (unsigned)dst->isoa_len);
+		ro->ro_dst.siso_family = AF_ISO;
+		ro->ro_dst.siso_len = sizeof(ro->ro_dst);
+		ia = iso_localifa(&ro->ro_dst);
+		if (ia == 0)
+			return EADDRNOTAVAIL;
+		if (ifa)
+			*ifa = ia;
+		if (first_hop)
+			*first_hop = (struct sockaddr *)&ro->ro_dst;
+		return 0;
+	}
+	/*
+	 *	If there is a cached route, check that it is still up and to
+	 *	the same destination. If not, free it and try again.
+	 */
+	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
+		(Bcmp(ro->ro_dst.siso_data, dst->isoa_genaddr, dst->isoa_len)))) {
+		IFDEBUG(D_ROUTE)
+			printf("clnp_route: freeing old route: ro->ro_rt 0x%x\n",
+				ro->ro_rt);
+			printf("clnp_route: old route refcnt: 0x%x\n",
+				ro->ro_rt->rt_refcnt);
+		ENDDEBUG
+
+		/* free old route entry */
+		RTFREE(ro->ro_rt);
+		ro->ro_rt = (struct rtentry *)0;
+	} else {
+		IFDEBUG(D_ROUTE)
+			printf("clnp_route: OK route exists\n");
+		ENDDEBUG
+	}
+
+	if (ro->ro_rt == 0) {
+		/* set up new route structure */
+		bzero((caddr_t)&ro->ro_dst, sizeof(ro->ro_dst));
+		ro->ro_dst.siso_len = sizeof(ro->ro_dst);
+		ro->ro_dst.siso_family = AF_ISO;
+		Bcopy(dst, &ro->ro_dst.siso_addr, 1 + dst->isoa_len);
+		/* allocate new route */
+		IFDEBUG(D_ROUTE)
+			printf("clnp_route: allocating new route to %s\n",
+				clnp_iso_addrp(dst));
+		ENDDEBUG
+		rtalloc((struct route *)ro);
+	}
+	if (ro->ro_rt == 0)
+		return(ENETUNREACH);	/* rtalloc failed */
+	ro->ro_rt->rt_use++;
+	if (ifa)
+		if ((*ifa = (struct iso_ifaddr *)ro->ro_rt->rt_ifa) == 0)
+			panic("clnp_route");
+	if (first_hop) {
+		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
+			*first_hop = ro->ro_rt->rt_gateway;
+		else
+			*first_hop = (struct sockaddr *)&ro->ro_dst;
+	}
+	return(0);
+}
+
+/*
+ * FUNCTION:		clnp_srcroute
+ *
+ * PURPOSE:			Source route the datagram. If complete source
+ *					routing is specified but not possible, then
+ *					return an error. If src routing is terminated, then
+ *					try routing on destination.
+ *					Usage of first_hop,
+ *					ifp, and error return is identical to clnp_route.
+ *
+ * RETURNS:			0 or unix error code
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			Remember that option index pointers are really
+ *					offsets from the beginning of the mbuf.
+ */
+clnp_srcroute(options, oidx, ro, first_hop, ifa, final_dst)
+struct mbuf			*options;		/* ptr to options */
+struct clnp_optidx	*oidx;			/* index to options */
+struct route_iso	*ro;			/* route structure */
+struct sockaddr		**first_hop;	/* RETURN: fill in with ptr to firsthop */
+struct iso_ifaddr	**ifa;			/* RETURN: fill in with ptr to interface */
+struct iso_addr		*final_dst;		/* final destination */
+{
+	struct iso_addr	dst;		/* first hop specified by src rt */
+	int				error = 0;	/* return code */
+
+	/*
+	 *	Check if we have run out of routes 
+	 *	If so, then try to route on destination.
+	 */
+	if CLNPSRCRT_TERM(oidx, options) {
+		dst.isoa_len = final_dst->isoa_len;
+		bcopy(final_dst->isoa_genaddr, dst.isoa_genaddr, dst.isoa_len);
+	} else {
+		/*
+		 * setup dst based on src rt specified
+		 */
+		dst.isoa_len = CLNPSRCRT_CLEN(oidx, options);
+		bcopy(CLNPSRCRT_CADDR(oidx, options), dst.isoa_genaddr, dst.isoa_len);
+	}
+
+	/*
+	 *	try to route it
+	 */
+	error = clnp_route(&dst, ro, 0, first_hop, ifa);
+	if (error != 0)
+		return error;
+	
+	/*
+	 *	If complete src rt, first hop must be equal to dst
+	 */
+	if ((CLNPSRCRT_TYPE(oidx, options) == CLNPOVAL_COMPRT) &&
+	 (!iso_addrmatch1(&(*(struct sockaddr_iso **)first_hop)->siso_addr,&dst))){
+		IFDEBUG(D_OPTIONS)
+			printf("clnp_srcroute: complete src route failed\n");
+		ENDDEBUG
+		return EHOSTUNREACH; /* RAH? would like ESRCRTFAILED */
+	}
+	
+	return error;
+}
+
+/*
+ * FUNCTION:		clnp_echoreply
+ *
+ * PURPOSE:			generate an echo reply packet and transmit
+ *
+ * RETURNS:			result of clnp_output
+ *
+ * SIDE EFFECTS:	
+ */
+clnp_echoreply(ec_m, ec_len, ec_src, ec_dst, ec_oidxp)
+struct mbuf			*ec_m;		/* echo request */
+int					ec_len;		/* length of ec */
+struct sockaddr_iso	*ec_src;		/* src of ec */
+struct sockaddr_iso	*ec_dst; 	/* destination of ec (i.e., us) */
+struct clnp_optidx	*ec_oidxp;	/* options index to ec packet */
+{
+	struct isopcb	isopcb;
+	int				flags = CLNP_NOCACHE|CLNP_ECHOR;
+	int				ret;
+
+	/* fill in fake isopcb to pass to output function */
+	bzero(&isopcb, sizeof(isopcb));
+	isopcb.isop_laddr = ec_dst;
+	isopcb.isop_faddr = ec_src;
+
+	/* forget copying the options for now. If implemented, need only
+	 * copy record route option, but it must be reset to zero length */
+
+	ret = clnp_output(ec_m, &isopcb, ec_len, flags);
+
+	IFDEBUG(D_OUTPUT)
+		printf("clnp_echoreply: output returns %d\n", ret);
+	ENDDEBUG
+	return ret;
+}
+
+/*
+ * FUNCTION:		clnp_badmtu
+ *
+ * PURPOSE:			print notice of route with mtu not initialized.
+ *
+ * RETURNS:			mtu of ifp.
+ *
+ * SIDE EFFECTS:	prints notice, slows down system.
+ */
+clnp_badmtu(ifp, rt, line, file)
+struct ifnet *ifp;	/* outgoing interface */
+struct rtentry *rt; /* dst route */
+int line;			/* where the dirty deed occured */
+char *file;			/* where the dirty deed occured */
+{
+	printf("sending on route 0x%x with no mtu, line %d of file %s\n",
+		rt, line, file);
+#ifdef ARGO_DEBUG
+	printf("route dst is ");
+	dump_isoaddr(rt_key(rt));
+#endif
+	return ifp->if_mtu;
+}
+
+/*
+ * FUNCTION:		clnp_ypocb - backwards bcopy
+ *
+ * PURPOSE:			bcopy starting at end of src rather than beginning.
+ *
+ * RETURNS:			none
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			No attempt has been made to make this efficient
+ */
+clnp_ypocb(from, to, len)
+caddr_t from;		/* src buffer */
+caddr_t to;			/* dst buffer */
+u_int	len;		/* number of bytes */
+{
+	while (len--)
+		*(to + len) = *(from + len);
+}
+#endif	/* ISO */
diff --git a/sys/netiso/clnp_timer.c b/sys/netiso/clnp_timer.c
new file mode 100644
index 00000000000..718d5302f77
--- /dev/null
+++ b/sys/netiso/clnp_timer.c
@@ -0,0 +1,180 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)clnp_timer.c	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: clnp_timer.c,v 4.2 88/06/29 14:59:05 hagens Exp $ */
+/* $Source: /usr/argo/sys/netiso/RCS/clnp_timer.c,v $ */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netiso/iso.h>
+#include <netiso/clnp.h>
+#include <netiso/clnp_stat.h>
+#include <netiso/argo_debug.h>
+
+extern struct clnp_fragl *clnp_frags;
+
+/*
+ * FUNCTION:		clnp_freefrags
+ *
+ * PURPOSE:			Free the resources associated with a fragment
+ *
+ * RETURNS:			pointer to next fragment in list of fragments
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ *			TODO: send ER back to source
+ */
+struct clnp_fragl *
+clnp_freefrags(cfh)
+register struct clnp_fragl	*cfh;	/* fragment header to delete */
+{
+	struct clnp_fragl	*next = cfh->cfl_next;
+	struct clnp_frag	*cf;
+
+	/* free any frags hanging around */
+	cf = cfh->cfl_frags;
+	while (cf != NULL) {
+		struct clnp_frag	*cf_next = cf->cfr_next;
+		INCSTAT(cns_fragdropped);
+		m_freem(cf->cfr_data);
+		cf = cf_next;
+	}
+
+	/* free the copy of the header */
+	INCSTAT(cns_fragdropped);
+	m_freem(cfh->cfl_orighdr);
+
+	if (clnp_frags == cfh) {
+		clnp_frags = cfh->cfl_next;
+	} else {
+		struct clnp_fragl	*scan;
+
+		for (scan = clnp_frags; scan != NULL; scan = scan->cfl_next) {
+			if (scan->cfl_next == cfh) {
+				scan->cfl_next = cfh->cfl_next;
+				break;
+			}
+		}
+	}
+
+	/* free the fragment header */
+	m_freem(dtom(cfh));
+
+	return(next);
+}
+
+/*
+ * FUNCTION:		clnp_slowtimo
+ *
+ * PURPOSE:			clnp timer processing; if the ttl expires on a 
+ *					packet on the reassembly queue, discard it.
+ *
+ * RETURNS:			none
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+clnp_slowtimo()
+{
+	register struct clnp_fragl	*cfh = clnp_frags;
+	int s = splnet();
+
+	while (cfh != NULL) {
+		if (--cfh->cfl_ttl == 0) {
+			cfh = clnp_freefrags(cfh);
+			INCSTAT(cns_fragtimeout);
+		} else {
+			cfh = cfh->cfl_next;
+		}
+	}
+	splx(s);
+}
+
+/*
+ * FUNCTION:		clnp_drain
+ *
+ * PURPOSE:			drain off all datagram fragments
+ *
+ * RETURNS:			none
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ *	TODO: should send back ER
+ */
+clnp_drain()
+{
+	register struct clnp_fragl	*cfh = clnp_frags;
+
+	while (cfh != NULL)
+		cfh = clnp_freefrags(cfh);
+}
diff --git a/sys/netiso/cltp_usrreq.c b/sys/netiso/cltp_usrreq.c
new file mode 100644
index 00000000000..93f8d1c398e
--- /dev/null
+++ b/sys/netiso/cltp_usrreq.c
@@ -0,0 +1,405 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cltp_usrreq.c	8.1 (Berkeley) 6/10/93
+ */
+
+#ifndef CLTPOVAL_SRC /* XXX -- till files gets changed */
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+#include <sys/stat.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netiso/argo_debug.h>
+#include <netiso/iso.h>
+#include <netiso/iso_pcb.h>
+#include <netiso/iso_var.h>
+#include <netiso/clnp.h>
+#include <netiso/cltp_var.h>
+#endif
+
+/*
+ * CLTP protocol implementation.
+ * Per ISO 8602, December, 1987.
+ */
+cltp_init()
+{
+
+	cltb.isop_next = cltb.isop_prev = &cltb;
+}
+
+int cltp_cksum = 1;
+
+
+/* ARGUSED */
+cltp_input(m0, srcsa, dstsa, cons_channel, output)
+	struct mbuf *m0;
+	struct sockaddr *srcsa, *dstsa;
+	u_int cons_channel;
+	int (*output)();
+{
+	register struct isopcb *isop;
+	register struct mbuf *m = m0;
+	register u_char *up = mtod(m, u_char *);
+	register struct sockaddr_iso *src = (struct sockaddr_iso *)srcsa;
+	int len, hdrlen = *up + 1, dlen = 0;
+	u_char *uplim = up + hdrlen;
+	caddr_t dtsap;
+
+	for (len = 0; m; m = m->m_next)
+		len += m->m_len;
+	up += 2; /* skip header */
+	while (up < uplim) switch (*up) { /* process options */
+	case CLTPOVAL_SRC:
+		src->siso_tlen = up[1];
+		src->siso_len = up[1] + TSEL(src) - (caddr_t)src;
+		if (src->siso_len < sizeof(*src))
+			src->siso_len = sizeof(*src);
+		else if (src->siso_len > sizeof(*src)) {
+			MGET(m, M_DONTWAIT, MT_SONAME);
+			if (m == 0)
+				goto bad;
+			m->m_len = src->siso_len;
+			src = mtod(m, struct sockaddr_iso *);
+			bcopy((caddr_t)srcsa, (caddr_t)src, srcsa->sa_len);
+		}
+		bcopy((caddr_t)up + 2, TSEL(src), up[1]);
+		up += 2 + src->siso_tlen;
+		continue;
+	
+	case CLTPOVAL_DST:
+		dtsap = 2 + (caddr_t)up;
+		dlen = up[1];
+		up += 2 + dlen;
+		continue;
+
+	case CLTPOVAL_CSM:
+		if (iso_check_csum(m0, len)) {
+			cltpstat.cltps_badsum++;
+			goto bad;
+		}
+		up += 4;
+		continue;
+
+	default:
+		printf("clts: unknown option (%x)\n", up[0]);
+		cltpstat.cltps_hdrops++;
+		goto bad;
+	}
+	if (dlen == 0 || src->siso_tlen == 0)
+		goto bad;
+	for (isop = cltb.isop_next;; isop = isop->isop_next) {
+		if (isop == &cltb) {
+			cltpstat.cltps_noport++;
+			goto bad;
+		}
+		if (isop->isop_laddr &&
+		    bcmp(TSEL(isop->isop_laddr), dtsap, dlen) == 0)
+			break;
+	}
+	m = m0;
+	m->m_len -= hdrlen;
+	m->m_data += hdrlen;
+	if (sbappendaddr(&isop->isop_socket->so_rcv, (struct sockaddr *)src,
+	    m, (struct mbuf *)0) == 0)
+		goto bad;
+	cltpstat.cltps_ipackets++;
+	sorwakeup(isop->isop_socket);
+	m0 = 0;
+bad:
+	if (src != (struct sockaddr_iso *)srcsa)
+		m_freem(dtom(src));
+	if (m0)
+		m_freem(m0);
+	return 0;
+}
+
+/*
+ * Notify a cltp user of an asynchronous error;
+ * just wake up so that he can collect error status.
+ */
+cltp_notify(isop)
+	register struct isopcb *isop;
+{
+
+	sorwakeup(isop->isop_socket);
+	sowwakeup(isop->isop_socket);
+}
+
+cltp_ctlinput(cmd, sa)
+	int cmd;
+	struct sockaddr *sa;
+{
+	extern u_char inetctlerrmap[];
+	struct sockaddr_iso *siso;
+	int iso_rtchange();
+
+	if ((unsigned)cmd > PRC_NCMDS)
+		return;
+	if (sa->sa_family != AF_ISO && sa->sa_family != AF_CCITT)
+		return;
+	siso = (struct sockaddr_iso *)sa;
+	if (siso == 0 || siso->siso_nlen == 0)
+		return;
+
+	switch (cmd) {
+	case PRC_ROUTEDEAD:
+	case PRC_REDIRECT_NET:
+	case PRC_REDIRECT_HOST:
+	case PRC_REDIRECT_TOSNET:
+	case PRC_REDIRECT_TOSHOST:
+		iso_pcbnotify(&cltb, siso,
+				(int)inetctlerrmap[cmd], iso_rtchange);
+		break;
+
+	default:
+		if (inetctlerrmap[cmd] == 0)
+			return;		/* XXX */
+		iso_pcbnotify(&cltb, siso, (int)inetctlerrmap[cmd],
+			cltp_notify);
+	}
+}
+
+cltp_output(isop, m)
+	register struct isopcb *isop;
+	register struct mbuf *m;
+{
+	register int len;
+	register struct sockaddr_iso *siso;
+	int hdrlen, error = 0, docsum;
+	register u_char *up;
+
+	if (isop->isop_laddr == 0 || isop->isop_faddr == 0) {
+		error = ENOTCONN;
+		goto bad;
+	}
+	/*
+	 * Calculate data length and get a mbuf for CLTP header.
+	 */
+	hdrlen = 2 + 2 + isop->isop_laddr->siso_tlen
+		   + 2 + isop->isop_faddr->siso_tlen;
+	if (docsum = /*isop->isop_flags & CLNP_NO_CKSUM*/ cltp_cksum)
+		hdrlen += 4;
+	M_PREPEND(m, hdrlen, M_WAIT);
+	len = m->m_pkthdr.len;
+	/*
+	 * Fill in mbuf with extended CLTP header
+	 */
+	up = mtod(m, u_char *);
+	up[0] = hdrlen - 1;
+	up[1] = UD_TPDU_type;
+	up[2] = CLTPOVAL_SRC;
+	up[3] = (siso = isop->isop_laddr)->siso_tlen;
+	up += 4;
+	bcopy(TSEL(siso), (caddr_t)up, siso->siso_tlen);
+	up += siso->siso_tlen;
+	up[0] = CLTPOVAL_DST;
+	up[1] = (siso = isop->isop_faddr)->siso_tlen;
+	up += 2;
+	bcopy(TSEL(siso), (caddr_t)up, siso->siso_tlen);
+	/*
+	 * Stuff checksum and output datagram.
+	 */
+	if (docsum) {
+		up += siso->siso_tlen;
+		up[0] = CLTPOVAL_CSM;
+		up[1] = 2;
+		iso_gen_csum(m, 2 + up - mtod(m, u_char *), len);
+	}
+	cltpstat.cltps_opackets++;
+	return (tpclnp_output(isop, m, len, !docsum));
+bad:
+	m_freem(m);
+	return (error);
+}
+
+u_long	cltp_sendspace = 9216;		/* really max datagram size */
+u_long	cltp_recvspace = 40 * (1024 + sizeof(struct sockaddr_iso));
+					/* 40 1K datagrams */
+
+
+/*ARGSUSED*/
+cltp_usrreq(so, req, m, nam, control)
+	struct socket *so;
+	int req;
+	struct mbuf *m, *nam, *control;
+{
+	register struct isopcb *isop = sotoisopcb(so);
+	int s, error = 0;
+
+	if (req == PRU_CONTROL)
+		return (iso_control(so, (int)m, (caddr_t)nam,
+			(struct ifnet *)control));
+	if ((isop == NULL && req != PRU_ATTACH) ||
+	    (control && control->m_len)) {
+		error = EINVAL;
+		goto release;
+	}
+	switch (req) {
+
+	case PRU_ATTACH:
+		if (isop != NULL) {
+			error = EINVAL;
+			break;
+		}
+		error = iso_pcballoc(so, &cltb);
+		if (error)
+			break;
+		error = soreserve(so, cltp_sendspace, cltp_recvspace);
+		if (error)
+			break;
+		break;
+
+	case PRU_DETACH:
+		iso_pcbdetach(isop);
+		break;
+
+	case PRU_BIND:
+		error = iso_pcbbind(isop, nam);
+		break;
+
+	case PRU_LISTEN:
+		error = EOPNOTSUPP;
+		break;
+
+	case PRU_CONNECT:
+		if (isop->isop_faddr) {
+			error = EISCONN;
+			break;
+		}
+		error = iso_pcbconnect(isop, nam);
+		if (error == 0)
+			soisconnected(so);
+		break;
+
+	case PRU_CONNECT2:
+		error = EOPNOTSUPP;
+		break;
+
+	case PRU_ACCEPT:
+		error = EOPNOTSUPP;
+		break;
+
+	case PRU_DISCONNECT:
+		if (isop->isop_faddr == 0) {
+			error = ENOTCONN;
+			break;
+		}
+		iso_pcbdisconnect(isop);
+		so->so_state &= ~SS_ISCONNECTED;		/* XXX */
+		break;
+
+	case PRU_SHUTDOWN:
+		socantsendmore(so);
+		break;
+
+	case PRU_SEND:
+		if (nam) {
+			if (isop->isop_faddr) {
+				error = EISCONN;
+				break;
+			}
+			/*
+			 * Must block input while temporarily connected.
+			 */
+			s = splnet();
+			error = iso_pcbconnect(isop, nam);
+			if (error) {
+				splx(s);
+				break;
+			}
+		} else {
+			if (isop->isop_faddr == 0) {
+				error = ENOTCONN;
+				break;
+			}
+		}
+		error = cltp_output(isop, m);
+		m = 0;
+		if (nam) {
+			iso_pcbdisconnect(isop);
+			splx(s);
+		}
+		break;
+
+	case PRU_ABORT:
+		soisdisconnected(so);
+		iso_pcbdetach(isop);
+		break;
+
+	case PRU_SOCKADDR:
+		if (isop->isop_laddr)
+			bcopy((caddr_t)isop->isop_laddr, mtod(m, caddr_t),
+				nam->m_len = isop->isop_laddr->siso_len);
+		break;
+
+	case PRU_PEERADDR:
+		if (isop->isop_faddr)
+			bcopy((caddr_t)isop->isop_faddr, mtod(m, caddr_t),
+				nam->m_len = isop->isop_faddr->siso_len);
+		break;
+
+	case PRU_SENSE:
+		/*
+		 * stat: don't bother with a blocksize.
+		 */
+		return (0);
+
+	case PRU_SENDOOB:
+	case PRU_FASTTIMO:
+	case PRU_SLOWTIMO:
+	case PRU_PROTORCV:
+	case PRU_PROTOSEND:
+		error =  EOPNOTSUPP;
+		break;
+
+	case PRU_RCVD:
+	case PRU_RCVOOB:
+		return (EOPNOTSUPP);	/* do not free mbuf's */
+
+	default:
+		panic("cltp_usrreq");
+	}
+release:
+	if (control != NULL)
+		m_freem(control);
+	if (m != NULL)
+		m_freem(m);
+	return (error);
+}
diff --git a/sys/netiso/cltp_var.h b/sys/netiso/cltp_var.h
new file mode 100644
index 00000000000..b4e08f2c99b
--- /dev/null
+++ b/sys/netiso/cltp_var.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cltp_var.h	8.1 (Berkeley) 6/10/93
+ */
+
+#define UD_TPDU_type	0x40	/* packet type */
+
+#define CLTPOVAL_SRC	0xc1	/* Source TSAP -- required */
+#define CLTPOVAL_DST	0xc2	/* Destination TSAP -- required */
+#define CLTPOVAL_CSM	0xc3	/* Checksum parameter -- optional */
+
+struct	cltpstat {
+	int	cltps_hdrops;
+	int	cltps_badsum;
+	int	cltps_badlen;
+	int	cltps_noport;
+	int	cltps_ipackets;
+	int	cltps_opackets;
+};
+
+#ifdef KERNEL
+struct	isopcb cltb;
+struct	cltpstat cltpstat;
+#endif
diff --git a/sys/netiso/cons.h b/sys/netiso/cons.h
new file mode 100644
index 00000000000..b0739de1eac
--- /dev/null
+++ b/sys/netiso/cons.h
@@ -0,0 +1,92 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cons.h	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * $Header: cons.h,v 4.4 88/09/09 19:01:28 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/cons.h,v $
+ *
+ * interface between TP and CONS
+ */
+
+#define	CONSOPT_X25CRUD	0x01		/* set x.25 call request user data */
+
+struct dte_addr {
+	u_char 	dtea_addr[7];
+	u_char	dtea_niblen;
+};
+
+#ifdef	KERNEL
+
+#define CONN_OPEN		0x33
+#define CONN_CONFIRM	0x30
+#define CONN_REFUSE		0x31
+#define CONN_CLOSE		0x32
+
+#define	CONS_IS_DGM		0x1
+#define	CONS_NOT_DGM	0x0
+
+#ifndef	PRC_NCMDS
+#include <sys/protosw.h>
+#endif	/* PRC_NCMDS */
+
+#define PRC_CONS_SEND_DONE 2 /* something unused in protosw.h */
+
+#endif	/* KERNEL */
diff --git a/sys/netiso/cons_pcb.h b/sys/netiso/cons_pcb.h
new file mode 100644
index 00000000000..b8adc373947
--- /dev/null
+++ b/sys/netiso/cons_pcb.h
@@ -0,0 +1,193 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cons_pcb.h	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: cons_pcb.h,v 4.2 88/06/29 14:59:08 hagens Exp $ */
+/* $Source: /usr/argo/sys/netiso/RCS/cons_pcb.h,v $ */
+
+/*
+ * protocol control block for the connection oriented network service
+ */
+
+/*
+ * legit port #s for cons "transport" are 0..23 for su users only, and
+ * 1024..1099 for public users
+ */
+#define X25_SBSIZE 	512
+#define	X25_PORT_RESERVED 24
+#define	X25_PORT_USERMAX 1099
+#define X25_FACIL_LEN_MAX  109
+#define X25_PARTIAL_PKT_LEN_MAX (MLEN - sizeof(struct cons_pcb))
+
+#ifndef ARGO_DEBUG
+#define X25_TTL 600 /* 5 min */
+#else /* ARGO_DEBUG */
+#define X25_TTL 120 /* 1 min */
+#endif /* ARGO_DEBUG */
+
+struct cons_pcb {
+	struct isopcb 	_co_isopcb;
+#define co_next	_co_isopcb.isop_next
+/* prev used for netstat only */
+#define co_prev	_co_isopcb.isop_prev
+#define co_head	_co_isopcb.isop_head
+#define co_laddr _co_isopcb.isop_laddr
+#define co_faddr _co_isopcb.isop_faddr
+#define co_lport _co_isopcb.isop_laddr.siso_tsuffix
+#define co_fport _co_isopcb.isop_faddr.siso_tsuffix
+#define co_route _co_isopcb.isop_route
+#define co_socket _co_isopcb.isop_socket
+#define	co_chanmask _co_isopcb.isop_chanmask
+#define	co_negchanmask _co_isopcb.isop_negchanmask
+#define	co_x25crud _co_isopcb.isop_x25crud
+#define	co_x25crud_len _co_isopcb.isop_x25crud_len
+	u_short 		co_state; 
+	u_char 			co_flags; 
+	u_short			co_ttl; /* time to live timer */
+	u_short			co_init_ttl; /* initial value of ttl  */
+	int 			co_channel; /* logical channel */
+	struct ifnet *	co_ifp; /* interface */
+	struct protosw *co_proto; 
+
+	struct ifqueue 	co_pending; /* queue data to send when connection
+						completes*/
+#define MAX_DTE_LEN 0x7 /* 17 bcd digits */
+	struct dte_addr	co_peer_dte;
+	struct	cons_pcb *co_myself; /* DEBUGGING AID */
+};
+
+/*
+ * X.25 Packet types 
+ */
+#define XPKT_DATA		1
+#define XPKT_INTERRUPT	2
+#define XPKT_FLOWCONTROL 3 /* not delivered? */
+
+/*
+ * pcb xtates
+ */
+
+#define	CLOSED		0x0
+#define	LISTENING	0x1
+#define	CLOSING		0x2
+/* USABLE STATES MUST BE LAST */
+#define	CONNECTING	0x3
+#define	ACKWAIT		0x4
+#define	OPEN		0x5
+#define MIN_USABLE_STATE CONNECTING
+
+#define	cons_NSTATES		0x6
+
+
+/* type */
+#define CONSF_OCRE	0x40 /* created on OUTPUT */
+#define CONSF_ICRE	0x20 /* created on INPUT */
+#define CONSF_unused	0x10 /* not used */
+#define CONSF_unused2	0x08 /* not used */
+#define CONSF_DGM		0x04 /* for dgm use only */
+#define CONSF_XTS		0x02 /* for cons-as-transport-service */
+#define CONSF_LOOPBACK	0x01 /* loopback was on when connection commenced */
+
+#define X_NOCHANNEL 0x80
+
+
+struct cons_stat {
+	u_int co_intr;	/* input from eicon board */
+	u_int co_restart; /* ecn_restart() request issued to board */
+	u_int co_slowtimo; /* times slowtimo called */
+	u_int co_timedout; /* connections closed by slowtimo */
+	u_int co_ack; /* ECN_ACK indication came from eicon board */
+	u_int co_receive; /* ECN_RECEIVE indication came from eicon board */
+	u_int co_send; /* ECN_SEND request issued to board */
+	u_int co_reset_in; /* ECN_RESET indication came from eicon board */
+	u_int co_reset_out; /* ECN_RESET issued to the eicon board */
+	u_int co_clear_in; /* ECN_CLEAR indication came from eicon board */
+	u_int co_clear_out; /* ECN_CLEAR request issued to board */
+	u_int co_refuse; /* ECN_REFUSE indication came from eicon board */
+	u_int co_accept; /* ECN_ACCEPT indication came from eicon board */
+	u_int co_connect; /* ECN_CONNECT indication came from eicon board */
+	u_int co_call; /* ECN_CALL request issued to board */
+	u_int co_Rdrops; /* bad pkt came from ll */
+	u_int co_Xdrops; /* can't keep up */
+
+	u_int	co_intrpt_pkts_in; /* interrupt packets in */
+	u_int co_avg_qlen;
+	u_int co_avg_qdrop;
+	u_int co_active;
+
+	u_int co_noresources;
+	u_int co_parse_facil_err;
+	u_int co_addr_proto_consist_err;
+	u_int co_no_copcb;
+} cons_stat;
+
+u_char x25_error_stats[CONL_ERROR_MAX + 1];
+
+struct ifqueue consintrq; 
+
+/* reasons for clear are in a data mbuf chained to a clear ecn_request */
+struct e_clear_data 				{
+	u_char ecd_cause;
+	u_char ecd_diagnostic;
+};
+
+#ifdef KERNEL
+#define IncStat(XYZ) cons_stat.XYZ++
+#endif /* KERNEL */
diff --git a/sys/netiso/eonvar.h b/sys/netiso/eonvar.h
new file mode 100644
index 00000000000..93f99172f74
--- /dev/null
+++ b/sys/netiso/eonvar.h
@@ -0,0 +1,170 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)eonvar.h	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+
+#define EON_986_VERSION 0x3
+#define EON_VERSION 0x1
+
+#define EON_CACHESIZE 30
+
+#define E_FREE 	1
+#define E_LINK	2
+#define E_ES 	3
+#define E_IS 	4
+ 
+
+/* 
+ * this overlays a sockaddr_iso
+ */
+
+struct sockaddr_eon {
+	u_char 			seon_len;	/* Length */
+	u_char 			seon_family;	/* AF_ISO */
+	u_char			seon_status;	/* overlays session suffixlen */
+#define EON_ESLINK_UP		0x1
+#define EON_ESLINK_DOWN		0x2
+#define EON_ISLINK_UP		0x10
+#define EON_ISLINK_DOWN		0x20
+/* no change is neither up or down */
+	u_char			seon_pad1;	/* 0, overlays tsfxlen */
+	u_char			seon_adrlen;
+	u_char			seon_afi;		/* 47 */
+	u_char			seon_idi[2];	/* 0006 */
+	u_char			seon_vers;		/* 03 */
+	u_char			seon_glbnum[2];	/* see RFC 1069 */
+	u_char			seon_RDN[2];	/* see RFC 1070 */
+	u_char			seon_pad2[3];	/* see RFC 1070 */
+	u_char			seon_LAREA[2];	/* see RFC 1070 */
+	u_char			seon_pad3[2];	/* see RFC 1070 */
+		/* right now ip addr is  aligned  -- be careful --
+		 * future revisions may have it u_char[4]
+		 */
+	u_int			seon_ipaddr;	/* a.b.c.d */
+	u_char			seon_protoid;	/* NSEL */
+};
+
+#ifdef EON_TEMPLATE
+struct sockaddr_eon eon_template = {
+	sizeof (eon_template), AF_ISO, 0, 0, 0x14,
+	0x47, 0x0, 0x6, 0x3, 0
+};
+#endif
+
+#define DOWNBITS ( EON_ESLINK_DOWN | EON_ISLINK_DOWN )
+#define UPBITS ( EON_ESLINK_UP | EON_ISLINK_UP )
+
+#define	SIOCSEONCORE _IOWR('i',10, struct iso_ifreq) /* EON core member */
+#define	SIOCGEONCORE _IOWR('i',11, struct iso_ifreq) /* EON core member */
+
+struct eon_hdr {
+	u_char 	eonh_vers; /* value 1 */
+	u_char 	eonh_class;  /* address multicast class, below */
+#define		EON_NORMAL_ADDR		0x0
+#define		EON_MULTICAST_ES	0x1
+#define		EON_MULTICAST_IS	0x2
+#define		EON_BROADCAST		0x3
+	u_short eonh_csum;  /* osi checksum (choke)*/
+};
+struct eon_iphdr {
+	struct	ip	ei_ip;
+	struct	eon_hdr	ei_eh;
+};
+#define EONIPLEN (sizeof(struct eon_hdr) + sizeof(struct ip))
+
+/* stole these 2 fields of the flags for I-am-ES and I-am-IS */
+#define	IFF_ES	0x400
+#define	IFF_IS	0x800
+
+struct eon_stat {
+	int	es_in_multi_es;
+	int	es_in_multi_is;
+	int	es_in_broad;
+	int	es_in_normal;
+	int	es_out_multi_es;
+	int	es_out_multi_is;
+	int	es_out_broad;
+	int	es_out_normal;
+	int	es_ipout;
+
+	int	es_icmp[PRC_NCMDS];
+	/* errors */
+	int	es_badcsum;
+	int	es_badhdr;
+} eonstat;
+
+#undef IncStat
+#define IncStat(xxx) eonstat.xxx++
+
+typedef struct qhdr {
+	struct qhdr *link, *rlink;
+} *queue_t;
+
+struct eon_llinfo {
+	struct	qhdr el_qhdr;		/* keep all in a list */
+	int	el_flags;		/* cache valid ? */
+	int	el_snpaoffset;		/* IP address contained in dst nsap */
+	struct	rtentry *el_rt;		/* back pointer to parent route */
+	struct	eon_iphdr el_ei;	/* precomputed portion of hdr */
+	struct	route el_iproute;	/* if direct route cache IP info */
+					/* if gateway, cache secondary route */
+};
+#define el_iphdr el_ei.ei_ip
+#define el_eonhdr el_ei.ei_eh
diff --git a/sys/netiso/esis.c b/sys/netiso/esis.c
new file mode 100644
index 00000000000..f4ade0f4fc9
--- /dev/null
+++ b/sys/netiso/esis.c
@@ -0,0 +1,1063 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)esis.c	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+
+#ifdef ISO
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/route.h>
+#include <net/raw_cb.h>
+
+#include <netiso/iso.h>
+#include <netiso/iso_pcb.h>
+#include <netiso/iso_var.h>
+#include <netiso/iso_snpac.h>
+#include <netiso/clnl.h>
+#include <netiso/clnp.h>
+#include <netiso/clnp_stat.h>
+#include <netiso/esis.h>
+#include <netiso/argo_debug.h>
+
+/*
+ *	Global variables to esis implementation
+ *
+ *	esis_holding_time - the holding time (sec) parameter for outgoing pdus
+ *	esis_config_time  - the frequency (sec) that hellos are generated
+ *	esis_esconfig_time - suggested es configuration time placed in the
+ *						ish.
+ *
+ */
+struct rawcb	esis_pcb;
+void				esis_config(), snpac_age();
+int				esis_sendspace = 2048;
+int				esis_recvspace = 2048;
+short			esis_holding_time = ESIS_HT;
+short			esis_config_time = ESIS_CONFIG;
+short			esis_esconfig_time = ESIS_CONFIG;
+extern int		iso_systype;
+struct sockaddr_dl	esis_dl = { sizeof(esis_dl), AF_LINK };
+extern char		all_es_snpa[], all_is_snpa[];
+
+#define EXTEND_PACKET(m, mhdr, cp)\
+	if (((m)->m_next = m_getclr(M_DONTWAIT, MT_HEADER)) == NULL) {\
+		esis_stat.es_nomem++;\
+		m_freem(mhdr);\
+		return;\
+	} else {\
+		(m) = (m)->m_next;\
+		(cp) = mtod((m), caddr_t);\
+	}
+/*
+ * FUNCTION:		esis_init
+ *
+ * PURPOSE:			Initialize the kernel portion of esis protocol
+ *
+ * RETURNS:			nothing
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+esis_init()
+{
+	extern struct clnl_protosw clnl_protox[256];
+	int	esis_input(), isis_input();
+#ifdef	ISO_X25ESIS
+	int	x25esis_input();
+#endif	/* ISO_X25ESIS */
+
+	esis_pcb.rcb_next = esis_pcb.rcb_prev = &esis_pcb;
+	llinfo_llc.lc_next = llinfo_llc.lc_prev = &llinfo_llc;
+
+	timeout(snpac_age, (caddr_t)0, hz);
+	timeout(esis_config, (caddr_t)0, hz);
+
+	clnl_protox[ISO9542_ESIS].clnl_input = esis_input;
+	clnl_protox[ISO10589_ISIS].clnl_input = isis_input;
+#ifdef	ISO_X25ESIS
+	clnl_protox[ISO9542X25_ESIS].clnl_input = x25esis_input;
+#endif	/* ISO_X25ESIS */
+}
+
+/*
+ * FUNCTION:		esis_usrreq
+ *
+ * PURPOSE:			Handle user level esis requests
+ *
+ * RETURNS:			0 or appropriate errno
+ *
+ * SIDE EFFECTS:	
+ *
+ */
+/*ARGSUSED*/
+esis_usrreq(so, req, m, nam, control)
+struct socket	*so;		/* socket: used only to get to this code */
+int				req;		/* request */
+struct mbuf		*m;			/* data for request */
+struct mbuf		*nam;		/* optional name */
+struct mbuf		*control;	/* optional control */
+{
+	struct rawcb *rp = sotorawcb(so);
+	int error = 0;
+
+	if ((so->so_state & SS_PRIV) == 0) {
+		error = EACCES;
+		goto release;
+	}
+	if (rp == NULL && req != PRU_ATTACH) {
+		error = EINVAL;
+		goto release;
+	}
+
+	switch (req) {
+	case PRU_ATTACH:
+		if (rp != NULL) {
+			error = EINVAL;
+			break;
+		}
+		MALLOC(rp, struct rawcb *, sizeof(*rp), M_PCB, M_WAITOK);
+		if (so->so_pcb = (caddr_t)rp) {
+			bzero(so->so_pcb, sizeof(*rp));
+			insque(rp, &esis_pcb);
+			rp->rcb_socket = so;
+			error = soreserve(so, esis_sendspace, esis_recvspace);
+		} else
+			error = ENOBUFS;
+		break;
+
+	case PRU_SEND:
+		if (nam == NULL) {
+			error = EINVAL;
+			break;
+		}
+		/* error checking here */
+		error = isis_output(mtod(nam,struct sockaddr_dl *), m);
+		m = NULL;
+		break;
+
+	case PRU_DETACH:
+		raw_detach(rp);
+		break;
+
+	case PRU_SHUTDOWN:
+		socantsendmore(so);
+		break;
+
+	case PRU_ABORT:
+		soisdisconnected(so);
+		raw_detach(rp);
+		break;
+
+	case PRU_SENSE:
+		return (0);
+
+	default:
+		return (EOPNOTSUPP);
+	}
+release:
+	if (m != NULL)
+		m_freem(m);
+
+	return (error);
+}
+
+/*
+ * FUNCTION:		esis_input
+ *
+ * PURPOSE:			Process an incoming esis packet
+ *
+ * RETURNS:			nothing
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+esis_input(m0, shp)
+struct mbuf		*m0;		/* ptr to first mbuf of pkt */
+struct snpa_hdr	*shp;	/* subnetwork header */
+{
+	register struct esis_fixed	*pdu = mtod(m0, struct esis_fixed *);
+	register int type;
+
+	/*
+	 *	check checksum if necessary
+	 */
+	if (ESIS_CKSUM_REQUIRED(pdu) && iso_check_csum(m0, (int)pdu->esis_hdr_len)) {
+		esis_stat.es_badcsum++;
+		goto bad;
+	}
+
+	/* check version */
+	if (pdu->esis_vers != ESIS_VERSION) {
+		esis_stat.es_badvers++;
+		goto bad;
+	}
+	type = pdu->esis_type & 0x1f;
+	switch (type) {
+		case ESIS_ESH:
+			esis_eshinput(m0, shp);
+			break;
+
+		case ESIS_ISH:
+			esis_ishinput(m0, shp);
+			break;
+
+		case ESIS_RD:
+			esis_rdinput(m0, shp);
+			break;
+
+		default:
+			esis_stat.es_badtype++;
+	}
+
+bad:
+	if (esis_pcb.rcb_next != &esis_pcb)
+		isis_input(m0, shp);
+	else
+		m_freem(m0);
+}
+
+/*
+ * FUNCTION:		esis_rdoutput
+ *
+ * PURPOSE:			Transmit a redirect pdu
+ *
+ * RETURNS:			nothing
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			Assumes there is enough space for fixed part of header,
+ *					DA, BSNPA and NET in first mbuf.
+ */
+esis_rdoutput(inbound_shp, inbound_m, inbound_oidx, rd_dstnsap, rt)
+struct snpa_hdr		*inbound_shp;	/* snpa hdr from incoming packet */
+struct mbuf			*inbound_m;		/* incoming pkt itself */
+struct clnp_optidx	*inbound_oidx;	/* clnp options assoc with incoming pkt */
+struct iso_addr		*rd_dstnsap;	/* ultimate destination of pkt */
+struct rtentry		*rt;			/* snpa cache info regarding next hop of
+										pkt */
+{
+	struct mbuf			*m, *m0;
+	caddr_t				cp;
+	struct esis_fixed	*pdu;
+	int					len, total_len = 0;
+	struct sockaddr_iso	siso;
+	struct ifnet 		*ifp = inbound_shp->snh_ifp;
+	struct sockaddr_dl *sdl;
+	struct iso_addr *rd_gwnsap;
+
+	if (rt->rt_flags & RTF_GATEWAY) {
+		rd_gwnsap = &((struct sockaddr_iso *)rt->rt_gateway)->siso_addr;
+		rt = rtalloc1(rt->rt_gateway, 0);
+	} else
+		rd_gwnsap = &((struct sockaddr_iso *)rt_key(rt))->siso_addr;
+	if (rt == 0 || (sdl = (struct sockaddr_dl *)rt->rt_gateway) == 0 ||
+		sdl->sdl_family != AF_LINK) {
+		/* maybe we should have a function that you
+		   could put in the iso_ifaddr structure
+		   which could translate iso_addrs into snpa's
+		   where there is a known mapping for that address type */
+		esis_stat.es_badtype++;
+		return;
+	}
+	esis_stat.es_rdsent++;
+	IFDEBUG(D_ESISOUTPUT)
+		printf("esis_rdoutput: ifp x%x (%s%d), ht %d, m x%x, oidx x%x\n",
+			ifp, ifp->if_name, ifp->if_unit, esis_holding_time, inbound_m,
+			inbound_oidx);
+		printf("\tdestination: %s\n", clnp_iso_addrp(rd_dstnsap));
+		printf("\tredirected toward:%s\n", clnp_iso_addrp(rd_gwnsap));
+	ENDDEBUG
+
+	if ((m0 = m = m_gethdr(M_DONTWAIT, MT_HEADER)) == NULL) {
+		esis_stat.es_nomem++;
+		return;
+	}
+	bzero(mtod(m, caddr_t), MHLEN);
+
+	pdu = mtod(m, struct esis_fixed *);
+	cp = (caddr_t)(pdu + 1); /*pointer arith.; 1st byte after header */
+	len = sizeof(struct esis_fixed);
+
+	/*
+	 *	Build fixed part of header
+	 */
+	pdu->esis_proto_id = ISO9542_ESIS;
+	pdu->esis_vers = ESIS_VERSION;
+	pdu->esis_type = ESIS_RD;
+	HTOC(pdu->esis_ht_msb, pdu->esis_ht_lsb, esis_holding_time);
+
+	/* Insert destination address */
+	(void) esis_insert_addr(&cp, &len, rd_dstnsap, m, 0);
+
+	/* Insert the snpa of better next hop */
+	*cp++ = sdl->sdl_alen;
+	bcopy(LLADDR(sdl), cp, sdl->sdl_alen);
+	cp += sdl->sdl_alen;
+	len += (sdl->sdl_alen + 1);
+
+	/* 
+	 *	If the next hop is not the destination, then it ought to be
+	 *	an IS and it should be inserted next. Else, set the
+	 *	NETL to 0
+	 */
+	/* PHASE2 use mask from ifp of outgoing interface */
+	if (!iso_addrmatch1(rd_dstnsap, rd_gwnsap)) {
+		/* this should not happen: 
+		if ((nhop_sc->sc_flags & SNPA_IS) == 0) {
+			printf("esis_rdoutput: next hop is not dst and not an IS\n");
+			m_freem(m0);
+			return;
+		} */
+		(void) esis_insert_addr(&cp, &len, rd_gwnsap, m, 0);
+	} else {
+		*cp++ = 0;	/* NETL */
+		len++;
+	}
+	m->m_len = len;
+
+	/*
+	 *	PHASE2
+	 *	If redirect is to an IS, add an address mask. The mask to be
+	 *	used should be the mask present in the routing entry used to
+	 *	forward the original data packet.
+	 */
+	
+	/*
+	 *	Copy Qos, priority, or security options present in original npdu
+	 */
+	if (inbound_oidx) {
+		/* THIS CODE IS CURRENTLY (mostly) UNTESTED */
+		int optlen = 0;
+		if (inbound_oidx->cni_qos_formatp)
+			optlen += (inbound_oidx->cni_qos_len + 2);
+		if (inbound_oidx->cni_priorp)	/* priority option is 1 byte long */
+			optlen += 3;
+		if (inbound_oidx->cni_securep)
+			optlen += (inbound_oidx->cni_secure_len + 2);
+		if (M_TRAILINGSPACE(m) < optlen) {
+			EXTEND_PACKET(m, m0, cp);
+			m->m_len = 0;
+			/* assumes MLEN > optlen */
+		}
+		/* assume MLEN-len > optlen */
+		/* 
+		 *	When copying options, copy from ptr - 2 in order to grab
+		 *	the option code and length
+		 */
+		if (inbound_oidx->cni_qos_formatp) {
+			bcopy(mtod(inbound_m, caddr_t) + inbound_oidx->cni_qos_formatp - 2,
+				cp, (unsigned)(inbound_oidx->cni_qos_len + 2));
+			cp += inbound_oidx->cni_qos_len + 2;
+		}
+		if (inbound_oidx->cni_priorp) {
+			bcopy(mtod(inbound_m, caddr_t) + inbound_oidx->cni_priorp - 2,
+					cp, 3);
+			cp += 3;
+		}
+		if (inbound_oidx->cni_securep) {
+			bcopy(mtod(inbound_m, caddr_t) + inbound_oidx->cni_securep - 2, cp, 
+				(unsigned)(inbound_oidx->cni_secure_len + 2));
+			cp += inbound_oidx->cni_secure_len + 2;
+		}
+		m->m_len += optlen;
+		len += optlen;
+	}
+
+	pdu->esis_hdr_len = m0->m_pkthdr.len = len;
+	iso_gen_csum(m0, ESIS_CKSUM_OFF, (int)pdu->esis_hdr_len);
+
+	bzero((caddr_t)&siso, sizeof(siso));
+	siso.siso_family = AF_ISO;
+	siso.siso_data[0] = AFI_SNA;
+	siso.siso_nlen = 6 + 1;	/* should be taken from snpa_hdr */
+										/* +1 is for AFI */
+	bcopy(inbound_shp->snh_shost, siso.siso_data + 1, 6);
+	(ifp->if_output)(ifp, m0, (struct sockaddr *)&siso, 0);
+}
+
+/*
+ * FUNCTION:		esis_insert_addr
+ *
+ * PURPOSE:			Insert an iso_addr into a buffer
+ *
+ * RETURNS:			true if buffer was big enough, else false
+ *
+ * SIDE EFFECTS:	Increment buf & len according to size of iso_addr
+ *
+ * NOTES:			Plus 1 here is for length byte
+ */
+esis_insert_addr(buf, len, isoa, m, nsellen)
+register caddr_t			*buf;		/* ptr to buffer to put address into */
+int							*len;		/* ptr to length of buffer so far */
+register struct iso_addr	*isoa;		/* ptr to address */
+register struct mbuf		*m;			/* determine if there remains space */
+int							nsellen;
+{
+	register int newlen, result = 0;
+
+	isoa->isoa_len -= nsellen;
+	newlen = isoa->isoa_len + 1;
+	if (newlen <=  M_TRAILINGSPACE(m)) {
+		bcopy((caddr_t)isoa, *buf, newlen);
+		*len += newlen;
+		*buf += newlen;
+		m->m_len += newlen;
+		result = 1;
+	}
+	isoa->isoa_len += nsellen;
+	return (result);
+}
+
+#define ESIS_EXTRACT_ADDR(d, b) { d = (struct iso_addr *)(b); b += (1 + *b); \
+	    if (b > buflim) {esis_stat.es_toosmall++; goto bad;}}
+#define ESIS_NEXT_OPTION(b)	{ b += (2 + b[1]); \
+	    if (b > buflim) {esis_stat.es_toosmall++; goto bad;}}
+int ESHonly = 0;
+/*
+ 
+/*
+ * FUNCTION:		esis_eshinput
+ *
+ * PURPOSE:			Process an incoming ESH pdu
+ *
+ * RETURNS:			nothing
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+esis_eshinput(m, shp)
+struct mbuf		*m;	/* esh pdu */
+struct snpa_hdr	*shp;	/* subnetwork header */
+{
+	struct	esis_fixed	*pdu = mtod(m, struct esis_fixed *);
+	u_short				ht;		/* holding time */
+	struct	iso_addr	*nsap;
+	int					naddr;
+	u_char				*buf = (u_char *)(pdu + 1);
+	u_char				*buflim = pdu->esis_hdr_len + (u_char *)pdu;
+	int					new_entry = 0;
+
+	esis_stat.es_eshrcvd++;
+
+	CTOH(pdu->esis_ht_msb, pdu->esis_ht_lsb, ht);
+
+	naddr = *buf++;
+	if (buf >= buflim)
+		goto bad;
+	if (naddr == 1) {
+		ESIS_EXTRACT_ADDR(nsap, buf);
+		new_entry = snpac_add(shp->snh_ifp,
+								 nsap, shp->snh_shost, SNPA_ES, ht, 0);
+	} else {
+		int nsellength = 0, nlen = 0;
+		{
+		/* See if we want to compress out multiple nsaps differing
+		   only by nsel */
+			register struct ifaddr *ifa = shp->snh_ifp->if_addrlist;
+			for (; ifa; ifa = ifa->ifa_next)
+				if (ifa->ifa_addr->sa_family == AF_ISO) {
+					nsellength = ((struct iso_ifaddr *)ifa)->ia_addr.siso_tlen;
+					break;
+			}
+		}
+		IFDEBUG(D_ESISINPUT)
+			printf("esis_eshinput: esh: ht %d, naddr %d nsellength %d\n",
+					ht, naddr, nsellength);
+		ENDDEBUG
+		while (naddr-- > 0) {
+			struct iso_addr *nsap2; u_char *buf2;
+			ESIS_EXTRACT_ADDR(nsap, buf);
+			/* see if there is at least one more nsap in ESH differing
+			   only by nsel */
+			if (nsellength != 0) for (buf2 = buf; buf2 < buflim;) {
+				ESIS_EXTRACT_ADDR(nsap2, buf2);
+				IFDEBUG(D_ESISINPUT)
+					printf("esis_eshinput: comparing %s ", 
+						clnp_iso_addrp(nsap));
+					printf("and %s\n", clnp_iso_addrp(nsap2));
+				ENDDEBUG
+				if (Bcmp(nsap->isoa_genaddr, nsap2->isoa_genaddr,
+						 nsap->isoa_len - nsellength) == 0) {
+					nlen = nsellength;
+					break;
+				}
+			}
+			new_entry |= snpac_add(shp->snh_ifp,
+									nsap, shp->snh_shost, SNPA_ES, ht, nlen);
+			nlen = 0;
+		}
+	}
+	IFDEBUG(D_ESISINPUT)
+		printf("esis_eshinput: nsap %s is %s\n", 
+			clnp_iso_addrp(nsap), new_entry ? "new" : "old");
+	ENDDEBUG
+	if (new_entry && (iso_systype & SNPA_IS))
+		esis_shoutput(shp->snh_ifp, ESIS_ISH, esis_holding_time,
+						shp->snh_shost, 6, (struct iso_addr *)0);
+bad:
+	return;
+}
+
+/*
+ * FUNCTION:		esis_ishinput
+ *
+ * PURPOSE:			process an incoming ISH pdu
+ *
+ * RETURNS:			
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+esis_ishinput(m, shp)
+struct mbuf		*m;	/* esh pdu */
+struct snpa_hdr	*shp;	/* subnetwork header */
+{
+	struct esis_fixed	*pdu = mtod(m, struct esis_fixed *);
+	u_short				ht, newct;			/* holding time */
+	struct iso_addr		*nsap; 				/* Network Entity Title */
+	register u_char		*buf = (u_char *) (pdu + 1);
+	register u_char		*buflim = pdu->esis_hdr_len + (u_char *)pdu;
+	int					new_entry;
+
+	esis_stat.es_ishrcvd++;
+	CTOH(pdu->esis_ht_msb, pdu->esis_ht_lsb, ht);
+
+	IFDEBUG(D_ESISINPUT)
+		printf("esis_ishinput: ish: ht %d\n", ht);
+	ENDDEBUG
+	if (ESHonly)
+		goto bad;
+
+	ESIS_EXTRACT_ADDR(nsap, buf);
+
+	while (buf < buflim) {
+		switch (*buf) {
+		case ESISOVAL_ESCT:
+			if (iso_systype & SNPA_IS)
+				break;
+			if (buf[1] != 2)
+				goto bad;
+			CTOH(buf[2], buf[3], newct);
+			if (esis_config_time != newct) {
+				untimeout(esis_config,0);
+				esis_config_time = newct;
+				esis_config();
+			}
+			break;
+		
+		default:
+			printf("Unknown ISH option: %x\n", *buf);
+		}
+		ESIS_NEXT_OPTION(buf);
+	}
+	new_entry = snpac_add(shp->snh_ifp, nsap, shp->snh_shost, SNPA_IS, ht, 0);
+	IFDEBUG(D_ESISINPUT)
+		printf("esis_ishinput: nsap %s is %s\n", 
+			clnp_iso_addrp(nsap), new_entry ? "new" : "old");
+	ENDDEBUG
+
+	if (new_entry)
+		esis_shoutput(shp->snh_ifp, 
+			iso_systype & SNPA_ES ? ESIS_ESH : ESIS_ISH,
+			esis_holding_time, shp->snh_shost, 6, (struct iso_addr *)0);
+bad:
+	return;
+}
+
+/*
+ * FUNCTION:		esis_rdinput
+ *
+ * PURPOSE:			Process an incoming RD pdu
+ *
+ * RETURNS:			
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+esis_rdinput(m0, shp)
+struct mbuf		*m0;	/* esh pdu */
+struct snpa_hdr	*shp;	/* subnetwork header */
+{
+	struct esis_fixed	*pdu = mtod(m0, struct esis_fixed *);
+	u_short				ht;		/* holding time */
+	struct iso_addr		*da, *net = 0, *netmask = 0, *snpamask = 0;
+	register struct iso_addr *bsnpa;
+	register u_char		*buf = (u_char *)(pdu + 1);
+	register u_char		*buflim = pdu->esis_hdr_len + (u_char *)pdu;
+
+	esis_stat.es_rdrcvd++;
+
+	/* intermediate systems ignore redirects */
+	if (iso_systype & SNPA_IS)
+		return;
+	if (ESHonly)
+		return;
+
+	CTOH(pdu->esis_ht_msb, pdu->esis_ht_lsb, ht);
+	if (buf >= buflim)
+		return;
+
+	/* Extract DA */
+	ESIS_EXTRACT_ADDR(da, buf);
+
+	/* Extract better snpa */
+	ESIS_EXTRACT_ADDR(bsnpa, buf);
+
+	/* Extract NET if present */
+	if (buf < buflim) {
+		if (*buf == 0)
+			buf++; /* no NET present, skip NETL anyway */
+		else
+			ESIS_EXTRACT_ADDR(net, buf);
+	}
+
+	/* process options */
+	while (buf < buflim) {
+		switch (*buf) {
+		case ESISOVAL_SNPAMASK:
+			if (snpamask) /* duplicate */
+				return;
+			snpamask = (struct iso_addr *)(buf + 1);
+			break;
+
+		case ESISOVAL_NETMASK:
+			if (netmask) /* duplicate */
+				return;
+			netmask = (struct iso_addr *)(buf + 1);
+			break;
+
+		default:
+			printf("Unknown option in ESIS RD (0x%x)\n", buf[-1]);
+		}
+		ESIS_NEXT_OPTION(buf);
+	}
+
+	IFDEBUG(D_ESISINPUT)
+		printf("esis_rdinput: rd: ht %d, da %s\n", ht, clnp_iso_addrp(da));
+		if (net)
+			printf("\t: net %s\n", clnp_iso_addrp(net));
+	ENDDEBUG
+	/*
+	 *	If netl is zero, then redirect is to an ES. We need to add an entry
+	 *	to the snpa cache for (destination, better snpa).
+	 *	If netl is not zero, then the redirect is to an IS. In this
+	 *	case, add an snpa cache entry for (net, better snpa).
+	 *
+	 *	If the redirect is to an IS, add a route entry towards that
+	 *	IS.
+	 */
+	if (net == 0 || net->isoa_len == 0 || snpamask) {
+		/* redirect to an ES */
+		snpac_add(shp->snh_ifp, da,
+				bsnpa->isoa_genaddr, SNPA_ES, ht, 0);
+	} else {
+		snpac_add(shp->snh_ifp, net,
+				bsnpa->isoa_genaddr, SNPA_IS, ht, 0);
+		snpac_addrt(shp->snh_ifp, da, net, netmask);
+	}
+bad: ;    /* Needed by ESIS_NEXT_OPTION */
+}
+
+/*
+ * FUNCTION:		esis_config
+ *
+ * PURPOSE:			Report configuration
+ *
+ * RETURNS:			
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			Called every esis_config_time seconds
+ */
+void
+esis_config()
+{
+	register struct ifnet	*ifp;
+
+	timeout(esis_config, (caddr_t)0, hz * esis_config_time);
+
+	/* 
+	 *	Report configuration for each interface that 
+	 *	- is UP
+	 *	- has BROADCAST capability
+	 *	- has an ISO address
+	 */
+	/* Todo: a better way would be to construct the esh or ish
+	 * once and copy it out for all devices, possibly calling
+	 * a method in the iso_ifaddr structure to encapsulate and
+	 * transmit it.  This could work to advantage for non-broadcast media
+	 */
+	
+	for (ifp = ifnet; ifp; ifp = ifp->if_next) {
+		if ((ifp->if_flags & IFF_UP) &&
+		    (ifp->if_flags & IFF_BROADCAST)) {
+			/* search for an ISO address family */
+			struct ifaddr	*ia;
+
+			for (ia = ifp->if_addrlist; ia; ia = ia->ifa_next) {
+				if (ia->ifa_addr->sa_family == AF_ISO) {
+					esis_shoutput(ifp, 
+						iso_systype & SNPA_ES ? ESIS_ESH : ESIS_ISH,
+						esis_holding_time,
+						(caddr_t)(iso_systype & SNPA_ES ? all_is_snpa : 
+						all_es_snpa), 6, (struct iso_addr *)0);
+					break;
+				}
+			}
+		}
+	}
+}
+
+/*
+ * FUNCTION:		esis_shoutput
+ *
+ * PURPOSE:			Transmit an esh or ish pdu
+ *
+ * RETURNS:			nothing
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+esis_shoutput(ifp, type, ht, sn_addr, sn_len, isoa)
+struct ifnet	*ifp;
+int				type;
+short			ht;
+caddr_t 		sn_addr;
+int				sn_len;
+struct	iso_addr *isoa;
+{
+	struct mbuf			*m, *m0;
+	caddr_t				cp, naddrp;
+	int					naddr = 0;
+	struct esis_fixed	*pdu;
+	struct iso_ifaddr	*ia;
+	int					len;
+	struct sockaddr_iso	siso;
+
+	if (type == ESIS_ESH)
+		esis_stat.es_eshsent++;
+	else if (type == ESIS_ISH) 
+		esis_stat.es_ishsent++;
+	else {
+		printf("esis_shoutput: bad pdu type\n");
+		return;
+	}
+
+	IFDEBUG(D_ESISOUTPUT)
+		int	i;
+		printf("esis_shoutput: ifp x%x (%s%d), %s, ht %d, to: [%d] ",
+			ifp, ifp->if_name, ifp->if_unit, type == ESIS_ESH ? "esh" : "ish",
+			ht, sn_len);
+		for (i=0; i<sn_len; i++)
+			printf("%x%c", *(sn_addr+i), i < (sn_len-1) ? ':' : ' ');
+		printf("\n");
+	ENDDEBUG
+
+	if ((m0 = m = m_gethdr(M_DONTWAIT, MT_HEADER)) == NULL) {
+		esis_stat.es_nomem++;
+		return;
+	}
+	bzero(mtod(m, caddr_t), MHLEN);
+
+	pdu = mtod(m, struct esis_fixed *);
+	naddrp = cp = (caddr_t)(pdu + 1);
+	len = sizeof(struct esis_fixed);
+
+	/*
+	 *	Build fixed part of header
+	 */
+	pdu->esis_proto_id = ISO9542_ESIS;
+	pdu->esis_vers = ESIS_VERSION;
+	pdu->esis_type = type;
+	HTOC(pdu->esis_ht_msb, pdu->esis_ht_lsb, ht);
+
+	if (type == ESIS_ESH) {
+		cp++;
+		len++;
+	}
+
+	m->m_len = len;
+	if (isoa) {
+		/*
+		 * Here we are responding to a clnp packet sent to an NSAP
+		 * that is ours which was sent to the MAC addr all_es's.
+		 * It is possible that we did not specifically advertise this
+		 * NSAP, even though it is ours, so we will respond
+		 * directly to the sender that we are here.  If we do have
+		 * multiple NSEL's we'll tack them on so he can compress them out.
+		 */
+		(void) esis_insert_addr(&cp, &len, isoa, m, 0);
+		naddr = 1;
+	}
+	for (ia = iso_ifaddr; ia; ia = ia->ia_next) {
+		int nsellen = (type == ESIS_ISH ? ia->ia_addr.siso_tlen : 0); 
+		int n = ia->ia_addr.siso_nlen;
+		register struct iso_ifaddr *ia2;
+
+		if (type == ESIS_ISH && naddr > 0)
+			break;
+		for (ia2 = iso_ifaddr; ia2 != ia; ia2 = ia2->ia_next)
+			if (Bcmp(ia->ia_addr.siso_data, ia2->ia_addr.siso_data, n) == 0)
+					break;
+		if (ia2 != ia)
+			continue;	/* Means we have previously copied this nsap */
+		if (isoa && Bcmp(ia->ia_addr.siso_data, isoa->isoa_genaddr, n) == 0) {
+			isoa = 0;
+			continue;	/* Ditto */
+		}
+		IFDEBUG(D_ESISOUTPUT)
+			printf("esis_shoutput: adding NSAP %s\n", 
+				clnp_iso_addrp(&ia->ia_addr.siso_addr));
+		ENDDEBUG
+		if (!esis_insert_addr(&cp, &len,
+							  &ia->ia_addr.siso_addr, m, nsellen)) {
+			EXTEND_PACKET(m, m0, cp);
+			(void) esis_insert_addr(&cp, &len, &ia->ia_addr.siso_addr, m,
+									nsellen);
+		}
+		naddr++;
+	}
+
+	if (type == ESIS_ESH)
+		*naddrp = naddr;
+	else {
+		/* add suggested es config timer option to ISH */
+		if (M_TRAILINGSPACE(m) < 4) {
+			printf("esis_shoutput: extending packet\n");
+			EXTEND_PACKET(m, m0, cp);
+		}
+		*cp++ = ESISOVAL_ESCT;
+		*cp++ = 2;
+		HTOC(*cp, *(cp+1), esis_esconfig_time);
+		len += 4;
+		m->m_len += 4;
+		IFDEBUG(D_ESISOUTPUT)
+			printf("m0 0x%x, m 0x%x, data 0x%x, len %d, cp 0x%x\n",
+			m0, m, m->m_data, m->m_len, cp);
+		ENDDEBUG
+	}
+
+	m0->m_pkthdr.len = len;
+	pdu->esis_hdr_len = len;
+	iso_gen_csum(m0, ESIS_CKSUM_OFF, (int)pdu->esis_hdr_len);
+
+	bzero((caddr_t)&siso, sizeof(siso));
+	siso.siso_family = AF_ISO;
+	siso.siso_data[0] = AFI_SNA;
+	siso.siso_nlen = sn_len + 1;
+	bcopy(sn_addr, siso.siso_data + 1, (unsigned)sn_len);
+	(ifp->if_output)(ifp, m0, (struct sockaddr *)&siso, 0);
+}
+
+/*
+ * FUNCTION:		isis_input
+ *
+ * PURPOSE:			Process an incoming isis packet
+ *
+ * RETURNS:			nothing
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+isis_input(m0, shp)
+struct mbuf		*m0;		/* ptr to first mbuf of pkt */
+struct snpa_hdr	*shp;	/* subnetwork header */
+{
+	register int type;
+	register struct rawcb *rp, *first_rp = 0;
+	struct ifnet *ifp = shp->snh_ifp;
+	char workbuf[16];
+	struct mbuf *mm;
+
+	IFDEBUG(D_ISISINPUT)
+		int i;
+
+		printf("isis_input: pkt on ifp x%x (%s%d): from:", ifp, 
+			ifp->if_name, ifp->if_unit);
+		for (i=0; i<6; i++)
+			printf("%x%c", shp->snh_shost[i]&0xff, (i<5) ? ':' : ' ');
+		printf(" to:");
+		for (i=0; i<6; i++)
+			printf("%x%c", shp->snh_dhost[i]&0xff, (i<5) ? ':' : ' ');
+		printf("\n");
+	ENDDEBUG
+	esis_dl.sdl_alen = ifp->if_addrlen;
+	esis_dl.sdl_index = ifp->if_index;
+	bcopy(shp->snh_shost, (caddr_t)esis_dl.sdl_data, esis_dl.sdl_alen);
+	for (rp = esis_pcb.rcb_next; rp != &esis_pcb; rp = rp->rcb_next) {
+		if (first_rp == 0) {
+			first_rp = rp;
+			continue;
+		}
+		if (mm = m_copy(m0, 0, M_COPYALL)) { /*can't block at interrupt level */
+			if (sbappendaddr(&rp->rcb_socket->so_rcv,
+							  &esis_dl, mm, (struct mbuf *)0) != 0) {
+				sorwakeup(rp->rcb_socket);
+			 } else {
+				IFDEBUG(D_ISISINPUT)
+					printf("Error in sbappenaddr, mm = 0x%x\n", mm);
+				ENDDEBUG
+				m_freem(mm);
+			}
+		}
+	}
+	if (first_rp && sbappendaddr(&first_rp->rcb_socket->so_rcv,
+							  &esis_dl, m0, (struct mbuf *)0) != 0) {
+		sorwakeup(first_rp->rcb_socket);
+		return;
+	}
+	m_freem(m0);
+}
+
+isis_output(sdl, m)
+register struct sockaddr_dl	*sdl;
+struct mbuf *m;
+{
+	register struct ifnet *ifp;
+	struct ifaddr *ifa, *ifa_ifwithnet();
+	struct sockaddr_iso siso;
+	int error = 0;
+	unsigned sn_len;
+
+	ifa = ifa_ifwithnet((struct sockaddr *)sdl);	/* get ifp from sdl */
+	if (ifa == 0) {
+		IFDEBUG(D_ISISOUTPUT)
+			printf("isis_output: interface not found\n");
+		ENDDEBUG
+		error = EINVAL;
+		goto release;
+	}
+	ifp = ifa->ifa_ifp;
+	sn_len = sdl->sdl_alen;
+	IFDEBUG(D_ISISOUTPUT)
+		u_char *cp = (u_char *)LLADDR(sdl), *cplim = cp + sn_len;
+		printf("isis_output: ifp 0x%x (%s%d), to: ",
+			ifp, ifp->if_name, ifp->if_unit);
+		while (cp < cplim) {
+			printf("%x", *cp++);
+			printf("%c", (cp < cplim) ? ':' : ' ');
+		}
+		printf("\n");
+	ENDDEBUG
+	bzero((caddr_t)&siso, sizeof(siso));
+	siso.siso_family = AF_ISO; /* This convention may be useful for X.25 */
+	siso.siso_data[0] = AFI_SNA;
+	siso.siso_nlen = sn_len + 1;
+	bcopy(LLADDR(sdl), siso.siso_data + 1, sn_len);
+	error = (ifp->if_output)(ifp, m, (struct sockaddr *)&siso, 0);
+	if (error) {
+		IFDEBUG(D_ISISOUTPUT)
+			printf("isis_output: error from ether_output is %d\n", error);
+		ENDDEBUG
+	}
+	return (error);
+
+release:
+	if (m != NULL)
+		m_freem(m);
+	return(error);
+}
+
+
+/*
+ * FUNCTION:		esis_ctlinput
+ *
+ * PURPOSE:			Handle the PRC_IFDOWN transition
+ *
+ * RETURNS:			nothing
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			Calls snpac_flush for interface specified.
+ *					The loop through iso_ifaddr is stupid because
+ *					back in if_down, we knew the ifp...
+ */
+esis_ctlinput(req, siso)
+int						req;		/* request: we handle only PRC_IFDOWN */
+struct sockaddr_iso		*siso;		/* address of ifp */
+{
+	register struct iso_ifaddr *ia;	/* scan through interface addresses */
+
+	if (req == PRC_IFDOWN)
+		for (ia = iso_ifaddr; ia; ia = ia->ia_next) {
+			if (iso_addrmatch(IA_SIS(ia), siso))
+				snpac_flushifp(ia->ia_ifp);
+		}
+}
+
+#endif	/* ISO */
diff --git a/sys/netiso/esis.h b/sys/netiso/esis.h
new file mode 100644
index 00000000000..81dd74ac310
--- /dev/null
+++ b/sys/netiso/esis.h
@@ -0,0 +1,135 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)esis.h	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ *	$Header: esis.h,v 4.7 88/09/15 11:24:18 hagens Exp $
+ *	$Source: /usr/argo/sys/netiso/RCS/esis.h,v $
+ */
+
+#ifndef BYTE_ORDER
+/*
+ * Definitions for byte order,
+ * according to byte significance from low address to high.
+ */
+#define	LITTLE_ENDIAN	1234	/* least-significant byte first (vax) */
+#define	BIG_ENDIAN	4321	/* most-significant byte first (IBM, net) */
+#define	PDP_ENDIAN	3412	/* LSB first in word, MSW first in long (pdp) */
+
+#ifdef vax
+#define	BYTE_ORDER	LITTLE_ENDIAN
+#else
+#define	BYTE_ORDER	BIG_ENDIAN	/* mc68000, tahoe, most others */
+#endif
+#endif /* BYTE_ORDER */
+
+#define	SNPAC_AGE		60			/* seconds */
+#define	ESIS_CONFIG		60			/* seconds */
+#define	ESIS_HT			(ESIS_CONFIG * 2)
+
+/*
+ *	Fixed part of an ESIS header
+ */
+struct esis_fixed {
+	u_char	esis_proto_id;		/* network layer protocol identifier */
+	u_char	esis_hdr_len;		/* length indicator (octets) */
+	u_char	esis_vers;			/* version/protocol identifier extension */
+	u_char	esis_res1;			/* reserved */
+	u_char	esis_type;			/* type code */
+/* technically, type should be &='d 0x1f */
+#define ESIS_ESH	0x02		/* End System Hello */
+#define ESIS_ISH	0x04		/* Intermediate System Hello */
+#define ESIS_RD		0x06		/* Redirect */
+	u_char	esis_ht_msb;		/* holding time (seconds) high byte */
+	u_char	esis_ht_lsb;		/* holding time (seconds) low byte */
+	u_char	esis_cksum_msb;		/* checksum high byte */
+	u_char	esis_cksum_lsb;		/* checksum low byte */
+};
+/*
+ * Values for ESIS datagram options
+ */
+#define ESISOVAL_NETMASK	0xe1	/* address mask option, RD PDU only */
+#define ESISOVAL_SNPAMASK	0xe2	/* snpa mask option, RD PDU only */
+#define ESISOVAL_ESCT		0xc6	/* end system conf. timer, ISH PDU only */
+
+
+#define	ESIS_CKSUM_OFF		0x07
+#define ESIS_CKSUM_REQUIRED(pdu)\
+	((pdu->esis_cksum_msb != 0) || (pdu->esis_cksum_lsb != 0))
+
+#define	ESIS_VERSION	1
+
+struct esis_stat {
+	u_short		es_nomem;			/* insufficient memory to send hello */
+	u_short		es_badcsum;			/* incorrect checksum */
+	u_short		es_badvers;			/* incorrect version number */
+	u_short		es_badtype;			/* unknown pdu type field */
+	u_short		es_toosmall;		/* packet too small */
+	u_short		es_eshsent;			/* ESH sent */
+	u_short		es_eshrcvd;			/* ESH rcvd */
+	u_short		es_ishsent;			/* ISH sent */
+	u_short		es_ishrcvd;			/* ISH rcvd */
+	u_short		es_rdsent;			/* RD sent */
+	u_short		es_rdrcvd;			/* RD rcvd */
+};
+
+#ifdef	KERNEL
+struct esis_stat esis_stat;
+#endif	/* KERNEL */
diff --git a/sys/netiso/idrp_usrreq.c b/sys/netiso/idrp_usrreq.c
new file mode 100644
index 00000000000..3109936b415
--- /dev/null
+++ b/sys/netiso/idrp_usrreq.c
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)idrp_usrreq.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+
+#include <net/route.h>
+#include <net/if.h>
+
+#include <netiso/argo_debug.h>
+#include <netiso/iso.h>
+#include <netiso/clnp.h>
+#include <netiso/clnl.h>
+#include <netiso/iso_pcb.h>
+#include <netiso/iso_var.h>
+
+int idrp_input();
+struct	isopcb	idrp_isop;
+static	struct	sockaddr_iso idrp_addrs[2] =
+{  { sizeof(idrp_addrs), AF_ISO, }, { sizeof(idrp_addrs[1]), AF_ISO, } };
+/*
+ * IDRP initialization
+ */
+idrp_init()
+{
+	extern struct clnl_protosw clnl_protox[256];
+
+	idrp_isop.isop_next = idrp_isop.isop_prev = &idrp_isop;
+	idrp_isop.isop_faddr = &idrp_isop.isop_sfaddr;
+	idrp_isop.isop_laddr = &idrp_isop.isop_sladdr;
+	idrp_isop.isop_sladdr = idrp_addrs[1];
+	idrp_isop.isop_sfaddr = idrp_addrs[1];
+	clnl_protox[ISO10747_IDRP].clnl_input = idrp_input;
+}
+
+/*
+ * CALLED FROM:
+ * 	tpclnp_input().
+ * FUNCTION and ARGUMENTS:
+ * Take a packet (m) from clnp, strip off the clnp header
+ * and mke suitable for the idrp socket.
+ * No return value.  
+ */
+idrp_input(m, src, dst)
+	register struct mbuf *m;
+	struct sockaddr_iso *src, *dst;
+{
+	if (idrp_isop.isop_socket == 0) {
+	bad:	m_freem(m);
+		return 0;
+	}
+	bzero(idrp_addrs[0].siso_data, sizeof(idrp_addrs[0].siso_data));
+	bcopy((caddr_t)&(src->siso_addr), (caddr_t)&idrp_addrs[0].siso_addr,
+		1 + src->siso_nlen);
+	bzero(idrp_addrs[1].siso_data, sizeof(idrp_addrs[1].siso_data));
+	bcopy((caddr_t)&(dst->siso_addr), (caddr_t)&idrp_addrs[1].siso_addr,
+		1 + dst->siso_nlen);
+	if (sbappendaddr(&idrp_isop.isop_socket->so_rcv,
+		(struct sockaddr *)idrp_addrs, m, (struct mbuf *)0) == 0)
+		goto bad;
+	sorwakeup(idrp_isop.isop_socket);
+	return 0;
+}
+
+idrp_output(m, addr)
+	struct mbuf *m, *addr;
+{
+	register struct sockaddr_iso *siso = mtod(addr, struct sockaddr_iso *);
+	int s = splnet(), i;
+
+	bcopy((caddr_t)&(siso->siso_addr),
+	      (caddr_t)&idrp_isop.isop_sfaddr.siso_addr, 1 + siso->siso_nlen);
+	siso++;
+	bcopy((caddr_t)&(siso->siso_addr),
+	      (caddr_t)&idrp_isop.isop_sladdr.siso_addr, 1 + siso->siso_nlen);
+	i = clnp_output(m, idrp_isop, m->m_pkthdr.len, 0);
+	splx(s);
+	return (i);
+}
+
+u_long	idrp_sendspace = 3072;		/* really max datagram size */
+u_long	idrp_recvspace = 40 * 1024;	/* 40 1K datagrams */
+
+/*ARGSUSED*/
+idrp_usrreq(so, req, m, addr, control)
+	struct socket *so;
+	int req;
+	struct mbuf *m, *addr, *control;
+{
+	int error = 0;
+
+	 /* Note: need to block idrp_input while changing
+	 * the udp pcb queue and/or pcb addresses.
+	 */
+	switch (req) {
+
+	case PRU_ATTACH:
+		if (idrp_isop.isop_socket != NULL) {
+			error = ENXIO;
+			break;
+		}
+		idrp_isop.isop_socket = so;
+		error = soreserve(so, idrp_sendspace, idrp_recvspace);
+		break;
+
+	case PRU_SHUTDOWN:
+		socantsendmore(so);
+		break;
+
+	case PRU_SEND:
+		return (idrp_output(m, addr));
+
+	case PRU_ABORT:
+		soisdisconnected(so);
+	case PRU_DETACH:
+		idrp_isop.isop_socket = 0;
+		break;
+
+
+	case PRU_SENSE:
+		/*
+		 * stat: don't bother with a blocksize.
+		 */
+		return (0);
+
+	default:
+		return (EOPNOTSUPP);	/* do not free mbuf's */
+	}
+
+release:
+	if (control) {
+		printf("idrp control data unexpectedly retained\n");
+		m_freem(control);
+	}
+	if (m)
+		m_freem(m);
+	return (error);
+}
diff --git a/sys/netiso/if_cons.c b/sys/netiso/if_cons.c
new file mode 100644
index 00000000000..7724b048be5
--- /dev/null
+++ b/sys/netiso/if_cons.c
@@ -0,0 +1,960 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)if_cons.c	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * $Header: if_cons.c,v 4.7 88/08/11 15:52:55 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/if_cons.c,v $
+ *
+ * cons.c - Connection Oriented Network Service:
+ * including support for a) user transport-level service, 
+ *	b) COSNS below CLNP, and c) CONS below TP.
+ */
+
+#ifdef TPCONS
+#ifdef KERNEL
+#ifdef ARGO_DEBUG
+#define Static  
+unsigned LAST_CALL_PCB;
+#else /* ARGO_DEBUG */
+#define Static static
+#endif /* ARGO_DEBUG */
+
+#ifndef SOCK_STREAM
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+#include <sys/ioctl.h>
+#include <sys/tsleep.h>
+
+#include <net/if.h>
+#include <net/netisr.h>
+#include <net/route.h>
+
+#include <netiso/iso_errno.h>
+#include <netiso/argo_debug.h>
+#include <netiso/tp_trace.h>
+#include <netiso/iso.h>
+#include <netiso/cons.h>
+#include <netiso/iso_pcb.h>
+
+#include <netccitt/x25.h>
+#include <netccitt/pk.h>
+#include <netccitt/pk_var.h>
+#endif
+
+#ifdef ARGO_DEBUG
+#define MT_XCONN	0x50
+#define MT_XCLOSE	0x51
+#define MT_XCONFIRM	0x52
+#define MT_XDATA	0x53
+#define MT_XHEADER	0x54
+#else
+#define MT_XCONN	MT_DATA
+#define MT_XCLOSE	MT_DATA
+#define MT_XCONFIRM	MT_DATA
+#define MT_XDATA	MT_DATA
+#define MT_XHEADER	MT_HEADER
+#endif /* ARGO_DEBUG */
+
+#define DONTCLEAR	 -1
+
+/*********************************************************************	
+ * cons.c - CONS interface to the x.25 layer
+ *
+ * TODO: figure out what resources we might run out of besides mbufs.
+ *  If we run out of any of them (including mbufs) close and recycle
+ *  lru x% of the connections, for some parameter x.
+ *
+ * There are 2 interfaces from above:
+ * 1) from TP0: 
+ *    cons CO network service
+ *    TP associates a transport connection with a network connection.
+ * 	  cons_output( isop, m, len, isdgm==0 ) 
+ *        co_flags == 0
+ * 2) from TP4:
+ *	  It's a datagram service, like clnp is. - even though it calls
+ *			cons_output( isop, m, len, isdgm==1 ) 
+ *	  it eventually goes through
+ *			cosns_output(ifp, m, dst).
+ *    TP4 permits multiplexing (reuse, possibly simultaneously) of the 
+ *	  network connections.
+ *    This means that many sockets (many tpcbs) may be associated with
+ *    this pklcd, hence cannot have a back ptr from pklcd to a tpcb.
+ *        co_flags & CONSF_DGM 
+ *    co_socket is null since there may be many sockets that use this pklcd.
+ *
+NOTE:
+	streams would really be nice. sigh.
+NOTE:
+	PVCs could be handled by config-ing a cons with an address and with the
+	IFF_POINTTOPOINT flag on.  This code would then have to skip the
+	connection setup stuff for pt-to-pt links.  
+
+
+ *********************************************************************/
+
+
+#define CONS_IFQMAXLEN 5
+
+
+/* protosw pointers for getting to higher layer */
+Static 	struct protosw	*CLNP_proto;
+Static 	struct protosw	*TP_proto;
+Static 	struct protosw	*X25_proto;
+Static 	int				issue_clear_req();
+
+#ifndef	PHASEONE
+extern	struct ifaddr	*ifa_ifwithnet();
+#endif	/* PHASEONE */
+
+extern	struct ifaddr	*ifa_ifwithaddr();
+
+extern struct	isopcb	tp_isopcb; /* chain of all TP pcbs */
+
+
+Static 	int parse_facil(), NSAPtoDTE(), make_partial_x25_packet();
+Static	int FACILtoNSAP(), DTEtoNSAP();
+Static	struct pklcd *cons_chan_to_pcb();
+
+#define HIGH_NIBBLE 1
+#define LOW_NIBBLE 0
+
+/*
+ * NAME:	nibble_copy()
+ * FUNCTION and ARGUMENTS:
+ * 	copies (len) nibbles from (src_octet), high or low nibble
+ *  to (dst_octet), high or low nibble,
+ * src_nibble & dst_nibble should be:
+ * 	HIGH_NIBBLE (1) if leftmost 4 bits/ most significant nibble
+ * 	LOW_NIBBLE (0) if rightmost 4 bits/ least significant nibble
+ * RETURNS: VOID
+ */
+void
+nibble_copy(src_octet, src_nibble, dst_octet, dst_nibble, len)
+	register char  	*src_octet;
+	register char  	*dst_octet;
+	register unsigned		src_nibble;
+	register unsigned 		dst_nibble;
+	int		len;
+{
+
+	register 	i;
+	register 	unsigned dshift, sshift;
+
+	IFDEBUG(D_CADDR)
+		printf("nibble_copy ( 0x%x, 0x%x, 0x%x, 0x%x 0x%x)\n", 
+		 src_octet, src_nibble, dst_octet, dst_nibble, len);
+	ENDDEBUG
+#define SHIFT 0x4
+
+	dshift = dst_nibble << 2;
+	sshift = src_nibble << 2;
+
+	for (i=0; i<len; i++) {
+		/* clear dst_nibble  */
+		*dst_octet 	&= ~(0xf<< dshift);
+
+		/* set dst nibble */
+		*dst_octet 	|= ( 0xf & (*src_octet >> sshift))<< dshift;
+
+		dshift		^= SHIFT;
+		sshift		^= SHIFT;
+		src_nibble 	= 1-src_nibble;
+		dst_nibble 	= 1-dst_nibble;
+		src_octet	+= src_nibble;
+		dst_octet 	+= dst_nibble;
+	}
+	IFDEBUG(D_CADDR)
+		printf("nibble_copy DONE\n");
+	ENDDEBUG
+}
+
+/*
+ * NAME:	nibble_match()
+ * FUNCTION and ARGUMENTS:
+ * 	compares src_octet/src_nibble and dst_octet/dst_nibble  for len nibbles.
+ * RETURNS: 0 if they differ, 1 if they are the same.
+ */
+int
+nibble_match( src_octet, src_nibble, dst_octet, dst_nibble, len)
+	register char  	*src_octet;
+	register char  	*dst_octet;
+	register unsigned		src_nibble;
+	register unsigned 		dst_nibble;
+	int		len;
+{
+
+	register 	i;
+	register 	unsigned dshift, sshift;
+	u_char		nibble_a, nibble_b;
+
+	IFDEBUG(D_CADDR)
+		printf("nibble_match ( 0x%x, 0x%x, 0x%x, 0x%x 0x%x)\n", 
+		 src_octet, src_nibble, dst_octet, dst_nibble, len);
+	ENDDEBUG
+#define SHIFT 0x4
+
+	dshift = dst_nibble << 2;
+	sshift = src_nibble << 2;
+
+	for (i=0; i<len; i++) {
+		nibble_b = ((*dst_octet)>>dshift) & 0xf;
+		nibble_a = ( 0xf & (*src_octet >> sshift));
+		if (nibble_b != nibble_a)
+			return 0;
+
+		dshift		^= SHIFT;
+		sshift		^= SHIFT;
+		src_nibble 	= 1-src_nibble;
+		dst_nibble 	= 1-dst_nibble;
+		src_octet	+= src_nibble;
+		dst_octet 	+= dst_nibble;
+	}
+	IFDEBUG(D_CADDR)
+		printf("nibble_match DONE\n");
+	ENDDEBUG
+	return 1;
+}
+
+/*
+ **************************** NET PROTOCOL cons ***************************
+ */
+/*
+ * NAME:	cons_init()
+ * CALLED FROM:
+ *	autoconf
+ * FUNCTION:
+ *	initialize the protocol
+ */
+cons_init()
+{
+	int tp_incoming(), clnp_incoming();
+
+
+	CLNP_proto = pffindproto(AF_ISO, ISOPROTO_CLNP, SOCK_DGRAM); 
+	X25_proto = pffindproto(AF_ISO, ISOPROTO_X25, SOCK_STREAM);
+	TP_proto = pffindproto(AF_ISO, ISOPROTO_TP0, SOCK_SEQPACKET);
+	IFDEBUG(D_CCONS)
+		printf("cons_init end : cnlp_proto 0x%x cons proto 0x%x tp proto 0x%x\n",
+			CLNP_proto, X25_proto, TP_proto);
+	ENDDEBUG
+#ifdef notdef
+	pk_protolisten(0x81, 0, clnp_incoming);
+	pk_protolisten(0x82, 0, esis_incoming);
+	pk_protolisten(0x84, 0, tp8878_A_incoming);
+	pk_protolisten(0, 0, tp_incoming);
+#endif
+}
+
+tp_incoming(lcp, m)
+struct pklcd *lcp;
+register struct mbuf *m;
+{
+	register struct isopcb *isop;
+	int cons_tpinput();
+
+	if (iso_pcballoc((struct socket *)0, &tp_isopcb)) {
+		pk_close(lcp);
+		return;
+	}
+	isop = tp_isopcb.isop_next;
+	lcp->lcd_upper = cons_tpinput;
+	lcp->lcd_upnext = (caddr_t)isop;
+	lcp->lcd_send(lcp); /* Confirms call */
+	isop->isop_chan = (caddr_t)lcp;
+	isop->isop_laddr = &isop->isop_sladdr;
+	isop->isop_faddr = &isop->isop_sfaddr;
+	DTEtoNSAP(isop->isop_laddr, &lcp->lcd_laddr);
+	DTEtoNSAP(isop->isop_faddr, &lcp->lcd_faddr);
+	parse_facil(lcp, isop, &(mtod(m, struct x25_packet *)->packet_data),
+		m->m_pkthdr.len - PKHEADERLN);
+}
+
+cons_tpinput(lcp, m0)
+struct mbuf *m0;
+struct pklcd *lcp;
+{
+	register struct isopcb *isop = (struct isopcb *)lcp->lcd_upnext;
+	register struct x25_packet *xp;
+	int cmd, ptype = CLEAR;
+
+	if (isop == 0)
+		return;
+	if (m0 == 0)
+		goto dead;
+	switch(m0->m_type) {
+	case MT_DATA:
+	case MT_OOBDATA:
+		tpcons_input(m0, isop->isop_faddr, isop->isop_laddr, (caddr_t)lcp);
+		return;
+
+	case MT_CONTROL:
+		switch (ptype = pk_decode(mtod(m0, struct x25_packet *))) {
+
+		case RR:
+			cmd = PRC_CONS_SEND_DONE;
+			break;
+
+		case CALL_ACCEPTED:
+			if (lcp->lcd_sb.sb_mb)
+				lcp->lcd_send(lcp); /* XXX - fix this */
+			/*FALLTHROUGH*/
+		default:
+			return;
+
+		dead:
+		case CLEAR:
+		case CLEAR_CONF:
+			lcp->lcd_upper = 0;
+			lcp->lcd_upnext = 0;
+			isop->isop_chan = 0;
+		case RESET:
+			cmd = PRC_ROUTEDEAD;
+		}
+		tpcons_ctlinput(cmd, isop->isop_faddr, isop);
+		if (cmd = PRC_ROUTEDEAD && isop->isop_refcnt == 0) 
+			iso_pcbdetach(isop);
+	}
+}
+
+/*
+ * NAME:	cons_connect()
+ * CALLED FROM:
+ *	tpcons_pcbconnect() when opening a new connection.  
+ * FUNCTION anD ARGUMENTS:
+ *  Figures out which device to use, finding a route if one doesn't
+ *  already exist.
+ * RETURN VALUE:
+ *  returns E*
+ */
+cons_connect(isop)
+	register struct isopcb *isop;
+{
+	register struct pklcd *lcp = (struct pklcd *)isop->isop_chan;
+	register struct mbuf 	*m;
+	struct ifaddr 			*ifa;
+	int error;
+
+	IFDEBUG(D_CCONN)
+		printf("cons_connect(0x%x): ", isop);
+		dump_isoaddr(isop->isop_faddr);
+		printf("myaddr: ");
+		dump_isoaddr(isop->isop_laddr);
+		printf("\n" );
+	ENDDEBUG
+	NSAPtoDTE(isop->isop_faddr, &lcp->lcd_faddr);
+	lcp->lcd_upper = cons_tpinput;
+	lcp->lcd_upnext = (caddr_t)isop;
+	IFDEBUG(D_CCONN)
+		printf(
+		"calling make_partial_x25_packet( 0x%x, 0x%x, 0x%x)\n",
+			&lcp->lcd_faddr, &lcp->lcd_laddr, 
+			isop->isop_socket->so_proto->pr_protocol); 
+	ENDDEBUG
+	if ((error = make_partial_x25_packet(isop, lcp, m)) == 0)
+		error = pk_connect(lcp, &lcp->lcd_faddr);
+	return error;
+}
+
+/*
+ **************************** DEVICE cons ***************************
+ */
+
+
+/* 
+ * NAME:	cons_ctlinput()
+ * CALLED FROM:
+ *  lower layer when ECN_CLEAR occurs : this routine is here
+ *  for consistency - cons subnet service calls its higher layer
+ *  through the protosw entry.
+ * FUNCTION & ARGUMENTS:
+ *  cmd is a PRC_* command, list found in ../sys/protosw.h
+ *  copcb is the obvious.
+ *  This serves the higher-layer cons service.
+ * NOTE: this takes 3rd arg. because cons uses it to inform itself
+ *  of things (timeouts, etc) but has a pcb instead of an address.
+ */
+cons_ctlinput(cmd, sa, copcb)
+	int cmd;
+	struct sockaddr *sa;
+	register struct pklcd *copcb;
+{
+}
+
+
+find_error_reason( xp )
+	register struct x25_packet *xp;
+{
+	extern u_char x25_error_stats[];
+	int error, cause;
+
+	if (xp) {
+		cause = 4[(char *)xp];
+		switch (cause) {
+			case 0x00:
+			case 0x80:
+				/* DTE originated; look at the diagnostic */
+				error = (CONL_ERROR_MASK | cause);
+				goto done;
+
+			case 0x01: /* number busy */
+			case 0x81:
+			case 0x09: /* Out of order */
+			case 0x89:
+			case 0x11: /* Remot Procedure Error */
+			case 0x91:
+			case 0x19: /* reverse charging accept not subscribed */
+			case 0x99:
+			case 0x21: /* Incampat destination */
+			case 0xa1:
+			case 0x29: /* fast select accept not subscribed */
+			case 0xa9:
+			case 0x39: /* ship absent */
+			case 0xb9:
+			case 0x03: /* invalid facil request */
+			case 0x83:
+			case 0x0b: /* access barred */
+			case 0x8b:
+			case 0x13: /* local procedure error */
+			case 0x93:
+			case 0x05: /* network congestion */
+			case 0x85:
+			case 0x8d: /* not obtainable */
+			case 0x0d:
+			case 0x95: /* RPOA out of order */
+			case 0x15:
+				/* take out bit 8 
+				 * so we don't have to have so many perror entries 
+				 */
+				error = (CONL_ERROR_MASK | 0x100 | (cause & ~0x80));
+				goto done;
+
+			case 0xc1: /* gateway-detected proc error */
+			case 0xc3: /* gateway congestion */
+
+				error = (CONL_ERROR_MASK | 0x100 | cause);
+				goto done;
+		} 
+	} 
+	/* otherwise, a *hopefully* valid perror exists in the e_reason field */
+	error = xp->packet_data;
+	if (error = 0) {
+		printf("Incoming PKT TYPE 0x%x with reason 0x%x\n",
+			pk_decode(xp),
+			cause);
+		error = E_CO_HLI_DISCA;
+	} 
+
+done:
+	return error;
+}
+
+
+
+#endif /* KERNEL */
+
+/*
+ * NAME:	make_partial_x25_packet()
+ *
+ * FUNCTION and ARGUMENTS:
+ *	Makes part of an X.25 call packet, for use by x25.
+ *  (src) and (dst) are the NSAP-addresses of source and destination.
+ *	(buf) is a ptr to a buffer into which to write this partial header.
+ *
+ *	 0			Facility length (in octets)
+ *	 1			Facility field, which is a set of:
+ *	  m			facil code
+ *	  m+1		facil param len (for >2-byte facilities) in octets
+ *	  m+2..p	facil param field
+ *  q			user data (protocol identification octet)
+ * 
+ *
+ * RETURNS: 
+ *  0 if OK
+ *  E* if failed.
+ *
+ * SIDE EFFECTS:
+ * Stores facilites mbuf in X.25 control block, where the connect
+ * routine knows where to look for it.
+ */
+
+#ifdef X25_1984 
+int cons_use_facils = 1;
+#else /* X25_1984  */
+int cons_use_facils = 0;
+#endif /* X25_1984  */
+
+int cons_use_udata = 1; /* KLUDGE FOR DEBUGGING */
+
+Static int
+make_partial_x25_packet(isop, lcp)
+	struct isopcb *isop;
+	struct pklcd *lcp;
+{
+	u_int				proto;
+	int					flag;
+	caddr_t 			buf;
+	register caddr_t	ptr;
+	register int		len	= 0;
+	int 				buflen	=0;
+	caddr_t				facil_len;
+	int 				oddness	= 0;
+	struct mbuf *m;
+
+
+	IFDEBUG(D_CCONN)
+		printf("make_partial_x25_packet(0x%x, 0x%x, 0x%x, 0x%x, 0x%x)\n",
+			isop->isop_laddr, isop->isop_faddr, proto, m, flag);
+	ENDDEBUG
+	if (cons_use_udata) {
+		if (isop->isop_x25crud_len > 0) {
+			/*
+			 *	The user specified something. Stick it in
+			 */
+			bcopy(isop->isop_x25crud, lcp->lcd_faddr.x25_udata,
+					isop->isop_x25crud_len);
+			lcp->lcd_faddr.x25_udlen = isop->isop_x25crud_len;
+		}
+	}
+
+	if (cons_use_facils == 0) {
+		lcp->lcd_facilities = 0;
+		return 0;
+	}
+	MGETHDR(m, MT_DATA, M_WAITOK);
+	if (m == 0)
+		return ENOBUFS;
+	buf = mtod(m, caddr_t);
+	ptr = buf;
+	
+	/* ptr now points to facil length (len of whole facil field in OCTETS */
+	facil_len = ptr ++;
+	m->m_len = 0;
+	pk_build_facilities(m, &lcp->lcd_faddr, 0);
+
+	IFDEBUG(D_CADDR)
+		printf("make_partial  calling: ptr 0x%x, len 0x%x\n", ptr, 
+				isop->isop_laddr->siso_addr.isoa_len);
+	ENDDEBUG
+	if (cons_use_facils) {
+		*ptr++ = 0;	 /* Marker to separate X.25 facitilies from CCITT ones */
+		*ptr++ = 0x0f;
+		*ptr = 0xcb; /* calling facility code */
+		ptr ++;
+		ptr ++; /* leave room for facil param len (in OCTETS + 1) */
+		ptr ++; /* leave room for the facil param len (in nibbles),
+				 * high two bits of which indicate full/partial NSAP
+				 */
+		len = isop->isop_laddr->siso_addr.isoa_len;
+		bcopy( isop->isop_laddr->siso_data, ptr, len);
+		*(ptr-2) = len+1; /* facil param len in octets */
+		*(ptr-1) = len<<1; /* facil param len in nibbles */
+		ptr += len;
+
+		IFDEBUG(D_CADDR)
+			printf("make_partial  called: ptr 0x%x, len 0x%x\n", ptr, 
+					isop->isop_faddr->siso_addr.isoa_len);
+		ENDDEBUG
+		*ptr = 0xc9; /* called facility code */
+		ptr ++;
+		ptr ++; /* leave room for facil param len (in OCTETS + 1) */
+		ptr ++; /* leave room for the facil param len (in nibbles),
+				 * high two bits of which indicate full/partial NSAP
+				 */
+		len = isop->isop_faddr->siso_nlen;
+		bcopy(isop->isop_faddr->siso_data, ptr, len);
+		*(ptr-2) = len+1; /* facil param len = addr len + 1 for each of these
+						  * two length fields, in octets */
+		*(ptr-1) = len<<1; /* facil param len in nibbles */
+		ptr += len;
+
+	}
+	*facil_len = ptr - facil_len - 1;
+	if (*facil_len > MAX_FACILITIES)
+		return E_CO_PNA_LONG;
+
+	buflen = (int)(ptr - buf);
+
+	IFDEBUG(D_CDUMP_REQ)
+		register int i;
+
+		printf("ECN_CONNECT DATA buf 0x%x len %d (0x%x)\n", 
+			buf, buflen, buflen);
+		for( i=0; i < buflen; ) {
+			printf("+%d: %x %x %x %x    %x %x %x %x\n",
+				i,
+				*(buf+i), *(buf+i+1), *(buf+i+2), *(buf+i+3),
+				*(buf+i+4), *(buf+i+5), *(buf+i+6), *(buf+i+7));
+			i+=8;
+		}
+	ENDDEBUG
+	IFDEBUG(D_CADDR)
+		printf("make_partial returns buf 0x%x size 0x%x bytes\n", 
+			mtod(m, caddr_t), buflen);
+	ENDDEBUG
+
+	if (buflen > MHLEN)
+		return E_CO_PNA_LONG;
+
+	m->m_pkthdr.len = m->m_len = buflen;
+	lcp->lcd_facilities = m;
+	return  0;
+}
+
+/*
+ * NAME:	NSAPtoDTE()
+ * CALLED FROM:
+ *  make_partial_x25_packet()
+ * FUNCTION and ARGUMENTS: 
+ *  get a DTE address from an NSAP-address (struct sockaddr_iso)
+ *  (dst_octet) is the octet into which to begin stashing the DTE addr
+ *  (dst_nibble) takes 0 or 1.  1 means begin filling in the DTE addr
+ * 		in the high-order nibble of dst_octet.  0 means low-order nibble.
+ *  (addr) is the NSAP-address
+ *  (flag) is true if the transport suffix is to become the
+ *		last two digits of the DTE address
+ *  A DTE address is a series of ASCII digits
+ *
+ *	A DTE address may have leading zeros. The are significant.
+ *		1 digit per nibble, may be an odd number of nibbles.
+ *
+ *  An NSAP-address has the DTE address in the IDI. Leading zeros are
+ *		significant. Trailing hex f indicates the end of the DTE address.
+ *  	The IDI is a series of BCD digits, one per nibble.
+ *
+ * RETURNS
+ *  # significant digits in the DTE address, -1 if error.
+ */
+
+Static int
+NSAPtoDTE(siso, sx25)
+	register struct sockaddr_iso *siso;
+	register struct sockaddr_x25 *sx25;
+{
+	int		dtelen = -1;
+
+	IFDEBUG(D_CADDR)
+		printf("NSAPtoDTE: nsap: %s\n", clnp_iso_addrp(&siso->siso_addr));
+	ENDDEBUG
+
+	if (siso->siso_data[0] == AFI_37) {
+		register char *out = sx25->x25_addr;
+		register char *in = siso->siso_data + 1;
+		register int nibble;
+		char *lim = siso->siso_data + siso->siso_nlen;
+		char *olim = out+15;
+		int lowNibble = 0;
+
+		while (in < lim) {
+			nibble = ((lowNibble ? *in++ : (*in >> 4)) & 0xf) | 0x30;
+			lowNibble ^= 1;
+			if (nibble != 0x3f && out < olim)
+				*out++ = nibble;
+		}
+		dtelen = out - sx25->x25_addr;
+		*out++ = 0;
+	} else {
+		/* error = iso_8208snparesolve(addr, x121string, &x121strlen);*/
+		register struct rtentry *rt;
+		extern struct sockaddr_iso blank_siso;
+		struct sockaddr_iso nsiso;
+
+		nsiso = blank_siso;
+		bcopy(nsiso.siso_data, siso->siso_data,
+				nsiso.siso_nlen = siso->siso_nlen);
+		if (rt = rtalloc1(&nsiso, 1)) {
+			register struct sockaddr_x25 *sxx =
+							(struct sockaddr_x25 *)rt->rt_gateway;
+			register char *in = sxx->x25_addr;
+
+			rt->rt_use--;
+			if (sxx && sxx->x25_family == AF_CCITT) {
+				bcopy(sx25->x25_addr, sxx->x25_addr, sizeof(sx25->x25_addr));
+				while (*in++) {}
+				dtelen = in - sxx->x25_addr;
+			}
+		}
+	}
+	return dtelen;
+}
+
+/*
+ * NAME:	FACILtoNSAP()
+ * CALLED FROM:
+ *  parse_facil()
+ * FUNCTION and ARGUMENTS:
+ * 	Creates and NSAP in the sockaddr_iso (addr) from the
+ *  x.25 facility found at buf - 1.
+ * RETURNS:
+ *  0 if ok, -1 if error.
+ */
+
+Static int
+FACILtoNSAP(addr, buf)
+	register u_char 		*buf;
+	register struct sockaddr_iso *addr;
+{
+	int			len_in_nibbles = *++buf & 0x3f;
+	u_char		buf_len = (len_in_nibbles + 1) >> 1;; /* in bytes */
+
+	IFDEBUG(D_CADDR)
+		printf("FACILtoNSAP( 0x%x, 0x%x, 0x%x )\n", 
+			buf, buf_len, addr );
+	ENDDEBUG
+
+	len_in_nibbles = *buf & 0x3f;
+	/* despite the fact that X.25 makes us put a length in nibbles
+	 * here, the NSAP-addrs are always in full octets
+	 */
+	switch (*buf++ & 0xc0) {
+	case 0:
+		/* Entire OSI NSAP address */
+		bcopy((caddr_t)buf, addr->siso_data, addr->siso_nlen = buf_len);
+		break;
+
+	case 40:
+		/* Partial OSI NSAP address, assume trailing */
+		if (buf_len + addr->siso_nlen > sizeof(addr->siso_addr))
+			return -1;
+		bcopy((caddr_t)buf, TSEL(addr), buf_len);
+		addr->siso_nlen += buf_len;
+		break;
+
+	default:
+		/* Rather than blow away the connection, just ignore and use
+		   NSAP from DTE */;
+	}
+	return 0;
+}
+
+Static
+init_siso(siso)
+register struct sockaddr_iso *siso;
+{
+	siso->siso_len = sizeof (*siso);
+	siso->siso_family = AF_ISO;
+	siso->siso_data[0] = AFI_37;
+	siso->siso_nlen = 8;
+}
+
+/*
+ * NAME:	DTEtoNSAP()
+ * CALLED FROM:
+ *  parse_facil()
+ * FUNCTION and ARGUMENTS:
+ *  Creates a type 37 NSAP in the sockaddr_iso (addr)
+ * 	from a DTE address found in a sockaddr_x25.
+ *  
+ * RETURNS:
+ *  0 if ok; E* otherwise.
+ */
+
+Static  int
+DTEtoNSAP(addr, sx)
+	struct sockaddr_iso *addr;
+	struct sockaddr_x25 *sx;
+{
+	register char		*in, *out;
+	register int		first;
+	int					pad_tail = 0;
+	int 				src_len;
+
+
+	init_siso(addr);
+	in = sx->x25_addr;
+	src_len = strlen(in);
+	addr->siso_nlen = (src_len + 3) / 2;
+	out = addr->siso_data;
+	*out++ = 0x37;
+	if (src_len & 1) {
+		pad_tail = 0xf;
+		src_len++;
+	}
+	for (first = 0; src_len > 0; src_len--) {
+		first |= 0xf & *in++;
+		if (src_len & 1) {
+			*out++ = first;
+			first = 0;
+		}
+		else first <<= 4;
+	}
+	if (pad_tail)
+		out[-1] |= 0xf;
+	return 0; /* ok */
+}
+
+/*
+ * FUNCTION and ARGUMENTS:
+ *	parses (buf_len) bytes beginning at (buf) and finds
+ *  a called nsap, a calling nsap, and protocol identifier.
+ * RETURNS:
+ *  0 if ok, E* otherwise.
+ */
+
+Static int
+parse_facil(lcp, isop, buf, buf_len)
+	caddr_t 		buf;
+	u_char			buf_len; /* in bytes */
+	struct			isopcb *isop;
+	struct			pklcd *lcp;
+{
+	register int 	i;
+	register u_char 	*ptr = (u_char *)buf;
+	u_char			*ptr_lim, *facil_lim;
+	int 			facil_param_len, facil_len;
+
+	IFDEBUG(D_CADDR)
+		printf("parse_facil(0x%x, 0x%x, 0x%x, 0x%x)\n", 
+			lcp, isop, buf, buf_len);
+		dump_buf(buf, buf_len);
+	ENDDEBUG
+
+	/* find the beginnings of the facility fields in buf 
+	 * by skipping over the called & calling DTE addresses
+	 * i <- # nibbles in called + # nibbles in calling
+	 * i += 1 so that an odd nibble gets rounded up to even  
+	 * before dividing by 2, then divide by two to get # octets
+	 */
+	i = (int)(*ptr >> 4) + (int)(*ptr&0xf);
+	i++;
+	ptr += i >> 1;
+	ptr ++; /* plus one for the DTE lengths byte */
+
+	/* ptr now is at facil_length field */
+	facil_len = *ptr++;
+	facil_lim = ptr + facil_len;
+	IFDEBUG(D_CADDR)
+		printf("parse_facils: facil length is  0x%x\n", (int) facil_len);
+	ENDDEBUG
+
+	while (ptr < facil_lim) {
+		/* get NSAP addresses from facilities */
+		switch (*ptr++) {
+			case 0xcb:
+				/* calling NSAP */
+				facil_param_len = FACILtoNSAP(isop->isop_faddr, ptr);
+				break;
+			case 0xc9:
+				/* called NSAP */
+				facil_param_len = FACILtoNSAP(isop->isop_laddr, ptr);
+				break;
+
+				/* from here to default are legit cases that I ignore */
+				/* variable length */
+			case 0xca:  /* end-to-end transit delay negot */
+			case 0xc6:  /* network user id */
+			case 0xc5: 	/* charging info : indicating monetary unit */
+			case 0xc2: 	/* charging info : indicating segment count */
+			case 0xc1: 	/* charging info : indicating call duration */
+			case 0xc4: 	/* RPOA extended format */
+			case 0xc3: 	/* call redirection notification */
+				facil_param_len = 0;
+				break;
+
+				/* 1 octet */
+			case 0x0a:  /* min. throughput class negot */
+			case 0x02:  /* throughput class */
+			case 0x03:  case 0x47:  /* CUG shit */
+			case 0x0b:  /* expedited data negot */
+			case 0x01:  /* Fast select or reverse charging 
+						(example of intelligent protocol design) */
+			case 0x04: 	/* charging info : requesting service */
+			case 0x08: 	/* called line addr modified notification */
+			case 0x00:  /* marker to indicate beginning of CCITT facils */
+				facil_param_len = 1;
+				break;
+
+				/* any 2 octets */
+			case 0x42:  /* pkt size */
+			case 0x43:  /* win size */
+			case 0x44:  /* RPOA basic format */
+			case 0x41:  /* bilateral CUG shit */
+			case 0x49: 	/* transit delay selection and indication */
+				facil_param_len = 2;
+				break;
+
+			default:
+				printf(
+"BOGUS FACILITY CODE facil_lim 0x%x facil_len %d, ptr 0x%x *ptr 0x%x\n",
+					facil_lim, facil_len, ptr - 1, ptr[-1]);
+				/* facil that we don't handle
+				return E_CO_HLI_REJI; */
+				switch (ptr[-1] & 0xc0) {
+				case 0x00:	facil_param_len = 1; break;
+				case 0x40:	facil_param_len = 2; break;
+				case 0x80:	facil_param_len = 3; break;
+				case 0xc0:	facil_param_len = 0; break;
+				}
+		}
+		if (facil_param_len == -1)
+			return E_CO_REG_ICDA;
+		if (facil_param_len == 0) /* variable length */ 
+			facil_param_len = (int)*ptr++; /* 1 + the real facil param */
+		ptr += facil_param_len;
+	}
+	return 0;
+}
+
+#endif /* TPCONS */
diff --git a/sys/netiso/if_eon.c b/sys/netiso/if_eon.c
new file mode 100644
index 00000000000..3c05133040a
--- /dev/null
+++ b/sys/netiso/if_eon.c
@@ -0,0 +1,609 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)if_eon.c	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * $Header: if_eon.c,v 1.4 88/07/19 15:53:59 hagens Exp $ 
+ * $Source: /usr/argo/sys/netiso/RCS/if_eon.c,v $ 
+ *
+ *	EON rfc 
+ *  Layer between IP and CLNL
+ *
+ * TODO:
+ * Put together a current rfc986 address format and get the right offset
+ * for the nsel
+ */
+
+#ifdef EON
+#define NEON 1
+
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/buf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/errno.h>
+#include <sys/types.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/if_dl.h>
+#include <net/netisr.h>
+#include <net/route.h>
+#include <machine/mtpr.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/if_ether.h>
+
+#include <netiso/iso.h>
+#include <netiso/iso_var.h>
+#include <netiso/iso_snpac.h>
+#include <netiso/argo_debug.h>
+#include <netiso/iso_errno.h>
+#include <netiso/eonvar.h>
+
+extern struct timeval time;
+extern struct ifnet loif;
+
+#define EOK 0
+
+int						eoninput();
+int						eonoutput();
+int						eonioctl();
+int						eonattach();
+int						eoninit();
+void						eonrtrequest();
+struct ifnet			eonif[1];
+
+eonprotoinit() {
+	(void) eonattach();
+}
+
+struct eon_llinfo eon_llinfo;
+#define PROBE_OK 0;
+
+
+/*
+ * FUNCTION:		eonattach
+ *
+ * PURPOSE:			autoconf attach routine
+ *
+ * RETURNS:			void
+ */
+
+eonattach()
+{
+	register struct ifnet *ifp = eonif;
+
+	IFDEBUG(D_EON)
+		printf("eonattach()\n");
+	ENDDEBUG
+	ifp->if_unit = 0;
+	ifp->if_name = "eon";
+	ifp->if_mtu = ETHERMTU; 
+		/* since everything will go out over ether or token ring */
+
+	ifp->if_init = eoninit;
+	ifp->if_ioctl = eonioctl;
+	ifp->if_output = eonoutput;
+	ifp->if_type = IFT_EON;
+	ifp->if_addrlen = 5;
+	ifp->if_hdrlen = EONIPLEN;
+	ifp->if_flags = IFF_BROADCAST;
+	if_attach(ifp);
+	eonioctl(ifp, SIOCSIFADDR, (caddr_t)ifp->if_addrlist);
+	eon_llinfo.el_qhdr.link = 
+		eon_llinfo.el_qhdr.rlink = &(eon_llinfo.el_qhdr);
+
+	IFDEBUG(D_EON)
+		printf("eonattach()\n");
+	ENDDEBUG
+}
+
+
+/*
+ * FUNCTION:		eonioctl
+ *
+ * PURPOSE:			io controls - ifconfig
+ *				need commands to 
+ *					link-UP (core addr) (flags: ES, IS)
+ *					link-DOWN (core addr) (flags: ES, IS)
+ *				must be callable from kernel or user
+ *
+ * RETURNS:			nothing
+ */
+eonioctl(ifp, cmd, data)
+	register struct ifnet *ifp;
+	int cmd;
+	register caddr_t data;
+{
+	int s = splimp();
+	register int error = 0;
+
+	IFDEBUG(D_EON)
+		printf("eonioctl (cmd 0x%x) \n", cmd);
+	ENDDEBUG
+
+	switch (cmd) {
+		register struct ifaddr *ifa;
+
+	case SIOCSIFADDR:
+		if (ifa = (struct ifaddr *)data) {
+			ifp->if_flags |= IFF_UP;
+			if (ifa->ifa_addr->sa_family != AF_LINK)
+				ifa->ifa_rtrequest = eonrtrequest;
+		}
+		break;
+	}
+	splx(s);
+	return(error);
+}
+
+
+eoniphdr(hdr, loc, ro, class, zero)
+struct route *ro;
+register struct eon_iphdr *hdr;
+caddr_t loc;
+{
+	struct mbuf mhead;
+	register struct sockaddr_in *sin = (struct sockaddr_in *)&ro->ro_dst;
+	if (zero) {
+		bzero((caddr_t)hdr, sizeof (*hdr));
+		bzero((caddr_t)ro, sizeof (*ro));
+	}
+	sin->sin_family = AF_INET;
+	sin->sin_len = sizeof (*sin);
+	bcopy(loc, (caddr_t)&sin->sin_addr, sizeof(struct in_addr));
+	/*
+	 * If there is a cached route,
+	 * check that it is to the same destination
+	 * and is still up.  If not, free it and try again.
+	 */
+	if (ro->ro_rt) {
+		struct sockaddr_in *dst =
+			(struct sockaddr_in *)rt_key(ro->ro_rt);
+		if ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
+		   sin->sin_addr.s_addr != dst->sin_addr.s_addr) {
+			RTFREE(ro->ro_rt);
+			ro->ro_rt = (struct rtentry *)0;
+		}
+	}
+	rtalloc(ro);
+	if (ro->ro_rt)
+		ro->ro_rt->rt_use++;
+	hdr->ei_ip.ip_dst = sin->sin_addr;
+	hdr->ei_ip.ip_p = IPPROTO_EON;
+	hdr->ei_ip.ip_ttl = MAXTTL;	
+	hdr->ei_eh.eonh_class = class;
+	hdr->ei_eh.eonh_vers = EON_VERSION;
+	hdr->ei_eh.eonh_csum = 0;
+	mhead.m_data = (caddr_t) &hdr->ei_eh;
+	mhead.m_len = sizeof(struct eon_hdr);
+	mhead.m_next = 0;
+	IFDEBUG(D_EON)
+		printf("eonoutput : gen csum (0x%x, offset %d, datalen %d)\n", 
+			&mhead,
+			_offsetof(struct eon_hdr, eonh_csum), sizeof(struct eon_hdr)); 
+	ENDDEBUG
+	iso_gen_csum(&mhead, 
+		_offsetof(struct eon_hdr, eonh_csum), sizeof(struct eon_hdr)); 
+}
+/*
+ * FUNCTION:		eonrtrequest
+ *
+ * PURPOSE:			maintains list of direct eon recipients.
+ *					sets up IP route for rest.
+ *
+ * RETURNS:			nothing
+ */
+void
+eonrtrequest(cmd, rt, gate)
+register struct rtentry *rt;
+register struct sockaddr *gate;
+{
+	unsigned long zerodst = 0;
+	caddr_t	ipaddrloc = (caddr_t) &zerodst;
+	register struct eon_llinfo *el = (struct eon_llinfo *)rt->rt_llinfo;
+
+	/*
+	 * Common Housekeeping
+	 */
+	switch (cmd) {
+	case RTM_DELETE:
+		if (el) {
+			remque(&(el->el_qhdr));
+			if (el->el_iproute.ro_rt)
+				RTFREE(el->el_iproute.ro_rt);
+			Free(el);
+			rt->rt_llinfo = 0;
+		}
+		return;
+
+	case RTM_ADD:
+	case RTM_RESOLVE:
+		rt->rt_rmx.rmx_mtu = loif.if_mtu; /* unless better below */
+		R_Malloc(el, struct eon_llinfo *, sizeof(*el));
+		rt->rt_llinfo = (caddr_t)el;
+		if (el == 0)
+			return;
+		Bzero(el, sizeof(*el));
+		insque(&(el->el_qhdr), &eon_llinfo.el_qhdr);
+		el->el_rt = rt;
+		break;
+	}
+	if (gate || (gate = rt->rt_gateway)) switch (gate->sa_family) {
+		case AF_LINK:
+#define SDL(x) ((struct sockaddr_dl *)x)
+			if (SDL(gate)->sdl_alen == 1)
+				el->el_snpaoffset = *(u_char *)LLADDR(SDL(gate));
+			else
+				ipaddrloc = LLADDR(SDL(gate));
+			break;
+		case AF_INET:
+#define SIN(x) ((struct sockaddr_in *)x)
+			ipaddrloc = (caddr_t) &SIN(gate)->sin_addr;
+			break;
+		default:
+			return;
+	}
+	el->el_flags |= RTF_UP;
+	eoniphdr(&el->el_ei, ipaddrloc, &el->el_iproute, EON_NORMAL_ADDR, 0);
+	if (el->el_iproute.ro_rt)
+		rt->rt_rmx.rmx_mtu = el->el_iproute.ro_rt->rt_rmx.rmx_mtu
+							- sizeof(el->el_ei);
+}
+
+/*
+ * FUNCTION:		eoninit
+ *
+ * PURPOSE:			initialization
+ *
+ * RETURNS:			nothing
+ */
+
+eoninit(unit)
+	int unit;
+{
+	printf("eon driver-init eon%d\n", unit);
+}
+
+
+/*
+ * FUNCTION:		eonoutput
+ *
+ * PURPOSE:			prepend an eon header and hand to IP
+ * ARGUMENTS:	 	(ifp) is points to the ifnet structure for this unit/device
+ *					(m)  is an mbuf *, *m is a CLNL packet
+ *					(dst) is a destination address - have to interp. as
+ *					multicast or broadcast or real address.
+ *
+ * RETURNS:			unix error code
+ *
+ * NOTES:			
+ *
+ */
+eonoutput(ifp, m, dst, rt)
+	struct ifnet 	*ifp;
+	register struct mbuf	*m;		/* packet */
+	struct sockaddr_iso		*dst;		/* destination addr */
+	struct rtentry *rt;
+{
+	register struct eon_llinfo *el;
+	register struct eon_iphdr *ei;
+	struct route *ro;
+	int	datalen;
+	struct mbuf *mh;
+	int	error = 0, class = 0, alen = 0;
+	caddr_t ipaddrloc;
+	static struct eon_iphdr eon_iphdr;
+	static struct route route;
+
+	IFDEBUG(D_EON)
+		printf("eonoutput \n" );
+	ENDDEBUG
+
+	ifp->if_lastchange = time;
+	ifp->if_opackets++;
+	if (rt == 0 || (el = (struct eon_llinfo *)rt->rt_llinfo) == 0) {
+		if (dst->siso_family == AF_LINK) {
+			register struct sockaddr_dl *sdl = (struct sockaddr_dl *)dst;
+
+			ipaddrloc = LLADDR(sdl);
+			alen = sdl->sdl_alen;
+		} else if (dst->siso_family == AF_ISO && dst->siso_data[0] == AFI_SNA) {
+			alen = dst->siso_nlen - 1;
+			ipaddrloc = (caddr_t) dst->siso_data + 1;
+		}
+		switch (alen) {
+		case 5:
+			class =  4[(u_char *)ipaddrloc];
+		case 4:
+			ro = &route;
+			ei = &eon_iphdr;
+			eoniphdr(ei, ipaddrloc, ro, class, 1);
+			goto send;
+		}
+einval:
+		error =  EINVAL;
+		goto flush;
+	}
+	if ((el->el_flags & RTF_UP) == 0) {
+		eonrtrequest(RTM_CHANGE, rt, (struct sockaddr *)0);
+		if ((el->el_flags & RTF_UP) == 0) {
+			error = EHOSTUNREACH;
+			goto flush;
+		}
+	}
+	if ((m->m_flags & M_PKTHDR) == 0) {
+		printf("eon: got non headered packet\n");
+		goto einval;
+	}
+	ei = &el->el_ei;
+	ro = &el->el_iproute;
+	if (el->el_snpaoffset) {
+		if (dst->siso_family == AF_ISO) {
+			bcopy((caddr_t) &dst->siso_data[el->el_snpaoffset],
+					(caddr_t) &ei->ei_ip.ip_dst, sizeof(ei->ei_ip.ip_dst));
+		} else
+			goto einval;
+	}
+send:
+	/* put an eon_hdr in the buffer, prepended by an ip header */
+	datalen = m->m_pkthdr.len + EONIPLEN;
+	MGETHDR(mh, M_DONTWAIT, MT_HEADER);
+	if(mh == (struct mbuf *)0)
+		goto flush;
+	mh->m_next = m;
+	m = mh;
+	MH_ALIGN(m, sizeof(struct eon_iphdr));
+	m->m_len = sizeof(struct eon_iphdr);
+	ifp->if_obytes +=
+		(ei->ei_ip.ip_len = (u_short)(m->m_pkthdr.len = datalen));
+	*mtod(m, struct eon_iphdr *) = *ei;
+
+	IFDEBUG(D_EON)
+		printf("eonoutput dst ip addr : %x\n",  ei->ei_ip.ip_dst.s_addr);
+		printf("eonoutput ip_output : eonip header:\n");
+		dump_buf(ei, sizeof(struct eon_iphdr));
+	ENDDEBUG
+
+	error = ip_output(m, (struct mbuf *)0, ro, 0, NULL);
+	m = 0;
+	if (error) {
+		ifp->if_oerrors++;
+		ifp->if_opackets--;
+		ifp->if_obytes -= datalen;
+	}
+flush:
+	if (m)
+		m_freem(m);
+	return error;
+}
+
+eoninput(m, iphlen)
+	register struct mbuf	*m;
+	int iphlen;
+{
+	register struct eon_hdr	*eonhdr;
+	register struct ip		*iphdr;
+	struct ifnet 			*eonifp;
+	int						s;
+
+	eonifp = &eonif[0]; /* kludge - really want to give CLNP
+						* the ifp for eon, not for the real device
+						*/
+
+	IFDEBUG(D_EON)
+		printf("eoninput() 0x%x m_data 0x%x m_len 0x%x dequeued\n",
+			m, m?m->m_data:0, m?m->m_len:0);
+	ENDDEBUG
+
+	if (m == 0)
+		return;
+	if (iphlen > sizeof (struct ip))
+		ip_stripoptions(m, (struct mbuf *)0);
+	if (m->m_len < EONIPLEN) {
+		if ((m = m_pullup(m, EONIPLEN)) == 0) {
+			IncStat(es_badhdr);
+drop:
+			IFDEBUG(D_EON)
+				printf("eoninput: DROP \n" );
+			ENDDEBUG
+			eonifp->if_ierrors ++;
+			m_freem(m);
+			return;
+		}
+	}
+	eonif->if_ibytes += m->m_pkthdr.len;
+	eonif->if_lastchange = time;
+	iphdr = mtod(m, struct ip *);
+	/* do a few checks for debugging */
+	if( iphdr->ip_p != IPPROTO_EON ) {
+		IncStat(es_badhdr);
+		goto drop;
+	}
+	/* temporarily drop ip header from the mbuf */
+	m->m_data += sizeof(struct ip);
+	eonhdr = mtod(m, struct eon_hdr *);
+	if( iso_check_csum( m, sizeof(struct eon_hdr) )   != EOK ) {
+		IncStat(es_badcsum);
+		goto drop;
+	}
+	m->m_data -= sizeof(struct ip);
+		
+	IFDEBUG(D_EON)
+		printf("eoninput csum ok class 0x%x\n", eonhdr->eonh_class );
+		printf("eoninput: eon header:\n");
+		dump_buf(eonhdr, sizeof(struct eon_hdr));
+	ENDDEBUG
+
+	/* checks for debugging */
+	if( eonhdr->eonh_vers != EON_VERSION) {
+		IncStat(es_badhdr);
+		goto drop;
+	}
+	m->m_flags &= ~(M_BCAST|M_MCAST);
+	switch( eonhdr->eonh_class) {
+		case EON_BROADCAST:
+			IncStat(es_in_broad);
+			m->m_flags |= M_BCAST;
+			break;
+		case EON_NORMAL_ADDR:
+			IncStat(es_in_normal);
+			break;
+		case EON_MULTICAST_ES:
+			IncStat(es_in_multi_es);
+			m->m_flags |= M_MCAST;
+			break;
+		case EON_MULTICAST_IS:
+			IncStat(es_in_multi_is);
+			m->m_flags |= M_MCAST;
+			break;
+	}
+	eonifp->if_ipackets++;
+
+	{
+		/* put it on the CLNP queue and set soft interrupt */
+		struct ifqueue 			*ifq;
+		extern struct ifqueue 	clnlintrq;
+
+		m->m_pkthdr.rcvif = eonifp; /* KLUDGE */
+		IFDEBUG(D_EON)
+			printf("eoninput to clnl IFQ\n");
+		ENDDEBUG
+		ifq = &clnlintrq;
+		s = splimp();
+		if (IF_QFULL(ifq)) {
+			IF_DROP(ifq);
+			m_freem(m);
+			eonifp->if_iqdrops++;
+			eonifp->if_ipackets--;
+			splx(s);
+			return;
+		}
+		IF_ENQUEUE(ifq, m);
+		IFDEBUG(D_EON) 
+			printf(
+	"0x%x enqueued on clnp Q: m_len 0x%x m_type 0x%x m_data 0x%x\n", 
+				m, m->m_len, m->m_type, m->m_data);
+			dump_buf(mtod(m, caddr_t), m->m_len);
+		ENDDEBUG
+		schednetisr(NETISR_ISO);
+		splx(s);
+	}
+}
+
+int
+eonctlinput(cmd, sin)
+	int cmd;
+	struct sockaddr_in *sin;
+{
+	extern u_char inetctlerrmap[];
+
+	IFDEBUG(D_EON)
+		printf("eonctlinput: cmd 0x%x addr: ", cmd);
+		dump_isoaddr(sin);
+		printf("\n");
+	ENDDEBUG
+
+	if (cmd < 0 || cmd > PRC_NCMDS)
+		return 0;
+
+	IncStat(es_icmp[cmd]);
+	switch (cmd) {
+
+		case	PRC_QUENCH:
+		case	PRC_QUENCH2:
+			/* TODO: set the dec bit */
+			break;
+		case	PRC_TIMXCEED_REASS:
+		case	PRC_ROUTEDEAD:
+		case	PRC_HOSTUNREACH:
+		case	PRC_UNREACH_NET:
+		case	PRC_IFDOWN:
+		case	PRC_UNREACH_HOST:
+		case	PRC_HOSTDEAD:
+		case	PRC_TIMXCEED_INTRANS:
+			/* TODO: mark the link down */
+			break;
+
+		case	PRC_UNREACH_PROTOCOL:
+		case	PRC_UNREACH_PORT:
+		case	PRC_UNREACH_SRCFAIL:
+		case	PRC_REDIRECT_NET:
+		case	PRC_REDIRECT_HOST:
+		case	PRC_REDIRECT_TOSNET:
+		case	PRC_REDIRECT_TOSHOST:
+		case	PRC_MSGSIZE:
+		case	PRC_PARAMPROB:
+			/* printf("eonctlinput: ICMP cmd 0x%x\n", cmd );*/
+		break;
+	}
+	return 0;
+}
+
+#endif
diff --git a/sys/netiso/iso.c b/sys/netiso/iso.c
new file mode 100644
index 00000000000..cd64e687152
--- /dev/null
+++ b/sys/netiso/iso.c
@@ -0,0 +1,919 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)iso.c	8.2 (Berkeley) 11/15/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * $Header: iso.c,v 4.11 88/09/19 14:58:35 root Exp $ 
+ * $Source: /usr/argo/sys/netiso/RCS/iso.c,v $ 
+ *
+ * iso.c: miscellaneous routines to support the iso address family
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/ioctl.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netiso/iso.h>
+#include <netiso/iso_var.h>
+#include <netiso/iso_snpac.h>
+#include <netiso/iso_pcb.h>
+#include <netiso/clnp.h>
+#include <netiso/argo_debug.h>
+#ifdef TUBA
+#include <netiso/tuba_table.h>
+#endif
+
+#ifdef ISO
+
+int	iso_interfaces = 0;		/* number of external interfaces */
+extern	struct ifnet loif;	/* loopback interface */
+int	ether_output();
+void	llc_rtrequest();
+
+/*
+ * FUNCTION:		iso_addrmatch1
+ *
+ * PURPOSE:			decide if the two iso_addrs passed are equal
+ *
+ * RETURNS:			true if the addrs match, false if they do not
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+iso_addrmatch1(isoaa, isoab)
+register struct iso_addr *isoaa, *isoab;		/* addresses to check */
+{
+	u_int	compare_len;
+
+	IFDEBUG(D_ROUTE)
+		printf("iso_addrmatch1: comparing lengths: %d to %d\n", isoaa->isoa_len,
+			isoab->isoa_len);
+		printf("a:\n");
+		dump_buf(isoaa->isoa_genaddr, isoaa->isoa_len);
+		printf("b:\n");
+		dump_buf(isoab->isoa_genaddr, isoab->isoa_len);
+	ENDDEBUG
+
+	if ((compare_len = isoaa->isoa_len) != isoab->isoa_len) {
+		IFDEBUG(D_ROUTE)
+			printf("iso_addrmatch1: returning false because of lengths\n");
+		ENDDEBUG
+		return 0;
+	}
+	
+#ifdef notdef
+	/* TODO : generalize this to all afis with masks */
+	if(	isoaa->isoa_afi == AFI_37 ) {
+		/* must not compare 2 least significant digits, or for
+		 * that matter, the DSP
+		 */
+		compare_len = ADDR37_IDI_LEN - 1; 
+	}
+#endif
+
+	IFDEBUG(D_ROUTE)
+		int i;
+		char *a, *b;
+
+		a = isoaa->isoa_genaddr;
+		b = isoab->isoa_genaddr;
+
+		for (i=0; i<compare_len; i++) {
+			printf("<%x=%x>", a[i]&0xff, b[i]&0xff);
+			if (a[i] != b[i]) {
+				printf("\naddrs are not equal at byte %d\n", i);
+				return(0);
+			}
+		}
+		printf("\n");
+		printf("addrs are equal\n");
+		return (1);
+	ENDDEBUG
+	return (!bcmp(isoaa->isoa_genaddr, isoab->isoa_genaddr, compare_len));
+}
+
+/*
+ * FUNCTION:		iso_addrmatch
+ *
+ * PURPOSE:			decide if the two sockadrr_isos passed are equal
+ *
+ * RETURNS:			true if the addrs match, false if they do not
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+iso_addrmatch(sisoa, sisob)
+struct sockaddr_iso	*sisoa, *sisob;		/* addresses to check */
+{
+	return(iso_addrmatch1(&sisoa->siso_addr, &sisob->siso_addr));
+}
+#ifdef notdef
+/*
+ * FUNCTION:		iso_netmatch
+ *
+ * PURPOSE:			similar to iso_addrmatch but takes sockaddr_iso
+ *					as argument.
+ *
+ * RETURNS:			true if same net, false if not
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+iso_netmatch(sisoa, sisob)
+struct sockaddr_iso *sisoa, *sisob;
+{
+	u_char			bufa[sizeof(struct sockaddr_iso)];
+	u_char			bufb[sizeof(struct sockaddr_iso)];
+	register int	lena, lenb;
+
+	lena = iso_netof(&sisoa->siso_addr, bufa);
+	lenb = iso_netof(&sisob->siso_addr, bufb);
+
+	IFDEBUG(D_ROUTE)
+		printf("iso_netmatch: comparing lengths: %d to %d\n", lena, lenb);
+		printf("a:\n");
+		dump_buf(bufa, lena);
+		printf("b:\n");
+		dump_buf(bufb, lenb);
+	ENDDEBUG
+
+	return ((lena == lenb) && (!bcmp(bufa, bufb, lena)));
+}
+#endif /* notdef */
+
+/*
+ * FUNCTION:		iso_hashchar
+ *
+ * PURPOSE:			Hash all character in the buffer specified into
+ *					a long. Return the long.
+ *
+ * RETURNS:			The hash value.
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			The hash is achieved by exclusive ORing 4 byte
+ *					quantities. 
+ */
+u_long
+iso_hashchar(buf, len)
+register caddr_t	buf;		/* buffer to pack from */
+register int		len;		/* length of buffer */
+{
+	register u_long	h = 0;
+	register int	i;
+
+	for (i=0; i<len; i+=4) {
+		register u_long	l = 0;
+
+		if ((len - i) < 4) {
+			/* buffer not multiple of 4 */
+			switch (len - i) {
+				case 3:
+					l |= buf[i+2] << 8;
+				case 2:
+					l |= buf[i+1] << 16;
+				case 1:
+					l |= buf[i] << 24;
+					break;
+				default:
+					printf("iso_hashchar: unexpected value x%x\n", len - i);
+					break;
+			}
+		} else {
+			l |= buf[i] << 24;
+			l |= buf[i+1] << 16;
+			l |= buf[i+2] << 8;
+			l |= buf[i+3];
+		}
+
+		h ^= l;
+	}
+	
+	h ^= (u_long) (len % 4);
+
+	return(h);
+}
+#ifdef notdef
+/*
+ * FUNCTION:		iso_hash
+ *
+ * PURPOSE:			Fill in fields of afhash structure based upon addr passed.
+ *
+ * RETURNS:			none
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+iso_hash(siso, hp)
+struct sockaddr_iso	*siso;		/* address to perform hash on */
+struct afhash		*hp;		/* RETURN: hash info here */
+{
+	u_long			buf[sizeof(struct sockaddr_iso)+1/4];
+	register int	bufsize;
+
+
+	bzero(buf, sizeof(buf));
+
+	bufsize = iso_netof(&siso->siso_addr, buf);
+	hp->afh_nethash = iso_hashchar((caddr_t)buf, bufsize);
+
+	IFDEBUG(D_ROUTE)
+		printf("iso_hash: iso_netof: bufsize = %d\n", bufsize);
+	ENDDEBUG
+
+	hp->afh_hosthash = iso_hashchar((caddr_t)&siso->siso_addr, 
+		siso->siso_addr.isoa_len);
+
+	IFDEBUG(D_ROUTE)
+		printf("iso_hash: %s: nethash = x%x, hosthash = x%x\n",
+			clnp_iso_addrp(&siso->siso_addr), hp->afh_nethash, 
+			hp->afh_hosthash);
+	ENDDEBUG
+}
+/*
+ * FUNCTION:		iso_netof
+ *
+ * PURPOSE:			Extract the network portion of the iso address.
+ *					The network portion of the iso address varies depending
+ *					on the type of address. The network portion of the
+ *					address will include the IDP. The network portion is:
+ *			
+ *						TYPE			DESC
+ *					t37					The AFI and x.121 (IDI)
+ *					osinet				The AFI, orgid, snetid
+ *					rfc986				The AFI, vers and network part of
+ *										internet address.
+ *
+ * RETURNS:			number of bytes placed into buf.
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			Buf is assumed to be big enough
+ */
+iso_netof(isoa, buf)
+struct iso_addr	*isoa;		/* address */
+caddr_t			buf;		/* RESULT: network portion of address here */
+{
+	u_int		len = 1;	/* length of afi */
+
+	switch (isoa->isoa_afi) {
+		case AFI_37:
+			/*
+			 * Due to classic x.25 tunnel vision, there is no
+			 * net portion of an x.121 address.  For our purposes
+			 * the AFI will do, so that all x.25 -type addresses
+			 * map to the single x.25 SNPA. (Cannot have more than
+			 * one, obviously).
+			 */
+
+			break;
+
+/* 		case AFI_OSINET:*/
+		case AFI_RFC986: {
+			u_short	idi;	/* value of idi */
+
+			/* osinet and rfc986 have idi in the same place */
+			CTOH(isoa->rfc986_idi[0], isoa->rfc986_idi[1], idi);
+
+			if (idi == IDI_OSINET)
+/*
+ *	Network portion of OSINET address can only be the IDI. Clearly,
+ *	with one x25 interface, one could get to several orgids, and
+ *	several snetids.
+				len += (ADDROSINET_IDI_LEN + OVLOSINET_ORGID_LEN + 
+						OVLOSINET_SNETID_LEN);
+ */
+				len += ADDROSINET_IDI_LEN;
+			else if (idi == IDI_RFC986) {
+				u_long				inetaddr;
+				struct ovl_rfc986	*o986 = (struct ovl_rfc986 *)isoa;
+
+				/* bump len to include idi and version (1 byte) */
+				len += ADDRRFC986_IDI_LEN + 1;
+
+				/* get inet addr long aligned */
+				bcopy(o986->o986_inetaddr, &inetaddr, sizeof(inetaddr));
+				inetaddr = ntohl(inetaddr);	/* convert to host byte order */
+
+				IFDEBUG(D_ROUTE)
+					printf("iso_netof: isoa ");
+					dump_buf(isoa, sizeof(*isoa));
+					printf("iso_netof: inetaddr 0x%x ", inetaddr);
+				ENDDEBUG
+
+				/* bump len by size of network portion of inet address */
+				if (IN_CLASSA(inetaddr)) {
+					len += 4-IN_CLASSA_NSHIFT/8;
+					IFDEBUG(D_ROUTE)
+						printf("iso_netof: class A net len is now %d\n", len);
+					ENDDEBUG
+				} else if (IN_CLASSB(inetaddr)) {
+					len += 4-IN_CLASSB_NSHIFT/8;
+					IFDEBUG(D_ROUTE)
+						printf("iso_netof: class B net len is now %d\n", len);
+					ENDDEBUG
+				} else {
+					len += 4-IN_CLASSC_NSHIFT/8;
+					IFDEBUG(D_ROUTE)
+						printf("iso_netof: class C net len is now %d\n", len);
+					ENDDEBUG
+				}
+			} else
+				len = 0;
+		} break;
+
+		default:
+			len = 0;
+	}
+
+	bcopy((caddr_t)isoa, buf, len);
+	IFDEBUG(D_ROUTE)
+		printf("iso_netof: isoa ");
+		dump_buf(isoa, len);
+		printf("iso_netof: net ");
+		dump_buf(buf, len);
+	ENDDEBUG
+	return len;
+}
+#endif /* notdef */
+/*
+ * Generic iso control operations (ioctl's).
+ * Ifp is 0 if not an interface-specific ioctl.
+ */
+/* ARGSUSED */
+iso_control(so, cmd, data, ifp)
+	struct socket *so;
+	int cmd;
+	caddr_t data;
+	register struct ifnet *ifp;
+{
+	register struct iso_ifreq *ifr = (struct iso_ifreq *)data;
+	register struct iso_ifaddr *ia = 0;
+	register struct ifaddr *ifa;
+	struct iso_ifaddr *oia;
+	struct iso_aliasreq *ifra = (struct iso_aliasreq *)data;
+	int error, hostIsNew, maskIsNew;
+
+	/*
+	 * Find address for this interface, if it exists.
+	 */
+	if (ifp)
+		for (ia = iso_ifaddr; ia; ia = ia->ia_next)
+			if (ia->ia_ifp == ifp)
+				break;
+
+	switch (cmd) {
+
+	case SIOCAIFADDR_ISO:
+	case SIOCDIFADDR_ISO:
+		if (ifra->ifra_addr.siso_family == AF_ISO)
+		    for (oia = ia; ia; ia = ia->ia_next) {
+			if (ia->ia_ifp == ifp  &&
+			    SAME_ISOADDR(&ia->ia_addr, &ifra->ifra_addr))
+				break;
+		}
+		if ((so->so_state & SS_PRIV) == 0)
+			return (EPERM);
+		if (ifp == 0)
+			panic("iso_control");
+		if (ia == (struct iso_ifaddr *)0) {
+			struct iso_ifaddr *nia;
+			if (cmd == SIOCDIFADDR_ISO)
+				return (EADDRNOTAVAIL);
+#ifdef TUBA
+			/* XXXXXX can't be done in the proto init routines */
+			if (tuba_tree == 0)
+				tuba_table_init();
+#endif
+			MALLOC(nia, struct iso_ifaddr *, sizeof(*nia),
+				       M_IFADDR, M_WAITOK);
+			if (nia == (struct iso_ifaddr *)0)
+				return (ENOBUFS);
+			bzero((caddr_t)nia, sizeof(*nia));
+			if (ia = iso_ifaddr) {
+				for ( ; ia->ia_next; ia = ia->ia_next)
+					;
+				ia->ia_next = nia;
+			} else
+				iso_ifaddr = nia;
+			ia = nia;
+			if (ifa = ifp->if_addrlist) {
+				for ( ; ifa->ifa_next; ifa = ifa->ifa_next)
+					;
+				ifa->ifa_next = (struct ifaddr *) ia;
+			} else
+				ifp->if_addrlist = (struct ifaddr *) ia;
+			ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
+			ia->ia_ifa.ifa_dstaddr
+					= (struct sockaddr *)&ia->ia_dstaddr;
+			ia->ia_ifa.ifa_netmask
+					= (struct sockaddr *)&ia->ia_sockmask;
+			ia->ia_ifp = ifp;
+			if (ifp != &loif)
+				iso_interfaces++;
+		}
+		break;
+
+#define cmdbyte(x)	(((x) >> 8) & 0xff)
+	default:
+		if (cmdbyte(cmd) == 'a')
+			return (snpac_ioctl(so, cmd, data));
+		if (ia == (struct iso_ifaddr *)0)
+			return (EADDRNOTAVAIL);
+		break;
+	}
+	switch (cmd) {
+
+	case SIOCGIFADDR_ISO:
+		ifr->ifr_Addr = ia->ia_addr;
+		break;
+
+	case SIOCGIFDSTADDR_ISO:
+		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
+			return (EINVAL);
+		ifr->ifr_Addr = ia->ia_dstaddr;
+		break;
+
+	case SIOCGIFNETMASK_ISO:
+		ifr->ifr_Addr = ia->ia_sockmask;
+		break;
+
+	case SIOCAIFADDR_ISO:
+		maskIsNew = 0; hostIsNew = 1; error = 0;
+		if (ia->ia_addr.siso_family == AF_ISO) {
+			if (ifra->ifra_addr.siso_len == 0) {
+				ifra->ifra_addr = ia->ia_addr;
+				hostIsNew = 0;
+			} else if (SAME_ISOADDR(&ia->ia_addr, &ifra->ifra_addr))
+				hostIsNew = 0;
+		}
+		if (ifra->ifra_mask.siso_len) {
+			iso_ifscrub(ifp, ia);
+			ia->ia_sockmask = ifra->ifra_mask;
+			maskIsNew = 1;
+		}
+		if ((ifp->if_flags & IFF_POINTOPOINT) &&
+		    (ifra->ifra_dstaddr.siso_family == AF_ISO)) {
+			iso_ifscrub(ifp, ia);
+			ia->ia_dstaddr = ifra->ifra_dstaddr;
+			maskIsNew  = 1; /* We lie; but the effect's the same */
+		}
+		if (ifra->ifra_addr.siso_family == AF_ISO &&
+					    (hostIsNew || maskIsNew)) {
+			error = iso_ifinit(ifp, ia, &ifra->ifra_addr, 0);
+		}
+		if (ifra->ifra_snpaoffset)
+			ia->ia_snpaoffset = ifra->ifra_snpaoffset;
+		return (error);
+
+	case SIOCDIFADDR_ISO:
+		iso_ifscrub(ifp, ia);
+		if ((ifa = ifp->if_addrlist) == (struct ifaddr *)ia)
+			ifp->if_addrlist = ifa->ifa_next;
+		else {
+			while (ifa->ifa_next &&
+			       (ifa->ifa_next != (struct ifaddr *)ia))
+				    ifa = ifa->ifa_next;
+			if (ifa->ifa_next)
+			    ifa->ifa_next = ((struct ifaddr *)ia)->ifa_next;
+			else
+				printf("Couldn't unlink isoifaddr from ifp\n");
+		}
+		oia = ia;
+		if (oia == (ia = iso_ifaddr)) {
+			iso_ifaddr = ia->ia_next;
+		} else {
+			while (ia->ia_next && (ia->ia_next != oia)) {
+				ia = ia->ia_next;
+			}
+			if (ia->ia_next)
+			    ia->ia_next = oia->ia_next;
+			else
+				printf("Didn't unlink isoifadr from list\n");
+		}
+		IFAFREE((&oia->ia_ifa));
+		break;
+
+	default:
+		if (ifp == 0 || ifp->if_ioctl == 0)
+			return (EOPNOTSUPP);
+		return ((*ifp->if_ioctl)(ifp, cmd, data));
+	}
+	return (0);
+}
+
+/*
+ * Delete any existing route for an interface.
+ */
+iso_ifscrub(ifp, ia)
+	register struct ifnet *ifp;
+	register struct iso_ifaddr *ia;
+{
+	int nsellength = ia->ia_addr.siso_tlen;
+	if ((ia->ia_flags & IFA_ROUTE) == 0)
+		return;
+	ia->ia_addr.siso_tlen = 0;
+	if (ifp->if_flags & IFF_LOOPBACK)
+		rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST);
+	else if (ifp->if_flags & IFF_POINTOPOINT)
+		rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST);
+	else {
+		rtinit(&(ia->ia_ifa), (int)RTM_DELETE, 0);
+	}
+	ia->ia_addr.siso_tlen = nsellength;
+	ia->ia_flags &= ~IFA_ROUTE;
+}
+
+/*
+ * Initialize an interface's internet address
+ * and routing table entry.
+ */
+iso_ifinit(ifp, ia, siso, scrub)
+	register struct ifnet *ifp;
+	register struct iso_ifaddr *ia;
+	struct sockaddr_iso *siso;
+{
+	struct sockaddr_iso oldaddr;
+	int s = splimp(), error, nsellength;
+
+	oldaddr = ia->ia_addr;
+	ia->ia_addr = *siso;
+	/*
+	 * Give the interface a chance to initialize
+	 * if this is its first address,
+	 * and to validate the address if necessary.
+	 */
+	if (ifp->if_ioctl &&
+				(error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia))) {
+		splx(s);
+		ia->ia_addr = oldaddr;
+		return (error);
+	}
+	if (scrub) {
+		ia->ia_ifa.ifa_addr = (struct sockaddr *)&oldaddr;
+		iso_ifscrub(ifp, ia);
+		ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
+	}
+	/* XXX -- The following is here temporarily out of laziness
+	   in not changing every ethernet driver's if_ioctl routine */
+	if (ifp->if_output == ether_output) {
+		ia->ia_ifa.ifa_rtrequest = llc_rtrequest;
+		ia->ia_ifa.ifa_flags |= RTF_CLONING;
+	}
+	/*
+	 * Add route for the network.
+	 */
+	nsellength = ia->ia_addr.siso_tlen;
+	ia->ia_addr.siso_tlen = 0;
+	if (ifp->if_flags & IFF_LOOPBACK) {
+		ia->ia_ifa.ifa_dstaddr = ia->ia_ifa.ifa_addr;
+		error = rtinit(&(ia->ia_ifa), (int)RTM_ADD, RTF_HOST|RTF_UP);
+	} else if (ifp->if_flags & IFF_POINTOPOINT &&
+		 ia->ia_dstaddr.siso_family == AF_ISO)
+		error = rtinit(&(ia->ia_ifa), (int)RTM_ADD, RTF_HOST|RTF_UP);
+	else {
+		rt_maskedcopy(ia->ia_ifa.ifa_addr, ia->ia_ifa.ifa_dstaddr,
+			ia->ia_ifa.ifa_netmask);
+		ia->ia_dstaddr.siso_nlen =
+			min(ia->ia_addr.siso_nlen, (ia->ia_sockmask.siso_len - 6));
+		error = rtinit(&(ia->ia_ifa), (int)RTM_ADD, RTF_UP);
+	}
+	ia->ia_addr.siso_tlen = nsellength;
+	ia->ia_flags |= IFA_ROUTE;
+	splx(s);
+	return (error);
+}
+#ifdef notdef
+
+struct ifaddr *
+iso_ifwithidi(addr)
+	register struct sockaddr *addr;
+{
+	register struct ifnet *ifp;
+	register struct ifaddr *ifa;
+	register u_int af = addr->sa_family;
+
+	if (af != AF_ISO)
+		return (0);
+	IFDEBUG(D_ROUTE)
+		printf(">>> iso_ifwithidi addr\n");
+		dump_isoaddr( (struct sockaddr_iso *)(addr));
+		printf("\n");
+	ENDDEBUG
+	for (ifp = ifnet; ifp; ifp = ifp->if_next) {
+		IFDEBUG(D_ROUTE)
+			printf("iso_ifwithidi ifnet %s\n", ifp->if_name);
+		ENDDEBUG
+		for (ifa = ifp->if_addrlist; ifa; ifa = ifa->ifa_next) {
+			IFDEBUG(D_ROUTE)
+				printf("iso_ifwithidi address ");
+				dump_isoaddr( (struct sockaddr_iso *)(ifa->ifa_addr));
+			ENDDEBUG
+			if (ifa->ifa_addr->sa_family != addr->sa_family)
+				continue;
+
+#define	IFA_SIS(ifa)\
+	((struct sockaddr_iso *)((ifa)->ifa_addr))
+
+			IFDEBUG(D_ROUTE)
+				printf(" af same, args to iso_eqtype:\n");
+				printf("0x%x ", IFA_SIS(ifa)->siso_addr);
+				printf(" 0x%x\n",
+				&(((struct sockaddr_iso *)addr)->siso_addr));
+			ENDDEBUG
+
+			if (iso_eqtype(&(IFA_SIS(ifa)->siso_addr), 
+				&(((struct sockaddr_iso *)addr)->siso_addr))) {
+				IFDEBUG(D_ROUTE)
+					printf("ifa_ifwithidi: ifa found\n");
+				ENDDEBUG
+				return (ifa);
+			}
+			IFDEBUG(D_ROUTE)
+				printf(" iso_eqtype failed\n");
+			ENDDEBUG
+		}
+	}
+	return ((struct ifaddr *)0);
+}
+
+#endif /* notdef */
+/*
+ * FUNCTION:		iso_ck_addr
+ *
+ * PURPOSE:			return true if the iso_addr passed is 
+ *					within the legal size limit for an iso address.
+ *
+ * RETURNS:			true or false
+ *
+ * SIDE EFFECTS:	
+ *
+ */
+iso_ck_addr(isoa)
+struct iso_addr	*isoa;	/* address to check */
+{
+	return (isoa->isoa_len <= 20);
+
+}
+
+#ifdef notdef
+/*
+ * FUNCTION:		iso_eqtype
+ *
+ * PURPOSE:			Determine if two iso addresses are of the same type.
+ *  This is flaky.  Really we should consider all type 47 addrs to be the
+ *  same - but there do exist different structures for 47 addrs.
+ *  Gosip adds a 3rd.
+ *
+ * RETURNS:			true if the addresses are the same type
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			By type, I mean rfc986, t37, or osinet
+ *
+ *					This will first compare afis. If they match, then
+ *					if the addr is not t37, the idis must be compared.
+ */
+iso_eqtype(isoaa, isoab)
+struct iso_addr	*isoaa;		/* first addr to check */
+struct iso_addr	*isoab;		/* other addr to check */
+{
+	if (isoaa->isoa_afi == isoab->isoa_afi) {
+		if (isoaa->isoa_afi == AFI_37)
+			return(1);
+		else 
+			return (!bcmp(&isoaa->isoa_u, &isoab->isoa_u, 2));
+	}
+	return(0);
+}
+#endif /* notdef */
+/*
+ * FUNCTION:		iso_localifa()
+ *
+ * PURPOSE:			Find an interface addresss having a given destination
+ *					or at least matching the net.
+ *
+ * RETURNS:			ptr to an interface address 
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+struct iso_ifaddr *
+iso_localifa(siso)
+	register struct sockaddr_iso *siso;
+{
+	register struct iso_ifaddr *ia;
+	register char *cp1, *cp2, *cp3;
+	register struct ifnet *ifp;
+	struct iso_ifaddr *ia_maybe = 0;
+	/*
+	 * We make one pass looking for both net matches and an exact
+	 * dst addr.
+	 */
+	for (ia = iso_ifaddr; ia; ia = ia->ia_next) {
+		if ((ifp = ia->ia_ifp) == 0 || ((ifp->if_flags & IFF_UP) == 0))
+			continue;
+		if (ifp->if_flags & IFF_POINTOPOINT) {
+			if ((ia->ia_dstaddr.siso_family == AF_ISO) &&
+				SAME_ISOADDR(&ia->ia_dstaddr, siso))
+				return (ia);
+			else
+				if (SAME_ISOADDR(&ia->ia_addr, siso))
+					ia_maybe = ia;
+			continue;
+		}
+		if (ia->ia_sockmask.siso_len) {
+			char *cplim = ia->ia_sockmask.siso_len + (char *)&ia->ia_sockmask;
+			cp1 = ia->ia_sockmask.siso_data;
+			cp2 = siso->siso_data;
+			cp3 = ia->ia_addr.siso_data;
+			while (cp1 < cplim)
+				if (*cp1++ & (*cp2++ ^ *cp3++))
+					goto next;
+			ia_maybe = ia;
+		}
+		if (SAME_ISOADDR(&ia->ia_addr, siso))
+			return ia;
+	next:;
+	}
+	return ia_maybe;
+}
+
+#ifdef	TPCONS
+#include <netiso/cons.h>
+#endif	/* TPCONS */
+/*
+ * FUNCTION:		iso_nlctloutput
+ *
+ * PURPOSE:			Set options at the network level
+ *
+ * RETURNS:			E*
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			This could embody some of the functions of
+ *					rclnp_ctloutput and cons_ctloutput.
+ */
+iso_nlctloutput(cmd, optname, pcb, m)
+int			cmd;		/* command:set or get */
+int			optname;	/* option of interest */
+caddr_t		pcb;		/* nl pcb */
+struct mbuf	*m;			/* data for set, buffer for get */
+{
+	struct isopcb	*isop = (struct isopcb *)pcb;
+	int				error = 0;	/* return value */
+	caddr_t			data;		/* data for option */
+	int				data_len;	/* data's length */
+
+	IFDEBUG(D_ISO)
+		printf("iso_nlctloutput: cmd %x, opt %x, pcb %x, m %x\n",
+			cmd, optname, pcb, m);
+	ENDDEBUG
+
+	if ((cmd != PRCO_GETOPT) && (cmd != PRCO_SETOPT))
+		return(EOPNOTSUPP);
+
+	data = mtod(m, caddr_t);
+	data_len = (m)->m_len;
+
+	IFDEBUG(D_ISO)
+		printf("iso_nlctloutput: data is:\n");
+		dump_buf(data, data_len);
+	ENDDEBUG
+
+	switch (optname) {
+
+#ifdef	TPCONS
+		case CONSOPT_X25CRUD:
+			if (cmd == PRCO_GETOPT) {
+				error = EOPNOTSUPP;
+				break;
+			}
+
+			if (data_len > MAXX25CRUDLEN) {
+				error = EINVAL;
+				break;
+			}
+
+			IFDEBUG(D_ISO)
+				printf("iso_nlctloutput: setting x25 crud\n");
+			ENDDEBUG
+
+			bcopy(data, (caddr_t)isop->isop_x25crud, (unsigned)data_len);
+			isop->isop_x25crud_len = data_len;
+			break;
+#endif	/* TPCONS */
+
+		default:
+			error = EOPNOTSUPP;
+	}
+	if (cmd == PRCO_SETOPT)
+		m_freem(m);
+	return error;
+}
+#endif /* ISO */
+
+#ifdef ARGO_DEBUG
+
+/*
+ * FUNCTION:		dump_isoaddr
+ *
+ * PURPOSE:			debugging
+ *
+ * RETURNS:			nada 
+ *
+ */
+dump_isoaddr(s)
+	struct sockaddr_iso *s;
+{
+	char *clnp_saddr_isop();
+	register int i;
+
+	if( s->siso_family == AF_ISO) {
+		printf("ISO address: suffixlen %d, %s\n",
+			s->siso_tlen, clnp_saddr_isop(s));
+	} else if( s->siso_family == AF_INET) {
+		/* hack */
+		struct sockaddr_in *sin = (struct sockaddr_in *)s;
+
+		printf("%d.%d.%d.%d: %d", 
+			(sin->sin_addr.s_addr>>24)&0xff,
+			(sin->sin_addr.s_addr>>16)&0xff,
+			(sin->sin_addr.s_addr>>8)&0xff,
+			(sin->sin_addr.s_addr)&0xff,
+			sin->sin_port);
+	}
+}
+
+#endif /* ARGO_DEBUG */
diff --git a/sys/netiso/iso.h b/sys/netiso/iso.h
new file mode 100644
index 00000000000..9237e6aaa73
--- /dev/null
+++ b/sys/netiso/iso.h
@@ -0,0 +1,195 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)iso.h	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: iso.h,v 4.9 88/09/11 18:06:38 hagens Exp $ */
+/* $Source: /usr/argo/sys/netiso/RCS/iso.h,v $ */
+
+#ifndef __ISO__
+#define __ISO__
+
+/*
+ *	Return true if this is a multicast address
+ *	This assumes that the bit transmission is lsb first. This 
+ *	assumption is valid for 802.3 but not 802.5. There is a
+ *	kludge to get around this for 802.5 -- see if_lan.c
+ *	where subnetwork header is setup.
+ */
+#define	IS_MULTICAST(snpa)\
+	((snpa)[0] & 0x01)
+	
+/*
+ * Protocols
+ */
+#define	ISOPROTO_TCP	6		/* IETF experiment */
+#define	ISOPROTO_UDP	17		/* IETF experiment */
+#define	ISOPROTO_TP0	25		/* connection oriented transport protocol */
+#define	ISOPROTO_TP1	26		/* not implemented */
+#define	ISOPROTO_TP2	27		/* not implemented */
+#define	ISOPROTO_TP3	28		/* not implemented */
+#define	ISOPROTO_TP4	29		/* connection oriented transport protocol */
+#define	ISOPROTO_TP		ISOPROTO_TP4	 /* tp-4 with negotiation */
+#define	ISOPROTO_CLTP	30		/* connectionless transport (not yet impl.) */
+#define	ISOPROTO_CLNP	31		/* connectionless internetworking protocol */
+#define	ISOPROTO_X25	32		/* cons */
+#define	ISOPROTO_INACT_NL	33	/* inactive network layer! */
+#define	ISOPROTO_ESIS	34		/* ES-IS protocol */
+#define	ISOPROTO_INTRAISIS	35		/* IS-IS protocol */
+#define	ISOPROTO_IDRP	36		/* Interdomain Routing Protocol */
+
+#define	ISOPROTO_RAW	255		/* raw clnp */
+#define	ISOPROTO_MAX	256
+
+#define	ISO_PORT_RESERVED		1024
+#define	ISO_PORT_USERRESERVED	5000
+/*
+ * Port/socket numbers: standard network functions
+ * NOT PRESENTLY USED
+ */
+#define	ISO_PORT_MAINT		501
+#define	ISO_PORT_ECHO		507
+#define	ISO_PORT_DISCARD	509
+#define	ISO_PORT_SYSTAT		511
+#define	ISO_PORT_NETSTAT	515
+/*
+ * Port/socket numbers: non-standard application functions
+ */
+#define ISO_PORT_LOGIN		513
+/*
+ * Port/socket numbers: public use
+ */
+#define ISO_PORT_PUBLIC		1024		/* high bit set --> public */
+
+/*
+ *	Network layer protocol identifiers
+ */
+#define ISO8473_CLNP	0x81
+#define	ISO9542_ESIS	0x82
+#define ISO9542X25_ESIS	0x8a
+#define ISO10589_ISIS		0x83
+#define ISO8878A_CONS		0x84
+#define ISO10747_IDRP		0x85
+
+
+#ifndef IN_CLASSA_NET
+#include <netinet/in.h>
+#endif /* IN_CLASSA_NET */
+
+
+
+/* The following looks like a sockaddr
+ * to facilitate using tree lookup routines */
+struct iso_addr {
+	u_char	isoa_len;						/* length (in bytes) */
+	char	isoa_genaddr[20];				/* general opaque address */
+};
+
+struct sockaddr_iso {
+	u_char	 			siso_len;			/* length */
+	u_char	 			siso_family;		/* family */
+	u_char				siso_plen;			/* presentation selector length */
+	u_char				siso_slen;			/* session selector length */
+	u_char				siso_tlen;			/* transport selector length */
+	struct 	iso_addr	siso_addr;			/* network address */
+	u_char				siso_pad[6];		/* space for gosip v2 sels */
+											/* makes struct 32 bytes long */
+};
+#define siso_nlen siso_addr.isoa_len
+#define siso_data siso_addr.isoa_genaddr
+
+#define TSEL(s) ((caddr_t)((s)->siso_data + (s)->siso_nlen))
+
+#define SAME_ISOADDR(a, b) \
+	(bcmp((a)->siso_data, (b)->siso_data, (unsigned)(a)->siso_nlen)==0)
+/*
+ * The following are specific values for siso->siso_data[0],
+ * otherwise known as the AFI:
+ */
+#define	AFI_37		0x37	/* bcd of "37" */
+#define AFI_OSINET	0x47	/* bcd of "47" */
+#define AFI_RFC986	0x47	/* bcd of "47" */
+#define	AFI_SNA		0x00	/* SubNetwork Address; invalid really...*/
+
+#ifdef KERNEL
+
+extern int iso_netmatch();
+extern int iso_hash(); 
+extern int iso_addrmatch();
+extern struct iso_ifaddr *iso_iaonnetof();
+extern	struct domain isodomain;
+extern	struct protosw isosw[];
+
+#else
+/* user utilities definitions from the iso library */
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+struct iso_addr *iso_addr __P((const char *));
+char *iso_ntoa __P((const struct iso_addr *));
+
+/* THESE DON'T EXIST YET */
+struct hostent *iso_gethostbyname(), *iso_gethostbyaddr();
+__END_DECLS
+
+#endif /* KERNEL */
+
+#define _offsetof(t, m) ((int)((caddr_t)&((t *)0)->m))
+#endif /* __ISO__ */
diff --git a/sys/netiso/iso_chksum.c b/sys/netiso/iso_chksum.c
new file mode 100644
index 00000000000..5b1aae59e16
--- /dev/null
+++ b/sys/netiso/iso_chksum.c
@@ -0,0 +1,360 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)iso_chksum.c	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* 
+ * $Header: iso_chksum.c,v 4.7 88/07/29 15:31:26 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/iso_chksum.c,v $
+ *
+ * ISO CHECKSUM
+ *
+ * The checksum generation and check routines are here.
+ * The checksum is 2 bytes such that the sum of all the bytes b(i) == 0
+ * and the sum of i * b(i) == 0.
+ * The whole thing is complicated by the fact that the data are in mbuf
+ * chains.
+ * Furthermore, there is the possibility of wraparound in the running
+ * sums after adding up 4102 octets.  In order to avoid doing a mod
+ * operation after EACH add, we have restricted this implementation to 
+ * negotiating a maximum of 4096-octets per TPDU (for the transport layer).
+ * The routine iso_check_csum doesn't need to know where the checksum
+ * octets are.
+ * The routine iso_gen_csum takes a pointer to an mbuf chain (logically
+ * a chunk of data), an offset into the chunk at which the 2 octets are to
+ * be stuffed, and the length of the chunk.  The 2 octets have to be
+ * logically adjacent, but may be physically located in separate mbufs.
+ */
+
+#ifdef ISO
+#include <netiso/argo_debug.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#endif /* ISO */
+
+#ifndef MNULL
+#define MNULL (struct mbuf *)0
+#endif /* MNULL */
+
+/*
+ * FUNCTION:	iso_check_csum
+ *
+ * PURPOSE:		To check the checksum of the packet in the mbuf chain (m).
+ * 				The total length of the packet is (len).
+ * 				Called from tp_input() and clnp_intr()
+ *
+ * RETURNS:		 TRUE (something non-zero) if there is a checksum error,
+ * 			 	 FALSE if there was NO checksum error.
+ *
+ * SIDE EFFECTS:  none
+ *
+ * NOTES:		 It might be possible to gain something by optimizing
+ *               this routine (unrolling loops, etc). But it is such
+ *				 a horrible thing to fiddle with anyway, it probably
+ *				 isn't worth it.
+ */
+int 
+iso_check_csum(m, len)
+	struct mbuf *m;
+	int len;
+{
+	register u_char *p = mtod(m, u_char *);
+	register u_long c0=0, c1=0;
+	register int i=0;
+	int cum = 0; /* cumulative length */
+	int l;
+
+	l = len;
+	len = min(m->m_len, len);
+	i = 0;
+
+	IFDEBUG(D_CHKSUM)
+		printf("iso_check_csum: m x%x, l x%x, m->m_len x%x\n", m, l, m->m_len);
+	ENDDEBUG
+
+	while( i<l ) {
+		cum += len;
+		while (i<cum) {
+			c0 = c0 + *(p++);
+			c1 += c0;
+			i++;
+		}
+		if(i < l) {
+			m = m->m_next;
+			IFDEBUG(D_CHKSUM)
+				printf("iso_check_csum: new mbuf\n");
+				if(l-i < m->m_len)
+					printf(
+					"bad mbuf chain in check csum l 0x%x i 0x%x m_data 0x%x",
+						l,i,m->m_data);
+			ENDDEBUG
+			ASSERT( m != MNULL);
+			len = min( m->m_len, l-i);
+			p = mtod(m, u_char *);
+		}
+	}
+	if ( ((int)c0 % 255) || ((int)c1 % 255) ) {
+		IFDEBUG(D_CHKSUM)
+			printf("BAD iso_check_csum l 0x%x cum 0x%x len 0x%x, i 0x%x", 
+				l, cum, len, i);
+		ENDDEBUG
+		return ((int)c0 % 255)<<8 | ((int)c1 % 255);
+	}
+	return 0;
+}
+
+/*
+ * FUNCTION:	iso_gen_csum
+ *
+ * PURPOSE:		To generate the checksum of the packet in the mbuf chain (m).
+ * 				The first of the 2 (logically) adjacent checksum bytes 
+ *				(x and y) go at offset (n).
+ * 				(n) is an offset relative to the beginning of the data, 
+ *				not the beginning of the mbuf.
+ * 				(l) is the length of the total mbuf chain's data.
+ * 				Called from tp_emit(), tp_error_emit()
+ *				clnp_emit_er(), clnp_forward(), clnp_output().
+ *
+ * RETURNS:		Rien
+ *
+ * SIDE EFFECTS: Puts the 2 checksum bytes into the packet.
+ *
+ * NOTES:		Ditto the note for iso_check_csum().
+ */
+
+void
+iso_gen_csum(m,n,l)
+	struct mbuf *m;
+	int n; /* offset of 2 checksum bytes */
+	int l;
+{
+	register u_char *p = mtod(m, u_char *);
+	register int c0=0, c1=0;
+	register int i=0;
+	int loc = n++, len=0; /* n is position, loc is offset */
+	u_char *xloc;
+	u_char *yloc;
+	int cum=0;	/* cum == cumulative length */
+
+	IFDEBUG(D_CHKSUM)
+		printf("enter gen csum m 0x%x n 0x%x l 0x%x\n",m, n-1 ,l );
+	ENDDEBUG
+
+	while(i < l) {
+		len = min(m->m_len, CLBYTES);
+		/* RAH: don't cksum more than l bytes */
+		len = min(len, l - i);
+
+		cum +=len;
+		p = mtod(m, u_char *);
+
+		if(loc>=0) {
+			if (loc < len) {
+				xloc = loc + mtod(m, u_char *);
+				IFDEBUG(D_CHKSUM)
+					printf("1: zeroing xloc 0x%x loc 0x%x\n",xloc, loc );
+				ENDDEBUG
+				*xloc = (u_char)0;
+				if (loc+1 < len) {
+					/* both xloc and yloc are in same mbuf */
+					yloc = 1  + xloc;
+					IFDEBUG(D_CHKSUM)
+						printf("2: zeroing yloc 0x%x loc 0x%x\n",yloc, loc );
+					ENDDEBUG
+					*yloc = (u_char)0;
+				} else {
+					/* crosses boundary of mbufs */
+					yloc = mtod(m->m_next, u_char *);
+					IFDEBUG(D_CHKSUM)
+						printf("3: zeroing yloc 0x%x \n",yloc );
+					ENDDEBUG
+					*yloc = (u_char)0;
+				}
+			}
+			loc -= len;
+		}
+
+		while(i < cum) {
+			c0 = (c0 + *p);
+			c1 += c0 ;
+			i++; 
+			p++;
+		}
+		m = m->m_next;
+	}
+	IFDEBUG(D_CHKSUM)
+		printf("gen csum final xloc 0x%x yloc 0x%x\n",xloc, yloc );
+	ENDDEBUG
+
+	c1 = (((c0 * (l-n))-c1)%255) ;
+	*xloc = (u_char) ((c1 < 0)? c1+255 : c1);
+
+	c1 = (-(int)(c1+c0))%255;
+	*yloc = (u_char) (c1 < 0? c1 + 255 : c1);
+
+	IFDEBUG(D_CHKSUM)
+		printf("gen csum end \n");
+	ENDDEBUG
+}
+
+/*
+ * FUNCTION:	m_datalen
+ *
+ * PURPOSE:		returns length of the mbuf chain.
+ * 				used all over the iso code.
+ *
+ * RETURNS:		integer
+ *
+ * SIDE EFFECTS: none
+ *
+ * NOTES:		
+ */
+
+int
+m_datalen (m)
+	register struct mbuf *m;
+{ 	
+	register int datalen;
+
+	for (datalen = 0; m; m = m->m_next)
+		datalen += m->m_len;
+	return datalen;
+}
+
+int
+m_compress(in, out)
+	register struct mbuf *in, **out;
+{
+	register 	int datalen = 0;
+	int	s = splimp();
+
+	if( in->m_next == MNULL ) {
+		*out = in;
+		IFDEBUG(D_REQUEST)
+			printf("m_compress returning 0x%x: A\n", in->m_len);
+		ENDDEBUG
+		splx(s);
+		return in->m_len;
+	}
+	MGET((*out), M_DONTWAIT, MT_DATA);
+	if((*out) == MNULL) {
+		*out = in;
+		IFDEBUG(D_REQUEST)
+			printf("m_compress returning -1: B\n");
+		ENDDEBUG
+		splx(s);
+		return -1; 
+	}
+	(*out)->m_len = 0;
+	(*out)->m_act = MNULL;
+
+	while (in) {
+		IFDEBUG(D_REQUEST)
+			printf("m_compress in 0x%x *out 0x%x\n", in, *out);
+			printf("m_compress in: len 0x%x, off 0x%x\n", in->m_len, in->m_data);
+			printf("m_compress *out: len 0x%x, off 0x%x\n", (*out)->m_len, 
+				(*out)->m_data);
+		ENDDEBUG
+		if (in->m_flags & M_EXT) {
+			ASSERT(in->m_len == 0);
+		}
+		if ( in->m_len == 0) {
+			in = in->m_next;
+			continue;
+		}
+		if (((*out)->m_flags & M_EXT) == 0) {
+			int len;
+
+			len = M_TRAILINGSPACE(*out);
+			len = min(len, in->m_len);
+			datalen += len;
+
+			IFDEBUG(D_REQUEST)
+				printf("m_compress copying len %d\n", len);
+			ENDDEBUG
+			bcopy(mtod(in, caddr_t), mtod((*out), caddr_t) + (*out)->m_len,
+						(unsigned)len);
+
+			(*out)->m_len += len;
+			in->m_len -= len;
+			continue;
+		} else {
+			/* (*out) is full */
+			if(( (*out)->m_next = m_get(M_DONTWAIT, MT_DATA) ) == MNULL) {
+				m_freem(*out);
+				*out = in;
+				IFDEBUG(D_REQUEST)
+					printf("m_compress returning -1: B\n");
+				ENDDEBUG
+				splx(s);
+				return -1;
+			}
+			(*out)->m_len = 0;
+			(*out)->m_act = MNULL;
+			*out = (*out)->m_next;
+		}
+	}
+	m_freem(in);
+	IFDEBUG(D_REQUEST)
+		printf("m_compress returning 0x%x: A\n", datalen);
+	ENDDEBUG
+	splx(s);
+	return datalen;
+}
diff --git a/sys/netiso/iso_errno.h b/sys/netiso/iso_errno.h
new file mode 100644
index 00000000000..0d75589ca0e
--- /dev/null
+++ b/sys/netiso/iso_errno.h
@@ -0,0 +1,274 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)iso_errno.h	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+
+#ifndef __ISO_ERRNO__
+#define __ISO_ERRNO__
+
+#define ISO_ERROR_MASK 	0x8000
+#define BSD_ERROR_MASK 	0x0000
+#define	TP_ERROR_MASK	0x8800	/* transport layer */
+#define	CONL_ERROR_MASK	0x8400	/* co network layer */ 
+#define	CLNL_ERROR_MASK	0x8200	/* cl network layer */
+#define TP_ERROR_SNDC	0x10000	/* kludge to force DC's on certain errors */
+
+#define E_CO_NOERROR	(CONL_ERROR_MASK | 0x0)	/* no add'l info */
+
+/******************************************************************************/
+/*                                                                            */
+/*                                                                            */
+/*                          Transport Layer                                   */
+/*                                                                            */
+/*                                                                            */
+/******************************************************************************/
+
+#define E_TP_DR_NO_REAS	(TP_ERROR_MASK | 0x0)	 /* dr reason not specified*/
+#define E_TP_CONGEST	(TP_ERROR_MASK | 0x1)	 /* dr reason congestion */
+#define E_TP_NO_SESSION	(TP_ERROR_MASK | 0x2)	 /* dr reason no sess ent */
+#define E_TP_ADDR_UNK	(TP_ERROR_MASK | 0x3)	 /* dr reason addr unknown */
+
+#define E_TP_ER_NO_REAS (TP_ERROR_MASK | 0x40) /* er reas not specified */
+#define E_TP_INV_PCODE	(TP_ERROR_MASK | 0x41)	 /* er reas invalid parm code */
+#define E_TP_INV_TPDU	(TP_ERROR_MASK | 0x42)	 /* er reas invalid tpdu type */
+#define E_TP_INV_PVAL	(TP_ERROR_MASK | 0x43)	 /* er reas invalid parm value*/
+
+#define E_TP_NORMAL_DISC (TP_ERROR_MASK | 0x80)	 /* dr reas normal disc */
+#define E_TP_CONGEST_2	(TP_ERROR_MASK | 0x81)	 /* dr reason congestion */
+#define E_TP_NEGOT_FAILED (TP_ERROR_MASK | 0x82)	 /* dr negotiation failed */
+#define E_TP_DUPL_SRCREF (TP_ERROR_MASK | 0x83)	 /* dr duplicate src ref */
+#define E_TP_MISM_REFS 	(TP_ERROR_MASK | 0x84)	 /* dr mismatched references*/
+#define E_TP_PROTO_ERR 	(TP_ERROR_MASK | 0x85)	 /* dr protocol error*/
+/* 0x86 not used */
+#define E_TP_REF_OVERFLOW (TP_ERROR_MASK | 0x87)	 /* dr reference overflow */
+#define E_TP_NO_CR_ON_NC (TP_ERROR_MASK | 0x88)	 /* dr cr refused on this nc */
+/* 0x89 not used */
+#define E_TP_LENGTH_INVAL (TP_ERROR_MASK | 0x8a)	 /* dr inval length in hdr*/
+
+/******************************************************************************/
+/*                                                                            */
+/*                                                                            */
+/*                   Connection Less Network Layer                            */
+/*                                                                            */
+/*                                                                            */
+/******************************************************************************/
+
+#define E_CLNL_???	(CLNL_ERROR_MASK | 0x1)	 /* explanation */
+
+/******************************************************************************/
+/*                                                                            */
+/*                                                                            */
+/*               Connection Oriented Network Layer                            */
+/*                                                                            */
+/*                                                                            */
+/******************************************************************************/
+	/* see p. 149 of ISO 8208 */
+#define E_CO_NOERROR	(CONL_ERROR_MASK | 0x0)	/* no add'l info */
+#define E_CO_INV_PS		(CONL_ERROR_MASK | 0x1)	/* invalid p(s) */
+#define E_CO_INV_PR		(CONL_ERROR_MASK | 0x2)	/* invalid p(r) */
+	/* dot dot dot */
+#define E_CO_INV_PKT_TYPE	(CONL_ERROR_MASK | 0x10)	/* packet type invalid*/
+#define E_CO_INV_PKT_R1		(CONL_ERROR_MASK | 0x11)	/* for state r1 */
+#define E_CO_INV_PKT_R2		(CONL_ERROR_MASK | 0x12)	/* for state r2 */
+#define E_CO_INV_PKT_R3		(CONL_ERROR_MASK | 0x13)	/* for state r3 */
+#define E_CO_INV_PKT_P1		(CONL_ERROR_MASK | 0x14)	/* for state p1 */
+#define E_CO_INV_PKT_P2		(CONL_ERROR_MASK | 0x15)	/* for state p2 */
+#define E_CO_INV_PKT_P3		(CONL_ERROR_MASK | 0x16)	/* for state p3 */
+#define E_CO_INV_PKT_P4		(CONL_ERROR_MASK | 0x17)	/* for state p4 */
+#define E_CO_INV_PKT_P5		(CONL_ERROR_MASK | 0x18)	/* for state p5 */
+#define E_CO_INV_PKT_P6		(CONL_ERROR_MASK | 0x19)	/* for state p6 */
+#define E_CO_INV_PKT_P7		(CONL_ERROR_MASK | 0x1a)	/* for state p7 */
+#define E_CO_INV_PKT_D1		(CONL_ERROR_MASK | 0x1b)	/* for state d1 */
+#define E_CO_INV_PKT_D2		(CONL_ERROR_MASK | 0x1c)	/* for state d2 */
+#define E_CO_INV_PKT_D3		(CONL_ERROR_MASK | 0x1d)	/* for state d3 */
+	/* dot dot dot */
+#define E_CO_PKT_NOT_ALWD	(CONL_ERROR_MASK | 0x20) /* packet not allowed */
+#define E_CO_PNA_UNIDENT	(CONL_ERROR_MASK | 0x21) /* unidentifiable pkt */
+#define E_CO_PNA_ONEWAY		(CONL_ERROR_MASK | 0x22) /* call on 1-way lc */
+#define E_CO_PNA_PVC		(CONL_ERROR_MASK | 0x23) /* inv pkt type on a pvc */
+#define E_CO_PNA_UNASSLC	(CONL_ERROR_MASK | 0x24) /* pkt on unassigned lc */
+#define E_CO_PNA_REJECT		(CONL_ERROR_MASK | 0x25) /* REJ not subscribed to*/
+#define E_CO_PNA_SHORT		(CONL_ERROR_MASK | 0x26) /* pkt too short */
+#define E_CO_PNA_LONG		(CONL_ERROR_MASK | 0x27) /* pkt too long */
+#define E_CO_PNA_INVGFI		(CONL_ERROR_MASK | 0x28) /* inv gen format id */
+#define E_CO_PNA_NZLCI		(CONL_ERROR_MASK | 0x29) \
+	/* restart or reg pkt with nonzero logical channel identifier */
+#define E_CO_PNA_FACIL		(CONL_ERROR_MASK | 0x2a) \
+	/* pkt type not compat with facility */
+#define E_CO_PNA_UINTCON	(CONL_ERROR_MASK | 0x2b)	/* unauthor intrpt conf */
+#define E_CO_PNA_UINTRPT	(CONL_ERROR_MASK | 0x2c) /* unauthorized intrpt	*/
+#define E_CO_PNA_UREJECT	(CONL_ERROR_MASK | 0x2d) /* unauthorized reject  */
+
+#define E_CO_TMR_EXP		(CONL_ERROR_MASK | 0x30) /* timer expired */
+#define E_CO_TMR_CALR		(CONL_ERROR_MASK | 0x31) /* inc. call or call req */
+#define E_CO_TMR_CLRI		(CONL_ERROR_MASK | 0x32) /* clear indication */
+#define E_CO_TMR_RSTI		(CONL_ERROR_MASK | 0x33) /* reset indication */
+#define E_CO_TMR_RRTI		(CONL_ERROR_MASK | 0x34) /* restart indication */
+
+#define E_CO_REG_PROB		(CONL_ERROR_MASK | 0x40)\
+	/* call setup, clear, or registration problem  */
+#define E_CO_REG_CODE		(CONL_ERROR_MASK | 0x41) /* code not allowed */
+#define E_CO_REG_PARM		(CONL_ERROR_MASK | 0x42) /* parameter not allowed */
+#define E_CO_REG_ICDA		(CONL_ERROR_MASK | 0x43) /* invalid called addr */
+#define E_CO_REG_ICGA		(CONL_ERROR_MASK | 0x44) /* invalid calling addr */
+#define E_CO_REG_ILEN		(CONL_ERROR_MASK | 0x45) /* invalid facil length */
+#define E_CO_REG_IBAR		(CONL_ERROR_MASK | 0x46) /* incoming call barred */
+#define E_CO_REG_NOLC		(CONL_ERROR_MASK | 0x47) /* no logical chan avail*/
+#define E_CO_REG_COLL		(CONL_ERROR_MASK | 0x48) /* call collision */
+#define E_CO_REG_DUPF		(CONL_ERROR_MASK | 0x49) /* dupl facil requested */
+#define E_CO_REG_NZAL		(CONL_ERROR_MASK | 0x4a) /* non-zero addr length */
+#define E_CO_REG_NZFL		(CONL_ERROR_MASK | 0x4b) /* non-zero facil length */
+#define E_CO_REG_EFNP		(CONL_ERROR_MASK | 0x4c) \
+	/* expected facil not provided */
+#define E_CO_REG_ICCITT		(CONL_ERROR_MASK | 0x4d) \
+	/* invalid CCITT-specified  DTE facil */
+
+#define E_CO_MISC			(CONL_ERROR_MASK | 0x50) /* miscellaneous */
+#define E_CO_MISC_CAUSE		(CONL_ERROR_MASK | 0x51) /* improper cause code */
+#define E_CO_MISC_ALIGN		(CONL_ERROR_MASK | 0x52) /* not octet-aligned */
+#define E_CO_MISC_IQBS		(CONL_ERROR_MASK | 0x53) \
+	/* inconsistent Q bit settings */
+
+#define E_CO_INTL			(CONL_ERROR_MASK | 0x70) /* international problem */
+#define E_CO_IREMNWK		(CONL_ERROR_MASK | 0x71) /* remote network problem */
+#define E_CO_INPROTO		(CONL_ERROR_MASK | 0x72) /* int'l protocol problem */
+#define E_CO_ILINKDWN		(CONL_ERROR_MASK | 0x73) /* int'l link down */
+#define E_CO_ILINKBSY		(CONL_ERROR_MASK | 0x74) /* int'l link busy */
+#define E_CO_IXNETFAC		(CONL_ERROR_MASK | 0x75) /* transit netwk facil */
+#define E_CO_IRNETFAC		(CONL_ERROR_MASK | 0x76) /* remote netwk facil */
+#define E_CO_IROUTING		(CONL_ERROR_MASK | 0x77) /* int'l routing prob */
+#define E_CO_ITMPRTG		(CONL_ERROR_MASK | 0x78) /* temporary routing prob */
+#define E_CO_IUNKDNIC		(CONL_ERROR_MASK | 0x79) /* unknown called DNIC */
+#define E_CO_IMAINT			(CONL_ERROR_MASK | 0x7a)	/* maintenance action */
+
+#define E_CO_TIMO			(CONL_ERROR_MASK | 0x90)	\
+	/* timer expired or retransmission count surpassed */
+#define E_CO_TIM_INTRP		(CONL_ERROR_MASK | 0x91)	/* for interrupt */
+#define E_CO_TIM_DATA		(CONL_ERROR_MASK | 0x92)	/*  for data */
+#define E_CO_TIM_REJ		(CONL_ERROR_MASK | 0x93)	/*  for reject */
+
+#define E_CO_DTE_SPEC		(CONL_ERROR_MASK | 0xa0)	/* DTE-specific */
+#define E_CO_DTE_OK			(CONL_ERROR_MASK | 0xa1)	/* DTE operational */
+#define E_CO_DTE_NOK		(CONL_ERROR_MASK | 0xa2)	/* DTE not operational */
+#define E_CO_DTE_RSRC		(CONL_ERROR_MASK | 0xa3)	/* DTE resource constraint*/
+#define E_CO_DTE_FSLCT		(CONL_ERROR_MASK | 0xa4)	/* fast select not subsc */
+#define E_CO_DTE_PFPKT		(CONL_ERROR_MASK | 0xa5)	/* partially full pkt */
+#define E_CO_DTE_DBIT		(CONL_ERROR_MASK | 0xa6)	/* D-bit proc not supp */
+#define E_CO_DTE_RCCON		(CONL_ERROR_MASK | 0xa7)	/* reg/canell confirmed */
+
+#define E_CO_OSI_NSP		(CONL_ERROR_MASK | 0xe0)	/* OSI net svc problem */
+#define E_CO_OSI_DISCT		(CONL_ERROR_MASK | 0xe1)	/* disconnect transient */
+#define E_CO_OSI_DISCP		(CONL_ERROR_MASK | 0xe2)	/* disconnect permanent */
+#define E_CO_OSI_REJT		(CONL_ERROR_MASK | 0xe3)	/* reject transient */
+#define E_CO_OSI_REJP		(CONL_ERROR_MASK | 0xe4)	/* reject permanent */
+#define E_CO_OSI_QOST		(CONL_ERROR_MASK | 0xe5)	/* reject QOS transient */
+#define E_CO_OSI_QOSP		(CONL_ERROR_MASK | 0xe6)	/* reject QOS permanent */
+#define E_CO_OSI_NSAPT		(CONL_ERROR_MASK | 0xe7)	/* NSAP unreach transient */
+#define E_CO_OSI_NSAPP		(CONL_ERROR_MASK | 0xe8)	/* NSAP unreach permanent */
+#define E_CO_OSI_RESET		(CONL_ERROR_MASK | 0xe9)	/* reset no reason */
+#define E_CO_OSI_CONGEST	(CONL_ERROR_MASK | 0xea)	/* reset congestion */
+#define E_CO_OSI_UNSAP		(CONL_ERROR_MASK | 0xeb)	/* unknown NSAP permanent */
+
+#define E_CO_HLI_INIT		(CONL_ERROR_MASK | 0xf0)	/* higher level initiated*/
+#define E_CO_HLI_DISCN		(CONL_ERROR_MASK | 0xf1)	/* disconnect normal */
+#define E_CO_HLI_DISCA		(CONL_ERROR_MASK | 0xf2)	/* disconnect abnormal */
+#define E_CO_HLI_DISCI		(CONL_ERROR_MASK | 0xf3)	/* disconnect incompatible*/
+#define E_CO_HLI_REJT		(CONL_ERROR_MASK | 0xf4)	/* reject transient */
+#define E_CO_HLI_REJP		(CONL_ERROR_MASK | 0xf5)	/* reject permanent */
+#define E_CO_HLI_QOST		(CONL_ERROR_MASK | 0xf6)	/* reject QOS transient */
+#define E_CO_HLI_QOSP		(CONL_ERROR_MASK | 0xf7)	/* reject QOS permanent */
+#define E_CO_HLI_REJI		(CONL_ERROR_MASK | 0xf8)	/* reject incompatible  */
+#define E_CO_HLI_PROTOID	(CONL_ERROR_MASK | 0xf9)	/* unrecog proto id  */
+#define E_CO_HLI_RESYNC		(CONL_ERROR_MASK | 0xfa)	/* reset - user resync */
+
+/* Cause on 8208 CLEAR field */
+#define E_CO_NUMBERBUSY		(CONL_ERROR_MASK | 0x101) /* Number busy */
+#define E_CO_INVFACREQ		(CONL_ERROR_MASK | 0x103)  /* invalid facil req */
+#define E_CO_NETCONGEST		(CONL_ERROR_MASK | 0x105)  /* Network congestion */
+#define E_CO_OUTOFORDER		(CONL_ERROR_MASK | 0x109) /* Out of order */
+#define E_CO_ACCESSBAR		(CONL_ERROR_MASK | 0x10b)  /* access barred */
+#define E_CO_NOTOBTAIN		(CONL_ERROR_MASK | 0x10d)  /* not obtainable */
+#define E_CO_REMPROCERR		(CONL_ERROR_MASK | 0x111) /* Remote procedure err */
+#define E_CO_LOCPROCERR		(CONL_ERROR_MASK | 0x113)  /* Local procedure err */
+#define E_CO_RPOAOOO		(CONL_ERROR_MASK | 0x115)  /* RPOA out of order */
+#define E_CO_NOREVCHG		(CONL_ERROR_MASK | 0x119) /* Revs chg not accepted*/
+#define E_CO_INCOMPAT		(CONL_ERROR_MASK | 0x121) /* Incompatible dest */
+#define E_CO_NOFASTSEL		(CONL_ERROR_MASK | 0x129) 
+	/* Fast select accpt not subscribed */
+#define E_CO_NOSHIP			(CONL_ERROR_MASK | 0x139)  /* ship absent */
+#define E_CO_GWPROCERR		(CONL_ERROR_MASK | 0x1c1)  /* Gateway-detected err*/
+#define E_CO_GWCONGEST		(CONL_ERROR_MASK | 0x1c3)  /* Gateway congestion*/
+
+/* ARGO only */
+#define E_CO_QFULL 	(CONL_ERROR_MASK | 0x100)	/* dropped packet - queue full*/
+#define E_CO_AIWP 	(CONL_ERROR_MASK | 0x102)   /* addr incompat w/proto */
+#define E_CO_CHAN 	(CONL_ERROR_MASK | 0x104)	/* bad channel number */
+
+/* ARGO only; driver specific */
+#define E_CO_NORESOURCES 	(CONL_ERROR_MASK | 0x1b0)	/* eicon clogged  */
+#define E_CO_PDNDOWN		(CONL_ERROR_MASK | 0x1b1)	/* physical net down */
+#define E_CO_DRVRCLRESET	(CONL_ERROR_MASK | 0x1b2)	/* driver clear/reset */
+#define E_CO_PDNCLRESET		(CONL_ERROR_MASK | 0x1b3)	/* PDN clear/reset */
+#define E_CO_DTECLRESET		(CONL_ERROR_MASK | 0x1b4)	/* board clear/reset */
+#define E_CO_UNKCLRESET		(CONL_ERROR_MASK | 0x1b5)	/* unexpected clr/rst */
+
+#define CONL_ERROR_MAX 0x1c3
+
+#endif /* __ISO_ERRNO__ */
diff --git a/sys/netiso/iso_pcb.c b/sys/netiso/iso_pcb.c
new file mode 100644
index 00000000000..0b50c603422
--- /dev/null
+++ b/sys/netiso/iso_pcb.c
@@ -0,0 +1,617 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)iso_pcb.c	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * $Header: iso_pcb.c,v 4.5 88/06/29 14:59:56 hagens Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/iso_pcb.c,v $
+ *
+ * Iso address family net-layer(s) pcb stuff. NEH 1/29/87
+ */
+
+#ifdef ISO
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+
+#include <netiso/argo_debug.h>
+#include <netiso/iso.h>
+#include <netiso/clnp.h>
+#include <netinet/in_systm.h>
+#include <net/if.h>
+#include <net/route.h>
+#include <netiso/iso_pcb.h>
+#include <netiso/iso_var.h>
+#include <sys/protosw.h>
+
+#ifdef TPCONS
+#include <netccitt/x25.h>
+#include <netccitt/pk.h>
+#include <netccitt/pk_var.h>
+#endif
+
+#define PCBNULL (struct isopcb *)0
+struct	iso_addr zeroiso_addr = {
+	0
+};
+
+
+/*
+ * FUNCTION:		iso_pcballoc
+ *
+ * PURPOSE:			creates an isopcb structure in an mbuf,
+ *					with socket (so), and 
+ *					puts it in the queue with head (head)
+ *
+ * RETURNS:			0 if OK, ENOBUFS if can't alloc the necessary mbuf
+ */
+int
+iso_pcballoc(so, head)
+	struct socket *so;
+	struct isopcb *head;
+{
+	register struct isopcb *isop;
+
+	IFDEBUG(D_ISO)
+		printf("iso_pcballoc(so 0x%x)\n", so);
+	ENDDEBUG
+	MALLOC(isop, struct isopcb *, sizeof(*isop), M_PCB, M_NOWAIT);
+	if (isop == NULL)
+		return ENOBUFS;
+	bzero((caddr_t)isop, sizeof(*isop));
+	isop->isop_head = head;
+	isop->isop_socket = so;
+	insque(isop, head);
+	if (so)
+		so->so_pcb = (caddr_t)isop;
+	return 0;
+}
+	
+/*
+ * FUNCTION:		iso_pcbbind
+ *
+ * PURPOSE:			binds the address given in *(nam) to the socket
+ *					specified by the isopcb in *(isop)
+ *					If the given address is zero, it makes sure the
+ *					address isn't already in use and if it's got a network
+ *					portion, we look for an interface with that network
+ *					address.  If the address given is zero, we allocate
+ *					a port and stuff it in the (nam) structure.
+ *
+ * RETURNS:			errno E* or 0 if ok.
+ *
+ * SIDE EFFECTS:	increments head->isop_lport if it allocates a port #
+ *
+ * NOTES:			
+ */
+#define	satosiso(sa)	((struct sockaddr_iso *)(sa))
+int
+iso_pcbbind(isop, nam)
+	register struct isopcb *isop;
+	struct mbuf *nam;
+{
+	register struct isopcb *head = isop->isop_head;
+	register struct sockaddr_iso *siso;
+	struct iso_ifaddr *ia;
+	union {
+		char data[2];
+		u_short s;
+	} suf;
+
+	IFDEBUG(D_ISO)
+		printf("iso_pcbbind(isop 0x%x, nam 0x%x)\n", isop, nam);
+	ENDDEBUG
+	suf.s = 0;
+	if (iso_ifaddr == 0) /* any interfaces attached? */
+		return EADDRNOTAVAIL;
+	if (isop->isop_laddr)  /* already bound */
+		return EADDRINUSE;
+	if(nam == (struct mbuf *)0) {
+		isop->isop_laddr = &isop->isop_sladdr;
+		isop->isop_sladdr.siso_len = sizeof(struct sockaddr_iso);
+		isop->isop_sladdr.siso_family = AF_ISO;
+		isop->isop_sladdr.siso_tlen = 2;
+		isop->isop_sladdr.siso_nlen = 0;
+		isop->isop_sladdr.siso_slen = 0;
+		isop->isop_sladdr.siso_plen = 0;
+		goto noname;
+	}
+	siso = mtod(nam, struct sockaddr_iso *);
+	IFDEBUG(D_ISO)
+		printf("iso_pcbbind(name len 0x%x)\n", nam->m_len);
+		printf("The address is %s\n", clnp_iso_addrp(&siso->siso_addr));
+	ENDDEBUG
+	/*
+	 * We would like sort of length check but since some OSI addrs
+	 * do not have fixed length, we can't really do much.
+	 * The ONLY thing we can say is that an osi addr has to have
+	 * at LEAST an afi and one more byte and had better fit into
+	 * a struct iso_addr.
+	 * However, in fact the size of the whole thing is a struct
+	 * sockaddr_iso, so probably this is what we should check for.
+	 */
+	if( (nam->m_len < 2) || (nam->m_len < siso->siso_len)) {
+			return ENAMETOOLONG;
+	}
+	if (siso->siso_nlen) {
+		/* non-zero net addr- better match one of our interfaces */
+		IFDEBUG(D_ISO)
+			printf("iso_pcbbind: bind to NOT zeroisoaddr\n");
+		ENDDEBUG
+		for (ia = iso_ifaddr; ia; ia = ia->ia_next) 
+			if (SAME_ISOADDR(siso, &ia->ia_addr))
+				break;
+		if (ia == 0)
+			return EADDRNOTAVAIL;
+	} 
+	if (siso->siso_len <= sizeof (isop->isop_sladdr)) {
+		isop->isop_laddr = &isop->isop_sladdr;
+	} else {
+		if ((nam = m_copy(nam, 0, (int)M_COPYALL)) == 0)
+			return ENOBUFS;
+		isop->isop_laddr = mtod(nam, struct sockaddr_iso *);
+	}
+	bcopy((caddr_t)siso, (caddr_t)isop->isop_laddr, siso->siso_len);
+	if (siso->siso_tlen == 0)
+		goto noname;
+	if ((isop->isop_socket->so_options & SO_REUSEADDR) == 0 &&
+		iso_pcblookup(head, 0, (caddr_t)0, isop->isop_laddr))
+		return EADDRINUSE;
+	if (siso->siso_tlen <= 2) {
+		bcopy(TSEL(siso), suf.data, sizeof(suf.data));
+		suf.s = ntohs(suf.s);
+		if((suf.s < ISO_PORT_RESERVED) &&
+		   (isop->isop_socket->so_state && SS_PRIV) == 0)
+			return EACCES;
+	} else {
+		register char *cp;
+noname:
+		cp = TSEL(isop->isop_laddr);
+	IFDEBUG(D_ISO)
+		printf("iso_pcbbind noname\n");
+	ENDDEBUG
+		do {
+			if (head->isop_lport++ < ISO_PORT_RESERVED ||
+			    head->isop_lport > ISO_PORT_USERRESERVED)
+				head->isop_lport = ISO_PORT_RESERVED;
+			suf.s = htons(head->isop_lport);
+			cp[0] = suf.data[0];
+			cp[1] = suf.data[1];
+		} while (iso_pcblookup(head, 0, (caddr_t)0, isop->isop_laddr));
+	}
+	IFDEBUG(D_ISO)
+		printf("iso_pcbbind returns 0, suf 0x%x\n", suf);
+	ENDDEBUG
+	return 0;
+}
+/*
+ * FUNCTION:		iso_pcbconnect
+ *
+ * PURPOSE:			Make the isopcb (isop) look like it's connected.
+ *					In other words, give it the peer address given in 
+ *					the mbuf * (nam).   Make sure such a combination
+ *					of local, peer addresses doesn't already exist
+ *					for this protocol.  Internet mentality prevails here,
+ *					wherein a src,dst pair uniquely identifies a connection.
+ * 					Both net address and port must be specified in argument 
+ *					(nam).
+ * 					If we don't have a local address for this socket yet, 
+ *					we pick one by calling iso_pcbbind().
+ *
+ * RETURNS:			errno E* or 0 if ok.
+ *
+ * SIDE EFFECTS:	Looks up a route, which may cause one to be left
+ *					in the isopcb.
+ *
+ * NOTES:			
+ */
+int
+iso_pcbconnect(isop, nam)
+	register struct isopcb *isop;
+	struct mbuf *nam;
+{
+	register struct sockaddr_iso	*siso = mtod(nam, struct sockaddr_iso *);
+	int								local_zero, error = 0;
+	struct iso_ifaddr 				*ia;
+
+	IFDEBUG(D_ISO)
+		printf("iso_pcbconnect(isop 0x%x sock 0x%x nam 0x%x",
+					isop, isop->isop_socket, nam);
+		printf("nam->m_len 0x%x), addr:\n", nam->m_len);
+		dump_isoaddr(siso);
+	ENDDEBUG
+	if (nam->m_len < siso->siso_len)
+		return EINVAL; 
+	if (siso->siso_family != AF_ISO)
+		return EAFNOSUPPORT;
+	if (siso->siso_nlen == 0) {
+		if (ia = iso_ifaddr) {
+			int nlen = ia->ia_addr.siso_nlen;
+			ovbcopy(TSEL(siso), nlen + TSEL(siso),
+				siso->siso_plen + siso->siso_tlen + siso->siso_slen);
+			bcopy((caddr_t)&ia->ia_addr.siso_addr,
+				  (caddr_t)&siso->siso_addr, nlen + 1);
+			/* includes siso->siso_nlen = nlen; */
+		} else
+			return EADDRNOTAVAIL;
+	}
+	/*
+	 * Local zero means either not bound, or bound to a TSEL, but no
+	 * particular local interface.  So, if we want to send somebody
+	 * we need to choose a return address.
+	 */
+	local_zero = 
+		((isop->isop_laddr == 0) || (isop->isop_laddr->siso_nlen == 0));
+	if (local_zero) {
+		int flags;
+
+		IFDEBUG(D_ISO)
+			printf("iso_pcbconnect localzero 1\n");
+		ENDDEBUG
+		/* 
+		 * If route is known or can be allocated now,
+		 * our src addr is taken from the i/f, else punt.
+		 */
+		flags = isop->isop_socket->so_options & SO_DONTROUTE;
+		if (error = clnp_route(&siso->siso_addr, &isop->isop_route, flags,
+						(struct sockaddr **)0, &ia))
+			return error;
+		IFDEBUG(D_ISO)
+			printf("iso_pcbconnect localzero 2, ro->ro_rt 0x%x",
+				isop->isop_route.ro_rt);
+			printf(" ia 0x%x\n", ia);
+		ENDDEBUG
+	}
+	IFDEBUG(D_ISO)
+		printf("in iso_pcbconnect before lookup isop 0x%x isop->sock 0x%x\n", 
+			isop, isop->isop_socket);
+	ENDDEBUG
+	if (local_zero) {
+		int nlen, tlen, totlen; caddr_t oldtsel, newtsel;
+		siso = isop->isop_laddr;
+		if (siso == 0 || siso->siso_tlen == 0)
+			(void)iso_pcbbind(isop, (struct mbuf *)0);
+		/*
+		 * Here we have problem of squezeing in a definite network address
+		 * into an existing sockaddr_iso, which in fact may not have room
+		 * for it.  This gets messy.
+		 */
+		siso = isop->isop_laddr;
+		oldtsel = TSEL(siso);
+		tlen = siso->siso_tlen;
+		nlen = ia->ia_addr.siso_nlen;
+		totlen = tlen + nlen + _offsetof(struct sockaddr_iso, siso_data[0]);
+		if ((siso == &isop->isop_sladdr) &&
+			(totlen > sizeof(isop->isop_sladdr))) {
+			struct mbuf *m = m_get(MT_SONAME, M_DONTWAIT);
+			if (m == 0)
+					return ENOBUFS;
+			m->m_len = totlen;
+			isop->isop_laddr = siso = mtod(m, struct sockaddr_iso *);
+		}
+		siso->siso_nlen = ia->ia_addr.siso_nlen;
+		newtsel = TSEL(siso);
+		ovbcopy(oldtsel, newtsel, tlen);
+		bcopy(ia->ia_addr.siso_data, siso->siso_data, nlen);
+		siso->siso_tlen = tlen;
+		siso->siso_family = AF_ISO;
+		siso->siso_len = totlen;
+		siso = mtod(nam, struct sockaddr_iso *);
+	}
+	IFDEBUG(D_ISO)
+		printf("in iso_pcbconnect before bcopy isop 0x%x isop->sock 0x%x\n", 
+			isop, isop->isop_socket);
+	ENDDEBUG
+	/*
+	 * If we had to allocate space to a previous big foreign address,
+	 * and for some reason we didn't free it, we reuse it knowing
+	 * that is going to be big enough, as sockaddrs are delivered in
+	 * 128 byte mbufs.
+	 * If the foreign address is small enough, we use default space;
+	 * otherwise, we grab an mbuf to copy into.
+	 */
+	if (isop->isop_faddr == 0 || isop->isop_faddr == &isop->isop_sfaddr) {
+		if (siso->siso_len <= sizeof(isop->isop_sfaddr))
+			isop->isop_faddr = &isop->isop_sfaddr;
+		else {
+			struct mbuf *m = m_get(MT_SONAME, M_DONTWAIT);
+			if (m == 0)
+				return ENOBUFS;
+			isop->isop_faddr = mtod(m, struct sockaddr_iso *);
+		}
+	}
+	bcopy((caddr_t)siso, (caddr_t)isop->isop_faddr, siso->siso_len);
+	IFDEBUG(D_ISO)
+		printf("in iso_pcbconnect after bcopy isop 0x%x isop->sock 0x%x\n", 
+			isop, isop->isop_socket);
+		printf("iso_pcbconnect connected to addr:\n");
+		dump_isoaddr(isop->isop_faddr);
+		printf("iso_pcbconnect end: src addr:\n");
+		dump_isoaddr(isop->isop_laddr);
+	ENDDEBUG
+	return 0;
+}
+
+/*
+ * FUNCTION:		iso_pcbdisconnect()
+ *
+ * PURPOSE:			washes away the peer address info so the socket
+ *					appears to be disconnected.
+ *					If there's no file descriptor associated with the socket
+ *					it detaches the pcb.
+ *
+ * RETURNS:			Nada.
+ *
+ * SIDE EFFECTS:	May detach the pcb.
+ *
+ * NOTES:			
+ */
+void
+iso_pcbdisconnect(isop)
+	struct isopcb *isop;
+{
+	void iso_pcbdetach();
+	register struct sockaddr_iso *siso;
+
+	IFDEBUG(D_ISO)
+		printf("iso_pcbdisconnect(isop 0x%x)\n", isop);
+	ENDDEBUG
+	/*
+	 * Preserver binding infnormation if already bound.
+	 */
+	if ((siso = isop->isop_laddr) && siso->siso_nlen && siso->siso_tlen) {
+		caddr_t otsel = TSEL(siso);
+		siso->siso_nlen = 0;
+		ovbcopy(otsel, TSEL(siso), siso->siso_tlen);
+	}
+	if (isop->isop_faddr && isop->isop_faddr != &isop->isop_sfaddr)
+		m_freem(dtom(isop->isop_faddr));
+	isop->isop_faddr = 0;
+	if (isop->isop_socket->so_state & SS_NOFDREF)
+		iso_pcbdetach(isop);
+}
+
+/*
+ * FUNCTION:		iso_pcbdetach
+ *
+ * PURPOSE:			detach the pcb at *(isop) from it's socket and free
+ *					the mbufs associated with the pcb..
+ *					Dequeues (isop) from its head.
+ *
+ * RETURNS:			Nada.
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+void
+iso_pcbdetach(isop)
+	struct isopcb *isop;
+{
+	struct socket *so = isop->isop_socket;
+
+	IFDEBUG(D_ISO)
+		printf("iso_pcbdetach(isop 0x%x socket 0x%x so 0x%x)\n", 
+			isop, isop->isop_socket, so);
+	ENDDEBUG
+#ifdef TPCONS
+	if (isop->isop_chan) {
+		register struct pklcd *lcp = (struct pklcd *)isop->isop_chan;
+		if (--isop->isop_refcnt > 0)
+			return;
+		if (lcp && lcp->lcd_state == DATA_TRANSFER) {
+			lcp->lcd_upper = 0;
+			lcp->lcd_upnext = 0;
+			pk_disconnect(lcp);
+		}
+		isop->isop_chan = 0;
+	}
+#endif
+	if (so) { /* in the x.25 domain, we sometimes have no socket */
+		so->so_pcb = 0;
+		sofree(so); 
+	}
+	IFDEBUG(D_ISO)
+		printf("iso_pcbdetach 2 \n");
+	ENDDEBUG
+	if (isop->isop_options)
+		(void)m_free(isop->isop_options);
+	IFDEBUG(D_ISO)
+		printf("iso_pcbdetach 3 \n");
+	ENDDEBUG
+	if (isop->isop_route.ro_rt)
+		rtfree(isop->isop_route.ro_rt);
+	IFDEBUG(D_ISO)
+		printf("iso_pcbdetach 3.1\n");
+	ENDDEBUG
+	if (isop->isop_clnpcache != NULL) {
+		struct clnp_cache *clcp =
+			mtod(isop->isop_clnpcache, struct clnp_cache *);
+		IFDEBUG(D_ISO)
+			printf("iso_pcbdetach 3.2: clcp 0x%x freeing clc_hdr x%x\n", 
+				clcp, clcp->clc_hdr);
+		ENDDEBUG
+		if (clcp->clc_hdr != NULL)
+			m_free(clcp->clc_hdr);
+		IFDEBUG(D_ISO)
+			printf("iso_pcbdetach 3.3: freeing cache x%x\n", 
+				isop->isop_clnpcache);
+		ENDDEBUG
+		m_free(isop->isop_clnpcache);
+	}
+	IFDEBUG(D_ISO)
+		printf("iso_pcbdetach 4 \n");
+	ENDDEBUG
+	remque(isop);
+	IFDEBUG(D_ISO)
+		printf("iso_pcbdetach 5 \n");
+	ENDDEBUG
+	if (isop->isop_laddr && (isop->isop_laddr != &isop->isop_sladdr))
+		m_freem(dtom(isop->isop_laddr));
+	free((caddr_t)isop, M_PCB);
+}
+
+
+/*
+ * FUNCTION:		iso_pcbnotify
+ *
+ * PURPOSE:			notify all connections in this protocol's queue (head)
+ *					that have peer address (dst) of the problem (errno)
+ *					by calling (notify) on the connections' isopcbs.
+ *
+ * RETURNS:			Rien.
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			(notify) is called at splimp!
+ */
+void
+iso_pcbnotify(head, siso, errno, notify)
+	struct isopcb *head;
+	register struct sockaddr_iso *siso;
+	int errno, (*notify)();
+{
+	register struct isopcb *isop;
+	int s = splimp();
+
+	IFDEBUG(D_ISO)
+		printf("iso_pcbnotify(head 0x%x, notify 0x%x) dst:\n", head, notify);
+	ENDDEBUG
+	for (isop = head->isop_next; isop != head; isop = isop->isop_next) {
+		if (isop->isop_socket == 0 || isop->isop_faddr == 0 ||
+			!SAME_ISOADDR(siso, isop->isop_faddr)) {
+			IFDEBUG(D_ISO)
+				printf("iso_pcbnotify: CONTINUE isop 0x%x, sock 0x%x\n" ,
+					isop, isop->isop_socket);
+				printf("addrmatch cmp'd with (0x%x):\n", isop->isop_faddr);
+				dump_isoaddr(isop->isop_faddr);
+			ENDDEBUG
+			continue;
+		}
+		if (errno) 
+			isop->isop_socket->so_error = errno;
+		if (notify)
+			(*notify)(isop);
+	}
+	splx(s);
+	IFDEBUG(D_ISO)
+		printf("END OF iso_pcbnotify\n" );
+	ENDDEBUG
+}
+
+
+/*
+ * FUNCTION:		iso_pcblookup
+ *
+ * PURPOSE:			looks for a given combination of (faddr), (fport),
+ *					(lport), (laddr) in the queue named by (head).
+ *					Argument (flags) is ignored.
+ *
+ * RETURNS:			ptr to the isopcb if it finds a connection matching
+ *					these arguments, o.w. returns zero.
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+struct isopcb *
+iso_pcblookup(head, fportlen, fport, laddr)
+	struct isopcb *head;
+	register struct sockaddr_iso *laddr;
+	caddr_t fport;
+	int fportlen;
+{
+	register struct isopcb *isop;
+	register caddr_t lp = TSEL(laddr);
+	unsigned int llen = laddr->siso_tlen;
+
+	IFDEBUG(D_ISO)
+		printf("iso_pcblookup(head 0x%x laddr 0x%x fport 0x%x)\n", 
+			head, laddr, fport);
+	ENDDEBUG
+	for (isop = head->isop_next; isop != head; isop = isop->isop_next) {
+		if (isop->isop_laddr == 0 || isop->isop_laddr == laddr)
+			continue;
+		if (isop->isop_laddr->siso_tlen != llen)
+			continue;
+		if (bcmp(lp, TSEL(isop->isop_laddr), llen))
+			continue;
+		if (fportlen && isop->isop_faddr &&
+			bcmp(fport, TSEL(isop->isop_faddr), (unsigned)fportlen))
+			continue;
+		/*	PHASE2
+		 *	addrmatch1 should be iso_addrmatch(a, b, mask)
+		 *	where mask is taken from isop->isop_laddrmask (new field)
+		 *	isop_lnetmask will also be available in isop
+		if (laddr != &zeroiso_addr &&
+			!iso_addrmatch1(laddr, &(isop->isop_laddr.siso_addr)))
+			continue;
+		*/
+		if (laddr->siso_nlen && (!SAME_ISOADDR(laddr, isop->isop_laddr)))
+			continue;
+		return (isop);
+	}
+	return (struct isopcb *)0;
+}
+#endif /* ISO */
diff --git a/sys/netiso/iso_pcb.h b/sys/netiso/iso_pcb.h
new file mode 100644
index 00000000000..aad76bcc065
--- /dev/null
+++ b/sys/netiso/iso_pcb.h
@@ -0,0 +1,113 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)iso_pcb.h	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: iso_pcb.h,v 4.3 88/06/29 15:00:01 hagens Exp $ */
+/* $Source: /usr/argo/sys/netiso/RCS/iso_pcb.h,v $ */
+
+#define	MAXX25CRUDLEN	16	/* 16 bytes of call request user data */
+
+/*
+ * Common structure pcb for argo protocol implementation.
+ */
+struct isopcb {
+	struct	isopcb			*isop_next,*isop_prev; /* pointers to other pcb's */
+	struct	isopcb			*isop_head;	/* pointer back to chain of pcbs for 
+								this protocol */
+	struct	socket			*isop_socket;	/* back pointer to socket */
+	struct	sockaddr_iso	*isop_laddr;
+	struct	sockaddr_iso	*isop_faddr;
+	struct	route_iso {
+		struct	rtentry 	*ro_rt;
+		struct	sockaddr_iso ro_dst;
+	}						isop_route;			/* CLNP routing entry */
+	struct	mbuf			*isop_options;		/* CLNP options */
+	struct	mbuf			*isop_optindex;		/* CLNP options index */
+	struct	mbuf			*isop_clnpcache;	/* CLNP cached hdr */
+	caddr_t					isop_chan;		/* actually struct pklcb * */
+	u_short					isop_refcnt;		/* mult TP4 tpcb's -> here */
+	u_short					isop_lport;			/* MISLEADLING work var */
+	u_short					isop_tuba_cached;	/* for tuba address ref cnts */
+	int						isop_x25crud_len;	/* x25 call request ud */
+	char					isop_x25crud[MAXX25CRUDLEN];
+	struct ifaddr			*isop_ifa;		/* ESIS interface assoc w/sock */
+	struct	sockaddr_iso	isop_sladdr,		/* preallocated laddr */
+							isop_sfaddr;		/* preallocated faddr */
+};
+
+#ifdef sotorawcb
+/*
+ * Common structure pcb for raw clnp protocol access.
+ * Here are clnp specific extensions to the raw control block,
+ * and space is allocated to the necessary sockaddrs.
+ */
+struct rawisopcb {
+	struct	rawcb risop_rcb;		/* common control block prefix */
+	int		risop_flags;			/* flags, e.g. raw sockopts */
+	struct	isopcb risop_isop;		/* space for bound addresses, routes etc.*/
+};
+#endif
+
+#define	sotoisopcb(so)	((struct isopcb *)(so)->so_pcb)
+#define	sotorawisopcb(so)	((struct rawisopcb *)(so)->so_pcb)
+
+#ifdef KERNEL
+struct	isopcb *iso_pcblookup();
+#endif
diff --git a/sys/netiso/iso_proto.c b/sys/netiso/iso_proto.c
new file mode 100644
index 00000000000..59575c7513b
--- /dev/null
+++ b/sys/netiso/iso_proto.c
@@ -0,0 +1,197 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)iso_proto.c	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: iso_proto.c,v 4.4 88/09/08 08:38:42 hagens Exp $ 
+ * $Source: /usr/argo/sys/netiso/RCS/iso_proto.c,v $ 
+ *
+ * iso_proto.c : protocol switch tables in the ISO domain
+ *
+ * ISO protocol family includes TP, CLTP, CLNP, 8208
+ * TP and CLNP are implemented here.
+ */
+
+#ifdef	ISO
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/domain.h>
+#include <sys/mbuf.h>
+
+#include <net/radix.h>
+
+#include <netiso/iso.h>
+
+int clnp_output(), clnp_init(),clnp_slowtimo(),clnp_drain();
+int rclnp_input(), rclnp_output(), rclnp_ctloutput(), raw_usrreq();
+int	clnp_usrreq();
+
+int	tp_ctloutput(), tpclnp_ctlinput(), tpclnp_input(), tp_usrreq();
+int	tp_init(), tp_fasttimo(), tp_slowtimo(), tp_drain();
+int	cons_init(), tpcons_input();
+
+int isis_input();
+int	esis_input(), esis_ctlinput(), esis_init(), esis_usrreq();
+int	idrp_input(), idrp_init(), idrp_usrreq();
+int	cltp_input(), cltp_ctlinput(), cltp_init(), cltp_usrreq(), cltp_output();
+
+#ifdef TUBA
+int	tuba_usrreq(), tuba_ctloutput(), tuba_init(), tuba_tcpinput(); 
+int	tuba_slowtimo(), tuba_fasttimo(); 
+#endif
+
+struct protosw isosw[] = {
+/*
+ *  We need a datagram entry through which net mgmt programs can get
+ *	to the iso_control procedure (iso ioctls). Thus, a minimal
+ *	SOCK_DGRAM interface is provided here.
+ *  THIS ONE MUST BE FIRST: Kludge city : socket() says if(!proto) call
+ *  pffindtype, which gets the first entry that matches the type.
+ *  sigh.
+ */
+{ SOCK_DGRAM,	&isodomain,		ISOPROTO_CLTP,		PR_ATOMIC|PR_ADDR,
+	0,			cltp_output,	0,					0,
+	cltp_usrreq,
+	cltp_init,	0, 				0,					0
+},
+
+/*
+ *	A datagram interface for clnp cannot co-exist with TP/CLNP
+ *  because CLNP has no way to discriminate incoming TP packets from
+ *  packets coming in for any other higher layer protocol.
+ *  Old way: set it up so that pffindproto(... dgm, clnp) fails.
+ *  New way: let pffindproto work (for x.25, thank you) but create
+ *  	a clnp_usrreq() that returns error on PRU_ATTACH.
+ */
+{SOCK_DGRAM,	&isodomain,		ISOPROTO_CLNP,		0,
+ 0,				clnp_output,	0,					0,
+ clnp_usrreq,
+ clnp_init,		0,				clnp_slowtimo, 		clnp_drain,
+},
+
+/* raw clnp */
+{ SOCK_RAW,		&isodomain,		ISOPROTO_RAW,		PR_ATOMIC|PR_ADDR,
+  rclnp_input,	rclnp_output,	0,					rclnp_ctloutput,
+  clnp_usrreq,
+  0,			0,				0,					0
+},
+
+/* ES-IS protocol */
+{ SOCK_DGRAM,	&isodomain,		ISOPROTO_ESIS,		PR_ATOMIC|PR_ADDR,
+  esis_input,	0,				esis_ctlinput,		0,
+  esis_usrreq,
+  esis_init,	0,				0,					0
+},
+
+/* ISOPROTO_INTRAISIS */
+{ SOCK_DGRAM,	&isodomain,		ISOPROTO_INTRAISIS,	PR_ATOMIC|PR_ADDR,
+  isis_input,	0,				0,					0,
+  esis_usrreq,
+  0,			0,				0,					0
+},
+
+/* ISOPROTO_IDRP */
+{ SOCK_DGRAM,	&isodomain,		ISOPROTO_IDRP,		PR_ATOMIC|PR_ADDR,
+  idrp_input,	0,				0,					0,
+  idrp_usrreq,
+  idrp_init,	0,				0,					0
+},
+
+/* ISOPROTO_TP */
+{ SOCK_SEQPACKET,	&isodomain,	ISOPROTO_TP,		PR_CONNREQUIRED|PR_WANTRCVD,
+  tpclnp_input,	0,				tpclnp_ctlinput,	tp_ctloutput,
+  tp_usrreq,
+  tp_init,		tp_fasttimo,	tp_slowtimo,		tp_drain,
+},
+
+#ifdef TUBA
+{ SOCK_STREAM,	&isodomain,		ISOPROTO_TCP,		PR_CONNREQUIRED|PR_WANTRCVD,
+  tuba_tcpinput,	0,			0,					tuba_ctloutput,
+  tuba_usrreq,
+  tuba_init,	tuba_fasttimo,	tuba_fasttimo,		0
+},
+#endif
+
+#ifdef TPCONS
+/* ISOPROTO_TP */
+{ SOCK_SEQPACKET,	&isodomain,	ISOPROTO_TP0,		PR_CONNREQUIRED|PR_WANTRCVD,
+  tpcons_input,		0,			0,					tp_ctloutput,
+  tp_usrreq,
+  cons_init,		0,			0,					0,
+},
+#endif
+
+};
+
+
+struct domain isodomain = {
+    AF_ISO, 			/* family */
+	"iso-domain", 		/* name */
+	0,					/* initialize routine */
+	0,					/* externalize access rights */
+	0,					/* dispose of internalized rights */
+	isosw,				/* protosw */
+	&isosw[sizeof(isosw)/sizeof(isosw[0])], /* NPROTOSW */
+	0,					/* next */
+	rn_inithead,		/* rtattach */
+	48,					/* rtoffset */
+	sizeof(struct sockaddr_iso) /* maxkeylen */
+};
+#endif	/* ISO */
diff --git a/sys/netiso/iso_snpac.c b/sys/netiso/iso_snpac.c
new file mode 100644
index 00000000000..2473ae7a12d
--- /dev/null
+++ b/sys/netiso/iso_snpac.c
@@ -0,0 +1,736 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)iso_snpac.c	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: iso_snpac.c,v 1.8 88/09/19 13:51:36 hagens Exp $ */
+/* $Source: /usr/argo/sys/netiso/RCS/iso_snpac.c,v $ */
+
+#ifdef ISO
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+#include <sys/ioctl.h>
+#include <sys/syslog.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/route.h>
+
+#include <netiso/iso.h>
+#include <netiso/iso_var.h>
+#include <netiso/iso_snpac.h>
+#include <netiso/clnp.h>
+#include <netiso/clnp_stat.h>
+#include <netiso/esis.h>
+#include <netiso/argo_debug.h>
+
+int 				iso_systype = SNPA_ES;	/* default to be an ES */
+extern short	esis_holding_time, esis_config_time, esis_esconfig_time;
+extern struct	timeval time;
+extern void esis_config();
+extern int hz;
+static void snpac_fixdstandmask();
+
+struct sockaddr_iso blank_siso = {sizeof(blank_siso), AF_ISO};
+extern u_long iso_hashchar();
+static struct sockaddr_iso
+	dst	= {sizeof(dst), AF_ISO},
+	gte	= {sizeof(dst), AF_ISO},
+	src	= {sizeof(dst), AF_ISO},
+	msk	= {sizeof(dst), AF_ISO},
+	zmk = {0};
+#define zsi blank_siso
+#define zero_isoa	zsi.siso_addr
+#define zap_isoaddr(a, b) {Bzero(&a.siso_addr, sizeof(*r)); r = b; \
+	   Bcopy(r, &a.siso_addr, 1 + (r)->isoa_len);}
+#define S(x) ((struct sockaddr *)&(x))
+
+static struct sockaddr_dl blank_dl = {sizeof(blank_dl), AF_LINK};
+static struct sockaddr_dl gte_dl;
+#define zap_linkaddr(a, b, c, i) \
+	(*a = blank_dl, bcopy(b, a->sdl_data, a->sdl_alen = c), a->sdl_index = i)
+
+/*
+ *	We only keep track of a single IS at a time.
+ */
+struct rtentry	*known_is;
+
+/*
+ *	Addresses taken from NBS agreements, December 1987.
+ *
+ *	These addresses assume on-the-wire transmission of least significant
+ *	bit first. This is the method used by 802.3. When these
+ *	addresses are passed to the token ring driver, (802.5), they
+ *	must be bit-swaped because 802.5 transmission order is MSb first.
+ *
+ *	Furthermore, according to IBM Austin, these addresses are not
+ *	true token ring multicast addresses. More work is necessary
+ *	to get multicast to work right on token ring.
+ *
+ *	Currently, the token ring driver does not handle multicast, so
+ *	these addresses are converted into the broadcast address in
+ *	lan_output() That means that if these multicast addresses change
+ *	the token ring driver must be altered.
+ */
+char all_es_snpa[] = { 0x09, 0x00, 0x2b, 0x00, 0x00, 0x04 };
+char all_is_snpa[] = { 0x09, 0x00, 0x2b, 0x00, 0x00, 0x05 };
+char all_l1is_snpa[] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x14};
+char all_l2is_snpa[] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x15};
+
+union sockunion {
+	struct sockaddr_iso siso;
+	struct sockaddr_dl	sdl;
+	struct sockaddr		sa;
+};
+
+/*
+ * FUNCTION:		llc_rtrequest
+ *
+ * PURPOSE:			Manage routing table entries specific to LLC for ISO.
+ *
+ * NOTES:			This does a lot of obscure magic;
+ */
+llc_rtrequest(req, rt, sa)
+int req;
+register struct rtentry *rt;
+struct sockaddr *sa;
+{
+	register union sockunion *gate = (union sockunion *)rt->rt_gateway;
+	register struct llinfo_llc *lc = (struct llinfo_llc *)rt->rt_llinfo, *lc2;
+	struct rtentry *rt2;
+	struct ifnet *ifp = rt->rt_ifp;
+	int addrlen = ifp->if_addrlen;
+#define LLC_SIZE 3 /* XXXXXX do this right later */
+
+	IFDEBUG (D_SNPA)
+		printf("llc_rtrequest(%d, %x, %x)\n", req, rt, sa);
+	ENDDEBUG
+	if (rt->rt_flags & RTF_GATEWAY)
+		return;
+	else switch (req) {
+	case RTM_ADD:
+		/*
+		 * Case 1: This route may come from a route to iface with mask
+		 * or from a default route.
+		 */
+		if (rt->rt_flags & RTF_CLONING) {
+			iso_setmcasts(ifp, req);
+			rt_setgate(rt, rt_key(rt), &blank_dl);
+			return;
+		}
+		if (lc != 0)
+			return; /* happens on a route change */
+		/* FALLTHROUGH */
+	case RTM_RESOLVE:
+		/*
+		 * Case 2:  This route may come from cloning, or a manual route
+		 * add with a LL address.
+		 */
+		if (gate->sdl.sdl_family != AF_LINK) {
+			log(LOG_DEBUG, "llc_rtrequest: got non-link non-gateway route\n");
+			break;
+		}
+		R_Malloc(lc, struct llinfo_llc *, sizeof (*lc));
+		rt->rt_llinfo = (caddr_t)lc;
+		if (lc == 0) {
+			log(LOG_DEBUG, "llc_rtrequest: malloc failed\n");
+			break;
+		}
+		Bzero(lc, sizeof(*lc));
+		lc->lc_rt = rt;
+		rt->rt_flags |= RTF_LLINFO;
+		insque(lc, &llinfo_llc);
+		if (gate->sdl.sdl_alen == sizeof(struct esis_req) + addrlen) {
+			gate->sdl.sdl_alen -= sizeof(struct esis_req);
+			bcopy(addrlen + LLADDR(&gate->sdl),
+				  (caddr_t)&lc->lc_er, sizeof(lc->lc_er));
+		} else if (gate->sdl.sdl_alen == addrlen)
+			lc->lc_flags = (SNPA_ES | SNPA_VALID | SNPA_PERM);
+		break;
+	case RTM_DELETE:
+		if (rt->rt_flags & RTF_CLONING)
+			iso_setmcasts(ifp, req);
+		if (lc == 0)
+			return;
+		remque(lc);
+		Free(lc);
+		rt->rt_llinfo = 0;
+		rt->rt_flags &= ~RTF_LLINFO;
+		break;
+	}
+	if (rt->rt_rmx.rmx_mtu == 0) {
+			rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu - LLC_SIZE;
+	}
+}
+/*
+ * FUNCTION:		iso_setmcasts
+ *
+ * PURPOSE:			Enable/Disable ESIS/ISIS multicast reception on interfaces.
+ *
+ * NOTES:			This also does a lot of obscure magic;
+ */
+iso_setmcasts(ifp, req)
+	struct	ifnet *ifp;
+	int		req;
+{
+	static char *addrlist[] =
+		{ all_es_snpa, all_is_snpa, all_l1is_snpa, all_l2is_snpa, 0};
+	struct ifreq ifr;
+	register caddr_t *cpp;
+	int		doreset = 0;
+
+	bzero((caddr_t)&ifr, sizeof(ifr));
+	for (cpp = (caddr_t *)addrlist; *cpp; cpp++) {
+		bcopy(*cpp, (caddr_t)ifr.ifr_addr.sa_data, 6);
+		if (req == RTM_ADD)
+			if (ether_addmulti(&ifr, (struct arpcom *)ifp) == ENETRESET)
+				doreset++;
+		else
+			if (ether_delmulti(&ifr, (struct arpcom *)ifp) == ENETRESET)
+				doreset++;
+	}
+	if (doreset) {
+		if (ifp->if_reset)
+			(*ifp->if_reset)(ifp->if_unit);
+		else
+			printf("iso_setmcasts: %s%d needs reseting to receive iso mcasts\n",
+					ifp->if_name, ifp->if_unit);
+	}
+}
+/*
+ * FUNCTION:		iso_snparesolve
+ *
+ * PURPOSE:			Resolve an iso address into snpa address
+ *
+ * RETURNS:			0 if addr is resolved
+ *					errno if addr is unknown
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			Now that we have folded the snpa cache into the routing
+ *					table, we know there is no snpa address known for this
+ *					destination.  If we know of a default IS, then the address
+ *					of the IS is returned.  If no IS is known, then return the
+ *					multi-cast address for "all ES" for this interface.
+ *
+ *					NB: the last case described above constitutes the
+ *					query configuration function 9542, sec 6.5
+ *					A mechanism is needed to prevent this function from
+ *					being invoked if the system is an IS.
+ */
+iso_snparesolve(ifp, dest, snpa, snpa_len)
+struct	ifnet *ifp;			/* outgoing interface */
+struct	sockaddr_iso *dest;	/* destination */
+caddr_t	snpa;				/* RESULT: snpa to be used */
+int		*snpa_len;			/* RESULT: length of snpa */
+{
+	struct	llinfo_llc *sc;	/* ptr to snpa table entry */
+	caddr_t	found_snpa;
+	int 	addrlen;
+
+	/*
+	 *	This hack allows us to send esis packets that have the destination snpa
+	 *	addresss embedded in the destination nsap address 
+	 */
+	if (dest->siso_data[0] == AFI_SNA) {
+		/*
+		 *	This is a subnetwork address. Return it immediately
+		 */
+		IFDEBUG(D_SNPA)
+			printf("iso_snparesolve: return SN address\n");
+		ENDDEBUG
+		addrlen = dest->siso_nlen - 1;	/* subtract size of AFI */
+		found_snpa = (caddr_t) dest->siso_data + 1;
+	/* 
+	 * If we are an IS, we can't do much with the packet;
+	 *	Check if we know about an IS.
+	 */
+	} else if (iso_systype != SNPA_IS && known_is != 0 &&
+				(sc = (struct llinfo_llc *)known_is->rt_llinfo) &&
+				 (sc->lc_flags & SNPA_VALID)) {
+		register struct sockaddr_dl *sdl =
+			(struct sockaddr_dl *)(known_is->rt_gateway);
+		found_snpa = LLADDR(sdl);
+		addrlen = sdl->sdl_alen;
+	} else if (ifp->if_flags & IFF_BROADCAST) {
+		/* 
+		 *	no IS, no match. Return "all es" multicast address for this
+		 *	interface, as per Query Configuration Function (9542 sec 6.5)
+		 *
+		 *	Note: there is a potential problem here. If the destination
+		 *	is on the subnet and it does not respond with a ESH, but
+		 *	does send back a TP CC, a connection could be established
+		 *	where we always transmit the CLNP packet to "all es"
+		 */
+		addrlen = ifp->if_addrlen;
+		found_snpa = (caddr_t)all_es_snpa;
+	} else
+		return (ENETUNREACH);
+	bcopy(found_snpa, snpa, *snpa_len = addrlen);
+	return (0);
+}
+
+
+/*
+ * FUNCTION:		snpac_free
+ *
+ * PURPOSE:			free an entry in the iso address map table
+ *
+ * RETURNS:			nothing
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			If there is a route entry associated with cache
+ *					entry, then delete that as well
+ */
+snpac_free(lc)
+register struct llinfo_llc *lc;		/* entry to free */
+{
+	register struct rtentry *rt = lc->lc_rt;
+	register struct iso_addr *r;
+
+	if (known_is == rt)
+		known_is = 0;
+	if (rt && (rt->rt_flags & RTF_UP) &&
+		(rt->rt_flags & (RTF_DYNAMIC | RTF_MODIFIED))) {
+			RTFREE(rt);
+			rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, rt_mask(rt),
+						rt->rt_flags, (struct rtentry **)0);
+		RTFREE(rt);
+	}
+}
+
+/*
+ * FUNCTION:		snpac_add
+ *
+ * PURPOSE:			Add an entry to the snpa cache
+ *
+ * RETURNS:			
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			If entry already exists, then update holding time.
+ */
+snpac_add(ifp, nsap, snpa, type, ht, nsellength)
+struct ifnet		*ifp;		/* interface info is related to */
+struct iso_addr		*nsap;		/* nsap to add */
+caddr_t				snpa;		/* translation */
+char				type;		/* SNPA_IS or SNPA_ES */
+u_short				ht;			/* holding time (in seconds) */
+int					nsellength;	/* nsaps may differ only in trailing bytes */
+{
+	register struct	llinfo_llc *lc;
+	register struct rtentry *rt;
+	struct	rtentry *mrt = 0;
+	register struct	iso_addr *r; /* for zap_isoaddr macro */
+	int		snpalen = min(ifp->if_addrlen, MAX_SNPALEN);
+	int		new_entry = 0, index = ifp->if_index, iftype = ifp->if_type;
+
+	IFDEBUG(D_SNPA)
+		printf("snpac_add(%x, %x, %x, %x, %x, %x)\n",
+			ifp, nsap, snpa, type, ht, nsellength);
+	ENDDEBUG
+	zap_isoaddr(dst, nsap);
+	rt = rtalloc1(S(dst), 0);
+	IFDEBUG(D_SNPA)
+		printf("snpac_add: rtalloc1 returns %x\n", rt);
+	ENDDEBUG
+	if (rt == 0) {
+		struct sockaddr *netmask;
+		int flags;
+		add:
+		if (nsellength) {
+			netmask = S(msk); flags = RTF_UP;
+			snpac_fixdstandmask(nsellength);
+		} else {
+			netmask = 0; flags = RTF_UP | RTF_HOST;
+		}
+		new_entry = 1;
+		zap_linkaddr((&gte_dl), snpa, snpalen, index);
+		gte_dl.sdl_type = iftype;
+		if (rtrequest(RTM_ADD, S(dst), S(gte_dl), netmask, flags, &mrt) ||
+			mrt == 0)
+			return (0);
+		rt = mrt;
+		rt->rt_refcnt--;
+	} else {
+		register struct sockaddr_dl *sdl = (struct sockaddr_dl *)rt->rt_gateway;
+		rt->rt_refcnt--;
+		if ((rt->rt_flags & RTF_LLINFO) == 0)
+			goto add;
+		if (nsellength && (rt->rt_flags & RTF_HOST)) {
+			if (rt->rt_refcnt == 0) {
+				rtrequest(RTM_DELETE, S(dst), (struct sockaddr *)0,
+					(struct sockaddr *)0, 0, (struct rtentry *)0);
+				rt = 0;
+				goto add;
+			} else {
+				static struct iso_addr nsap2; register char *cp;
+				nsap2 = *nsap;
+				cp = nsap2.isoa_genaddr + nsap->isoa_len - nsellength;
+				while (cp < (char *)(1 + &nsap2))
+					*cp++ = 0;
+				(void) snpac_add(ifp, &nsap2, snpa, type, ht, nsellength);
+			}
+		}
+		if (sdl->sdl_family != AF_LINK || sdl->sdl_alen == 0) {
+			int old_sdl_len = sdl->sdl_len;
+			if (old_sdl_len < sizeof(*sdl)) {
+				log(LOG_DEBUG, "snpac_add: cant make room for lladdr\n");
+				return (0);
+			}
+			zap_linkaddr(sdl, snpa, snpalen, index);
+			sdl->sdl_len = old_sdl_len;
+			sdl->sdl_type = iftype;
+			new_entry = 1;
+		}
+	}
+	if ((lc = (struct llinfo_llc *)rt->rt_llinfo) == 0)
+		panic("snpac_rtrequest");
+	rt->rt_rmx.rmx_expire = ht + time.tv_sec;
+	lc->lc_flags = SNPA_VALID | type;
+	if ((type & SNPA_IS) && !(iso_systype & SNPA_IS))
+		snpac_logdefis(rt);
+	return (new_entry);
+}
+
+static void
+snpac_fixdstandmask(nsellength)
+{
+	register char *cp = msk.siso_data, *cplim;
+
+	cplim = cp + (dst.siso_nlen -= nsellength);
+	msk.siso_len = cplim - (char *)&msk;
+	msk.siso_nlen = 0;
+	while (cp < cplim)
+		*cp++ = -1;
+	while (cp < (char *)msk.siso_pad)
+		*cp++ = 0;
+	for (cp = dst.siso_data + dst.siso_nlen; cp < (char *)dst.siso_pad; )
+		*cp++ = 0;
+}
+
+/*
+ * FUNCTION:		snpac_ioctl
+ *
+ * PURPOSE:			Set/Get the system type and esis parameters
+ *
+ * RETURNS:			0 on success, or unix error code
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+snpac_ioctl (so, cmd, data)
+struct socket *so;
+int		cmd;	/* ioctl to process */
+caddr_t	data;	/* data for the cmd */
+{
+	register struct systype_req *rq = (struct systype_req *)data;
+
+	IFDEBUG(D_IOCTL)
+		if (cmd == SIOCSSTYPE)
+			printf("snpac_ioctl: cmd set, type x%x, ht %d, ct %d\n",
+				rq->sr_type, rq->sr_holdt, rq->sr_configt);
+		else
+			printf("snpac_ioctl: cmd get\n");
+	ENDDEBUG
+
+	if (cmd == SIOCSSTYPE) {
+		if ((so->so_state & SS_PRIV) == 0)
+			return (EPERM);
+		if ((rq->sr_type & (SNPA_ES|SNPA_IS)) == (SNPA_ES|SNPA_IS))
+			return(EINVAL);
+		if (rq->sr_type & SNPA_ES) {
+			iso_systype = SNPA_ES;
+		} else if (rq->sr_type & SNPA_IS) {
+			iso_systype = SNPA_IS;
+		} else {
+			return(EINVAL);
+		}
+		esis_holding_time = rq->sr_holdt;
+		esis_config_time = rq->sr_configt;
+		if (esis_esconfig_time != rq->sr_esconfigt) {
+			untimeout(esis_config, (caddr_t)0);
+			esis_esconfig_time = rq->sr_esconfigt;
+			esis_config();
+		}
+	} else if (cmd == SIOCGSTYPE) {
+		rq->sr_type = iso_systype;
+		rq->sr_holdt = esis_holding_time;
+		rq->sr_configt = esis_config_time;
+		rq->sr_esconfigt = esis_esconfig_time;
+	} else {
+		return (EINVAL);
+	}
+	return (0);
+}
+
+/*
+ * FUNCTION:		snpac_logdefis
+ *
+ * PURPOSE:			Mark the IS passed as the default IS
+ *
+ * RETURNS:			nothing
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+snpac_logdefis(sc)
+register struct rtentry *sc;
+{
+	register struct iso_addr *r;
+	register struct sockaddr_dl *sdl = (struct sockaddr_dl *)sc->rt_gateway;
+	register struct rtentry *rt;
+
+	if (known_is == sc || !(sc->rt_flags & RTF_HOST))
+		return;
+	if (known_is) {
+		RTFREE(known_is);
+	}
+	known_is = sc;
+	sc->rt_refcnt++;
+	rt = rtalloc1((struct sockaddr *)&zsi, 0);
+	if (rt == 0)
+		rtrequest(RTM_ADD, S(zsi), rt_key(sc), S(zmk),
+						RTF_DYNAMIC|RTF_GATEWAY, 0);
+	else {
+		if ((rt->rt_flags & RTF_DYNAMIC) && 
+		    (rt->rt_flags & RTF_GATEWAY) && rt_mask(rt)->sa_len == 0)
+			rt_setgate(rt, rt_key(rt), rt_key(sc));
+	}
+}
+
+/*
+ * FUNCTION:		snpac_age
+ *
+ * PURPOSE:			Time out snpac entries
+ *
+ * RETURNS:			
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			When encountering an entry for the first time, snpac_age
+ *					may delete up to SNPAC_AGE too many seconds. Ie.
+ *					if the entry is added a moment before snpac_age is
+ *					called, the entry will immediately have SNPAC_AGE
+ *					seconds taken off the holding time, even though
+ *					it has only been held a brief moment.
+ *
+ *					The proper way to do this is set an expiry timeval
+ *					equal to current time + holding time. Then snpac_age
+ *					would time out entries where expiry date is older
+ *					than the current time.
+ */
+void
+snpac_age()
+{
+	register struct	llinfo_llc *lc, *nlc;
+	register struct	rtentry *rt;
+
+	timeout(snpac_age, (caddr_t)0, SNPAC_AGE * hz);
+
+	for (lc = llinfo_llc.lc_next; lc != & llinfo_llc; lc = nlc) {
+		nlc = lc->lc_next;
+		if (lc->lc_flags & SNPA_VALID) {
+			rt = lc->lc_rt;
+			if (rt->rt_rmx.rmx_expire && rt->rt_rmx.rmx_expire < time.tv_sec)
+				snpac_free(lc);
+		}
+	}
+}
+
+/*
+ * FUNCTION:		snpac_ownmulti
+ *
+ * PURPOSE:			Determine if the snpa address is a multicast address
+ *					of the same type as the system.
+ *
+ * RETURNS:			true or false
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			Used by interface drivers when not in eavesdrop mode 
+ *					as interm kludge until
+ *					real multicast addresses can be configured
+ */
+snpac_ownmulti(snpa, len)
+caddr_t	snpa;
+u_int	len;
+{
+	return (((iso_systype & SNPA_ES) &&
+			 (!bcmp(snpa, (caddr_t)all_es_snpa, len))) ||
+			((iso_systype & SNPA_IS) &&
+			 (!bcmp(snpa, (caddr_t)all_is_snpa, len))));
+}
+
+/*
+ * FUNCTION:		snpac_flushifp
+ *
+ * PURPOSE:			Flush entries associated with specific ifp
+ *
+ * RETURNS:			nothing
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+snpac_flushifp(ifp)
+struct ifnet	*ifp;
+{
+	register struct llinfo_llc	*lc;
+
+	for (lc = llinfo_llc.lc_next; lc != & llinfo_llc; lc = lc->lc_next) {
+		if (lc->lc_rt->rt_ifp == ifp && (lc->lc_flags & SNPA_VALID))
+			snpac_free(lc);
+	}
+}
+
+/*
+ * FUNCTION:		snpac_rtrequest
+ *
+ * PURPOSE:			Make a routing request
+ *
+ * RETURNS:			nothing
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			In the future, this should make a request of a user
+ *					level routing daemon.
+ */
+snpac_rtrequest(req, host, gateway, netmask, flags, ret_nrt)
+int				req;
+struct iso_addr	*host;
+struct iso_addr	*gateway;
+struct iso_addr	*netmask;
+short			flags;
+struct rtentry	**ret_nrt;
+{
+	register struct iso_addr *r;
+
+	IFDEBUG(D_SNPA)
+		printf("snpac_rtrequest: ");
+		if (req == RTM_ADD)
+			printf("add");
+		else if (req == RTM_DELETE)
+			printf("delete");
+		else 
+			printf("unknown command");
+		printf(" dst: %s\n", clnp_iso_addrp(host));
+		printf("\tgateway: %s\n", clnp_iso_addrp(gateway));
+	ENDDEBUG
+
+
+	zap_isoaddr(dst, host);
+	zap_isoaddr(gte, gateway);
+	if (netmask) {
+		zap_isoaddr(msk, netmask);
+		msk.siso_nlen = 0;
+		msk.siso_len = msk.siso_pad - (u_char *)&msk;
+	}
+
+	rtrequest(req, S(dst), S(gte), (netmask ? S(msk) : (struct sockaddr *)0),
+		flags, ret_nrt);
+}
+
+/*
+ * FUNCTION:		snpac_addrt
+ *
+ * PURPOSE:			Associate a routing entry with an snpac entry
+ *
+ * RETURNS:			nothing
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			If a cache entry exists for gateway, then
+ *					make a routing entry (host, gateway) and associate
+ *					with gateway.
+ *
+ *					If a route already exists and is different, first delete
+ *					it.
+ *
+ *					This could be made more efficient by checking 
+ *					the existing route before adding a new one.
+ */
+snpac_addrt(ifp, host, gateway, netmask)
+struct ifnet *ifp;
+struct iso_addr	*host, *gateway, *netmask;
+{
+	register struct iso_addr *r;
+
+	zap_isoaddr(dst, host);
+	zap_isoaddr(gte, gateway);
+	if (netmask) {
+		zap_isoaddr(msk, netmask);
+		msk.siso_nlen = 0;
+		msk.siso_len = msk.siso_pad - (u_char *)&msk;
+		rtredirect(S(dst), S(gte), S(msk), RTF_DONE, S(gte), 0);
+	} else
+		rtredirect(S(dst), S(gte), (struct sockaddr *)0,
+							RTF_DONE | RTF_HOST, S(gte), 0);
+}
+#endif	/* ISO */
diff --git a/sys/netiso/iso_snpac.h b/sys/netiso/iso_snpac.h
new file mode 100644
index 00000000000..105e8dd11d6
--- /dev/null
+++ b/sys/netiso/iso_snpac.h
@@ -0,0 +1,112 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)iso_snpac.h	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+
+#define	MAX_SNPALEN		8			/* curiously equal to sizeof x.121 (
+										plus 1 for nibble len) addr */
+struct snpa_req {
+	struct iso_addr	sr_isoa;		/* nsap address */
+	u_char			sr_len;			/* length of snpa */
+	u_char			sr_snpa[MAX_SNPALEN];	/* snpa associated 
+												with nsap address */
+	u_char			sr_flags;		/* true if entry is valid */
+	u_short			sr_ht;			/* holding time */
+};
+
+#define	SNPA_VALID		0x01
+#define	SNPA_ES			0x02
+#define SNPA_IS			0x04
+#define	SNPA_PERM		0x10
+
+struct systype_req {
+	short	sr_holdt;		/* holding timer */
+	short	sr_configt;		/* configuration timer */
+	short	sr_esconfigt;	/* suggested ES configuration timer */
+	char	sr_type;		/* SNPA_ES or SNPA_IS */
+};
+
+struct esis_req {
+	short	er_ht;			/* holding time */
+	u_char	er_flags;		/* type and validity */
+};
+/*
+ * Space for this structure gets added onto the end of a route
+ * going to an ethernet or other 802.[45x] device.
+ */
+
+struct llinfo_llc {
+	struct	llinfo_llc *lc_next;	/* keep all llc routes linked */
+	struct	llinfo_llc *lc_prev;	/* keep all llc routes linked */
+	struct	rtentry *lc_rt;			/* backpointer to route */
+	struct	esis_req lc_er;			/* holding time, etc */
+#define lc_ht		lc_er.er_ht
+#define lc_flags	lc_er.er_flags
+};
+
+
+/* ISO arp IOCTL data structures */
+
+#define	SIOCSSTYPE 	_IOW('a', 39, struct systype_req) /* set system type */
+#define	SIOCGSTYPE 	_IOR('a', 40, struct systype_req) /* get system type */
+
+#ifdef	KERNEL
+struct llinfo_llc llinfo_llc;	/* head for linked lists */
+#endif	/* KERNEL */
diff --git a/sys/netiso/iso_var.h b/sys/netiso/iso_var.h
new file mode 100644
index 00000000000..946aeea93fe
--- /dev/null
+++ b/sys/netiso/iso_var.h
@@ -0,0 +1,137 @@
+/*-
+ * Copyright (c) 1988, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)iso_var.h	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: iso_var.h,v 4.2 88/06/29 15:00:08 hagens Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/iso_var.h,v $
+ */
+
+/*
+ *	Interface address, iso version. One of these structures is 
+ *	allocated for each interface with an osi address. The ifaddr
+ *	structure conatins the protocol-independent part
+ *	of the structure, and is assumed to be first.
+ */
+struct iso_ifaddr {
+	struct ifaddr		ia_ifa;		/* protocol-independent info */
+#define ia_ifp		ia_ifa.ifa_ifp
+#define	ia_flags	ia_ifa.ifa_flags
+	int					ia_snpaoffset;
+	struct iso_ifaddr	*ia_next;	/* next in list of iso addresses */
+	struct	sockaddr_iso ia_addr;	/* reserve space for interface name */
+	struct	sockaddr_iso ia_dstaddr; /* reserve space for broadcast addr */
+#define	ia_broadaddr	ia_dstaddr
+	struct	sockaddr_iso ia_sockmask; /* reserve space for general netmask */
+};
+
+struct	iso_aliasreq {
+	char	ifra_name[IFNAMSIZ];		/* if name, e.g. "en0" */
+	struct	sockaddr_iso ifra_addr;
+	struct	sockaddr_iso ifra_dstaddr;
+	struct	sockaddr_iso ifra_mask;
+	int	ifra_snpaoffset;
+};
+
+struct	iso_ifreq {
+	char	ifr_name[IFNAMSIZ];		/* if name, e.g. "en0" */
+	struct	sockaddr_iso ifr_Addr;
+};
+
+/*
+ *	Given a pointer to an iso_ifaddr (ifaddr),
+ *	return a pointer to the addr as a sockaddr_iso
+ */
+/*
+#define	IA_SIS(ia) ((struct sockaddr_iso *)(ia.ia_ifa->ifa_addr))
+ * works if sockaddr_iso becomes variable sized.
+ */
+#define	IA_SIS(ia) (&(((struct iso_ifaddr *)ia)->ia_addr))
+
+#define	SIOCDIFADDR_ISO	_IOW('i',25, struct iso_ifreq)	/* delete IF addr */
+#define	SIOCAIFADDR_ISO	_IOW('i',26, struct iso_aliasreq)/* add/chg IFalias */
+#define	SIOCGIFADDR_ISO	_IOWR('i',33, struct iso_ifreq)	/* get ifnet address */
+#define	SIOCGIFDSTADDR_ISO _IOWR('i',34, struct iso_ifreq) /* get dst address */
+#define	SIOCGIFNETMASK_ISO _IOWR('i',37, struct iso_ifreq) /* get dst address */
+
+/*
+ * This stuff should go in if.h or if_llc.h or someplace else,
+ * but for now . . .
+ */
+
+struct llc_etherhdr {
+	char dst[6];
+	char src[6];
+	char len[2];
+	char llc_dsap;
+	char llc_ssap;
+	char llc_ui_byte;
+};
+
+struct snpa_hdr {
+	struct	ifnet *snh_ifp;
+	char	snh_dhost[6];
+	char	snh_shost[6];
+	short	snh_flags;
+};
+#ifdef KERNEL
+struct iso_ifaddr	*iso_ifaddr;	/* linked list of iso address ifaces */
+struct iso_ifaddr	*iso_localifa();	/* linked list of iso address ifaces */
+struct ifqueue 		clnlintrq;		/* clnl packet input queue */
+#endif /* KERNEL */
diff --git a/sys/netiso/tp.trans b/sys/netiso/tp.trans
new file mode 100644
index 00000000000..edefc769b81
--- /dev/null
+++ b/sys/netiso/tp.trans
@@ -0,0 +1,1342 @@
+/* NEW */
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tp.trans	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* $Header: tp.trans,v 5.1 88/10/12 12:22:07 root Exp $
+ *
+ * Transition file for TP.
+ *
+ * DO NOT:
+ * - change the order of any of the events or states.  to do so will
+ *   make tppt, netstat, etc. cease working.
+ *
+ * NOTE:
+ * some hooks exist for data on (dis)connect, but it's ***NOT***SUPPORTED***
+ * (read: may not work!)
+ *
+ * I tried to put everything that causes a change of state in here, hence 
+ * there are some seemingly trivial events  like T_DETACH and T_LISTEN_req.
+ *
+ * Almost everything having to do w/ setting & cancelling timers is here
+ * but once it was debugged, I moved the setting of the 
+ * keepalive (sendack) timer to tp_emit(), where an AK_TPDU is sent.
+ * This is so the code wouldn't be duplicated all over creation in here.
+ *
+ */
+*PROTOCOL tp
+
+*INCLUDE
+{
+/* @(#)tp.trans	8.1 (Berkeley) 6/10/93 */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/mbuf.h>
+#include <sys/time.h>
+#include <sys/errno.h>
+
+#include <netiso/tp_param.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_pcb.h>
+#include <netiso/tp_tpdu.h>
+#include <netiso/argo_debug.h>
+#include <netiso/tp_trace.h>
+#include <netiso/iso_errno.h>
+#include <netiso/tp_seq.h>
+#include <netiso/cons.h>
+
+#define DRIVERTRACE TPPTdriver
+#define sbwakeup(sb)	sowakeup(p->tp_sock, sb);
+#define MCPY(d, w) (d ? m_copym(d, 0, (int)M_COPYALL, w): 0)
+
+static 	trick_hc = 1;
+
+int 	tp_emit(),
+		tp_goodack(),				tp_goodXack(),
+		tp_stash()
+;
+void	tp_indicate(),				tp_getoptions(),	
+		tp_soisdisconnecting(), 	tp_soisdisconnected(),
+		tp_recycle_tsuffix(),		
+#ifdef TP_DEBUG_TIMERS
+		tp_etimeout(),				tp_euntimeout(),
+		tp_ctimeout(),				tp_cuntimeout(),
+		tp_ctimeout_MIN(),
+#endif
+		tp_freeref(),				tp_detach(),
+		tp0_stash(), 				tp0_send(),
+		tp_netcmd(),				tp_send()
+;
+
+typedef  struct tp_pcb tpcb_struct;
+
+
+}
+
+*PCB    tpcb_struct 	SYNONYM  P
+
+*STATES
+
+TP_CLOSED 	
+TP_CRSENT
+TP_AKWAIT
+TP_OPEN
+TP_CLOSING 
+TP_REFWAIT
+TP_LISTENING	/* Local to this implementation */
+TP_CONFIRMING	/* Local to this implementation */
+
+*EVENTS		{ struct timeval e_time; } 		SYNONYM  E
+
+ /*
+  * C (typically cancelled) timers  - 
+  *
+  * let these be the first ones so for the sake of convenience
+  * their values are 0--> n-1
+  * DO NOT CHANGE THE ORDER OF THESE TIMER EVENTS!! 
+  */
+ TM_inact		
+ TM_retrans		
+				/* TM_retrans is used for all 
+				 * simple retransmissions - CR,CC,XPD,DR 
+				 */
+
+ TM_sendack		
+				/* TM_sendack does dual duty - keepalive AND closed-window
+				 * Probes.
+				 * It's set w/ keepalive-ticks every time an ack is sent.
+				 * (this is done in (void) tp_emit() ).
+				 * Whenever a DT arrives which doesn't require immediate acking,
+				 * a separate fast-timeout flag is set ensuring 200ms response.
+				 */
+ TM_notused	
+
+ /* 
+  * E (typically expired) timers - these may be in any order. 
+  * These cause procedures to be executed directly; may not
+  * cause an 'event' as we know them here.
+  */
+ TM_reference		{ SeqNum e_low; SeqNum e_high; int e_retrans; }
+ TM_data_retrans	{ SeqNum e_low; SeqNum e_high; int e_retrans; }
+
+/* NOTE: in tp_input is a minor optimization that assumes that
+ * for all tpdu types that can take e_data and e_datalen, these
+ * fields fall in the same place in the event structure, that is,
+ * e_data is the first field and e_datalen is the 2nd field.
+ */
+
+ ER_TPDU  	 	{
+				  u_char		e_reason;
+				}
+ CR_TPDU  	 	{ struct mbuf 	*e_data;	/* first field */
+				  int 			e_datalen; /* 2nd field */
+				  u_int			e_cdt;
+				}
+ DR_TPDU   	 	{ struct mbuf 	*e_data;	/* first field */
+				  int 			e_datalen; /* 2nd field */
+				  u_short		e_sref;
+				  u_char		e_reason;
+				}
+ DC_TPDU		
+ CC_TPDU   	 	{ struct mbuf 	*e_data;	/* first field */
+				  int 			e_datalen; /* 2nd field */
+				  u_short		e_sref;
+				  u_int			e_cdt;
+				}
+ AK_TPDU		{ u_int			e_cdt;	
+				  SeqNum 	 	e_seq;		
+				  SeqNum 	 	e_subseq;		
+				  u_char 	 	e_fcc_present;		
+				}
+ DT_TPDU		{ struct mbuf	*e_data; 	/* first field */
+				  int 			e_datalen; /* 2nd field */
+				  u_int 		e_eot;
+				  SeqNum		e_seq; 
+				}
+ XPD_TPDU		{ struct mbuf 	*e_data;	/* first field */
+				  int 			e_datalen; 	/* 2nd field */
+				  SeqNum 		e_seq;	
+				}
+ XAK_TPDU		{ SeqNum 		e_seq;		}
+
+ T_CONN_req 
+ T_DISC_req		{ u_char		e_reason; 	}
+ T_LISTEN_req
+ T_DATA_req
+ T_XPD_req	
+ T_USR_rcvd	
+ T_USR_Xrcvd	
+ T_DETACH
+ T_NETRESET
+ T_ACPT_req
+
+
+*TRANSITIONS
+
+
+/* TP_AKWAIT doesn't exist in TP 0 */
+SAME			<==			TP_AKWAIT			[ CC_TPDU, DC_TPDU, XAK_TPDU ]
+	DEFAULT
+	NULLACTION
+;
+
+
+/* applicable in TP4, TP0 */
+SAME			<==			TP_REFWAIT								DR_TPDU
+	( $$.e_sref !=  0 ) 
+	{
+		(void) tp_emit(DC_TPDU_type, $P, 0, 0, MNULL);
+	}
+;
+	
+/* applicable in TP4, TP0 */
+SAME			<==			TP_REFWAIT			[ CR_TPDU, CC_TPDU, DT_TPDU, 
+					DR_TPDU, XPD_TPDU, AK_TPDU, XAK_TPDU, DC_TPDU, ER_TPDU ]
+	DEFAULT
+	{
+#		ifdef TP_DEBUG
+		if( $E.ev_number != AK_TPDU )
+			printf("TPDU 0x%x in REFWAIT!!!!\n", $E.ev_number);
+#		endif TP_DEBUG
+	}
+;
+
+/* applicable in TP4, TP0 */
+SAME			<==			TP_REFWAIT				[ T_DETACH, T_DISC_req ]
+	DEFAULT
+	NULLACTION
+;
+
+/* applicable in TP4, TP0 */
+SAME			<==			TP_CRSENT								 AK_TPDU
+	($P.tp_class == TP_CLASS_0)
+	{
+		/* oh, man is this grotesque or what? */
+		(void) tp_goodack($P, $$.e_cdt, $$.e_seq,  $$.e_subseq);
+		/* but it's necessary because this pseudo-ack may happen
+		 * before the CC arrives, but we HAVE to adjust the
+		 * snduna as a result of the ack, WHENEVER it arrives
+		 */
+	}
+;
+
+/* applicable in TP4, TP0 */
+SAME			<==			TP_CRSENT	
+					[ CR_TPDU, DC_TPDU, DT_TPDU, XPD_TPDU,  XAK_TPDU ]
+	DEFAULT
+	NULLACTION
+;
+
+/* applicable in TP4, TP0 */
+SAME			<==			TP_CLOSED					[ DT_TPDU, XPD_TPDU,
+										ER_TPDU, DC_TPDU, AK_TPDU, XAK_TPDU ] 
+	DEFAULT
+	NULLACTION
+;
+
+/* TP_CLOSING doesn't exist in TP 0 */
+SAME 			<== 		TP_CLOSING
+					[ CC_TPDU, CR_TPDU, DT_TPDU, XPD_TPDU, AK_TPDU, XAK_TPDU ]
+	DEFAULT
+	NULLACTION
+;
+
+
+/* DC_TPDU doesn't exist in TP 0 */
+SAME			<==			TP_OPEN						  DC_TPDU
+	DEFAULT
+	NULLACTION
+;
+
+/* applicable in TP4, TP0 */
+SAME			<==		 	TP_LISTENING  [DR_TPDU, CC_TPDU, DT_TPDU, XPD_TPDU,
+										 ER_TPDU, DC_TPDU, AK_TPDU, XAK_TPDU ] 
+	DEFAULT	
+	NULLACTION
+;
+
+/* applicable in TP4, TP0 */
+TP_LISTENING	<==			TP_CLOSED  							T_LISTEN_req 
+	DEFAULT
+	NULLACTION
+;
+
+/* applicable in TP4, TP0 */
+TP_CLOSED  		<== 		[ TP_LISTENING, TP_CLOSED ] 			T_DETACH
+	DEFAULT
+	{
+		tp_detach($P);
+	}
+;
+
+TP_CONFIRMING	<==		 TP_LISTENING  								CR_TPDU 
+	( $P.tp_class == TP_CLASS_0)
+	{
+		$P.tp_refstate = REF_OPEN; /* has timers ??? */
+	}
+;
+
+TP_CONFIRMING		<==		 TP_LISTENING  							CR_TPDU 
+	DEFAULT
+	{
+		IFTRACE(D_CONN)
+			tptrace(TPPTmisc, "CR datalen data", $$.e_datalen, $$.e_data,0,0);
+		ENDTRACE
+		IFDEBUG(D_CONN)
+			printf("CR datalen 0x%x data 0x%x", $$.e_datalen, $$.e_data);
+		ENDDEBUG
+		$P.tp_refstate = REF_OPEN; /* has timers */
+		$P.tp_fcredit = $$.e_cdt;
+
+		if ($$.e_datalen > 0) {
+			/* n/a for class 0 */
+			ASSERT($P.tp_Xrcv.sb_cc == 0); 
+			sbappendrecord(&$P.tp_Xrcv, $$.e_data);
+			$$.e_data = MNULL; 
+		} 
+	}
+;
+
+TP_OPEN		<==		 TP_CONFIRMING  								T_ACPT_req 
+	( $P.tp_class == TP_CLASS_0 )
+	{
+		IncStat(ts_tp0_conn);
+		IFTRACE(D_CONN)
+			tptrace(TPPTmisc, "Confiming", $P, 0,0,0);
+		ENDTRACE
+		IFDEBUG(D_CONN)
+			printf("Confirming connection: $P" );
+		ENDDEBUG
+		soisconnected($P.tp_sock);
+		(void) tp_emit(CC_TPDU_type, $P, 0,0, MNULL) ;
+		$P.tp_fcredit = 1;
+	}
+;
+
+TP_AKWAIT		<==		 TP_CONFIRMING  							T_ACPT_req
+	(tp_emit(CC_TPDU_type, $P, 0,0, MCPY($P.tp_ucddata, M_NOWAIT)) == 0)
+	{
+		IncStat(ts_tp4_conn); /* even though not quite open */
+		IFTRACE(D_CONN)
+			tptrace(TPPTmisc, "Confiming", $P, 0,0,0);
+		ENDTRACE
+		IFDEBUG(D_CONN)
+			printf("Confirming connection: $P" );
+		ENDDEBUG
+		tp_getoptions($P);
+		soisconnecting($P.tp_sock);
+		if (($P.tp_rx_strat & TPRX_FASTSTART) && ($P.tp_fcredit > 0))
+			$P.tp_cong_win = $P.tp_fcredit * $P.tp_l_tpdusize;
+		$P.tp_retrans = $P.tp_Nretrans;
+		tp_ctimeout($P, TM_retrans, (int)$P.tp_cc_ticks);
+	}
+;
+
+/* TP4 only */
+TP_CLOSED		<==		 TP_CONFIRMING								T_ACPT_req
+	DEFAULT /* emit failed */
+	{
+		IFDEBUG(D_CONN)
+			printf("event: CR_TPDU emit CC failed done " );
+		ENDDEBUG
+		soisdisconnected($P.tp_sock);
+		tp_recycle_tsuffix($P);
+		tp_freeref($P.tp_lref);
+		tp_detach($P);
+	}
+;
+
+/* applicable in TP4, TP0 */
+TP_CRSENT		<==		TP_CLOSED								T_CONN_req 
+	DEFAULT
+	{
+		int error;
+		struct mbuf *data = MNULL;
+
+		IFTRACE(D_CONN)
+			tptrace(TPPTmisc, "T_CONN_req flags ucddata", (int)$P.tp_flags,
+			$P.tp_ucddata, 0, 0);
+		ENDTRACE
+		data =  MCPY($P.tp_ucddata, M_WAIT);
+		if (data) {
+			IFDEBUG(D_CONN)
+				printf("T_CONN_req.trans m_copy cc 0x%x\n", 
+					$P.tp_ucddata);
+				dump_mbuf(data, "sosnd @ T_CONN_req");
+			ENDDEBUG
+		}
+
+		if (error = tp_emit(CR_TPDU_type, $P, 0, 0, data) )
+			return error; /* driver WON'T change state; will return error */
+		
+		$P.tp_refstate = REF_OPEN; /* has timers */
+		if($P.tp_class != TP_CLASS_0) {
+			$P.tp_retrans = $P.tp_Nretrans;
+			tp_ctimeout($P, TM_retrans, (int)$P.tp_cr_ticks);
+		}
+	}
+;
+
+/* applicable in TP4, TP0, but state TP_AKWAIT doesn't apply to TP0 */
+TP_REFWAIT 		<==		[ TP_CRSENT, TP_AKWAIT, TP_OPEN ] 			DR_TPDU 
+	DEFAULT
+	{
+		sbflush(&$P.tp_Xrcv); /* purge non-delivered data data */
+		if ($$.e_datalen > 0) {
+			sbappendrecord(&$P.tp_Xrcv, $$.e_data);
+			$$.e_data = MNULL;
+		} 
+		if ($P.tp_state == TP_OPEN)
+			tp_indicate(T_DISCONNECT, $P, 0);
+		else {
+			int so_error = ECONNREFUSED;
+			if ($$.e_reason != (E_TP_NO_SESSION ^ TP_ERROR_MASK) &&
+			    $$.e_reason != (E_TP_NO_CR_ON_NC ^ TP_ERROR_MASK) &&
+			    $$.e_reason != (E_TP_REF_OVERFLOW ^ TP_ERROR_MASK))
+				so_error = ECONNABORTED;
+			tp_indicate(T_DISCONNECT, $P, so_error);
+		}
+		tp_soisdisconnected($P);
+		if ($P.tp_class != TP_CLASS_0) {
+			if ($P.tp_state == TP_OPEN ) {
+				tp_euntimeout($P, TM_data_retrans); /* all */
+				tp_cuntimeout($P, TM_retrans);
+				tp_cuntimeout($P, TM_inact);
+				tp_cuntimeout($P, TM_sendack);
+				$P.tp_flags &= ~TPF_DELACK;
+			}
+			tp_cuntimeout($P, TM_retrans);
+			if( $$.e_sref !=  0 ) 
+				(void) tp_emit(DC_TPDU_type, $P, 0, 0, MNULL);
+		}
+	}
+;
+
+SAME 			<==		TP_CLOSED 									DR_TPDU 
+	DEFAULT
+	{
+		if( $$.e_sref != 0 )
+			(void) tp_emit(DC_TPDU_type, $P, 0, 0, MNULL); 
+		/* reference timer already set - reset it to be safe (???) */
+		tp_euntimeout($P, TM_reference); /* all */
+		tp_etimeout($P, TM_reference, (int)$P.tp_refer_ticks);
+	}
+;
+
+/* NBS(34) */
+TP_REFWAIT 		<==  	TP_CRSENT  									ER_TPDU
+	DEFAULT
+	{	
+		tp_cuntimeout($P, TM_retrans);
+		tp_indicate(ER_TPDU, $P, $$.e_reason);
+		tp_soisdisconnected($P);
+	}
+;
+
+/* NBS(27) */
+TP_REFWAIT		<==		TP_CLOSING									DR_TPDU
+	DEFAULT
+	{	 
+		tp_cuntimeout($P, TM_retrans);
+		tp_soisdisconnected($P);
+	}
+;
+/* these two transitions are the same but can't be combined because xebec
+ * can't handle the use of $$.e_reason if they're combined
+ */
+/* NBS(27) */
+TP_REFWAIT		<==		TP_CLOSING									ER_TPDU
+	DEFAULT
+	{	 
+		tp_indicate(ER_TPDU, $P, $$.e_reason);
+		tp_cuntimeout($P, TM_retrans);
+		tp_soisdisconnected($P);
+	}
+;
+/* NBS(27) */
+TP_REFWAIT		<==		TP_CLOSING									DC_TPDU 
+	DEFAULT
+	{	 
+		tp_cuntimeout($P, TM_retrans);
+		tp_soisdisconnected($P);
+	}
+;
+
+/* NBS(21) */
+SAME 			<== 	TP_CLOSED 						[ CC_TPDU, CR_TPDU ]
+	DEFAULT
+	{	/* don't ask me why we have to do this - spec says so */
+		(void) tp_emit(DR_TPDU_type, $P, 0, E_TP_NO_SESSION, MNULL);
+		/* don't bother with retransmissions of the DR */
+	}
+;
+
+/* NBS(34) */
+TP_REFWAIT 		<== 	TP_OPEN  				 					ER_TPDU
+	($P.tp_class == TP_CLASS_0)
+	{
+		tp_soisdisconnecting($P.tp_sock);
+		tp_indicate(ER_TPDU, $P, $$.e_reason);
+		tp_soisdisconnected($P);
+		tp_netcmd( $P, CONN_CLOSE );
+	}
+;
+
+TP_CLOSING 		<== 	[ TP_AKWAIT, TP_OPEN ]  					ER_TPDU
+	DEFAULT
+	{
+		if ($P.tp_state == TP_OPEN) {
+			tp_euntimeout($P, TM_data_retrans); /* all */
+			tp_cuntimeout($P, TM_inact);
+			tp_cuntimeout($P, TM_sendack);
+		}
+		tp_soisdisconnecting($P.tp_sock);
+		tp_indicate(ER_TPDU, $P, $$.e_reason);
+		$P.tp_retrans = $P.tp_Nretrans;
+		tp_ctimeout($P, TM_retrans, (int)$P.tp_dr_ticks);
+		(void) tp_emit(DR_TPDU_type, $P, 0, E_TP_PROTO_ERR, MNULL);
+	}
+;
+/* NBS(6) */
+TP_OPEN			<==		TP_CRSENT									CC_TPDU 
+	($P.tp_class == TP_CLASS_0) 
+	{	
+		tp_cuntimeout($P, TM_retrans);
+		IncStat(ts_tp0_conn);
+		$P.tp_fcredit = 1;
+		soisconnected($P.tp_sock);
+	}
+;
+
+TP_OPEN			<==		TP_CRSENT									CC_TPDU 
+	DEFAULT
+	{	
+		IFDEBUG(D_CONN)
+			printf("trans: CC_TPDU in CRSENT state flags 0x%x\n", 
+				(int)$P.tp_flags);
+		ENDDEBUG
+		IncStat(ts_tp4_conn);
+		$P.tp_fref = $$.e_sref;
+		$P.tp_fcredit = $$.e_cdt;
+		if (($P.tp_rx_strat & TPRX_FASTSTART) && ($$.e_cdt > 0))
+			$P.tp_cong_win = $$.e_cdt * $P.tp_l_tpdusize;
+		tp_getoptions($P);
+		tp_cuntimeout($P, TM_retrans);
+		if ($P.tp_ucddata) {
+			IFDEBUG(D_CONN)
+				printf("dropping user connect data cc 0x%x\n",
+					$P.tp_ucddata->m_len);
+			ENDDEBUG
+			m_freem($P.tp_ucddata);
+			$P.tp_ucddata = 0;
+		}
+		soisconnected($P.tp_sock);
+		if ($$.e_datalen > 0) {
+			ASSERT($P.tp_Xrcv.sb_cc == 0); /* should be empty */
+			sbappendrecord(&$P.tp_Xrcv, $$.e_data);
+			$$.e_data = MNULL;
+		}
+
+		(void) tp_emit(AK_TPDU_type, $P, $P.tp_rcvnxt, 0, MNULL);
+		tp_ctimeout($P, TM_inact, (int)$P.tp_inact_ticks);
+	}
+;
+
+/* TP4 only */
+SAME			<==		TP_CRSENT									TM_retrans 
+	(	$P.tp_retrans > 0 )
+	{
+		struct mbuf *data = MNULL;
+		int error;
+
+		IncStat(ts_retrans_cr);
+		$P.tp_cong_win = 1 * $P.tp_l_tpdusize;
+		data = MCPY($P.tp_ucddata, M_NOWAIT);
+		if($P.tp_ucddata) {
+			IFDEBUG(D_CONN)
+				printf("TM_retrans.trans m_copy cc 0x%x\n", data);
+				dump_mbuf($P.tp_ucddata, "sosnd @ TM_retrans");
+			ENDDEBUG
+			if( data == MNULL )
+				return ENOBUFS;
+		}
+
+		$P.tp_retrans --;
+		if( error = tp_emit(CR_TPDU_type, $P, 0, 0, data) ) {
+			$P.tp_sock->so_error = error;
+		}
+		tp_ctimeout($P, TM_retrans, (int)$P.tp_cr_ticks);
+	}
+;
+
+/* TP4 only  */
+TP_REFWAIT		<==		TP_CRSENT									TM_retrans 
+	DEFAULT /* no more CR retransmissions */
+	{ 	
+		IncStat(ts_conn_gaveup);
+		$P.tp_sock->so_error = ETIMEDOUT;
+		tp_indicate(T_DISCONNECT, $P, ETIMEDOUT);
+		tp_soisdisconnected($P);
+	}
+;
+
+/* TP4 only */
+SAME 			<==	 TP_AKWAIT											CR_TPDU 
+	DEFAULT
+	/* duplicate CR (which doesn't really exist in the context of
+	 * a connectionless network layer) 
+	 * Doesn't occur in class 0.
+	 */
+	{	
+		int error;
+		struct mbuf *data = MCPY($P.tp_ucddata, M_WAIT);
+
+		if( error = tp_emit(CC_TPDU_type, $P, 0, 0, data) ) {
+			$P.tp_sock->so_error = error;
+		}
+		$P.tp_retrans = $P.tp_Nretrans;
+		tp_ctimeout($P, TM_retrans, (int)$P.tp_cc_ticks);
+	}
+;
+
+/* TP4 only */
+TP_OPEN			<==		TP_AKWAIT 										DT_TPDU 
+	( IN_RWINDOW( $P, $$.e_seq,
+					$P.tp_rcvnxt, SEQ($P, $P.tp_rcvnxt + $P.tp_lcredit)) )
+	{
+		int doack;
+
+		/*
+		 * Get rid of any confirm or connect data, so that if we
+		 * crash or close, it isn't thought of as disconnect data.
+		 */
+		if ($P.tp_ucddata) {
+			m_freem($P.tp_ucddata);
+			$P.tp_ucddata = 0;
+		}
+		tp_ctimeout($P, TM_inact, (int)$P.tp_inact_ticks);
+		tp_cuntimeout($P, TM_retrans);
+		soisconnected($P.tp_sock);
+		tp_ctimeout($P, TM_inact, (int)$P.tp_inact_ticks);
+
+		/* see also next 2 transitions, if you make any changes */
+
+		doack = tp_stash($P, $E);
+		IFDEBUG(D_DATA)
+			printf("tp_stash returns %d\n",doack);
+		ENDDEBUG
+
+		if (doack) {
+			(void) tp_emit(AK_TPDU_type, $P, $P.tp_rcvnxt, 0, MNULL ); 
+			tp_ctimeout($P, TM_sendack, (int)$P.tp_keepalive_ticks);
+		} else
+			tp_ctimeout( $P, TM_sendack, (int)$P.tp_sendack_ticks);
+		
+		IFDEBUG(D_DATA)
+			printf("after stash calling sbwakeup\n");
+		ENDDEBUG
+	}
+;
+
+SAME			<==		TP_OPEN 									DT_TPDU 
+	( $P.tp_class == TP_CLASS_0 )
+	{
+		tp0_stash($P, $E);
+		sbwakeup( &$P.tp_sock->so_rcv );
+
+		IFDEBUG(D_DATA)
+			printf("after stash calling sbwakeup\n");
+		ENDDEBUG
+	}
+;
+
+/* TP4 only */
+SAME			<==		TP_OPEN 									DT_TPDU 
+	( IN_RWINDOW( $P, $$.e_seq,
+					$P.tp_rcvnxt, SEQ($P, $P.tp_rcvnxt + $P.tp_lcredit)) )
+	{
+		int doack; /* tells if we must ack immediately */
+
+		tp_ctimeout($P, TM_inact, (int)$P.tp_inact_ticks);
+		sbwakeup( &$P.tp_sock->so_rcv );
+
+		doack = tp_stash($P, $E);
+		IFDEBUG(D_DATA)
+			printf("tp_stash returns %d\n",doack);
+		ENDDEBUG
+
+		if(doack)
+			(void) tp_emit(AK_TPDU_type, $P, $P.tp_rcvnxt, 0, MNULL ); 
+		else
+			tp_ctimeout_MIN( $P, TM_sendack, (int)$P.tp_sendack_ticks);
+		
+		IFDEBUG(D_DATA)
+			printf("after stash calling sbwakeup\n");
+		ENDDEBUG
+	}
+;
+
+/* Not in window  - we must ack under certain circumstances, namely
+ * a) if the seq number is below lwe but > lwe - (max credit ever given)
+ * (to handle lost acks) Can use max-possible-credit for this ^^^.
+ * and 
+ * b) seq number is > uwe but < uwe + previously sent & withdrawn credit
+ *
+ * (see 12.2.3.8.1 of ISO spec, p. 73)
+ * We just always ack.
+ */
+/* TP4 only */
+SAME 			<== 	[ TP_OPEN, TP_AKWAIT ]							DT_TPDU
+	DEFAULT /* Not in window */
+	{ 	
+		IFTRACE(D_DATA)
+			tptrace(TPPTmisc, "NIW seq rcvnxt lcredit ",
+				$$.e_seq, $P.tp_rcvnxt, $P.tp_lcredit, 0);
+		ENDTRACE
+		IncStat(ts_dt_niw);
+		m_freem($$.e_data);
+		tp_ctimeout($P, TM_inact, (int)$P.tp_inact_ticks);
+		(void) tp_emit(AK_TPDU_type, $P, $P.tp_rcvnxt, 0, MNULL ); 
+	}
+;
+
+/* TP4 only */
+TP_OPEN			<==		TP_AKWAIT										AK_TPDU
+	DEFAULT
+	{
+		if ($P.tp_ucddata) {
+			m_freem($P.tp_ucddata);
+			$P.tp_ucddata = 0;
+		}
+		(void) tp_goodack($P, $$.e_cdt, $$.e_seq, $$.e_subseq);
+		tp_cuntimeout($P, TM_retrans);
+
+		soisconnected($P.tp_sock);
+		IFTRACE(D_CONN)
+			struct socket *so = $P.tp_sock;
+			tptrace(TPPTmisc, 
+			"called sosiconn: so so_state rcv.sb_sel rcv.sb_flags",
+				so, so->so_state, so->so_rcv.sb_sel, so->so_rcv.sb_flags);
+			tptrace(TPPTmisc, 
+			"called sosiconn 2: so_qlen so_error so_rcv.sb_cc so_head",
+				so->so_qlen, so->so_error, so->so_rcv.sb_cc, so->so_head);
+		ENDTRACE
+
+		tp_ctimeout($P, TM_sendack, (int)$P.tp_keepalive_ticks);
+		tp_ctimeout($P, TM_inact, (int)$P.tp_inact_ticks);
+	}
+;
+
+/* TP4 only */
+TP_OPEN 		<== 	[ TP_OPEN, TP_AKWAIT ]						XPD_TPDU
+	($P.tp_Xrcvnxt == $$.e_seq)
+	{
+		if( $P.tp_state == TP_AKWAIT ) {
+			if ($P.tp_ucddata) {
+				m_freem($P.tp_ucddata);
+				$P.tp_ucddata = 0;
+			}
+			tp_cuntimeout($P, TM_retrans);
+			soisconnected($P.tp_sock);
+			tp_ctimeout($P, TM_sendack, (int)$P.tp_keepalive_ticks);
+			tp_ctimeout($P, TM_inact, (int)$P.tp_inact_ticks);
+		} 
+		IFTRACE(D_XPD)
+		tptrace(TPPTmisc, "XPD tpdu accepted Xrcvnxt, e_seq datalen m_len\n",
+				$P.tp_Xrcvnxt,$$.e_seq,  $$.e_datalen, $$.e_data->m_len);
+		ENDTRACE
+
+		$P.tp_sock->so_state |= SS_RCVATMARK;
+		$$.e_data->m_flags |= M_EOR;
+		sbinsertoob(&$P.tp_Xrcv, $$.e_data);
+		IFDEBUG(D_XPD)
+			dump_mbuf($$.e_data, "XPD TPDU: tp_Xrcv");
+		ENDDEBUG
+		tp_indicate(T_XDATA, $P, 0);
+		sbwakeup( &$P.tp_Xrcv );
+
+		(void) tp_emit(XAK_TPDU_type, $P, $P.tp_Xrcvnxt, 0, MNULL);
+		SEQ_INC($P, $P.tp_Xrcvnxt);
+	}
+;
+
+/* TP4 only */
+SAME			<==		TP_OPEN 									T_USR_Xrcvd
+	DEFAULT
+	{
+		if( $P.tp_Xrcv.sb_cc == 0 ) {
+			/* kludge for select(): */ 
+			/* $P.tp_sock->so_state &= ~SS_OOBAVAIL; */
+		}
+	}
+	/* OLD WAY:
+	 * Ack only after the user receives the XPD.  This is better for 
+	 * users that use one XPD right after another.
+	 * Acking right away (the NEW WAY, see the prev. transition) is 
+	 * better for occasional * XPD, when the receiving user doesn't 
+	 * want to read the XPD immediately (which is session's behavior).
+	 *
+		int error = tp_emit(XAK_TPDU_type, $P, $P.tp_Xrcvnxt, 0, MNULL);
+		SEQ_INC($P, $P.tp_Xrcvnxt);
+		return error;
+	*/
+;
+
+/* NOTE: presently if the user doesn't read the connection data
+ * before and expedited data PDU comes in, the connection data will
+ * be dropped. This is a bug.  To avoid it, we need somewhere else
+ * to put the connection data.
+ * On the other hand, we need not to have it sitting around forever.
+ * This is a problem with the idea of trying to accommodate
+ * data on connect w/ a passive-open user interface. 
+ */
+/* TP4 only */
+
+SAME	 		<== 	[ TP_AKWAIT, TP_OPEN ] 							XPD_TPDU
+	DEFAULT /* not in window or cdt==0 */
+	{
+		IFTRACE(D_XPD)
+			tptrace(TPPTmisc, "XPD tpdu niw (Xrcvnxt, e_seq) or not cdt (cc)\n",
+				$P.tp_Xrcvnxt, $$.e_seq,  $P.tp_Xrcv.sb_cc , 0);
+		ENDTRACE
+		if( $P.tp_Xrcvnxt != $$.e_seq )
+			IncStat(ts_xpd_niw);
+		if( $P.tp_Xrcv.sb_cc ) {
+			/* might as well kick 'em again */
+			tp_indicate(T_XDATA, $P, 0);
+			IncStat(ts_xpd_dup);
+		}
+		m_freem($$.e_data);
+		tp_ctimeout($P, TM_inact, (int)$P.tp_inact_ticks);
+		/* don't send an xack because the xak gives "last one received", not
+		 * "next one i expect" (dumb)
+		 */
+	}
+;
+
+/* Occurs (AKWAIT, OPEN) when parent (listening) socket gets aborted, and tries
+ * to detach all its "children"
+ * Also (CRSENT) when user kills a job that's doing a connect()
+ */
+TP_REFWAIT		<== 	TP_CRSENT 										T_DETACH
+	($P.tp_class == TP_CLASS_0)
+	{
+		struct socket *so = $P.tp_sock;
+
+		/* detach from parent socket so it can finish closing */
+		if (so->so_head) {
+			if (!soqremque(so, 0) && !soqremque(so, 1))
+				panic("tp: T_DETACH");
+			so->so_head = 0;
+		}
+		tp_soisdisconnecting($P.tp_sock);
+		tp_netcmd( $P, CONN_CLOSE);
+		tp_soisdisconnected($P);
+	}
+;
+
+/* TP4 only */
+TP_CLOSING		<== [ TP_CLOSING, TP_AKWAIT, TP_CRSENT, TP_CONFIRMING ]	T_DETACH
+	DEFAULT
+	{
+		struct socket *so = $P.tp_sock;
+		struct mbuf *data = MNULL;
+
+		/* detach from parent socket so it can finish closing */
+		if (so->so_head) {
+			if (!soqremque(so, 0) && !soqremque(so, 1))
+				panic("tp: T_DETACH");
+			so->so_head = 0;
+		}
+		if ($P.tp_state != TP_CLOSING) {
+			tp_soisdisconnecting($P.tp_sock);
+			data = MCPY($P.tp_ucddata, M_NOWAIT);
+			(void) tp_emit(DR_TPDU_type, $P, 0, E_TP_NORMAL_DISC, data);
+			$P.tp_retrans = $P.tp_Nretrans;
+			tp_ctimeout($P, TM_retrans, (int)$P.tp_dr_ticks);
+		}
+	}
+;
+
+TP_REFWAIT		<==		[ TP_OPEN, TP_CRSENT ]		 	  			T_DISC_req
+	( $P.tp_class == TP_CLASS_0 )
+	{
+		tp_soisdisconnecting($P.tp_sock);
+		tp_netcmd( $P, CONN_CLOSE);
+		tp_soisdisconnected($P);
+	}
+;
+
+/* TP4 only */
+TP_CLOSING		<==	[ TP_AKWAIT, TP_OPEN, TP_CRSENT, TP_CONFIRMING ]  T_DISC_req
+	DEFAULT
+	{
+		struct mbuf *data = MCPY($P.tp_ucddata, M_WAIT);
+
+		if($P.tp_state == TP_OPEN) {
+			tp_euntimeout($P, TM_data_retrans); /* all */
+			tp_cuntimeout($P, TM_inact);
+			tp_cuntimeout($P, TM_sendack);
+			$P.tp_flags &= ~TPF_DELACK;
+		}
+		if (data) {
+			IFDEBUG(D_CONN)
+				printf("T_DISC_req.trans tp_ucddata 0x%x\n", 
+					$P.tp_ucddata);
+				dump_mbuf(data, "ucddata @ T_DISC_req");
+			ENDDEBUG
+		}
+		tp_soisdisconnecting($P.tp_sock);
+		$P.tp_retrans = $P.tp_Nretrans;
+		tp_ctimeout($P, TM_retrans, (int)$P.tp_dr_ticks);
+
+		if( trick_hc )
+			return tp_emit(DR_TPDU_type, $P, 0, $$.e_reason, data);
+	}
+;
+
+/* TP4 only */
+SAME			<==		TP_AKWAIT									TM_retrans
+	( $P.tp_retrans > 0 )
+	{
+		int error;
+		struct mbuf *data = MCPY($P.tp_ucddata, M_WAIT);
+
+		IncStat(ts_retrans_cc);
+		$P.tp_retrans --;
+		$P.tp_cong_win = 1 * $P.tp_l_tpdusize;
+
+		if( error = tp_emit(CC_TPDU_type, $P, 0, 0, data) ) 
+			$P.tp_sock->so_error = error;
+		tp_ctimeout($P, TM_retrans, (int)$P.tp_cc_ticks);
+	}
+;
+
+/* TP4 only */
+TP_CLOSING		<==		TP_AKWAIT									TM_retrans
+	DEFAULT  /* out of time */
+	{
+		IncStat(ts_conn_gaveup);
+		tp_soisdisconnecting($P.tp_sock);
+		$P.tp_sock->so_error = ETIMEDOUT;
+		tp_indicate(T_DISCONNECT, $P, ETIMEDOUT);
+		(void) tp_emit(DR_TPDU_type, $P, 0, E_TP_CONGEST, MNULL);
+		$P.tp_retrans = $P.tp_Nretrans;
+		tp_ctimeout($P, TM_retrans, (int)$P.tp_dr_ticks);
+	}
+;
+
+/* the retrans timers had better go off BEFORE the inactivity timer does,
+ * if transmissions are going on.
+ * (i.e., TM_inact should be greater than timer for all retrans plus ack
+ * turnaround)
+ */
+/* TP4 only */
+TP_CLOSING 		<==		TP_OPEN		   [ TM_inact, TM_retrans, TM_data_retrans ]
+	DEFAULT
+	{
+		tp_euntimeout($P, TM_data_retrans); /* all */
+		tp_cuntimeout($P, TM_inact); 
+		tp_cuntimeout($P, TM_sendack);
+
+		IncStat(ts_conn_gaveup);
+		tp_soisdisconnecting($P.tp_sock);
+		$P.tp_sock->so_error = ETIMEDOUT;
+		tp_indicate(T_DISCONNECT, $P, ETIMEDOUT);
+		(void) tp_emit(DR_TPDU_type, $P, 0, E_TP_CONGEST_2, MNULL);
+		$P.tp_retrans = $P.tp_Nretrans;
+		tp_ctimeout($P, TM_retrans, (int)$P.tp_dr_ticks);
+	}
+;
+
+/* TP4 only */
+SAME			<==		TP_OPEN										TM_retrans
+	( $P.tp_retrans > 0 )
+	{
+		$P.tp_cong_win = 1 * $P.tp_l_tpdusize;
+		/* resume XPD */
+		if	( $P.tp_Xsnd.sb_mb )  {
+			struct mbuf *m = m_copy($P.tp_Xsnd.sb_mb, 0, (int)$P.tp_Xsnd.sb_cc);
+			int shift;
+
+			IFTRACE(D_XPD)
+				tptrace(TPPTmisc, "XPD retrans: Xuna Xsndnxt sndnxt snduna",
+					$P.tp_Xuna, $P.tp_Xsndnxt, $P.tp_sndnxt, 
+					$P.tp_snduna); 
+			ENDTRACE
+			IFDEBUG(D_XPD)
+				dump_mbuf(m, "XPD retrans emitting M");
+			ENDDEBUG
+			IncStat(ts_retrans_xpd);
+			$P.tp_retrans --;
+			shift = max($P.tp_Nretrans - $P.tp_retrans, 6);
+			(void) tp_emit(XPD_TPDU_type, $P, $P.tp_Xuna, 1, m);
+			tp_ctimeout($P, TM_retrans, ((int)$P.tp_dt_ticks) << shift);
+		}
+	}
+;
+
+/* TP4 only */
+SAME 			<==		TP_OPEN									TM_data_retrans
+	($P.tp_rxtshift < TP_NRETRANS)
+	{	
+		$P.tp_rxtshift++;
+		(void) tp_data_retrans($P);
+	}
+;
+
+/* TP4 only */
+SAME	 		<==		TP_CLOSING									TM_retrans
+	(	$P.tp_retrans > 0 )
+	{	
+		$P.tp_retrans --;
+		(void) tp_emit(DR_TPDU_type, $P, 0, E_TP_DR_NO_REAS, MNULL);
+		IncStat(ts_retrans_dr);
+		tp_ctimeout($P, TM_retrans, (int)$P.tp_dr_ticks);
+	}
+;
+
+/* TP4 only */
+TP_REFWAIT 		<==		TP_CLOSING									TM_retrans
+	DEFAULT	/* no more retrans - gave up */
+	{	
+		$P.tp_sock->so_error = ETIMEDOUT;
+		$P.tp_refstate = REF_FROZEN;
+		tp_recycle_tsuffix( $P );
+		tp_etimeout($P, TM_reference, (int)$P.tp_refer_ticks);
+	}
+;
+
+/*
+ * The resources are kept around until the ref timer goes off.
+ * The suffices are wiped out sooner so they can be reused right away.
+ */
+/* applicable in TP4, TP0 */
+TP_CLOSED 		<==		TP_REFWAIT 									TM_reference
+	DEFAULT
+	{
+		tp_freeref($P.tp_lref);
+		tp_detach($P);
+	}
+;
+
+/* applicable in TP4, TP0 */
+/* A duplicate CR from connectionless network layer can't happen */
+SAME 			<== 	TP_OPEN 							[ CR_TPDU, CC_TPDU ]
+	DEFAULT
+	{	
+		if( $P.tp_class != TP_CLASS_0) {
+			tp_ctimeout($P, TM_inact, (int)$P.tp_inact_ticks);
+			if ( $E.ev_number == CC_TPDU )
+				(void) tp_emit(AK_TPDU_type, $P, $P.tp_rcvnxt, 0, MNULL); 
+		}
+		/* ignore it if class 0 - state tables are blank for this */
+	}
+;
+
+/* applicable in TP4, TP0 */
+SAME			<== 	TP_OPEN									T_DATA_req
+	DEFAULT
+	{
+		IFTRACE(D_DATA)
+			tptrace(TPPTmisc, "T_DATA_req sndnxt snduna fcredit, tpcb",
+				$P.tp_sndnxt, $P.tp_snduna, $P.tp_fcredit, $P);
+		ENDTRACE
+
+		tp_send($P);
+	}
+;
+
+/* TP4 only */
+SAME			<==		TP_OPEN										T_XPD_req
+	DEFAULT
+		/* T_XPD_req was issued by sosend iff xpd socket buf was empty
+		 * at time of sosend(), 
+		 * AND (which means) there were no unacknowledged XPD tpdus outstanding!
+		 */
+	{
+		int error = 0;
+
+		/* resume XPD */
+		if	( $P.tp_Xsnd.sb_mb )  {
+			struct mbuf *m = m_copy($P.tp_Xsnd.sb_mb, 0, (int)$P.tp_Xsnd.sb_cc);
+			/* m_copy doesn't preserve the m_xlink field, but at this pt.
+			 * that doesn't matter
+			 */
+
+			IFTRACE(D_XPD)
+				tptrace(TPPTmisc, "XPD req: Xuna Xsndnxt sndnxt snduna",
+					$P.tp_Xuna, $P.tp_Xsndnxt, $P.tp_sndnxt, 
+					$P.tp_snduna); 
+			ENDTRACE
+			IFDEBUG(D_XPD)
+				printf("T_XPD_req: sb_cc 0x%x\n", $P.tp_Xsnd.sb_cc);
+				dump_mbuf(m, "XPD req emitting M");
+			ENDDEBUG
+			error = 
+				tp_emit(XPD_TPDU_type, $P, $P.tp_Xuna, 1, m);
+			$P.tp_retrans = $P.tp_Nretrans;
+
+			tp_ctimeout($P, TM_retrans, (int)$P.tp_rxtcur);
+			SEQ_INC($P, $P.tp_Xsndnxt);
+		} 
+		if(trick_hc)
+			return error;
+	}
+;
+
+/* TP4, faked ack in TP0 when cons send completes */
+SAME 			<==		TP_OPEN 									AK_TPDU
+	( tp_goodack($P, $$.e_cdt, $$.e_seq, $$.e_subseq)  )
+
+	/* tp_goodack == true means 
+	 * EITHER it actually acked something heretofore unacknowledged
+	 * OR no news but the credit should be processed.
+	 */
+	{
+		struct sockbuf *sb = &$P.tp_sock->so_snd;
+
+		IFDEBUG(D_ACKRECV)
+			printf("GOOD ACK seq 0x%x cdt 0x%x\n", $$.e_seq, $$.e_cdt);
+		ENDDEBUG
+		if( $P.tp_class != TP_CLASS_0) {
+			tp_ctimeout($P, TM_inact, (int)$P.tp_inact_ticks);
+		}
+		sbwakeup(sb);
+		IFDEBUG(D_ACKRECV)
+			printf("GOOD ACK new sndnxt 0x%x\n", $P.tp_sndnxt);
+		ENDDEBUG
+	}
+;
+
+/* TP4, and TP0 after sending a CC or possibly a CR */
+SAME			<==		TP_OPEN 			 						 AK_TPDU
+	DEFAULT
+	{
+		IFTRACE(D_ACKRECV)
+			tptrace(TPPTmisc, "BOGUS ACK fcc_present, tp_r_subseq e_subseq", 
+				$$.e_fcc_present, $P.tp_r_subseq, $$.e_subseq, 0);
+		ENDTRACE
+		if( $P.tp_class != TP_CLASS_0 ) {
+
+			if ( !$$.e_fcc_present ) {
+				/* send ACK with FCC */
+				IncStat( ts_ackreason[_ACK_FCC_] );
+				(void) tp_emit(AK_TPDU_type, $P, $P.tp_rcvnxt, 1, MNULL);
+			}
+			tp_ctimeout($P, TM_inact, (int)$P.tp_inact_ticks);
+		} 
+	}
+;
+
+/* NBS(47) */
+	/* goes in at *** */
+		/* just so happens that this is never true now, because we allow
+		 * only 1 packet in the queue at once (this could be changed)
+		if	( $P.tp_Xsnd.sb_mb )  {
+			struct mbuf *m = m_copy($P.tp_Xsnd.sb_mb, 0, ??);
+
+			(void) tp_emit(XPD_TPDU_type, $P, $P.tp_Xuna, 1, m);
+			$P.tp_retrans = $P.tp_Nretrans;
+			tp_ctimeout($P, TM_retrans, (int)$P.tp_xpd_ticks);
+			SEQ_INC($P, $P.tp_Xsndnxt);
+		}
+		 */
+	/* end of the above hack */
+
+/* TP4 only */
+SAME			<== 	TP_OPEN										XAK_TPDU
+	( tp_goodXack($P, $$.e_seq) )
+	/* tp_goodXack checks for good ack, removes the correct 
+	 * tpdu from the queue and  returns 1 if ack was legit, 0 if not.
+	 * also updates tp_Xuna
+	 */
+	{	
+		tp_ctimeout($P, TM_inact, (int)$P.tp_inact_ticks);
+		tp_cuntimeout($P, TM_retrans);
+
+		sbwakeup( &$P.tp_sock->so_snd );
+
+		/* resume normal data */
+		tp_send($P);
+	}
+;
+
+/* TP4, and TP0 after sending a CC or possibly a CR */
+SAME			<==		TP_OPEN 			 						XAK_TPDU
+	DEFAULT
+	{
+		IFTRACE(D_ACKRECV)
+			tptrace(TPPTmisc, "BOGUS XACK eventtype ", $E.ev_number, 0, 0,0);
+		ENDTRACE
+		if( $P.tp_class != TP_CLASS_0 ) {
+			tp_ctimeout($P, TM_inact, (int)$P.tp_inact_ticks);
+		} 
+	}
+;
+
+/* TP4 only */
+SAME			<==		TP_OPEN 								TM_sendack 
+	DEFAULT
+	{	
+		int timo;
+		IFTRACE(D_TIMER)
+			tptrace(TPPTsendack, -1, $P.tp_lcredit, $P.tp_sent_uwe, 
+			$P.tp_sent_lcdt, 0);
+		ENDTRACE
+		IncPStat($P, tps_n_TMsendack);
+		(void) tp_emit(AK_TPDU_type, $P, $P.tp_rcvnxt, 0, MNULL);
+		if ($P.tp_fcredit == 0) {
+			if ($P.tp_rxtshift < TP_MAXRXTSHIFT)
+				$P.tp_rxtshift++;
+			timo = ($P.tp_dt_ticks) << $P.tp_rxtshift;
+		} else
+			timo = $P.tp_sendack_ticks;
+		tp_ctimeout($P, TM_sendack, timo);
+	}
+;
+
+/* TP0 only */
+SAME			<==		TP_OPEN 									T_USR_rcvd
+	($P.tp_class == TP_CLASS_0)
+	{
+		if (sbspace(&$P.tp_sock->so_rcv) > 0)
+			tp0_openflow($P);
+	}
+;
+
+/* TP4 only */
+		/* If old credit was zero, 
+		 * we'd better inform other side that we now have space
+		 * But this is not enough.  Sender might not yet have
+		 * seen an ack with cdt 0 but it might still think the
+		 * window is closed, so it's going to wait.
+		 * Best to send an ack each time.
+		 * Strictly speaking, this ought to be a function of the
+		 * general ack strategy.
+		 */
+SAME			<==		TP_OPEN 									T_USR_rcvd
+	DEFAULT
+	{	
+		if( trick_hc ) {
+			SeqNum ack_thresh;
+			/*
+			 * If the upper window edge has advanced a reasonable
+			 * amount beyond what was known, send an ACK.
+			 * A reasonable amount is 2 packets, unless the max window
+			 * is only 1 or 2 packets, in which case we
+			 * should send an ack for any advance in the upper window edge.
+			 */
+			LOCAL_CREDIT($P);
+			ack_thresh = SEQ_SUB($P, $P.tp_lcredit + $P.tp_rcvnxt,
+									 ($P.tp_maxlcredit > 2 ? 2 : 1));
+			if (SEQ_GT($P, ack_thresh, $P.tp_sent_uwe)) {
+				IncStat(ts_ackreason[_ACK_USRRCV_]);
+				$P.tp_flags &= ~TPF_DELACK;
+				return tp_emit(AK_TPDU_type, $P, $P.tp_rcvnxt, 0, MNULL);
+			}
+		}
+	}
+;
+
+/* applicable in TP4, TP0 */
+SAME			<==		TP_REFWAIT 				[ T_USR_rcvd, T_USR_Xrcvd ]
+	DEFAULT
+	/* This happens if other end sent a DR when  the user was waiting 
+	 * on a receive.  
+	 * Processing the DR includes putting us in REFWAIT state.
+	 */
+	{
+		if(trick_hc)
+		return ECONNABORTED;
+	}
+;
+
+/* TP0 only */
+TP_REFWAIT		<==		[ TP_OPEN, TP_CRSENT, TP_LISTENING ] 	T_NETRESET
+	( $P.tp_class != TP_CLASS_4 ) 
+		/* 0 or (4 and 0) */
+		/* in OPEN class will be 0 or 4 but not both */
+		/* in CRSENT or LISTENING it could be in negotiation, hence both */
+		/* Actually, this shouldn't ever happen in LISTENING */
+	{
+		ASSERT( $P.tp_state != TP_LISTENING );
+		tp_indicate(T_DISCONNECT, $P, ECONNRESET);
+		tp_soisdisconnected($P);
+	}
+;
+
+/* TP4: ignore resets */
+SAME		<==		[ TP_OPEN, TP_CRSENT, TP_AKWAIT,
+						TP_CLOSING, TP_LISTENING ] 				T_NETRESET
+	DEFAULT
+	NULLACTION
+;
+ 
+/* applicable in TP4, TP0 */
+SAME			<==		[ TP_CLOSED, TP_REFWAIT ]				T_NETRESET
+	DEFAULT
+	NULLACTION
+;
+
+/* C'EST TOUT */
diff --git a/sys/netiso/tp_astring.c b/sys/netiso/tp_astring.c
new file mode 100644
index 00000000000..af08cebbc86
--- /dev/null
+++ b/sys/netiso/tp_astring.c
@@ -0,0 +1,74 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tp_astring.c	8.1 (Berkeley) 6/10/93
+ */
+
+char *tp_sstring[] = {
+"ST_ERROR(0x0)",
+"TP_CLOSED(0x1)",
+"TP_CRSENT(0x2)",
+"TP_AKWAIT(0x3)",
+"TP_OPEN(0x4)",
+"TP_CLOSING(0x5)",
+"TP_REFWAIT(0x6)",
+"TP_LISTENING(0x7)",
+"TP_CONFIRMING(0x8)",
+};
+
+char *tp_estring[] = {
+"TM_inact(0x0)",
+"TM_retrans(0x1)",
+"TM_sendack(0x2)",
+"TM_notused(0x3)",
+"TM_reference(0x4)",
+"TM_data_retrans(0x5)",
+"ER_TPDU(0x6)",
+"CR_TPDU(0x7)",
+"DR_TPDU(0x8)",
+"DC_TPDU(0x9)",
+"CC_TPDU(0xa)",
+"AK_TPDU(0xb)",
+"DT_TPDU(0xc)",
+"XPD_TPDU(0xd)",
+"XAK_TPDU(0xe)",
+"T_CONN_req(0xf)",
+"T_DISC_req(0x10)",
+"T_LISTEN_req(0x11)",
+"T_DATA_req(0x12)",
+"T_XPD_req(0x13)",
+"T_USR_rcvd(0x14)",
+"T_USR_Xrcvd(0x15)",
+"T_DETACH(0x16)",
+"T_NETRESET(0x17)",
+"T_ACPT_req(0x18)",
+};
diff --git a/sys/netiso/tp_clnp.h b/sys/netiso/tp_clnp.h
new file mode 100644
index 00000000000..81a7cffc13e
--- /dev/null
+++ b/sys/netiso/tp_clnp.h
@@ -0,0 +1,94 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tp_clnp.h	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* 
+ * ARGO TP
+ *
+ * $Header: tp_clnp.h,v 5.1 88/10/12 12:16:36 root Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_clnp.h,v $
+ *
+ * AF_ISO net-dependent structures and include files
+ *
+ */
+
+
+#ifndef __TP_CLNP__
+#define __TP_CLNP__
+
+#ifndef SOCK_STREAM
+#include <sys/socket.h>
+#endif /* SOCK_STREAM */
+
+#ifndef RTFREE
+#include <net/route.h>
+#endif
+#include <netiso/iso.h>
+#include <netiso/clnp.h>
+#include <netiso/iso_pcb.h>
+#ifndef IF_DEQUEUE
+#include <net/if.h>
+#endif
+#include <netiso/iso_var.h>
+
+struct isopcb tp_isopcb;	
+	/* queue of active inpcbs for tp ; for tp with dod ip */
+
+#endif /* __TP_CLNP__ */
diff --git a/sys/netiso/tp_cons.c b/sys/netiso/tp_cons.c
new file mode 100644
index 00000000000..797ee9ef829
--- /dev/null
+++ b/sys/netiso/tp_cons.c
@@ -0,0 +1,308 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tp_cons.c	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* 
+ * ARGO TP
+ * $Header: tp_cons.c,v 5.6 88/11/18 17:27:13 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_cons.c,v $
+ *
+ * Here is where you find the iso- and cons-dependent code.  We've tried
+ * keep all net-level and (primarily) address-family-dependent stuff
+ * out of the tp source, and everthing here is reached indirectly
+ * through a switch table (struct nl_protosw *) tpcb->tp_nlproto 
+ * (see tp_pcb.c). 
+ * The routines here are:
+ *	tpcons_input: pullup and call tp_input w/ correct arguments
+ *	tpcons_output: package a pkt for cons given an isopcb & some data
+ *	cons_chan_to_tpcb: find a tpcb based on the channel #
+ */
+
+#ifdef ISO
+#ifdef TPCONS
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/domain.h>
+#include <sys/mbuf.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netiso/tp_param.h>
+#include <netiso/argo_debug.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_pcb.h>
+#include <netiso/tp_trace.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_tpdu.h>
+#include <netiso/iso.h>
+#include <netiso/iso_errno.h>
+#include <netiso/iso_pcb.h>
+#include <netiso/cons.h>
+#include <netiso/tp_seq.h>
+
+#undef FALSE
+#undef TRUE
+#include <netccitt/x25.h>
+#include <netccitt/pk.h>
+#include <netccitt/pk_var.h>
+
+#include <netiso/if_cons.c>
+int tpcons_output();
+
+/*
+ * CALLED FROM:
+ *  tp_route_to() for PRU_CONNECT
+ * FUNCTION, ARGUMENTS, SIDE EFFECTS and RETURN VALUE:
+ *  version of the previous procedure for X.25
+ */
+
+tpcons_pcbconnect(isop, nam)
+struct isopcb *isop;
+register struct mbuf *nam;
+{
+	int error;
+	if (error = iso_pcbconnect(isop, nam))
+		return error;
+	if ((isop->isop_chan = (caddr_t) pk_attach((struct socket *)0)) == 0) {
+		IFDEBUG(D_CCONS)
+			printf("tpcons_pcbconnect: no pklcd; returns 0x%x\n", error);
+		ENDDEBUG
+		return ENOBUFS;
+	}
+	if (error = cons_connect(isop)) { /* if it doesn't work */
+		/* oh, dear, throw packet away */
+		pk_disconnect((struct pklcd *)isop->isop_chan);
+		isop->isop_chan = 0;
+	} else 
+		isop->isop_refcnt = 1;
+	return error;
+}
+
+
+/*
+ * CALLED FROM:
+ * 	cons
+ * FUNCTION and ARGUMENTS:
+ * THIS MAYBE BELONGS IN SOME OTHER PLACE??? but i think not -
+ */
+ProtoHook
+tpcons_ctlinput(cmd, siso, isop)
+	int cmd; 
+	struct sockaddr_iso *siso;
+	struct isopcb *isop;
+{
+	register struct tp_pcb *tpcb = 0;
+
+	if (isop->isop_socket)
+		tpcb = (struct tp_pcb *)isop->isop_socket->so_pcb;
+	switch (cmd) {
+
+	case PRC_CONS_SEND_DONE:
+		if (tpcb) {
+			struct 	tp_event 		E;
+			int 					error = 0;
+
+			if (tpcb->tp_class == TP_CLASS_0) {
+				/* only if class is exactly class zero, not
+				 * still in class negotiation
+				 */
+				/* fake an ack */
+				register SeqNum	seq =  SEQ_ADD(tpcb, tpcb->tp_snduna, 1);
+
+				IFTRACE(D_DATA)
+					tptrace(TPPTmisc, "FAKE ACK seq cdt 1", 
+						seq, 0,0,0);
+				ENDTRACE
+				IFDEBUG(D_DATA)
+					printf("FAKE ACK seq 0x%x cdt 1\n", seq );
+				ENDDEBUG
+				E.ATTR(AK_TPDU).e_cdt = 1;
+				E.ATTR(AK_TPDU).e_seq = seq;
+				E.ATTR(AK_TPDU).e_subseq = 0;
+				E.ATTR(AK_TPDU).e_fcc_present = 0;
+				error =  DoEvent(AK_TPDU);
+				if( error ) {
+					tpcb->tp_sock->so_error = error;
+				}
+			} /* else ignore it */
+		}
+		break;
+	case PRC_ROUTEDEAD:
+		if (tpcb && tpcb->tp_class == TP_CLASS_0) {
+			tpiso_reset(isop);
+			break;
+		} /* else drop through */
+	default:
+		(void) tpclnp_ctlinput(cmd, siso);
+		break;
+	}
+	return 0;
+}
+
+/*
+ * CALLED FROM:
+ * 	cons's intr routine
+ * FUNCTION and ARGUMENTS:
+ * Take a packet (m) from cons, pullup m as required by tp,
+ *  ignore the socket argument, and call tp_input. 
+ * No return value.  
+ */
+ProtoHook
+tpcons_input(m, faddr, laddr, channel)
+	struct mbuf 		*m;
+	struct sockaddr_iso	*faddr, *laddr;
+	caddr_t				channel;
+{
+	if( m == MNULL)
+		return 0;
+
+	m = (struct mbuf *)tp_inputprep(m);
+
+	IFDEBUG(D_TPINPUT)
+		printf("tpcons_input before tp_input(m 0x%x)\n", m);
+		dump_buf( m, 12+ m->m_len);
+	ENDDEBUG
+	tp_input(m, faddr, laddr, channel, tpcons_output, 0);
+	return 0;
+}
+
+
+/*
+ * CALLED FROM:
+ *  tp_emit()
+ * FUNCTION and ARGUMENTS:
+ *  Take a packet(m0) from tp and package it so that cons will accept it.
+ *  This means filling in a few of the fields.
+ *  inp is the isopcb structure; datalen is the length of the data in the
+ *  mbuf string m0.
+ * RETURN VALUE:
+ *  whatever (E*) is returned form the net layer output routine.
+ */
+
+int
+tpcons_output(isop, m0, datalen, nochksum)
+	struct isopcb		*isop;
+	struct mbuf 		*m0;
+	int 				datalen;
+	int					nochksum;
+{
+	register	struct mbuf *m = m0;
+	int					error;
+
+	IFDEBUG(D_EMIT)
+		printf(
+		"tpcons_output(isop 0x%x, m 0x%x, len 0x%x socket 0x%x\n",
+			isop, m0, datalen, isop->isop_socket);
+	ENDDEBUG
+	if (m == MNULL)
+		return 0;
+	if ((m->m_flags & M_PKTHDR) == 0) {
+		MGETHDR(m, M_DONTWAIT, MT_DATA);
+		if (m == 0)
+			return ENOBUFS;
+		m->m_next = m0;
+	}
+	m->m_pkthdr.len = datalen;
+	if (isop->isop_chan == 0) {
+		/* got a restart maybe? */
+		if ((isop->isop_chan = (caddr_t) pk_attach((struct socket *)0)) == 0) {
+			IFDEBUG(D_CCONS)
+				printf("tpcons_output: no pklcd\n");
+			ENDDEBUG
+			error = ENOBUFS;
+		}
+		if (error = cons_connect(isop)) {
+			pk_disconnect((struct pklcd *)isop->isop_chan);
+			isop->isop_chan = 0;
+			IFDEBUG(D_CCONS)
+				printf("tpcons_output: can't reconnect\n");
+			ENDDEBUG
+		}
+	} else {
+		error = pk_send(isop->isop_chan, m);
+		IncStat(ts_tpdu_sent);
+	}
+	return error;
+}
+/*
+ * CALLED FROM:
+ *  tp_error_emit()
+ * FUNCTION and ARGUMENTS:
+ *  Take a packet(m0) from tp and package it so that cons will accept it.
+ *  chan is the cons channel to use; datalen is the length of the data in the
+ *  mbuf string m0.
+ * RETURN VALUE:
+ *  whatever (E*) is returned form the net layer output routine.
+ */
+
+int
+tpcons_dg_output(chan, m0, datalen)
+	caddr_t				chan;
+	struct mbuf 		*m0;
+	int 				datalen;
+{
+	return tpcons_output(((struct pklcd *)chan)->lcd_upnext, m0, datalen, 0);
+}
+#endif /* TPCONS */
+#endif /* ISO */
diff --git a/sys/netiso/tp_driver.c b/sys/netiso/tp_driver.c
new file mode 100644
index 00000000000..586ef4e2ade
--- /dev/null
+++ b/sys/netiso/tp_driver.c
@@ -0,0 +1,999 @@
+/* $Header$ */
+/* $Source$ */
+#ifndef lint
+static char *rcsid = "$Header/**/$";
+#endif lint
+#define _XEBEC_PG static
+
+#include "tp_states.h"
+
+static struct act_ent {
+	int a_newstate;
+	int a_action;
+} statetable[] = { {0,0},
+#include "tp_states.init"
+};
+
+/* @(#)tp.trans	8.1 (Berkeley) 6/10/93 */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/mbuf.h>
+#include <sys/time.h>
+#include <sys/errno.h>
+
+#include <netiso/tp_param.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_pcb.h>
+#include <netiso/tp_tpdu.h>
+#include <netiso/argo_debug.h>
+#include <netiso/tp_trace.h>
+#include <netiso/iso_errno.h>
+#include <netiso/tp_seq.h>
+#include <netiso/cons.h>
+
+#define DRIVERTRACE TPPTdriver
+#define sbwakeup(sb)	sowakeup(p->tp_sock, sb);
+#define MCPY(d, w) (d ? m_copym(d, 0, (int)M_COPYALL, w): 0)
+
+static 	trick_hc = 1;
+
+int 	tp_emit(),
+		tp_goodack(),				tp_goodXack(),
+		tp_stash()
+;
+void	tp_indicate(),				tp_getoptions(),	
+		tp_soisdisconnecting(), 	tp_soisdisconnected(),
+		tp_recycle_tsuffix(),		
+#ifdef TP_DEBUG_TIMERS
+		tp_etimeout(),				tp_euntimeout(),
+		tp_ctimeout(),				tp_cuntimeout(),
+		tp_ctimeout_MIN(),
+#endif
+		tp_freeref(),				tp_detach(),
+		tp0_stash(), 				tp0_send(),
+		tp_netcmd(),				tp_send()
+;
+
+typedef  struct tp_pcb tpcb_struct;
+
+
+
+typedef tpcb_struct tp_PCB_;
+
+#include "tp_events.h"
+
+_XEBEC_PG int _Xebec_action(a,e,p)
+int a;
+struct tp_event *e;
+tp_PCB_ *p;
+{
+switch(a) {
+case -1:  return tp_protocol_error(e,p);
+case 0x1: 
+		{
+		(void) tp_emit(DC_TPDU_type, p, 0, 0, MNULL);
+	}
+		 break;
+case 0x2: 
+		{
+#		ifdef TP_DEBUG
+		if( e->ev_number != AK_TPDU )
+			printf("TPDU 0x%x in REFWAIT!!!!\n", e->ev_number);
+#		endif TP_DEBUG
+	}
+		 break;
+case 0x3: 
+		{
+		/* oh, man is this grotesque or what? */
+		(void) tp_goodack(p, e->ev_union.EV_AK_TPDU.e_cdt, e->ev_union.EV_AK_TPDU.e_seq,  e->ev_union.EV_AK_TPDU.e_subseq);
+		/* but it's necessary because this pseudo-ack may happen
+		 * before the CC arrives, but we HAVE to adjust the
+		 * snduna as a result of the ack, WHENEVER it arrives
+		 */
+	}
+		 break;
+case 0x4: 
+		{
+		tp_detach(p);
+	}
+		 break;
+case 0x5: 
+		{
+		p->tp_refstate = REF_OPEN; /* has timers ??? */
+	}
+		 break;
+case 0x6: 
+		{
+		IFTRACE(D_CONN)
+			tptrace(TPPTmisc, "CR datalen data", e->ev_union.EV_CR_TPDU.e_datalen, e->ev_union.EV_CR_TPDU.e_data,0,0);
+		ENDTRACE
+		IFDEBUG(D_CONN)
+			printf("CR datalen 0x%x data 0x%x", e->ev_union.EV_CR_TPDU.e_datalen, e->ev_union.EV_CR_TPDU.e_data);
+		ENDDEBUG
+		p->tp_refstate = REF_OPEN; /* has timers */
+		p->tp_fcredit = e->ev_union.EV_CR_TPDU.e_cdt;
+
+		if (e->ev_union.EV_CR_TPDU.e_datalen > 0) {
+			/* n/a for class 0 */
+			ASSERT(p->tp_Xrcv.sb_cc == 0); 
+			sbappendrecord(&p->tp_Xrcv, e->ev_union.EV_CR_TPDU.e_data);
+			e->ev_union.EV_CR_TPDU.e_data = MNULL; 
+		} 
+	}
+		 break;
+case 0x7: 
+		{
+		IncStat(ts_tp0_conn);
+		IFTRACE(D_CONN)
+			tptrace(TPPTmisc, "Confiming", p, 0,0,0);
+		ENDTRACE
+		IFDEBUG(D_CONN)
+			printf("Confirming connection: p" );
+		ENDDEBUG
+		soisconnected(p->tp_sock);
+		(void) tp_emit(CC_TPDU_type, p, 0,0, MNULL) ;
+		p->tp_fcredit = 1;
+	}
+		 break;
+case 0x8: 
+		{
+		IncStat(ts_tp4_conn); /* even though not quite open */
+		IFTRACE(D_CONN)
+			tptrace(TPPTmisc, "Confiming", p, 0,0,0);
+		ENDTRACE
+		IFDEBUG(D_CONN)
+			printf("Confirming connection: p" );
+		ENDDEBUG
+		tp_getoptions(p);
+		soisconnecting(p->tp_sock);
+		if ((p->tp_rx_strat & TPRX_FASTSTART) && (p->tp_fcredit > 0))
+			p->tp_cong_win = p->tp_fcredit * p->tp_l_tpdusize;
+		p->tp_retrans = p->tp_Nretrans;
+		tp_ctimeout(p, TM_retrans, (int)p->tp_cc_ticks);
+	}
+		 break;
+case 0x9: 
+		{
+		IFDEBUG(D_CONN)
+			printf("event: CR_TPDU emit CC failed done " );
+		ENDDEBUG
+		soisdisconnected(p->tp_sock);
+		tp_recycle_tsuffix(p);
+		tp_freeref(p->tp_lref);
+		tp_detach(p);
+	}
+		 break;
+case 0xa: 
+		{
+		int error;
+		struct mbuf *data = MNULL;
+
+		IFTRACE(D_CONN)
+			tptrace(TPPTmisc, "T_CONN_req flags ucddata", (int)p->tp_flags,
+			p->tp_ucddata, 0, 0);
+		ENDTRACE
+		data =  MCPY(p->tp_ucddata, M_WAIT);
+		if (data) {
+			IFDEBUG(D_CONN)
+				printf("T_CONN_req.trans m_copy cc 0x%x\n", 
+					p->tp_ucddata);
+				dump_mbuf(data, "sosnd @ T_CONN_req");
+			ENDDEBUG
+		}
+
+		if (error = tp_emit(CR_TPDU_type, p, 0, 0, data) )
+			return error; /* driver WON'T change state; will return error */
+		
+		p->tp_refstate = REF_OPEN; /* has timers */
+		if(p->tp_class != TP_CLASS_0) {
+			p->tp_retrans = p->tp_Nretrans;
+			tp_ctimeout(p, TM_retrans, (int)p->tp_cr_ticks);
+		}
+	}
+		 break;
+case 0xb: 
+		{
+		sbflush(&p->tp_Xrcv); /* purge non-delivered data data */
+		if (e->ev_union.EV_DR_TPDU.e_datalen > 0) {
+			sbappendrecord(&p->tp_Xrcv, e->ev_union.EV_DR_TPDU.e_data);
+			e->ev_union.EV_DR_TPDU.e_data = MNULL;
+		} 
+		if (p->tp_state == TP_OPEN)
+			tp_indicate(T_DISCONNECT, p, 0);
+		else {
+			int so_error = ECONNREFUSED;
+			if (e->ev_union.EV_DR_TPDU.e_reason != (E_TP_NO_SESSION ^ TP_ERROR_MASK) &&
+			    e->ev_union.EV_DR_TPDU.e_reason != (E_TP_NO_CR_ON_NC ^ TP_ERROR_MASK) &&
+			    e->ev_union.EV_DR_TPDU.e_reason != (E_TP_REF_OVERFLOW ^ TP_ERROR_MASK))
+				so_error = ECONNABORTED;
+			tp_indicate(T_DISCONNECT, p, so_error);
+		}
+		tp_soisdisconnected(p);
+		if (p->tp_class != TP_CLASS_0) {
+			if (p->tp_state == TP_OPEN ) {
+				tp_euntimeout(p, TM_data_retrans); /* all */
+				tp_cuntimeout(p, TM_retrans);
+				tp_cuntimeout(p, TM_inact);
+				tp_cuntimeout(p, TM_sendack);
+				p->tp_flags &= ~TPF_DELACK;
+			}
+			tp_cuntimeout(p, TM_retrans);
+			if( e->ev_union.EV_DR_TPDU.e_sref !=  0 ) 
+				(void) tp_emit(DC_TPDU_type, p, 0, 0, MNULL);
+		}
+	}
+		 break;
+case 0xc: 
+		{
+		if( e->ev_union.EV_DR_TPDU.e_sref != 0 )
+			(void) tp_emit(DC_TPDU_type, p, 0, 0, MNULL); 
+		/* reference timer already set - reset it to be safe (???) */
+		tp_euntimeout(p, TM_reference); /* all */
+		tp_etimeout(p, TM_reference, (int)p->tp_refer_ticks);
+	}
+		 break;
+case 0xd: 
+		{	
+		tp_cuntimeout(p, TM_retrans);
+		tp_indicate(ER_TPDU, p, e->ev_union.EV_ER_TPDU.e_reason);
+		tp_soisdisconnected(p);
+	}
+		 break;
+case 0xe: 
+		{	 
+		tp_cuntimeout(p, TM_retrans);
+		tp_soisdisconnected(p);
+	}
+		 break;
+case 0xf: 
+		{	 
+		tp_indicate(ER_TPDU, p, e->ev_union.EV_ER_TPDU.e_reason);
+		tp_cuntimeout(p, TM_retrans);
+		tp_soisdisconnected(p);
+	}
+		 break;
+case 0x10: 
+		{	 
+		tp_cuntimeout(p, TM_retrans);
+		tp_soisdisconnected(p);
+	}
+		 break;
+case 0x11: 
+		{	/* don't ask me why we have to do this - spec says so */
+		(void) tp_emit(DR_TPDU_type, p, 0, E_TP_NO_SESSION, MNULL);
+		/* don't bother with retransmissions of the DR */
+	}
+		 break;
+case 0x12: 
+		{
+		tp_soisdisconnecting(p->tp_sock);
+		tp_indicate(ER_TPDU, p, e->ev_union.EV_ER_TPDU.e_reason);
+		tp_soisdisconnected(p);
+		tp_netcmd( p, CONN_CLOSE );
+	}
+		 break;
+case 0x13: 
+		{
+		if (p->tp_state == TP_OPEN) {
+			tp_euntimeout(p, TM_data_retrans); /* all */
+			tp_cuntimeout(p, TM_inact);
+			tp_cuntimeout(p, TM_sendack);
+		}
+		tp_soisdisconnecting(p->tp_sock);
+		tp_indicate(ER_TPDU, p, e->ev_union.EV_ER_TPDU.e_reason);
+		p->tp_retrans = p->tp_Nretrans;
+		tp_ctimeout(p, TM_retrans, (int)p->tp_dr_ticks);
+		(void) tp_emit(DR_TPDU_type, p, 0, E_TP_PROTO_ERR, MNULL);
+	}
+		 break;
+case 0x14: 
+		{	
+		tp_cuntimeout(p, TM_retrans);
+		IncStat(ts_tp0_conn);
+		p->tp_fcredit = 1;
+		soisconnected(p->tp_sock);
+	}
+		 break;
+case 0x15: 
+		{	
+		IFDEBUG(D_CONN)
+			printf("trans: CC_TPDU in CRSENT state flags 0x%x\n", 
+				(int)p->tp_flags);
+		ENDDEBUG
+		IncStat(ts_tp4_conn);
+		p->tp_fref = e->ev_union.EV_CC_TPDU.e_sref;
+		p->tp_fcredit = e->ev_union.EV_CC_TPDU.e_cdt;
+		if ((p->tp_rx_strat & TPRX_FASTSTART) && (e->ev_union.EV_CC_TPDU.e_cdt > 0))
+			p->tp_cong_win = e->ev_union.EV_CC_TPDU.e_cdt * p->tp_l_tpdusize;
+		tp_getoptions(p);
+		tp_cuntimeout(p, TM_retrans);
+		if (p->tp_ucddata) {
+			IFDEBUG(D_CONN)
+				printf("dropping user connect data cc 0x%x\n",
+					p->tp_ucddata->m_len);
+			ENDDEBUG
+			m_freem(p->tp_ucddata);
+			p->tp_ucddata = 0;
+		}
+		soisconnected(p->tp_sock);
+		if (e->ev_union.EV_CC_TPDU.e_datalen > 0) {
+			ASSERT(p->tp_Xrcv.sb_cc == 0); /* should be empty */
+			sbappendrecord(&p->tp_Xrcv, e->ev_union.EV_CC_TPDU.e_data);
+			e->ev_union.EV_CC_TPDU.e_data = MNULL;
+		}
+
+		(void) tp_emit(AK_TPDU_type, p, p->tp_rcvnxt, 0, MNULL);
+		tp_ctimeout(p, TM_inact, (int)p->tp_inact_ticks);
+	}
+		 break;
+case 0x16: 
+		{
+		struct mbuf *data = MNULL;
+		int error;
+
+		IncStat(ts_retrans_cr);
+		p->tp_cong_win = 1 * p->tp_l_tpdusize;
+		data = MCPY(p->tp_ucddata, M_NOWAIT);
+		if(p->tp_ucddata) {
+			IFDEBUG(D_CONN)
+				printf("TM_retrans.trans m_copy cc 0x%x\n", data);
+				dump_mbuf(p->tp_ucddata, "sosnd @ TM_retrans");
+			ENDDEBUG
+			if( data == MNULL )
+				return ENOBUFS;
+		}
+
+		p->tp_retrans --;
+		if( error = tp_emit(CR_TPDU_type, p, 0, 0, data) ) {
+			p->tp_sock->so_error = error;
+		}
+		tp_ctimeout(p, TM_retrans, (int)p->tp_cr_ticks);
+	}
+		 break;
+case 0x17: 
+		{ 	
+		IncStat(ts_conn_gaveup);
+		p->tp_sock->so_error = ETIMEDOUT;
+		tp_indicate(T_DISCONNECT, p, ETIMEDOUT);
+		tp_soisdisconnected(p);
+	}
+		 break;
+case 0x18: 
+		{	
+		int error;
+		struct mbuf *data = MCPY(p->tp_ucddata, M_WAIT);
+
+		if( error = tp_emit(CC_TPDU_type, p, 0, 0, data) ) {
+			p->tp_sock->so_error = error;
+		}
+		p->tp_retrans = p->tp_Nretrans;
+		tp_ctimeout(p, TM_retrans, (int)p->tp_cc_ticks);
+	}
+		 break;
+case 0x19: 
+		{
+		int doack;
+
+		/*
+		 * Get rid of any confirm or connect data, so that if we
+		 * crash or close, it isn't thought of as disconnect data.
+		 */
+		if (p->tp_ucddata) {
+			m_freem(p->tp_ucddata);
+			p->tp_ucddata = 0;
+		}
+		tp_ctimeout(p, TM_inact, (int)p->tp_inact_ticks);
+		tp_cuntimeout(p, TM_retrans);
+		soisconnected(p->tp_sock);
+		tp_ctimeout(p, TM_inact, (int)p->tp_inact_ticks);
+
+		/* see also next 2 transitions, if you make any changes */
+
+		doack = tp_stash(p, e);
+		IFDEBUG(D_DATA)
+			printf("tp_stash returns %d\n",doack);
+		ENDDEBUG
+
+		if (doack) {
+			(void) tp_emit(AK_TPDU_type, p, p->tp_rcvnxt, 0, MNULL ); 
+			tp_ctimeout(p, TM_sendack, (int)p->tp_keepalive_ticks);
+		} else
+			tp_ctimeout( p, TM_sendack, (int)p->tp_sendack_ticks);
+		
+		IFDEBUG(D_DATA)
+			printf("after stash calling sbwakeup\n");
+		ENDDEBUG
+	}
+		 break;
+case 0x1a: 
+		{
+		tp0_stash(p, e);
+		sbwakeup( &p->tp_sock->so_rcv );
+
+		IFDEBUG(D_DATA)
+			printf("after stash calling sbwakeup\n");
+		ENDDEBUG
+	}
+		 break;
+case 0x1b: 
+		{
+		int doack; /* tells if we must ack immediately */
+
+		tp_ctimeout(p, TM_inact, (int)p->tp_inact_ticks);
+		sbwakeup( &p->tp_sock->so_rcv );
+
+		doack = tp_stash(p, e);
+		IFDEBUG(D_DATA)
+			printf("tp_stash returns %d\n",doack);
+		ENDDEBUG
+
+		if(doack)
+			(void) tp_emit(AK_TPDU_type, p, p->tp_rcvnxt, 0, MNULL ); 
+		else
+			tp_ctimeout_MIN( p, TM_sendack, (int)p->tp_sendack_ticks);
+		
+		IFDEBUG(D_DATA)
+			printf("after stash calling sbwakeup\n");
+		ENDDEBUG
+	}
+		 break;
+case 0x1c: 
+		{ 	
+		IFTRACE(D_DATA)
+			tptrace(TPPTmisc, "NIW seq rcvnxt lcredit ",
+				e->ev_union.EV_DT_TPDU.e_seq, p->tp_rcvnxt, p->tp_lcredit, 0);
+		ENDTRACE
+		IncStat(ts_dt_niw);
+		m_freem(e->ev_union.EV_DT_TPDU.e_data);
+		tp_ctimeout(p, TM_inact, (int)p->tp_inact_ticks);
+		(void) tp_emit(AK_TPDU_type, p, p->tp_rcvnxt, 0, MNULL ); 
+	}
+		 break;
+case 0x1d: 
+		{
+		if (p->tp_ucddata) {
+			m_freem(p->tp_ucddata);
+			p->tp_ucddata = 0;
+		}
+		(void) tp_goodack(p, e->ev_union.EV_AK_TPDU.e_cdt, e->ev_union.EV_AK_TPDU.e_seq, e->ev_union.EV_AK_TPDU.e_subseq);
+		tp_cuntimeout(p, TM_retrans);
+
+		soisconnected(p->tp_sock);
+		IFTRACE(D_CONN)
+			struct socket *so = p->tp_sock;
+			tptrace(TPPTmisc, 
+			"called sosiconn: so so_state rcv.sb_sel rcv.sb_flags",
+				so, so->so_state, so->so_rcv.sb_sel, so->so_rcv.sb_flags);
+			tptrace(TPPTmisc, 
+			"called sosiconn 2: so_qlen so_error so_rcv.sb_cc so_head",
+				so->so_qlen, so->so_error, so->so_rcv.sb_cc, so->so_head);
+		ENDTRACE
+
+		tp_ctimeout(p, TM_sendack, (int)p->tp_keepalive_ticks);
+		tp_ctimeout(p, TM_inact, (int)p->tp_inact_ticks);
+	}
+		 break;
+case 0x1e: 
+		{
+		if( p->tp_state == TP_AKWAIT ) {
+			if (p->tp_ucddata) {
+				m_freem(p->tp_ucddata);
+				p->tp_ucddata = 0;
+			}
+			tp_cuntimeout(p, TM_retrans);
+			soisconnected(p->tp_sock);
+			tp_ctimeout(p, TM_sendack, (int)p->tp_keepalive_ticks);
+			tp_ctimeout(p, TM_inact, (int)p->tp_inact_ticks);
+		} 
+		IFTRACE(D_XPD)
+		tptrace(TPPTmisc, "XPD tpdu accepted Xrcvnxt, e_seq datalen m_len\n",
+				p->tp_Xrcvnxt,e->ev_union.EV_XPD_TPDU.e_seq,  e->ev_union.EV_XPD_TPDU.e_datalen, e->ev_union.EV_XPD_TPDU.e_data->m_len);
+		ENDTRACE
+
+		p->tp_sock->so_state |= SS_RCVATMARK;
+		e->ev_union.EV_XPD_TPDU.e_data->m_flags |= M_EOR;
+		sbinsertoob(&p->tp_Xrcv, e->ev_union.EV_XPD_TPDU.e_data);
+		IFDEBUG(D_XPD)
+			dump_mbuf(e->ev_union.EV_XPD_TPDU.e_data, "XPD TPDU: tp_Xrcv");
+		ENDDEBUG
+		tp_indicate(T_XDATA, p, 0);
+		sbwakeup( &p->tp_Xrcv );
+
+		(void) tp_emit(XAK_TPDU_type, p, p->tp_Xrcvnxt, 0, MNULL);
+		SEQ_INC(p, p->tp_Xrcvnxt);
+	}
+		 break;
+case 0x1f: 
+		{
+		if( p->tp_Xrcv.sb_cc == 0 ) {
+			/* kludge for select(): */ 
+			/* p->tp_sock->so_state &= ~SS_OOBAVAIL; */
+		}
+	}
+		 break;
+case 0x20: 
+		{
+		IFTRACE(D_XPD)
+			tptrace(TPPTmisc, "XPD tpdu niw (Xrcvnxt, e_seq) or not cdt (cc)\n",
+				p->tp_Xrcvnxt, e->ev_union.EV_XPD_TPDU.e_seq,  p->tp_Xrcv.sb_cc , 0);
+		ENDTRACE
+		if( p->tp_Xrcvnxt != e->ev_union.EV_XPD_TPDU.e_seq )
+			IncStat(ts_xpd_niw);
+		if( p->tp_Xrcv.sb_cc ) {
+			/* might as well kick 'em again */
+			tp_indicate(T_XDATA, p, 0);
+			IncStat(ts_xpd_dup);
+		}
+		m_freem(e->ev_union.EV_XPD_TPDU.e_data);
+		tp_ctimeout(p, TM_inact, (int)p->tp_inact_ticks);
+		/* don't send an xack because the xak gives "last one received", not
+		 * "next one i expect" (dumb)
+		 */
+	}
+		 break;
+case 0x21: 
+		{
+		struct socket *so = p->tp_sock;
+
+		/* detach from parent socket so it can finish closing */
+		if (so->so_head) {
+			if (!soqremque(so, 0) && !soqremque(so, 1))
+				panic("tp: T_DETACH");
+			so->so_head = 0;
+		}
+		tp_soisdisconnecting(p->tp_sock);
+		tp_netcmd( p, CONN_CLOSE);
+		tp_soisdisconnected(p);
+	}
+		 break;
+case 0x22: 
+		{
+		struct socket *so = p->tp_sock;
+		struct mbuf *data = MNULL;
+
+		/* detach from parent socket so it can finish closing */
+		if (so->so_head) {
+			if (!soqremque(so, 0) && !soqremque(so, 1))
+				panic("tp: T_DETACH");
+			so->so_head = 0;
+		}
+		if (p->tp_state != TP_CLOSING) {
+			tp_soisdisconnecting(p->tp_sock);
+			data = MCPY(p->tp_ucddata, M_NOWAIT);
+			(void) tp_emit(DR_TPDU_type, p, 0, E_TP_NORMAL_DISC, data);
+			p->tp_retrans = p->tp_Nretrans;
+			tp_ctimeout(p, TM_retrans, (int)p->tp_dr_ticks);
+		}
+	}
+		 break;
+case 0x23: 
+		{
+		tp_soisdisconnecting(p->tp_sock);
+		tp_netcmd( p, CONN_CLOSE);
+		tp_soisdisconnected(p);
+	}
+		 break;
+case 0x24: 
+		{
+		struct mbuf *data = MCPY(p->tp_ucddata, M_WAIT);
+
+		if(p->tp_state == TP_OPEN) {
+			tp_euntimeout(p, TM_data_retrans); /* all */
+			tp_cuntimeout(p, TM_inact);
+			tp_cuntimeout(p, TM_sendack);
+			p->tp_flags &= ~TPF_DELACK;
+		}
+		if (data) {
+			IFDEBUG(D_CONN)
+				printf("T_DISC_req.trans tp_ucddata 0x%x\n", 
+					p->tp_ucddata);
+				dump_mbuf(data, "ucddata @ T_DISC_req");
+			ENDDEBUG
+		}
+		tp_soisdisconnecting(p->tp_sock);
+		p->tp_retrans = p->tp_Nretrans;
+		tp_ctimeout(p, TM_retrans, (int)p->tp_dr_ticks);
+
+		if( trick_hc )
+			return tp_emit(DR_TPDU_type, p, 0, e->ev_union.EV_T_DISC_req.e_reason, data);
+	}
+		 break;
+case 0x25: 
+		{
+		int error;
+		struct mbuf *data = MCPY(p->tp_ucddata, M_WAIT);
+
+		IncStat(ts_retrans_cc);
+		p->tp_retrans --;
+		p->tp_cong_win = 1 * p->tp_l_tpdusize;
+
+		if( error = tp_emit(CC_TPDU_type, p, 0, 0, data) ) 
+			p->tp_sock->so_error = error;
+		tp_ctimeout(p, TM_retrans, (int)p->tp_cc_ticks);
+	}
+		 break;
+case 0x26: 
+		{
+		IncStat(ts_conn_gaveup);
+		tp_soisdisconnecting(p->tp_sock);
+		p->tp_sock->so_error = ETIMEDOUT;
+		tp_indicate(T_DISCONNECT, p, ETIMEDOUT);
+		(void) tp_emit(DR_TPDU_type, p, 0, E_TP_CONGEST, MNULL);
+		p->tp_retrans = p->tp_Nretrans;
+		tp_ctimeout(p, TM_retrans, (int)p->tp_dr_ticks);
+	}
+		 break;
+case 0x27: 
+		{
+		tp_euntimeout(p, TM_data_retrans); /* all */
+		tp_cuntimeout(p, TM_inact); 
+		tp_cuntimeout(p, TM_sendack);
+
+		IncStat(ts_conn_gaveup);
+		tp_soisdisconnecting(p->tp_sock);
+		p->tp_sock->so_error = ETIMEDOUT;
+		tp_indicate(T_DISCONNECT, p, ETIMEDOUT);
+		(void) tp_emit(DR_TPDU_type, p, 0, E_TP_CONGEST_2, MNULL);
+		p->tp_retrans = p->tp_Nretrans;
+		tp_ctimeout(p, TM_retrans, (int)p->tp_dr_ticks);
+	}
+		 break;
+case 0x28: 
+		{
+		p->tp_cong_win = 1 * p->tp_l_tpdusize;
+		/* resume XPD */
+		if	( p->tp_Xsnd.sb_mb )  {
+			struct mbuf *m = m_copy(p->tp_Xsnd.sb_mb, 0, (int)p->tp_Xsnd.sb_cc);
+			int shift;
+
+			IFTRACE(D_XPD)
+				tptrace(TPPTmisc, "XPD retrans: Xuna Xsndnxt sndnxt snduna",
+					p->tp_Xuna, p->tp_Xsndnxt, p->tp_sndnxt, 
+					p->tp_snduna); 
+			ENDTRACE
+			IFDEBUG(D_XPD)
+				dump_mbuf(m, "XPD retrans emitting M");
+			ENDDEBUG
+			IncStat(ts_retrans_xpd);
+			p->tp_retrans --;
+			shift = max(p->tp_Nretrans - p->tp_retrans, 6);
+			(void) tp_emit(XPD_TPDU_type, p, p->tp_Xuna, 1, m);
+			tp_ctimeout(p, TM_retrans, ((int)p->tp_dt_ticks) << shift);
+		}
+	}
+		 break;
+case 0x29: 
+		{	
+		p->tp_rxtshift++;
+		(void) tp_data_retrans(p);
+	}
+		 break;
+case 0x2a: 
+		{	
+		p->tp_retrans --;
+		(void) tp_emit(DR_TPDU_type, p, 0, E_TP_DR_NO_REAS, MNULL);
+		IncStat(ts_retrans_dr);
+		tp_ctimeout(p, TM_retrans, (int)p->tp_dr_ticks);
+	}
+		 break;
+case 0x2b: 
+		{	
+		p->tp_sock->so_error = ETIMEDOUT;
+		p->tp_refstate = REF_FROZEN;
+		tp_recycle_tsuffix( p );
+		tp_etimeout(p, TM_reference, (int)p->tp_refer_ticks);
+	}
+		 break;
+case 0x2c: 
+		{
+		tp_freeref(p->tp_lref);
+		tp_detach(p);
+	}
+		 break;
+case 0x2d: 
+		{	
+		if( p->tp_class != TP_CLASS_0) {
+			tp_ctimeout(p, TM_inact, (int)p->tp_inact_ticks);
+			if ( e->ev_number == CC_TPDU )
+				(void) tp_emit(AK_TPDU_type, p, p->tp_rcvnxt, 0, MNULL); 
+		}
+		/* ignore it if class 0 - state tables are blank for this */
+	}
+		 break;
+case 0x2e: 
+		{
+		IFTRACE(D_DATA)
+			tptrace(TPPTmisc, "T_DATA_req sndnxt snduna fcredit, tpcb",
+				p->tp_sndnxt, p->tp_snduna, p->tp_fcredit, p);
+		ENDTRACE
+
+		tp_send(p);
+	}
+		 break;
+case 0x2f: 
+		{
+		int error = 0;
+
+		/* resume XPD */
+		if	( p->tp_Xsnd.sb_mb )  {
+			struct mbuf *m = m_copy(p->tp_Xsnd.sb_mb, 0, (int)p->tp_Xsnd.sb_cc);
+			/* m_copy doesn't preserve the m_xlink field, but at this pt.
+			 * that doesn't matter
+			 */
+
+			IFTRACE(D_XPD)
+				tptrace(TPPTmisc, "XPD req: Xuna Xsndnxt sndnxt snduna",
+					p->tp_Xuna, p->tp_Xsndnxt, p->tp_sndnxt, 
+					p->tp_snduna); 
+			ENDTRACE
+			IFDEBUG(D_XPD)
+				printf("T_XPD_req: sb_cc 0x%x\n", p->tp_Xsnd.sb_cc);
+				dump_mbuf(m, "XPD req emitting M");
+			ENDDEBUG
+			error = 
+				tp_emit(XPD_TPDU_type, p, p->tp_Xuna, 1, m);
+			p->tp_retrans = p->tp_Nretrans;
+
+			tp_ctimeout(p, TM_retrans, (int)p->tp_rxtcur);
+			SEQ_INC(p, p->tp_Xsndnxt);
+		} 
+		if(trick_hc)
+			return error;
+	}
+		 break;
+case 0x30: 
+		{
+		struct sockbuf *sb = &p->tp_sock->so_snd;
+
+		IFDEBUG(D_ACKRECV)
+			printf("GOOD ACK seq 0x%x cdt 0x%x\n", e->ev_union.EV_AK_TPDU.e_seq, e->ev_union.EV_AK_TPDU.e_cdt);
+		ENDDEBUG
+		if( p->tp_class != TP_CLASS_0) {
+			tp_ctimeout(p, TM_inact, (int)p->tp_inact_ticks);
+		}
+		sbwakeup(sb);
+		IFDEBUG(D_ACKRECV)
+			printf("GOOD ACK new sndnxt 0x%x\n", p->tp_sndnxt);
+		ENDDEBUG
+	}
+		 break;
+case 0x31: 
+		{
+		IFTRACE(D_ACKRECV)
+			tptrace(TPPTmisc, "BOGUS ACK fcc_present, tp_r_subseq e_subseq", 
+				e->ev_union.EV_AK_TPDU.e_fcc_present, p->tp_r_subseq, e->ev_union.EV_AK_TPDU.e_subseq, 0);
+		ENDTRACE
+		if( p->tp_class != TP_CLASS_0 ) {
+
+			if ( !e->ev_union.EV_AK_TPDU.e_fcc_present ) {
+				/* send ACK with FCC */
+				IncStat( ts_ackreason[_ACK_FCC_] );
+				(void) tp_emit(AK_TPDU_type, p, p->tp_rcvnxt, 1, MNULL);
+			}
+			tp_ctimeout(p, TM_inact, (int)p->tp_inact_ticks);
+		} 
+	}
+		 break;
+case 0x32: 
+		{	
+		tp_ctimeout(p, TM_inact, (int)p->tp_inact_ticks);
+		tp_cuntimeout(p, TM_retrans);
+
+		sbwakeup( &p->tp_sock->so_snd );
+
+		/* resume normal data */
+		tp_send(p);
+	}
+		 break;
+case 0x33: 
+		{
+		IFTRACE(D_ACKRECV)
+			tptrace(TPPTmisc, "BOGUS XACK eventtype ", e->ev_number, 0, 0,0);
+		ENDTRACE
+		if( p->tp_class != TP_CLASS_0 ) {
+			tp_ctimeout(p, TM_inact, (int)p->tp_inact_ticks);
+		} 
+	}
+		 break;
+case 0x34: 
+		{	
+		int timo;
+		IFTRACE(D_TIMER)
+			tptrace(TPPTsendack, -1, p->tp_lcredit, p->tp_sent_uwe, 
+			p->tp_sent_lcdt, 0);
+		ENDTRACE
+		IncPStat(p, tps_n_TMsendack);
+		(void) tp_emit(AK_TPDU_type, p, p->tp_rcvnxt, 0, MNULL);
+		if (p->tp_fcredit == 0) {
+			if (p->tp_rxtshift < TP_MAXRXTSHIFT)
+				p->tp_rxtshift++;
+			timo = (p->tp_dt_ticks) << p->tp_rxtshift;
+		} else
+			timo = p->tp_sendack_ticks;
+		tp_ctimeout(p, TM_sendack, timo);
+	}
+		 break;
+case 0x35: 
+		{
+		if (sbspace(&p->tp_sock->so_rcv) > 0)
+			tp0_openflow(p);
+	}
+		 break;
+case 0x36: 
+		{	
+		if( trick_hc ) {
+			SeqNum ack_thresh;
+			/*
+			 * If the upper window edge has advanced a reasonable
+			 * amount beyond what was known, send an ACK.
+			 * A reasonable amount is 2 packets, unless the max window
+			 * is only 1 or 2 packets, in which case we
+			 * should send an ack for any advance in the upper window edge.
+			 */
+			LOCAL_CREDIT(p);
+			ack_thresh = SEQ_SUB(p, p->tp_lcredit + p->tp_rcvnxt,
+									 (p->tp_maxlcredit > 2 ? 2 : 1));
+			if (SEQ_GT(p, ack_thresh, p->tp_sent_uwe)) {
+				IncStat(ts_ackreason[_ACK_USRRCV_]);
+				p->tp_flags &= ~TPF_DELACK;
+				return tp_emit(AK_TPDU_type, p, p->tp_rcvnxt, 0, MNULL);
+			}
+		}
+	}
+		 break;
+case 0x37: 
+		{
+		if(trick_hc)
+		return ECONNABORTED;
+	}
+		 break;
+case 0x38: 
+		{
+		ASSERT( p->tp_state != TP_LISTENING );
+		tp_indicate(T_DISCONNECT, p, ECONNRESET);
+		tp_soisdisconnected(p);
+	}
+		 break;
+	}
+return 0;
+}
+
+_XEBEC_PG int
+_Xebec_index( e,p )
+	struct tp_event *e;
+	tp_PCB_ *p;
+{
+switch( (e->ev_number<<4)+(p->tp_state) ) {
+case 0x12:
+	if (	p->tp_retrans > 0 ) return 0x1e;
+	 else return 0x1f;
+case 0x13:
+	if ( p->tp_retrans > 0 ) return 0x2f;
+	 else return 0x30;
+case 0x14:
+	if ( p->tp_retrans > 0 ) return 0x32;
+	 else return 0x31;
+case 0x15:
+	if (	p->tp_retrans > 0 ) return 0x34;
+	 else return 0x35;
+case 0x54:
+	if (p->tp_rxtshift < TP_NRETRANS) return 0x33;
+	 else return 0x31;
+case 0x64:
+	if (p->tp_class == TP_CLASS_0) return 0x1a;
+	 else return 0x1b;
+case 0x77:
+	if ( p->tp_class == TP_CLASS_0) return 0xd;
+	 else return 0xe;
+case 0x86:
+	if ( e->ev_union.EV_DR_TPDU.e_sref !=  0 ) return 0x2;
+	 else return 0x3;
+case 0xa2:
+	if (p->tp_class == TP_CLASS_0) return 0x1c;
+	 else return 0x1d;
+case 0xb2:
+	if (p->tp_class == TP_CLASS_0) return 0x5;
+	 else return 0x0;
+case 0xb4:
+	if ( tp_goodack(p, e->ev_union.EV_AK_TPDU.e_cdt, e->ev_union.EV_AK_TPDU.e_seq, e->ev_union.EV_AK_TPDU.e_subseq)  ) return 0x3a;
+	 else return 0x3b;
+case 0xc3:
+	if ( IN_RWINDOW( p, e->ev_union.EV_DT_TPDU.e_seq,
+					p->tp_rcvnxt, SEQ(p, p->tp_rcvnxt + p->tp_lcredit)) ) return 0x21;
+	 else return 0x24;
+case 0xc4:
+	if ( p->tp_class == TP_CLASS_0 ) return 0x22;
+	 else if ( IN_RWINDOW( p, e->ev_union.EV_DT_TPDU.e_seq,
+					p->tp_rcvnxt, SEQ(p, p->tp_rcvnxt + p->tp_lcredit)) ) return 0x23;
+	 else return 0x25;
+case 0xd3:
+	if (p->tp_Xrcvnxt == e->ev_union.EV_XPD_TPDU.e_seq) return 0x27;
+	 else return 0x2a;
+case 0xd4:
+	if (p->tp_Xrcvnxt == e->ev_union.EV_XPD_TPDU.e_seq) return 0x27;
+	 else return 0x29;
+case 0xe4:
+	if ( tp_goodXack(p, e->ev_union.EV_XAK_TPDU.e_seq) ) return 0x3c;
+	 else return 0x3d;
+case 0x102:
+	if ( p->tp_class == TP_CLASS_0 ) return 0x2d;
+	 else return 0x2e;
+case 0x104:
+	if ( p->tp_class == TP_CLASS_0 ) return 0x2d;
+	 else return 0x2e;
+case 0x144:
+	if (p->tp_class == TP_CLASS_0) return 0x3f;
+	 else return 0x40;
+case 0x162:
+	if (p->tp_class == TP_CLASS_0) return 0x2b;
+	 else return 0x2c;
+case 0x172:
+	if ( p->tp_class != TP_CLASS_4 ) return 0x42;
+	 else return 0x46;
+case 0x174:
+	if ( p->tp_class != TP_CLASS_4 ) return 0x42;
+	 else return 0x47;
+case 0x177:
+	if ( p->tp_class != TP_CLASS_4 ) return 0x42;
+	 else return 0x43;
+case 0x188:
+	if ( p->tp_class == TP_CLASS_0 ) return 0xf;
+	 else if (tp_emit(CC_TPDU_type, p, 0,0, MCPY(p->tp_ucddata, M_NOWAIT)) == 0) return 0x10;
+	 else return 0x11;
+default: return 0;
+} /* end switch */
+} /* _Xebec_index() */
+static int inx[26][9] = { {0,0,0,0,0,0,0,0,0,},
+ {0x0,0x0,0x0,0x0,0x31,0x0,0x0,0x0,0x0, },
+ {0x0,0x0,-1,-1,-1,-1,0x0,0x0,0x0, },
+ {0x0,0x0,0x0,0x0,0x3e,0x0,0x0,0x0,0x0, },
+ {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, },
+ {0x0,0x0,0x0,0x0,0x0,0x0,0x36,0x0,0x0, },
+ {0x0,0x0,0x0,0x0,-1,0x0,0x0,0x0,0x0, },
+ {0x0,0x7,0x15,0x1b,-1,0x17,0x3,0xa,0x0, },
+ {0x0,0x19,0x6,0x20,0x37,0x8,0x3,-1,0x0, },
+ {0x0,0x14,0x13,0x13,0x13,0x16,-1,0xa,0x0, },
+ {0x0,0x7,0x6,0x1,0x9,0x18,0x3,0xa,0x0, },
+ {0x0,0x19,-1,0x1,0x37,0x8,0x3,0xa,0x0, },
+ {0x0,0x7,-1,0x26,-1,0x8,0x3,0xa,0x0, },
+ {0x0,0x7,0x6,-1,-1,0x8,0x3,0xa,0x0, },
+ {0x0,0x7,0x6,-1,-1,0x8,0x3,0xa,0x0, },
+ {0x0,0x7,0x6,0x1,-1,0x8,0x3,0xa,0x0, },
+ {0x0,0x12,0x0,0x0,0x0,0x0,0x0,0x0,0x0, },
+ {0x0,0x0,-1,0x2e,-1,0x0,0x4,0x0,0x2e, },
+ {0x0,0xb,0x0,0x0,0x0,0x0,0x0,0x0,0x0, },
+ {0x0,0x0,0x0,0x0,0x38,0x0,0x0,0x0,0x0, },
+ {0x0,0x0,0x0,0x0,0x39,0x0,0x0,0x0,0x0, },
+ {0x0,0x0,0x0,0x0,-1,0x0,0x41,0x0,0x0, },
+ {0x0,0x0,0x0,0x0,0x28,0x0,0x41,0x0,0x0, },
+ {0x0,0xc,-1,0x2c,0x0,0x2c,0x4,0xc,0x2c, },
+ {0x0,0x49,-1,0x45,-1,0x44,0x48,-1,0x0, },
+ {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,-1, },
+};
+tp_driver(p, e)
+register tp_PCB_ *p;
+register struct tp_event *e;
+{
+	register int index, error=0;
+	struct act_ent *a;
+	static struct act_ent erroraction = {0,-1};
+
+	index = inx[1 + e->ev_number][p->tp_state];
+	if(index<0) index=_Xebec_index(e, p);
+	if (index==0) {
+		a = &erroraction;
+	} else
+		a = &statetable[index];
+
+	if(a->a_action)
+		error = _Xebec_action( a->a_action, e, p );
+	IFTRACE(D_DRIVER)
+	tptrace(DRIVERTRACE,		a->a_newstate, p->tp_state, e->ev_number, a->a_action, 0);
+	ENDTRACE
+	if(error==0)
+	p->tp_state = a->a_newstate;
+	return error;
+}
diff --git a/sys/netiso/tp_emit.c b/sys/netiso/tp_emit.c
new file mode 100644
index 00000000000..16ed5bc7b7b
--- /dev/null
+++ b/sys/netiso/tp_emit.c
@@ -0,0 +1,996 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tp_emit.c	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* 
+ * ARGO TP
+ *
+ * $Header: tp_emit.c,v 5.5 88/11/18 17:27:20 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_emit.c,v $
+ *
+ * This file contains tp_emit() and tp_error_emit(), which
+ * form TPDUs and hand them to ip.
+ * They take data in the form of mbuf chain, allocate mbufs as
+ * necessary for headers, and set the fields as appropriate from
+ * information found in the tpcb and net-level pcb.
+ *
+ * The worst thing about this code is adding the variable-length
+ * options on a machine that requires alignment for any memory access
+ * that isn't of size 1.  See the macro ADDOPTION() below.
+ *
+ * We don't do any concatenation. (There's a kludge to test the
+ * basic mechanism of separation under the 'w' tpdebug option, that's all.)
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+
+#include <netiso/iso.h>
+#include <netiso/iso_pcb.h>
+#include <netiso/argo_debug.h>
+#include <netiso/tp_timer.h>
+#include <netiso/tp_param.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_pcb.h>
+#include <netiso/tp_tpdu.h>
+#include <netiso/tp_trace.h>
+#include <netiso/tp_meas.h>
+#include <netiso/tp_seq.h>
+#include <netiso/iso_errno.h>
+
+#include <net/if.h>
+#ifdef TRUE
+#undef FALSE
+#undef TRUE
+#endif
+#include <netccitt/x25.h>
+#include <netccitt/pk.h>
+#include <netccitt/pk_var.h>
+
+void iso_gen_csum();
+
+
+/* Here is a mighty kludge.  The token ring misorders packets if you
+ * fire them at it too fast, and TP sans checksum is "too fast", so
+ * we have introduced a delay when checksumming isn't used.
+ */
+char tp_delay = 0x00; /* delay to keep token ring from blowing it */
+
+/*
+ * NAME:	tp_emit()
+ *
+ * CALLED FROM: tp.trans and from tp_sbsend()
+ *
+ * FUNCTION and ARGUMENTS:
+ * 	Emits one tpdu of the type (dutype), of the format appropriate
+ * 	to the connection described by the pcb (tpcb), with sequence
+ * 	number (seq) (where appropriate), end-of-tsdu bit (eot) where
+ * 	appropriate, and with the data in the mbuf chain (data).
+ * 	For DR and ER tpdus, the argument (eot) is
+ * 	the reason for issuing the tpdu rather than an end-of-tsdu indicator.
+ *
+ * RETURNS:			
+ * 	0  OK
+ * 	ENOBUFS 
+ * 	E* returned from net layer output rtn 
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ *  
+ * 	WE ASSUME that the tp header + all options will fit in ONE mbuf.  
+ *	If mbufs are 256 this will most likely be true, but if they are 128 it's
+ *	possible that they won't. 
+ *	If you used every option on the CR + max. user data you'd overrun 
+ *	112 but unless you used > 115 bytes for the security
+ *	parameter, it would fit in a 256-byte mbuf (240 bytes for the header)
+ *	We don't support the security parameter, so this isn't a problem.
+ *	If security is added, we ought to remove this assumption.
+ *
+ *  We do not implement the flow control confirmation "element of procedure".
+ *  A) it should not affect interoperability,
+ *  B) it should not be necessary - the protocol will eventually
+ *   	straighten things out w/o FCC, as long as we don't have severely
+ *		mismatched keepalive and inactivity timers, and
+ *	C) it appears not to be REQUIRED, and
+ *  D) it's incredibly grotesque, and no doubt will lengthen a few
+ *   	critical paths.
+ *  HOWEVER, we're thinking about putting it in anyway, for
+ *  completeness, just like we did with ack subsequencing.
+ */
+
+int
+tp_emit(dutype,	tpcb, seq, eot, data)
+	int dutype;
+	struct tp_pcb *tpcb;
+	SeqNum	seq;
+	u_int 	eot;
+	struct mbuf *data;
+{
+	register struct tpdu *hdr; 
+	register struct mbuf *m;
+	int csum_offset=0;
+	int datalen = 0;
+	int error = 0;
+ 	SeqNum olduwe;
+	int acking_ooo;
+
+	/* NOTE:
+	 * here we treat tpdu_li as if it DID include the li field, up until
+	 * the end, at which time we subtract 1
+	 * THis is because if we subtract 1 right away, we end up adding
+	 * one every time we add an option.
+	 */
+	IFDEBUG(D_EMIT)
+		printf(
+	"tp_emit dutype 0x%x, tpcb 0x%x, eot 0x%x, seq 0x%x, data 0x%x",
+		dutype, tpcb, eot, seq, data);
+	ENDDEBUG
+
+	if (dutype == CR_TPDU || dutype == CC_TPDU) {
+		m = (struct mbuf *) malloc((u_long)256, M_MBUF, M_DONTWAIT);
+		if (m) {
+			m->m_type = TPMT_TPHDR;
+			mbstat.m_mtypes[TPMT_TPHDR]++;
+			m->m_next = MNULL;
+			m->m_nextpkt = MNULL;
+			m->m_data = m->m_pktdat;
+			m->m_flags = M_PKTHDR;
+		}
+	} else {
+		MGETHDR(m, M_DONTWAIT, TPMT_TPHDR); 
+	}
+	m->m_data += max_hdr;
+	if (m == NULL) {
+		if(data != (struct mbuf *)0)
+			m_freem(data);
+		error = ENOBUFS;
+		goto done;
+	}
+	m->m_len = sizeof(struct tpdu);
+	m->m_act = MNULL;
+
+	hdr = mtod(m, struct tpdu *);
+	bzero((caddr_t)hdr, sizeof(struct tpdu));
+
+	{
+		int 	tp_headersize();
+
+		hdr->tpdu_type = dutype;
+		hdr->tpdu_li = tp_headersize(dutype, tpcb);  
+		/*
+		 * class 0 doesn't use this for DT
+		 * it'll just get overwritten below 
+		 */
+		hdr->tpdu_dref = htons(tpcb->tp_fref); 
+		if( tpcb->tp_use_checksum || 
+			(dutype == CR_TPDU_type && (tpcb->tp_class & TP_CLASS_4) )) {
+			csum_offset =  hdr->tpdu_li + 2; /* DOESN'T include csum */
+			ADDOPTION(TPP_checksum, hdr, 2, eot /* dummy arg */);
+			IFDEBUG(D_CHKSUM)
+				printf(
+					"tp_emit: csum_offset 0x%x, hdr->tpdu_li 0x%x\n",
+						csum_offset, hdr->tpdu_li);
+			ENDDEBUG
+		} 
+		/*
+		 * VARIABLE PARTS...
+		 */
+		switch( dutype ) {
+
+		case CR_TPDU_type:
+			hdr->tpdu_CRdref_0 = 0;	/* must be zero */
+		case CC_TPDU_type: 
+			if (!tpcb->tp_cebit_off) {
+				tpcb->tp_win_recv = tp_start_win << 8;
+				LOCAL_CREDIT(tpcb);
+				CONG_INIT_SAMPLE(tpcb);
+			} else
+				LOCAL_CREDIT(tpcb);
+
+/* Case CC_TPDU_type used to be here */
+		{
+					u_char x;
+
+				hdr->tpdu_CCsref =  htons(tpcb->tp_lref); /* same as CRsref */
+
+				if( tpcb->tp_class > TP_CLASS_1 ) {
+					tpcb->tp_sent_uwe = tpcb->tp_lcredit -1;
+					tpcb->tp_sent_rcvnxt = 1;
+					tpcb->tp_sent_lcdt = tpcb->tp_lcredit;
+					hdr->tpdu_cdt = tpcb->tp_lcredit;
+				} else {
+#ifdef TPCONS
+					if (tpcb->tp_netservice == ISO_CONS) {
+						struct isopcb *isop = (struct isopcb *)tpcb->tp_npcb;
+						struct pklcd *lcp = (struct pklcd *)(isop->isop_chan);
+						lcp->lcd_flags &= ~X25_DG_CIRCUIT;
+					}
+#endif
+					hdr->tpdu_cdt = 0;
+				}
+				hdr->tpdu_CCclass = tp_mask_to_num(tpcb->tp_class);
+				hdr->tpdu_CCoptions = 
+					(tpcb->tp_xtd_format? TPO_XTD_FMT:0) |
+					(tpcb->tp_use_efc? TPO_USE_EFC:0);
+
+				IFPERF(tpcb)
+					u_char perf_meas = tpcb->tp_perf_on;
+					ADDOPTION(TPP_perf_meas, hdr, sizeof(perf_meas), perf_meas);
+				ENDPERF
+
+				if( dutype == CR_TPDU_type ) {
+					IncStat(ts_CR_sent);
+
+					ASSERT( tpcb->tp_lsuffixlen > 0 );
+					ASSERT( tpcb->tp_fsuffixlen > 0 );
+
+					ADDOPTION(TPP_calling_sufx, hdr,
+						tpcb->tp_lsuffixlen, tpcb->tp_lsuffix[0]);
+					ADDOPTION(TPP_called_sufx, hdr,
+						tpcb->tp_fsuffixlen, tpcb->tp_fsuffix[0]);
+				} else {
+					IncStat(ts_CC_sent);
+				}
+
+				ADDOPTION(TPP_tpdu_size, hdr, 
+					sizeof(tpcb->tp_tpdusize), tpcb->tp_tpdusize);
+
+				if (tpcb->tp_class != TP_CLASS_0) {
+					short millisec = 500*(tpcb->tp_sendack_ticks);
+
+					millisec = htons(millisec);
+					ADDOPTION(TPP_acktime, hdr, sizeof(short), millisec);
+
+					x = (tpcb->tp_use_nxpd? TPAO_USE_NXPD: 0)
+					 |	(tpcb->tp_use_rcc?  TPAO_USE_RCC : 0) 
+					 |  (tpcb->tp_use_checksum?0: TPAO_NO_CSUM)
+					 |	(tpcb->tp_xpd_service? TPAO_USE_TXPD: 0);
+					ADDOPTION(TPP_addl_opt, hdr, 1, x);
+
+					if ((tpcb->tp_l_tpdusize ^ (1 << tpcb->tp_tpdusize)) != 0) {
+						u_short size_s = tpcb->tp_l_tpdusize >> 7;
+						u_char size_c = size_s;
+						ASSERT(tpcb->tp_l_tpdusize < 65536 * 128);
+						if (dutype == CR_TPDU_type)
+							tpcb->tp_ptpdusize = size_s;
+						if (size_s < 256) {
+							ADDOPTION(TPP_ptpdu_size, hdr, 1, size_c);
+						} else {
+							size_s = htons(size_s);
+							ADDOPTION(TPP_ptpdu_size, hdr, 2, size_s);
+						}
+					}
+				}
+					
+				if( (dutype == CR_TPDU_type) && (tpcb->tp_class != TP_CLASS_0)){
+
+					ASSERT( 1 == sizeof(tpcb->tp_vers) );
+					ADDOPTION(TPP_vers, hdr, 1, tpcb->tp_vers);
+
+					/* for each alt protocol class x,
+					 * 	x = x<<4;
+					 *  option = concat(option, x);
+					 * Well, for now we only have TP0 for an
+					 * alternative so... this is easy.
+					 *
+					 * HOWEVER... There should be NO alt protocol
+					 * class over CLNS.  Need to see if the route suggests
+					 * CONS, and iff so add alt class.
+					 */
+					x = 0;
+					ADDOPTION(TPP_alt_class, hdr, 1, x);
+				}
+
+				if( hdr->tpdu_li > MLEN) 
+					panic("tp_emit CR/CC");
+			}
+			break;
+
+		case DR_TPDU_type:
+			if( hdr->tpdu_DRdref == 0 ) {
+				/* don't issue the DR */
+				goto done;
+			}
+			hdr->tpdu_cdt = 0;
+			hdr->tpdu_DRsref = htons(tpcb->tp_lref);
+			hdr->tpdu_DRreason = (u_char)eot; /* WHICH BYTE OF THIS??? */
+
+			/* forget the add'l information variable part */
+			IncStat(ts_DR_sent);
+			break;
+
+		case DC_TPDU_type: /* not used in class 0 */
+			ASSERT( tpcb->tp_class != TP_CLASS_0);
+			hdr->tpdu_DCsref =  htons(tpcb->tp_lref);
+			hdr->tpdu_cdt = 0;
+			data = (struct mbuf *)0;
+			IncStat(ts_DC_sent);
+			break;
+
+		case XAK_TPDU_type: /* xak not used in class 0 */
+			ASSERT( tpcb->tp_class != TP_CLASS_0); /* fall through */
+			hdr->tpdu_cdt = 0;
+
+			IFTRACE(D_XPD)
+				tptraceTPCB(TPPTXack, seq, 0, 0, 0, 0);
+			ENDTRACE
+			data = (struct mbuf *)0;
+			if (tpcb->tp_xtd_format) {
+#ifdef BYTE_ORDER
+				union seq_type seqeotX;
+
+				seqeotX.s_seq = seq;
+				seqeotX.s_eot = 1;
+				hdr->tpdu_seqeotX = htonl(seqeotX.s_seqeot);
+#else
+				hdr->tpdu_XAKseqX = seq;
+#endif /* BYTE_ORDER */
+			} else {
+				hdr->tpdu_XAKseq = seq;
+			}
+			IncStat(ts_XAK_sent);
+			IncPStat(tpcb, tps_XAK_sent);
+			break;
+
+		case XPD_TPDU_type: /* xpd not used in class 0 */
+			ASSERT( tpcb->tp_class != TP_CLASS_0); /* fall through */
+			hdr->tpdu_cdt = 0;
+			if (tpcb->tp_xtd_format) {
+#ifdef BYTE_ORDER
+				union seq_type seqeotX;
+
+				seqeotX.s_seq = seq;
+				seqeotX.s_eot = 1;
+				hdr->tpdu_seqeotX = htonl(seqeotX.s_seqeot);
+#else
+				hdr->tpdu_XPDseqX = seq;
+				hdr->tpdu_XPDeotX = 1; /* always 1 for XPD tpdu */
+#endif /* BYTE_ORDER */
+			} else {
+				hdr->tpdu_XPDseq = seq;
+				hdr->tpdu_XPDeot = 1; /* always 1 for XPD tpdu */
+			}
+			IncStat(ts_XPD_sent);
+			IncPStat(tpcb, tps_XPD_sent);
+
+			/* kludge to test the input size checking */
+			IFDEBUG(D_SIZE_CHECK)
+				/*if(data->m_len <= 16 && data->m_off < (MLEN-18) ) {
+					printf("Sending too much data on XPD: 18 bytes\n");
+					data->m_len = 18;
+				}*/
+			ENDDEBUG
+			break;
+
+		case DT_TPDU_type:
+			hdr->tpdu_cdt = 0;
+			IFTRACE(D_DATA)
+				tptraceTPCB(TPPTmisc, "emit DT: eot seq tpdu_li", eot, seq, 
+					hdr->tpdu_li, 0);
+			ENDTRACE
+			if (tpcb->tp_xtd_format) {
+#ifdef BYTE_ORDER
+				union seq_type seqeotX;
+
+				seqeotX.s_seq = seq;
+				seqeotX.s_eot = eot;
+				hdr->tpdu_seqeotX = htonl(seqeotX.s_seqeot);
+#else
+				hdr->tpdu_DTseqX = seq;
+				hdr->tpdu_DTeotX = eot;
+#endif /* BYTE_ORDER */
+			} else if (tpcb->tp_class == TP_CLASS_0) {
+				IFDEBUG(D_EMIT)
+					printf("DT tpdu: class 0 m 0x%x hdr 0x%x\n", m, hdr);
+					dump_buf( hdr, hdr->tpdu_li + 1 );
+				ENDDEBUG
+				((struct tp0du *)hdr)->tp0du_eot = eot;
+				((struct tp0du *)hdr)->tp0du_mbz = 0;
+				IFDEBUG(D_EMIT)
+					printf("DT 2 tpdu: class 0 m 0x%x hdr 0x%x\n", m, hdr);
+					dump_buf( hdr, hdr->tpdu_li + 1 );
+				ENDDEBUG
+			} else {
+				hdr->tpdu_DTseq = seq;
+				hdr->tpdu_DTeot = eot;
+			}
+			if(eot) {
+				IncStat(ts_EOT_sent);
+			}
+			IncStat(ts_DT_sent);
+			IncPStat(tpcb, tps_DT_sent);
+			break;
+
+		case AK_TPDU_type:/* ak not used in class 0 */
+			ASSERT( tpcb->tp_class != TP_CLASS_0); 
+			data = (struct mbuf *)0;
+			olduwe = tpcb->tp_sent_uwe;
+
+			if (seq != tpcb->tp_sent_rcvnxt || tpcb->tp_rsycnt == 0) {
+				LOCAL_CREDIT( tpcb ); 
+				tpcb->tp_sent_uwe = 
+					SEQ(tpcb,tpcb->tp_rcvnxt + tpcb->tp_lcredit -1);
+				tpcb->tp_sent_lcdt = tpcb->tp_lcredit;
+				acking_ooo = 0;
+			} else
+				acking_ooo = 1;
+
+			IFDEBUG(D_RENEG)
+				/* occasionally fake a reneging so 
+					you can test subsequencing */
+				if( olduwe & 0x1 ) {
+					tpcb->tp_reneged = 1;
+					IncStat(ts_ldebug);
+				}
+			ENDDEBUG
+			/* Are we about to reneg on credit? 
+			 * When might we do so?
+			 *	a) when using optimistic credit (which we no longer do).
+			 *  b) when drain() gets implemented (not in the plans).
+			 *  c) when D_RENEG is on.
+			 *  d) when DEC BIT response is implemented.
+			 *	(not- when we do this, we'll need to implement flow control
+			 *	confirmation)
+			 */
+			if( SEQ_LT(tpcb, tpcb->tp_sent_uwe, olduwe) ) {
+				tpcb->tp_reneged = 1;
+				IncStat(ts_lcdt_reduced);
+				IFTRACE(D_CREDIT)
+					tptraceTPCB(TPPTmisc, 
+						"RENEG: olduwe newuwe lcredit rcvnxt",
+						olduwe,
+						tpcb->tp_sent_uwe, tpcb->tp_lcredit,
+						tpcb->tp_rcvnxt);
+				ENDTRACE
+			}
+			IFPERF(tpcb)
+				/* new lwe is less than old uwe means we're
+				 * acking before we received a whole window full
+				 */
+				if( SEQ_LT( tpcb, tpcb->tp_rcvnxt, olduwe) ) {
+					/* tmp1 = number of pkts fewer than the full window */
+					register int tmp1 = 
+						(int) SEQ_SUB( tpcb, olduwe, tpcb->tp_rcvnxt);
+
+					if(tmp1 > TP_PM_MAX)
+						tmp1 = TP_PM_MAX;
+					IncPStat( tpcb,  tps_ack_early[tmp1] );
+
+					/* tmp1 = amt of new cdt we're advertising */
+					tmp1 = SEQ_SUB( tpcb, seq, tpcb->tp_sent_rcvnxt);
+					if(tmp1 > TP_PM_MAX )
+						tmp1 = TP_PM_MAX;
+
+					IncPStat( tpcb, 
+							tps_cdt_acked [ tmp1 ]
+							[ ((tpcb->tp_lcredit > TP_PM_MAX)?
+								TP_PM_MAX:tpcb->tp_lcredit) ] );
+
+				}
+			ENDPERF
+
+			IFTRACE(D_ACKSEND)
+				tptraceTPCB(TPPTack, seq, tpcb->tp_lcredit, tpcb->tp_sent_uwe, 
+					tpcb->tp_r_subseq, 0);
+			ENDTRACE
+			if (tpcb->tp_xtd_format) {
+#ifdef BYTE_ORDER
+				union seq_type seqeotX;
+
+				seqeotX.s_seq = seq;
+				seqeotX.s_eot = 0;
+				hdr->tpdu_seqeotX = htonl(seqeotX.s_seqeot);
+				hdr->tpdu_AKcdtX = htons(tpcb->tp_lcredit);
+#else
+				hdr->tpdu_cdt = 0; 
+				hdr->tpdu_AKseqX = seq;
+				hdr->tpdu_AKcdtX = tpcb->tp_lcredit;
+#endif /* BYTE_ORDER */
+			} else {
+				hdr->tpdu_AKseq = seq;
+				hdr->tpdu_AKcdt = tpcb->tp_lcredit;
+			}
+			if ((tpcb->tp_class == TP_CLASS_4) &&
+				(tpcb->tp_reneged || acking_ooo)) {
+				/* 
+				 * Ack subsequence parameter req'd if WE reneged on 
+				 * credit offered.  (ISO 8073, 12.2.3.8.2, p. 74)
+				 */
+				IFDEBUG(D_RENEG)
+					printf("Adding subseq 0x%x\n", tpcb->tp_s_subseq);
+				ENDDEBUG
+				tpcb->tp_s_subseq++;
+				/*
+				 * add tmp subseq and do a htons on it.
+				 */
+				ADDOPTION(TPP_subseq, hdr, 
+					sizeof(tpcb->tp_s_subseq), tpcb->tp_s_subseq);
+			} else
+				tpcb->tp_s_subseq = 0;
+
+			if ( tpcb->tp_sendfcc || eot ) /* overloaded to mean SEND FCC */ {
+				/* 
+				 * Rules for sending FCC ("should" send when) :
+				 * %a) received an ack from peer with NO NEWS whatsoever,
+				 *  	and it did not contain an FCC
+				 * 	b) received an ack from peer that opens its closed window.
+				 * 	c) received an ack from peer after it reneged on its
+				 *		offered credit, AND this ack raises UWE but LWE is same
+				 *		and below UWE at time of reneging (reduction)
+				 * Now, ISO 8073 12.2.3.8.3 says
+				 * that a retransmitted AK shall not contain the FCC
+				 * parameter.  Now, how the hell you tell the difference 
+				 * between a retransmitted ack and an ack that's sent in 
+				 * response to a received ack, I don't know, because without
+				 * any local activity, and w/o any received DTs, they
+				 * will contain exactly the same credit/seq# information.
+				 * Anyway, given that the "retransmission of acks"
+				 * procedure (ISO 8073 12.2.3.8.3) is optional, and we
+				 * don't do it (although the peer can't tell that), we 
+				 * ignore this last rule.
+				 *
+				 * We send FCC for reasons a) and b) only. 
+				 * To add reason c) would require a ridiculous amount of state.
+				 * 
+				 */
+				u_short 	bogus[4]; /* lwe(32), subseq(16), cdt(16) */
+				SeqNum		lwe;
+				u_short		subseq, fcredit;
+
+				tpcb->tp_sendfcc = 0;
+
+				lwe = (SeqNum) htonl(tpcb->tp_snduna);
+				subseq = htons(tpcb->tp_r_subseq);
+				fcredit = htons(tpcb->tp_fcredit);
+
+				bcopy((caddr_t) &lwe, (caddr_t)&bogus[0], sizeof(SeqNum));
+				bcopy((caddr_t) &subseq, (caddr_t)&bogus[2], sizeof(u_short));
+				bcopy((caddr_t) &fcredit, (caddr_t)&bogus[3], sizeof(u_short));
+
+				IFTRACE(D_ACKSEND)
+					tptraceTPCB(TPPTmisc, 
+						"emit w/FCC: snduna r_subseq fcredit", 
+						tpcb->tp_snduna, tpcb->tp_r_subseq,
+						tpcb->tp_fcredit, 0);
+				ENDTRACE
+
+				IFDEBUG(D_ACKSEND)
+					printf("Calling ADDOPTION 0x%x, 0x%x, 0x%x,0x%x\n",
+						TPP_flow_cntl_conf, 
+						hdr, sizeof(bogus), bogus[0]);
+				ENDDEBUG
+				ADDOPTION(TPP_flow_cntl_conf, hdr, sizeof(bogus), bogus[0]);
+				IFDEBUG(D_ACKSEND)
+					printf("after ADDOPTION hdr 0x%x hdr->tpdu_li 0x%x\n",
+						hdr, hdr->tpdu_li);
+					printf(
+					"after ADDOPTION csum_offset 0x%x, hdr->tpdu_li 0x%x\n",
+							csum_offset, hdr->tpdu_li);
+				ENDDEBUG
+					
+			}
+			tpcb->tp_reneged = 0;
+			tpcb->tp_sent_rcvnxt = seq;
+			if (tpcb->tp_fcredit == 0) {
+				int timo = tpcb->tp_keepalive_ticks;
+				if (tpcb->tp_rxtshift < TP_MAXRXTSHIFT)
+					tpcb->tp_rxtshift++;
+				timo = min(timo, ((int)tpcb->tp_dt_ticks) << tpcb->tp_rxtshift);
+				tp_ctimeout(tpcb, TM_sendack, timo);
+			} else
+				tp_ctimeout(tpcb, TM_sendack, tpcb->tp_keepalive_ticks);
+			IncStat(ts_AK_sent);
+			IncPStat(tpcb, tps_AK_sent);
+			IFDEBUG(D_ACKSEND)
+				printf(
+				"2 after rADDOPTION csum_offset 0x%x, hdr->tpdu_li 0x%x\n",
+						csum_offset, hdr->tpdu_li);
+			ENDDEBUG
+			break;
+
+		case ER_TPDU_type:
+			hdr->tpdu_ERreason = eot; 
+			hdr->tpdu_cdt = 0;
+			/* no user data */
+			data = (struct mbuf *)0;
+			IncStat(ts_ER_sent);
+			break;
+		}
+
+	}
+	ASSERT( ((int)hdr->tpdu_li > 0) && ((int)hdr->tpdu_li < MLEN) );
+
+	m->m_next = data;
+
+	ASSERT( hdr->tpdu_li < MLEN ); /* leave this in */
+	ASSERT( hdr->tpdu_li != 0 ); /* leave this in */
+
+	m->m_len = hdr->tpdu_li ; 
+	hdr->tpdu_li --; /* doesn't include the li field */
+
+	datalen = m_datalen( m ); /* total len */
+
+	ASSERT( datalen <= tpcb->tp_l_tpdusize ); /* may become a problem
+				when CLNP is used; leave in here for the time being */
+		IFDEBUG(D_ACKSEND)
+			printf(
+			"4 after rADDOPTION csum_offset 0x%x, hdr->tpdu_li 0x%x\n",
+					csum_offset, hdr->tpdu_li);
+		ENDDEBUG
+	if( datalen > tpcb->tp_l_tpdusize ) {
+		printf("data len 0x%x tpcb->tp_l_tpdusize 0x%x\n", 
+			datalen, tpcb->tp_l_tpdusize);
+	}
+	IFDEBUG(D_EMIT)
+		printf(
+		"tp_emit before gen_csum m_len 0x%x, csum_offset 0x%x, datalen 0x%x\n",
+		m->m_len, csum_offset, datalen);
+	ENDDEBUG
+	if( tpcb->tp_use_checksum || 
+		(dutype == CR_TPDU_type && (tpcb->tp_class & TP_CLASS_4)) ) {
+		iso_gen_csum(m, csum_offset, datalen);
+	}
+
+	IFDEBUG(D_EMIT)
+	printf("tp_emit before tpxxx_output tpcb 0x%x, dutype 0x%x, datalen 0x%x\n",
+		tpcb, dutype, datalen);
+		dump_buf(mtod(m, caddr_t), datalen);
+	ENDDEBUG
+
+	IFPERF(tpcb)
+		if( dutype == DT_TPDU_type ) {
+			PStat(tpcb, Nb_to_ll) += (datalen - m->m_len);
+			tpmeas( tpcb->tp_lref, TPtime_to_ll,  (struct timeval *)0,
+				seq, PStat(tpcb, Nb_to_ll), (datalen - m->m_len));
+		}
+	ENDPERF
+
+	IFTRACE(D_EMIT)
+		tptraceTPCB(TPPTtpduout, dutype, hdr, hdr->tpdu_li+1, datalen, 0);
+	ENDTRACE
+	IFDEBUG(D_EMIT)
+		printf("OUTPUT: tpcb 0x%x, isop 0x%x, so 0x%x\n",
+			tpcb,  tpcb->tp_npcb,  tpcb->tp_sock);
+	ENDDEBUG
+
+	{ extern char tp_delay;
+
+		if( tp_delay )
+			if( tpcb->tp_use_checksum == 0 ) {
+				register u_int i  = tp_delay;
+				for (; i!= 0; i--)
+					(void) iso_check_csum(m, datalen);
+			}
+	}
+	ASSERT( m->m_len > 0 );
+	error = (tpcb->tp_nlproto->nlp_output)(tpcb->tp_npcb, m, datalen,
+		!tpcb->tp_use_checksum);
+	IFDEBUG(D_EMIT)
+		printf("OUTPUT: returned 0x%x\n", error);
+	ENDDEBUG
+	IFTRACE(D_EMIT)
+		tptraceTPCB(TPPTmisc, 
+			"tp_emit nlproto->output netservice returns datalen", 
+			tpcb->tp_nlproto->nlp_output, tpcb->tp_netservice, error, datalen); 
+	ENDTRACE
+done:
+	if (error) {
+		if (dutype == AK_TPDU_type)
+			tp_ctimeout(tpcb, TM_sendack, 1);
+		if (error == E_CO_QFULL) {
+			tp_quench(tpcb, PRC_QUENCH);
+			return 0;
+		}
+	}
+	return error;
+}
+/*
+ * NAME:		tp_error_emit()
+ * CALLED FROM:	tp_input() when a DR or ER is to be issued in
+ * 		response to an input error.
+ * FUNCTION and ARGUMENTS:
+ * 		The error type is the first argument.
+ * 		The argument (sref) is the source reference on the bad incoming tpdu,
+ * 		and is used for a destination reference on the outgoing packet.
+ * 		(faddr) and (laddr) are the foreign and local addresses for this 
+ *		connection.
+ * 		(erdata) is a ptr to the errant incoming tpdu, and is copied into the
+ * 		outgoing ER, if an ER is to be issued.
+ * 		(erlen)  is the number of octets of the errant tpdu that we should
+ * 		try to copy.
+ * 		(tpcb) is the pcb that describes the connection for which the bad tpdu
+ * 		arrived.
+ * RETURN VALUES:
+ * 		0 OK
+ *  	ENOBUFS
+ *  	E* from net layer datagram output routine
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+
+int
+tp_error_emit(error, sref, faddr, laddr, erdata, erlen, tpcb, cons_channel,
+	dgout_routine)
+	int				error;
+	u_long			sref;
+	struct sockaddr_iso *faddr, *laddr;
+	struct mbuf 	*erdata;
+	int 			erlen;
+	struct tp_pcb 	*tpcb;
+	caddr_t			cons_channel;
+	int				(*dgout_routine)();
+{
+	int						dutype;
+	int 					datalen = 0;
+	register struct tpdu	*hdr; 
+	register struct mbuf	*m;
+	int						csum_offset;
+
+	IFTRACE(D_ERROR_EMIT)
+		tptrace(TPPTmisc, "tp_error_emit error sref tpcb erlen", 
+			error, sref, tpcb, erlen);
+	ENDTRACE
+	IFDEBUG(D_ERROR_EMIT)
+		printf(
+		"tp_error_emit error 0x%x sref 0x%x tpcb 0x%x erlen 0x%x chan 0x%x\n", 
+			error, sref, tpcb, erlen, cons_channel);
+	ENDDEBUG
+
+	MGET(m, M_DONTWAIT, TPMT_TPHDR); 
+	if (m == NULL) {
+		return ENOBUFS;
+	}
+	m->m_len = sizeof(struct tpdu);
+	m->m_act = MNULL;
+
+	hdr = mtod(m, struct tpdu *);
+
+	IFDEBUG(D_ERROR_EMIT)
+		printf("[error 0x%x] [error&0xff  0x%x] [(char)error 0x%x]\n",
+			error, error&0xff, (char)error);
+	ENDDEBUG
+
+
+	if (error & TP_ERROR_SNDC)
+		dutype = DC_TPDU_type;
+	else if (error & 0x40) {
+		error &= ~0x40;
+		dutype = ER_TPDU_type;
+	} else
+		dutype = DR_TPDU_type;
+	error &= 0xff;
+
+	hdr->tpdu_type = dutype;
+	hdr->tpdu_cdt = 0;
+
+	switch( dutype ) {
+
+	case DC_TPDU_type:
+		IncStat(ts_DC_sent);
+		hdr->tpdu_li = 6;
+		hdr->tpdu_DCdref = htons(sref);
+		hdr->tpdu_DCsref = tpcb ? htons(tpcb->tp_lref) : 0;
+		IFDEBUG(D_ERROR_EMIT)
+			printf("DC case:\n");
+			dump_buf( hdr, 6);
+		ENDDEBUG
+		/* forget the add'l information variable part */
+		break;
+
+	case DR_TPDU_type:
+		IncStat(ts_DR_sent);
+		hdr->tpdu_li = 7;
+		hdr->tpdu_DRdref = htons(sref);
+		hdr->tpdu_DRsref = 0;
+		hdr->tpdu_DRreason = (char)error;
+		IFDEBUG(D_ERROR_EMIT)
+			printf("DR case:\n");
+			dump_buf( hdr, 7);
+		ENDDEBUG
+		/* forget the add'l information variable part */
+		break;
+
+	case ER_TPDU_type:
+		IncStat(ts_ER_sent);
+		hdr->tpdu_li = 5; 
+		hdr->tpdu_ERreason = (char)error;
+		hdr->tpdu_ERdref = htons(sref);
+		break;
+
+	default:
+		ASSERT(0);
+		printf("TP PANIC: bad dutype 0x%x\n", dutype);
+	}
+
+	if(tpcb)
+		if( tpcb->tp_use_checksum ) {
+			ADDOPTION(TPP_checksum, hdr, 2, csum_offset /* dummy argument */); 
+			csum_offset =  hdr->tpdu_li - 2;
+		}
+
+	ASSERT( hdr->tpdu_li < MLEN ); 
+
+	if (dutype == ER_TPDU_type) {
+		/* copy the errant tpdu into another 'variable part' */
+		register caddr_t P;
+
+		IFTRACE(D_ERROR_EMIT)
+			tptrace(TPPTmisc, "error_emit ER len tpduli", erlen, hdr->tpdu_li,
+				0,0);
+		ENDTRACE
+		IFDEBUG(D_ERROR_EMIT)
+			printf("error_emit ER len 0x%x tpduli 0x%x\n", erlen, hdr->tpdu_li);
+		ENDDEBUG
+
+		/* copy at most as many octets for which you have room */
+		if (erlen + hdr->tpdu_li + 2 > TP_MAX_HEADER_LEN)
+			erlen = TP_MAX_HEADER_LEN - hdr->tpdu_li - 2;
+			
+		/* add the "invalid tpdu" parameter : required in class 0 */
+		P = (caddr_t)hdr + (int)(hdr->tpdu_li);
+		vbptr(P)->tpv_code =  TPP_invalid_tpdu; /* parameter code */
+		vbptr(P)->tpv_len = erlen;	/* parameter length */
+		m->m_len = hdr->tpdu_li + 2; /* 1 for code, 1 for length */
+
+		/* tp_input very likely handed us an mbuf chain w/ nothing in
+		 * the first mbuf and the data following the empty mbuf
+		 */
+		if(erdata->m_len == 0) {
+			erdata = m_free(erdata); /* returns the next mbuf on the chain */
+		}
+		/*
+		 * copy only up to the bad octet
+		 * (or max that will fit in a header
+		 */
+		m->m_next = m_copy(erdata, 0, erlen);
+		hdr->tpdu_li += erlen + 2; 
+		m_freem(erdata);
+	} else {
+		IFDEBUG(D_ERROR_EMIT)
+			printf("error_emit DR error tpduli 0x%x\n", error, hdr->tpdu_li);
+			dump_buf( (char *)hdr, hdr->tpdu_li );
+		ENDDEBUG
+		m->m_len = hdr->tpdu_li ;
+		m_freem(erdata);
+	}
+
+	hdr->tpdu_li --;
+	IFTRACE(D_ERROR_EMIT)
+		tptrace(TPPTtpduout, 2, hdr, hdr->tpdu_li+1, 0, 0);
+	ENDTRACE
+
+	datalen = m_datalen( m);
+	if (tpcb) {
+		if( tpcb->tp_use_checksum ) {
+			IFTRACE(D_ERROR_EMIT)
+				tptrace(TPPTmisc, "before gen csum datalen", datalen,0,0,0);
+			ENDTRACE
+			IFDEBUG(D_ERROR_EMIT)
+				printf("before gen csum datalen 0x%x, csum_offset 0x%x\n", 
+					datalen, csum_offset);
+			ENDDEBUG
+
+			iso_gen_csum(m, csum_offset, datalen);
+		}
+
+		IFDEBUG(D_ERROR_EMIT)
+			printf("OUTPUT: tpcb 0x%x, isop 0x%x, so 0x%x\n",
+				tpcb,  tpcb->tp_npcb,  tpcb->tp_sock);
+		ENDDEBUG
+	}
+	if (cons_channel) {
+#ifdef TPCONS
+		struct pklcd *lcp = (struct pklcd *)cons_channel;
+		struct isopcb *isop = (struct isopcb *)lcp->lcd_upnext;
+
+		tpcons_dg_output(cons_channel, m, datalen);
+		/* was if (tpcb == 0) iso_pcbdetach(isop); */
+		/* but other side may want to try again over same VC,
+		   so, we'll depend on him closing it, but in case it gets forgotten
+		   we'll mark it for garbage collection */
+		lcp->lcd_flags |= X25_DG_CIRCUIT;
+		IFDEBUG(D_ERROR_EMIT)
+			printf("OUTPUT: dutype 0x%x channel 0x%x\n",
+				dutype, cons_channel);
+		ENDDEBUG
+#else
+		printf("TP panic! cons channel 0x%x but not cons configured\n",
+			cons_channel);
+#endif
+	} else if (tpcb) {
+
+		IFDEBUG(D_ERROR_EMIT)
+			printf("tp_error_emit 1 sending DG: Laddr\n");
+			dump_addr((struct sockaddr *)laddr);
+			printf("Faddr\n");
+			dump_addr((struct sockaddr *)faddr);
+		ENDDEBUG
+		return (tpcb->tp_nlproto->nlp_dgoutput)(
+			&laddr->siso_addr, 
+			&faddr->siso_addr, 
+			m, datalen, 
+					/* no route */	(caddr_t)0, !tpcb->tp_use_checksum); 
+	} else if (dgout_routine) {
+			IFDEBUG(D_ERROR_EMIT)
+				printf("tp_error_emit sending DG: Laddr\n");
+				dump_addr((struct sockaddr *)laddr);
+				printf("Faddr\n");
+				dump_addr((struct sockaddr *)faddr);
+			ENDDEBUG
+				return (*dgout_routine)( &laddr->siso_addr, &faddr->siso_addr, 
+					m, datalen, /* no route */ 
+					(caddr_t)0, /* nochecksum==false */0);
+	} else {
+			IFDEBUG(D_ERROR_EMIT)
+				printf("tp_error_emit DROPPING \n", m);
+			ENDDEBUG
+			IncStat(ts_send_drop);
+			m_freem(m);
+			return 0;
+	}
+}
diff --git a/sys/netiso/tp_events.h b/sys/netiso/tp_events.h
new file mode 100644
index 00000000000..48222830a0d
--- /dev/null
+++ b/sys/netiso/tp_events.h
@@ -0,0 +1,84 @@
+/* $Header$ */
+/* $Source$ */
+struct tp_event {
+	int ev_number;
+	 struct timeval e_time; 
+#define TM_inact 0x0
+#define TM_retrans 0x1
+#define TM_sendack 0x2
+#define TM_notused 0x3
+
+	union{
+struct { SeqNum e_low; SeqNum e_high; int e_retrans; } EV_TM_reference;
+
+#define TM_reference 0x4
+struct { SeqNum e_low; SeqNum e_high; int e_retrans; } EV_TM_data_retrans;
+
+#define TM_data_retrans 0x5
+struct {
+				  u_char		e_reason;
+				} EV_ER_TPDU;
+
+#define ER_TPDU 0x6
+struct { struct mbuf 	*e_data;	/* first field */
+				  int 			e_datalen; /* 2nd field */
+				  u_int			e_cdt;
+				} EV_CR_TPDU;
+
+#define CR_TPDU 0x7
+struct { struct mbuf 	*e_data;	/* first field */
+				  int 			e_datalen; /* 2nd field */
+				  u_short		e_sref;
+				  u_char		e_reason;
+				} EV_DR_TPDU;
+
+#define DR_TPDU 0x8
+#define DC_TPDU 0x9
+struct { struct mbuf 	*e_data;	/* first field */
+				  int 			e_datalen; /* 2nd field */
+				  u_short		e_sref;
+				  u_int			e_cdt;
+				} EV_CC_TPDU;
+
+#define CC_TPDU 0xa
+struct { u_int			e_cdt;	
+				  SeqNum 	 	e_seq;		
+				  SeqNum 	 	e_subseq;		
+				  u_char 	 	e_fcc_present;		
+				} EV_AK_TPDU;
+
+#define AK_TPDU 0xb
+struct { struct mbuf	*e_data; 	/* first field */
+				  int 			e_datalen; /* 2nd field */
+				  u_int 		e_eot;
+				  SeqNum		e_seq; 
+				} EV_DT_TPDU;
+
+#define DT_TPDU 0xc
+struct { struct mbuf 	*e_data;	/* first field */
+				  int 			e_datalen; 	/* 2nd field */
+				  SeqNum 		e_seq;	
+				} EV_XPD_TPDU;
+
+#define XPD_TPDU 0xd
+struct { SeqNum 		e_seq;		} EV_XAK_TPDU;
+
+#define XAK_TPDU 0xe
+#define T_CONN_req 0xf
+struct { u_char		e_reason; 	} EV_T_DISC_req;
+
+#define T_DISC_req 0x10
+#define T_LISTEN_req 0x11
+#define T_DATA_req 0x12
+#define T_XPD_req 0x13
+#define T_USR_rcvd 0x14
+#define T_USR_Xrcvd 0x15
+#define T_DETACH 0x16
+#define T_NETRESET 0x17
+#define T_ACPT_req 0x18
+	}ev_union;
+};/* end struct event */
+
+#define tp_NEVENTS 0x19
+
+#define ATTR(X)ev_union.EV_/**/X/**/
diff --git a/sys/netiso/tp_inet.c b/sys/netiso/tp_inet.c
new file mode 100644
index 00000000000..fb013718ba2
--- /dev/null
+++ b/sys/netiso/tp_inet.c
@@ -0,0 +1,688 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tp_inet.c	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* 
+ * ARGO TP
+ * $Header: tp_inet.c,v 5.3 88/11/18 17:27:29 nhall Exp $ 
+ * $Source: /usr/argo/sys/netiso/RCS/tp_inet.c,v $
+ *
+ * Here is where you find the inet-dependent code.  We've tried
+ * keep all net-level and (primarily) address-family-dependent stuff
+ * out of the tp source, and everthing here is reached indirectly
+ * through a switch table (struct nl_protosw *) tpcb->tp_nlproto 
+ * (see tp_pcb.c). 
+ * The routines here are:
+ * 	in_getsufx: gets transport suffix out of an inpcb structure.
+ * 	in_putsufx: put transport suffix into an inpcb structure.
+ *	in_putnetaddr: put a whole net addr into an inpcb.
+ *	in_getnetaddr: get a whole net addr from an inpcb.
+ *	in_cmpnetaddr: compare a whole net addr from an isopcb.
+ *	in_recycle_suffix: clear suffix for reuse in inpcb
+ *	tpip_mtu: figure out what size tpdu to use
+ *	tpip_input: take a pkt from ip, strip off its ip header, give to tp
+ *	tpip_output_dg: package a pkt for ip given 2 addresses & some data
+ *	tpip_output: package a pkt for ip given an inpcb & some data
+ */
+
+#ifdef INET
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/mbuf.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+
+#include <net/if.h>
+
+#include <netiso/tp_param.h>
+#include <netiso/argo_debug.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_ip.h>
+#include <netiso/tp_pcb.h>
+#include <netiso/tp_trace.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_tpdu.h>
+#include <netinet/in_var.h>
+
+#ifndef ISO
+#include <netiso/iso_chksum.c>
+#endif
+
+/*
+ * NAME:			in_getsufx()
+
+ * CALLED FROM: 	pr_usrreq() on PRU_BIND, 
+ *					PRU_CONNECT, PRU_ACCEPT, and PRU_PEERADDR
+ *
+ * FUNCTION, ARGUMENTS, and RETURN VALUE:
+ * 	Get a transport suffix from an inpcb structure (inp).
+ * 	The argument (which) takes the value TP_LOCAL or TP_FOREIGN.
+ *
+ * RETURNS:		internet port / transport suffix
+ *  			(CAST TO AN INT)
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+in_getsufx(inp, lenp, data_out, which)
+	struct inpcb *inp;
+	u_short *lenp;
+	caddr_t data_out;
+	int which;
+{
+	*lenp = sizeof(u_short);
+	switch (which) {
+	case TP_LOCAL:
+		*(u_short *)data_out = inp->inp_lport;
+		return;
+
+	case TP_FOREIGN:
+		*(u_short *)data_out = inp->inp_fport;
+	}
+
+}
+
+/*
+ * NAME:		in_putsufx()
+ *
+ * CALLED FROM: tp_newsocket(); i.e., when a connection 
+ *		is being established by an incoming CR_TPDU.
+ *
+ * FUNCTION, ARGUMENTS:
+ * 	Put a transport suffix (found in name) into an inpcb structure (inp).
+ * 	The argument (which) takes the value TP_LOCAL or TP_FOREIGN.
+ *
+ * RETURNS:		Nada
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+/*ARGSUSED*/
+void
+in_putsufx(inp, sufxloc, sufxlen, which)
+	struct inpcb *inp;
+	caddr_t sufxloc;
+	int which;
+{
+	if (which == TP_FOREIGN) {
+		bcopy(sufxloc, (caddr_t)&inp->inp_fport, sizeof(inp->inp_fport));
+	}
+}
+
+/*
+ * NAME:	in_recycle_tsuffix()	
+ *
+ * CALLED FROM:	tp.trans whenever we go into REFWAIT state.
+ *
+ * FUNCTION and ARGUMENT:
+ *	 Called when a ref is frozen, to allow the suffix to be reused. 
+ * 	(inp) is the net level pcb.  
+ *
+ * RETURNS:			Nada
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:	This really shouldn't have to be done in a NET level pcb 
+ *	but... for the internet world that just the way it is done in BSD...
+ * 	The alternative is to have the port unusable until the reference
+ * 	timer goes off.
+ */
+void
+in_recycle_tsuffix(inp)
+	struct inpcb	*inp;
+{
+	inp->inp_fport = inp->inp_lport = 0;
+}
+
+/*
+ * NAME:	in_putnetaddr()
+ *
+ * CALLED FROM:
+ * 	tp_newsocket(); i.e., when a connection is being established by an
+ * 	incoming CR_TPDU.
+ *
+ * FUNCTION and ARGUMENTS:
+ * 	Copy a whole net addr from a struct sockaddr (name).
+ * 	into an inpcb (inp).
+ * 	The argument (which) takes values TP_LOCAL or TP_FOREIGN
+ *
+ * RETURNS:		Nada
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */ 
+void
+in_putnetaddr(inp, name, which)
+	register struct inpcb	*inp;
+	struct sockaddr_in	*name;
+	int which;
+{
+	switch (which) {
+	case TP_LOCAL:
+		bcopy((caddr_t)&name->sin_addr, 
+			(caddr_t)&inp->inp_laddr, sizeof(struct in_addr));
+			/* won't work if the dst address (name) is INADDR_ANY */
+
+		break;
+	case TP_FOREIGN:
+		if( name != (struct sockaddr_in *)0 ) {
+			bcopy((caddr_t)&name->sin_addr, 
+				(caddr_t)&inp->inp_faddr, sizeof(struct in_addr));
+		}
+	}
+}
+
+/*
+ * NAME:	in_putnetaddr()
+ *
+ * CALLED FROM:
+ * 	tp_input() when a connection is being established by an
+ * 	incoming CR_TPDU, and considered for interception.
+ *
+ * FUNCTION and ARGUMENTS:
+ * 	Compare a whole net addr from a struct sockaddr (name),
+ * 	with that implicitly stored in an inpcb (inp).
+ * 	The argument (which) takes values TP_LOCAL or TP_FOREIGN
+ *
+ * RETURNS:		Nada
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */ 
+in_cmpnetaddr(inp, name, which)
+	register struct inpcb	*inp;
+	register struct sockaddr_in	*name;
+	int which;
+{
+	if (which == TP_LOCAL) {
+		if (name->sin_port && name->sin_port != inp->inp_lport)
+			return 0;
+		return (name->sin_addr.s_addr == inp->inp_laddr.s_addr);
+	}
+	if (name->sin_port && name->sin_port != inp->inp_fport)
+		return 0;
+	return (name->sin_addr.s_addr == inp->inp_faddr.s_addr);
+}
+
+/*
+ * NAME:	in_getnetaddr()
+ *
+ * CALLED FROM:
+ *  pr_usrreq() PRU_SOCKADDR, PRU_ACCEPT, PRU_PEERADDR
+ * FUNCTION and ARGUMENTS:
+ * 	Copy a whole net addr from an inpcb (inp) into
+ * 	an mbuf (name);
+ * 	The argument (which) takes values TP_LOCAL or TP_FOREIGN.
+ *
+ * RETURNS:		Nada
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */ 
+
+void
+in_getnetaddr( inp, name, which)
+	register struct mbuf *name;
+	struct inpcb *inp;
+	int which;
+{
+	register struct sockaddr_in *sin = mtod(name, struct sockaddr_in *);
+	bzero((caddr_t)sin, sizeof(*sin));
+	switch (which) {
+	case TP_LOCAL:
+		sin->sin_addr = inp->inp_laddr;
+		sin->sin_port = inp->inp_lport;
+		break;
+	case TP_FOREIGN:
+		sin->sin_addr = inp->inp_faddr;
+		sin->sin_port = inp->inp_fport;
+		break;
+	default:
+		return;
+	}
+	name->m_len = sin->sin_len = sizeof (*sin);
+	sin->sin_family = AF_INET;
+}
+
+/*
+ * NAME: 	tpip_mtu()
+ *
+ * CALLED FROM:
+ *  tp_route_to() on incoming CR, CC, and pr_usrreq() for PRU_CONNECT
+ *
+ * FUNCTION, ARGUMENTS, and RETURN VALUE:
+ *
+ * Perform subnetwork dependent part of determining MTU information.
+ * It appears that setting a double pointer to the rtentry associated with
+ * the destination, and returning the header size for the network protocol
+ * suffices.
+ * 
+ * SIDE EFFECTS:
+ * Sets tp_routep pointer in pcb.
+ *
+ * NOTES:
+ */
+
+tpip_mtu(tpcb)
+register struct tp_pcb *tpcb;
+{
+	struct inpcb			*inp = (struct inpcb *)tpcb->tp_npcb;
+
+	IFDEBUG(D_CONN)
+		printf("tpip_mtu(tpcb)\n", tpcb);
+		printf("tpip_mtu routing to addr 0x%x\n", inp->inp_faddr.s_addr);
+	ENDDEBUG
+	tpcb->tp_routep = &(inp->inp_route.ro_rt);
+	return (sizeof (struct ip));
+
+}
+
+/*
+ * NAME:	tpip_output()
+ *
+ * CALLED FROM:  tp_emit()
+ *
+ * FUNCTION and ARGUMENTS:
+ *  Take a packet(m0) from tp and package it so that ip will accept it.
+ *  This means prepending space for the ip header and filling in a few
+ *  of the fields.
+ *  inp is the inpcb structure; datalen is the length of the data in the
+ *  mbuf string m0.
+ * RETURNS:			
+ *  whatever (E*) is returned form the net layer output routine.
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+
+int
+tpip_output(inp, m0, datalen, nochksum)
+	struct inpcb		*inp;
+	struct mbuf 		*m0;
+	int 				datalen;
+	int					nochksum;
+{
+	return tpip_output_dg( &inp->inp_laddr, &inp->inp_faddr, m0, datalen,
+		&inp->inp_route, nochksum);
+}
+
+/*
+ * NAME:	tpip_output_dg()
+ *
+ * CALLED FROM:  tp_error_emit()
+ *
+ * FUNCTION and ARGUMENTS:
+ *  This is a copy of tpip_output that takes the addresses
+ *  instead of a pcb.  It's used by the tp_error_emit, when we
+ *  don't have an in_pcb with which to call the normal output rtn.
+ *
+ * RETURNS:	 ENOBUFS or  whatever (E*) is 
+ *	returned form the net layer output routine.
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+
+/*ARGSUSED*/
+int
+tpip_output_dg(laddr, faddr, m0, datalen, ro, nochksum)
+	struct in_addr		*laddr, *faddr;
+	struct mbuf 		*m0;
+	int 				datalen;
+	struct route 		*ro;
+	int					nochksum;
+{
+	register struct mbuf 	*m;
+	register struct ip *ip;
+	int 					error;
+
+	IFDEBUG(D_EMIT)
+		printf("tpip_output_dg  datalen 0x%x m0 0x%x\n", datalen, m0);
+	ENDDEBUG
+
+
+	MGETHDR(m, M_DONTWAIT, TPMT_IPHDR);
+	if (m == 0) {
+		error = ENOBUFS;
+		goto bad;
+	}
+	m->m_next = m0;
+	MH_ALIGN(m, sizeof(struct ip));
+	m->m_len = sizeof(struct ip);
+
+	ip = mtod(m, struct ip *);
+	bzero((caddr_t)ip, sizeof *ip);
+
+	ip->ip_p = IPPROTO_TP;
+	m->m_pkthdr.len = ip->ip_len = sizeof(struct ip) + datalen;
+	ip->ip_ttl = MAXTTL;	
+		/* don't know why you need to set ttl;
+		 * overlay doesn't even make this available
+		 */
+
+	ip->ip_src = *laddr;
+	ip->ip_dst = *faddr;
+
+	IncStat(ts_tpdu_sent);
+	IFDEBUG(D_EMIT)
+		dump_mbuf(m, "tpip_output_dg before ip_output\n");
+	ENDDEBUG
+
+	error = ip_output(m, (struct mbuf *)0, ro, IP_ALLOWBROADCAST, NULL);
+
+	IFDEBUG(D_EMIT)
+		printf("tpip_output_dg after ip_output\n");
+	ENDDEBUG
+
+	return error;
+
+bad:
+	m_freem(m);
+	IncStat(ts_send_drop);
+	return error;
+}
+
+/*
+ * NAME:  tpip_input()
+ *
+ * CALLED FROM:
+ * 	ip's input routine, indirectly through the protosw.
+ *
+ * FUNCTION and ARGUMENTS:
+ * Take a packet (m) from ip, strip off the ip header and give it to tp
+ *
+ * RETURNS:  No return value.  
+ * 
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+ProtoHook
+tpip_input(m, iplen)
+	struct mbuf *m;
+	int iplen;
+{
+	struct sockaddr_in 	src, dst;
+	register struct ip 		*ip;
+	int						s = splnet(), hdrlen;
+
+	IncStat(ts_pkt_rcvd);
+
+	/*
+	 * IP layer has already pulled up the IP header,
+	 * but the first byte after the IP header may not be there,
+	 * e.g. if you came in via loopback, so you have to do an
+	 * m_pullup to before you can even look to see how much you
+	 * really need.  The good news is that m_pullup will round
+	 * up to almost the next mbuf's worth.
+	 */
+
+
+	if((m = m_pullup(m, iplen + 1)) == MNULL)
+		goto discard;
+	CHANGE_MTYPE(m, TPMT_DATA);
+	
+	/*
+	 * Now pull up the whole tp header:
+	 * Unfortunately, there may be IP options to skip past so we
+	 * just fetch it as an unsigned char.
+	 */
+	hdrlen = iplen + 1 + mtod(m, u_char *)[iplen];
+
+	if( m->m_len < hdrlen ) {
+		if((m = m_pullup(m, hdrlen)) == MNULL){
+			IFDEBUG(D_TPINPUT)
+				printf("tp_input, pullup 2!\n");
+			ENDDEBUG
+			goto discard;
+		}
+	}
+	/* 
+	 * cannot use tp_inputprep() here 'cause you don't 
+	 * have quite the same situation
+	 */
+
+	IFDEBUG(D_TPINPUT)
+		dump_mbuf(m, "after tpip_input both pullups");
+	ENDDEBUG
+	/* 
+	 * m_pullup may have returned a different mbuf
+	 */
+	ip = mtod(m, struct ip *);
+
+	/*
+	 * drop the ip header from the front of the mbuf
+	 * this is necessary for the tp checksum
+	 */
+	m->m_len -= iplen;
+	m->m_data += iplen;
+
+	src.sin_addr = *(struct in_addr *)&(ip->ip_src);
+	src.sin_family  = AF_INET;
+	src.sin_len  = sizeof(src);
+	dst.sin_addr = *(struct in_addr *)&(ip->ip_dst);
+	dst.sin_family  = AF_INET; 
+	dst.sin_len  = sizeof(dst);
+
+	(void) tp_input(m, (struct sockaddr *)&src, (struct sockaddr *)&dst,
+				0, tpip_output_dg, 0);
+	return 0;
+
+discard:
+	IFDEBUG(D_TPINPUT)
+		printf("tpip_input DISCARD\n");
+	ENDDEBUG
+	IFTRACE(D_TPINPUT)
+		tptrace(TPPTmisc, "tpip_input DISCARD m",  m,0,0,0);
+	ENDTRACE
+	m_freem(m);
+	IncStat(ts_recv_drop);
+	splx(s);
+	return 0;
+}
+
+
+#include <sys/protosw.h>
+#include <netinet/ip_icmp.h>
+
+extern void tp_quench();
+/*
+ * NAME:	tpin_quench()
+ *
+ * CALLED FROM: tpip_ctlinput()
+ *
+ * FUNCTION and ARGUMENTS:  find the tpcb pointer and pass it to tp_quench
+ *
+ * RETURNS:	Nada
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+
+void
+tpin_quench(inp)
+	struct inpcb *inp;
+{
+	tp_quench((struct tp_pcb *)inp->inp_socket->so_pcb, PRC_QUENCH);
+}
+
+/*
+ * NAME:	tpip_ctlinput()
+ *
+ * CALLED FROM:
+ *  The network layer through the protosw table.
+ *
+ * FUNCTION and ARGUMENTS:
+ *	When clnp gets an ICMP msg this gets called.
+ *	It either returns an error status to the user or
+ *	causes all connections on this address to be aborted
+ *	by calling the appropriate xx_notify() routine.
+ *	(cmd) is the type of ICMP error.   
+ * 	(sa) the address of the sender
+ *
+ * RETURNS:	 Nothing
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+ProtoHook
+tpip_ctlinput(cmd, sin)
+	int cmd;
+	struct sockaddr_in *sin;
+{
+	extern u_char inetctlerrmap[];
+	extern struct in_addr zeroin_addr;
+	void tp_quench __P((struct inpcb *,int));
+	void tpin_abort __P((struct inpcb *,int));
+
+	if (sin->sin_family != AF_INET && sin->sin_family != AF_IMPLINK)
+		return 0;
+	if (sin->sin_addr.s_addr == INADDR_ANY)
+		return 0;
+	if (cmd < 0 || cmd > PRC_NCMDS)
+		return 0;
+	switch (cmd) {
+
+		case	PRC_QUENCH:
+			in_pcbnotify(&tp_inpcb, (struct sockaddr *)sin, 0,
+				zeroin_addr, 0, cmd, tp_quench);
+			break;
+
+		case	PRC_ROUTEDEAD:
+		case	PRC_HOSTUNREACH:
+		case	PRC_UNREACH_NET:
+		case	PRC_IFDOWN:
+		case	PRC_HOSTDEAD:
+			in_pcbnotify(&tp_inpcb, (struct sockaddr *)sin, 0,
+				zeroin_addr, 0, cmd, in_rtchange);
+			break;
+
+		default:
+		/*
+		case	PRC_MSGSIZE:
+		case	PRC_UNREACH_HOST:
+		case	PRC_UNREACH_PROTOCOL:
+		case	PRC_UNREACH_PORT:
+		case	PRC_UNREACH_NEEDFRAG:
+		case	PRC_UNREACH_SRCFAIL:
+		case	PRC_REDIRECT_NET:
+		case	PRC_REDIRECT_HOST:
+		case	PRC_REDIRECT_TOSNET:
+		case	PRC_REDIRECT_TOSHOST:
+		case	PRC_TIMXCEED_INTRANS:
+		case	PRC_TIMXCEED_REASS:
+		case	PRC_PARAMPROB:
+		*/
+		in_pcbnotify(&tp_inpcb, (struct sockaddr *)sin, 0,
+			zeroin_addr, 0, cmd, tpin_abort);
+	}
+	return 0;
+}
+
+/*
+ * NAME:	tpin_abort()
+ *
+ * CALLED FROM:
+ *	xxx_notify() from tp_ctlinput() when
+ *  net level gets some ICMP-equiv. type event.
+ *
+ * FUNCTION and ARGUMENTS:
+ *  Cause the connection to be aborted with some sort of error
+ *  reason indicating that the network layer caused the abort.
+ *  Fakes an ER TPDU so we can go through the driver.
+ *
+ * RETURNS:	 Nothing
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+
+ProtoHook
+tpin_abort(inp)
+	struct inpcb *inp;
+{
+	struct tp_event e;
+
+	e.ev_number = ER_TPDU;
+	e.ATTR(ER_TPDU).e_reason = ENETRESET;
+	(void) tp_driver((struct tp_pcb *)inp->inp_ppcb, &e);
+	return 0;
+}
+
+#ifdef ARGO_DEBUG
+dump_inaddr(addr)
+	register struct sockaddr_in *addr;
+{
+	printf("INET: port 0x%x; addr 0x%x\n", addr->sin_port, addr->sin_addr);
+}
+#endif /* ARGO_DEBUG */
+#endif /* INET */
diff --git a/sys/netiso/tp_input.c b/sys/netiso/tp_input.c
new file mode 100644
index 00000000000..a071a5d4add
--- /dev/null
+++ b/sys/netiso/tp_input.c
@@ -0,0 +1,1624 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tp_input.c	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* 
+ * ARGO TP
+ *
+ * $Header: tp_input.c,v 5.6 88/11/18 17:27:38 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_input.c,v $
+ *
+ * tp_input() gets an mbuf chain from ip.  Actually, not directly
+ * from ip, because ip calls a net-level routine that strips off
+ * the net header and then calls tp_input(), passing the proper type
+ * of addresses for the address family in use (how it figures out
+ * which AF is not yet determined.)
+ *
+ * Decomposing the tpdu is some of the most laughable code.  The variable-length
+ * parameters and the problem of non-aligned memory references
+ * necessitates such abominations as the macros WHILE_OPTIONS (q.v. below)
+ * to loop through the header and decompose it.
+ *
+ * The routine tp_newsocket() is called when a CR comes in for a listening
+ * socket.  tp_input calls sonewconn() and tp_newsocket() to set up the
+ * "child" socket.  Most tpcb values are copied from the parent tpcb into
+ * the child.
+ * 
+ * Also in here is tp_headersize() (grot) which tells the expected size
+ * of a tp header, to be used by other layers.  It's in here because it
+ * uses the static structure tpdu_info.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <netiso/iso.h>
+#include <netiso/iso_errno.h>
+#include <netiso/iso_pcb.h>
+#include <netiso/tp_param.h>
+#include <netiso/tp_timer.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_pcb.h>
+#include <netiso/argo_debug.h>
+#include <netiso/tp_trace.h>
+#include <netiso/tp_tpdu.h>
+
+#include <net/if.h>
+#ifdef TRUE
+#undef FALSE
+#undef TRUE
+#endif
+#include <netccitt/x25.h>
+#include <netccitt/pk.h>
+#include <netccitt/pk_var.h>
+
+int 	iso_check_csum(), tp_driver(), tp_headersize(), tp_error_emit();
+
+/*
+	#ifdef lint
+	#undef ATTR
+	#define ATTR(X)ev_number
+	#endif lint
+*/
+
+struct mbuf *
+tp_inputprep(m) 
+	register struct mbuf *m;
+{
+	int hdrlen;
+
+	IFDEBUG(D_TPINPUT)
+		printf("tp_inputprep: m 0x%x\n", m) ;
+	ENDDEBUG
+
+	while(  m->m_len < 1 ) {
+	    /* The "m_free" logic
+	     * if( (m = m_free(m)) == MNULL )
+	     *      return (struct mbuf *)0;
+		 * would cause a system crash if ever executed.
+		 * This logic will be executed if the first mbuf
+	     * in the chain only contains a CLNP header. The m_free routine
+	     * will release the mbuf containing the CLNP header from the
+	     * chain and the new head of the chain will not have the
+	     * M_PKTHDR bit set. This routine, tp_inputprep, will
+	     * eventually call the "sbappendaddr" routine. "sbappendaddr"
+	     * calls "panic" if M_PKTHDR is not set. m_pullup is a cheap
+	     * way of keeping the head of the chain from being freed.
+		 */
+		if((m = m_pullup(m, 1)) == MNULL)
+			return (MNULL);
+	}
+	if(((int)m->m_data) & 0x3) {
+		/* If we are not 4-byte aligned, we have to be
+		 * above the beginning of the mbuf, and it is ok just
+		 * to slide it back. 
+		 */
+		caddr_t ocp = m->m_data;
+
+		m->m_data = (caddr_t)(((int)m->m_data) & ~0x3);
+		bcopy(ocp, m->m_data, (unsigned)m->m_len);
+	}
+	CHANGE_MTYPE(m, TPMT_DATA);
+
+	/* we KNOW that there is at least 1 byte in this mbuf
+	   and that it is hdr->tpdu_li XXXXXXX!  */
+
+	hdrlen = 1 + *mtod( m, u_char *);
+
+	/*
+	 * now pull up the whole tp header 
+	 */
+	if ( m->m_len < hdrlen) {
+		if ((m = m_pullup(m, hdrlen)) == MNULL ) {
+			IncStat(ts_recv_drop);
+			return (struct mbuf *)0;
+		}
+	}
+	IFDEBUG(D_INPUT)
+	printf(
+	" at end: m 0x%x hdr->tpdu_li 0x%x m_len 0x%x\n",m,
+		hdrlen, m->m_len);
+	ENDDEBUG
+	return m;
+}
+
+/* begin groan
+ * -- this array and the following macros allow you to step through the
+ * parameters of the variable part of a header
+ * note that if for any reason the values of the **_TPDU macros (in tp_events.h)
+ * should change, this array has to be rearranged
+ */
+
+#define TP_LEN_CLASS_0_INDEX	2
+#define TP_MAX_DATA_INDEX 3
+
+static u_char tpdu_info[][4] =
+{
+/*								length						 max data len */
+/*								reg fmt 	xtd fmt  class 0  		 	  */
+ 	/* UNUSED		0x0 */		0x0 ,		0x0,	0x0,		0x0,
+ 	/* XPD_TPDU_type 0x1 */		0x5,		0x8,	0x0,		TP_MAX_XPD_DATA,
+ 	/* XAK_TPDU_type 0x2 */		0x5 ,		0x8,	0x0,		0x0,
+ 	/* GR_TPDU_type	0x3 */		0x0 ,		0x0,	0x0,		0x0,
+ 	/* UNUSED		0x4 */		0x0 ,		0x0,	0x0,		0x0,
+ 	/* UNUSED		0x5 */		0x0 ,		0x0,	0x0,		0x0,
+ 	/* AK_TPDU_type 0x6 */		0x5,		0xa,	0x0,		0x0,
+	/* ER_TPDU_type 0x7 */		0x5,		0x5,	0x0,		0x0,
+ 	/* DR_TPDU_type 0x8 */		0x7,		0x7,	0x7,		TP_MAX_DR_DATA,
+ 	/* UNUSED		0x9 */		0x0 ,		0x0,	0x0,		0x0,
+ 	/* UNUSED		0xa */		0x0 ,		0x0,	0x0,		0x0,
+ 	/* UNUSED		0xb */		0x0 ,		0x0,	0x0,		0x0,
+ 	/* DC_TPDU_type 0xc */		0x6,		0x6,	0x0,		0x0,
+ 	/* CC_TPDU_type 0xd */		0x7,		0x7,	0x7,		TP_MAX_CC_DATA,
+ 	/* CR_TPDU_type 0xe */		0x7,		0x7,	0x7,		TP_MAX_CR_DATA,
+ 	/* DT_TPDU_type 0xf */		0x5,		0x8,	0x3,		0x0,
+};
+
+#define CHECK(Phrase, Erval, Stat, Whattodo, Loc)\
+	if (Phrase) {error = (Erval); errlen = (int)(Loc); IncStat(Stat);\
+	goto Whattodo; }
+
+/* 
+ * WHENEVER YOU USE THE FOLLOWING MACRO,
+ * BE SURE THE TPDUTYPE IS A LEGIT VALUE FIRST! 
+ */
+
+#define WHILE_OPTIONS(P, hdr, format)\
+{	register caddr_t P = tpdu_info[(hdr)->tpdu_type][(format)] + (caddr_t)hdr;\
+	caddr_t PLIM = 1 + hdr->tpdu_li + (caddr_t)hdr;\
+	for (;; P += 2 + ((struct tp_vbp *)P)->tpv_len) {\
+		CHECK((P > PLIM), E_TP_LENGTH_INVAL, ts_inv_length,\
+				respond, P - (caddr_t)hdr);\
+		if (P == PLIM) break;
+
+#define END_WHILE_OPTIONS(P) } }
+
+/* end groan */
+
+/*
+ * NAME:  tp_newsocket()
+ *
+ * CALLED FROM:
+ *  tp_input() on incoming CR, when a socket w/ the called suffix
+ * is awaiting a  connection request
+ *
+ * FUNCTION and ARGUMENTS:
+ *  Create a new socket structure, attach to it a new transport pcb,
+ *  using a copy of the net level pcb for the parent socket.
+ *  (so) is the parent socket.
+ *  (fname) is the foreign address (all that's used is the nsap portion)
+ *
+ * RETURN VALUE:
+ *  a new socket structure, being this end of the newly formed connection.
+ *
+ * SIDE EFFECTS:
+ *  Sets a few things in the tpcb and net level pcb
+ *
+ * NOTES:
+ */
+static struct socket *
+tp_newsocket(so, fname, cons_channel, class_to_use, netservice)
+	struct socket				*so;
+	struct sockaddr				*fname;
+	caddr_t						cons_channel;
+	u_char						class_to_use;
+	u_int						netservice;
+{
+	register struct tp_pcb	*tpcb = sototpcb(so); /* old tpcb, needed below */
+	register struct tp_pcb	*newtpcb;
+
+	/* 
+	 * sonewconn() gets a new socket structure,
+	 * a new lower layer pcb and a new tpcb,
+	 * but the pcbs are unnamed (not bound)
+	 */
+	IFTRACE(D_NEWSOCK)
+		tptraceTPCB(TPPTmisc, "newsock: listg_so, _tpcb, so_head",
+			so, tpcb, so->so_head, 0);
+	ENDTRACE	
+
+	if ((so = sonewconn(so, SS_ISCONFIRMING)) == (struct socket *)0)
+		return so;
+	IFTRACE(D_NEWSOCK)
+		tptraceTPCB(TPPTmisc, "newsock: after newconn so, so_head",
+			so, so->so_head, 0, 0);
+	ENDTRACE	
+
+	IFDEBUG(D_NEWSOCK)
+		printf("tp_newsocket(channel 0x%x)  after sonewconn so 0x%x \n",
+				cons_channel, so);
+		dump_addr(fname);
+		{ 
+			struct socket *t, *head ;
+
+			head = so->so_head;
+			t = so;
+			printf("so 0x%x so_head 0x%x so_q0 0x%x, q0len %d\n",
+					t, t->so_head, t->so_q0, t->so_q0len);
+			while( (t=t->so_q0)  && t!= so  && t!= head)
+				printf("so 0x%x so_head 0x%x so_q0 0x%x, q0len %d\n",
+					t, t->so_head, t->so_q0, t->so_q0len);
+		}
+	ENDDEBUG
+
+	/* 
+	 * before we clobber the old tpcb ptr, get these items from the parent pcb 
+	 */
+	newtpcb = sototpcb(so);
+	newtpcb->_tp_param = tpcb->_tp_param;
+	newtpcb->tp_flags = tpcb->tp_flags;
+	newtpcb->tp_lcredit = tpcb->tp_lcredit;
+	newtpcb->tp_l_tpdusize = tpcb->tp_l_tpdusize;
+	newtpcb->tp_lsuffixlen = tpcb->tp_lsuffixlen;
+	bcopy( tpcb->tp_lsuffix, newtpcb->tp_lsuffix, newtpcb->tp_lsuffixlen);
+
+	if( /* old */ tpcb->tp_ucddata) {
+		/* 
+		 * These data are the connect- , confirm- or disconnect- data.
+		 */
+		struct mbuf *conndata;
+
+		conndata = m_copy(tpcb->tp_ucddata, 0, (int)M_COPYALL);
+		IFDEBUG(D_CONN)
+			dump_mbuf(conndata, "conndata after mcopy");
+		ENDDEBUG
+		newtpcb->tp_ucddata = conndata;
+	}
+
+	tpcb = newtpcb;
+	tpcb->tp_state = TP_LISTENING;
+	tpcb->tp_class = class_to_use;
+	tpcb->tp_netservice = netservice;
+
+
+	ASSERT( fname != 0 ) ; /* just checking */
+	if ( fname ) {
+		/*
+		 *	tp_route_to takes its address argument in the form of an mbuf.
+		 */
+		struct mbuf	*m;
+		int			err;
+
+		MGET(m, M_DONTWAIT, MT_SONAME);	/* mbuf type used is confusing */
+		if (m) {
+			/*
+			 * this seems a bit grotesque, but tp_route_to expects
+			 * an mbuf * instead of simply a sockaddr; it calls the ll
+			 * pcb_connect, which expects the name/addr in an mbuf as well.
+			 * sigh.
+			 */
+			bcopy((caddr_t)fname, mtod(m, caddr_t), fname->sa_len);
+			m->m_len = fname->sa_len;
+
+			/* grot  : have to say the kernel can override params in
+			 * the passive open case
+			 */
+			tpcb->tp_dont_change_params = 0;
+			err = tp_route_to( m, tpcb, cons_channel);
+			m_free(m);
+
+			if (!err)
+				goto ok;
+		}
+		IFDEBUG(D_CONN)
+			printf("tp_route_to FAILED! detaching tpcb 0x%x, so 0x%x\n",
+				tpcb, so);
+		ENDDEBUG
+		(void) tp_detach(tpcb); 
+		return 0;
+	}
+ok:
+	IFDEBUG(D_TPINPUT)
+		printf("tp_newsocket returning so 0x%x, sototpcb(so) 0x%x\n",
+			so, sototpcb(so));
+	ENDDEBUG
+	return so;
+}
+
+#ifndef TPCONS
+tpcons_output()
+{
+	return(0);
+}
+#endif /* !CONS */
+
+/* 
+ * NAME: 	tp_input()
+ *
+ * CALLED FROM:
+ *  net layer input routine
+ *
+ * FUNCTION and ARGUMENTS:
+ *  Process an incoming TPDU (m), finding the associated tpcb if there
+ *  is one. Create the appropriate type of event and call the driver.
+ *  (faddr) and (laddr) are the foreign and local addresses.
+ * 
+ * 	When tp_input() is called we KNOW that the ENTIRE TP HEADER
+ * 	has been m_pullup-ed.
+ *
+ * RETURN VALUE: Nada
+ *  
+ * SIDE EFFECTS:
+ *	When using COSNS it may affect the state of the net-level pcb
+ *
+ * NOTE:
+ *  The initial value of acktime is 2 so that we will never
+ *  have a 0 value for tp_peer_acktime.  It gets used in the
+ *  computation of the retransmission timer value, and so it
+ *  mustn't be zero.
+ *  2 seems like a reasonable minimum.
+ */
+ProtoHook
+tp_input(m, faddr, laddr, cons_channel, dgout_routine, ce_bit)
+	register	struct mbuf 	*m;
+	struct sockaddr 			*faddr, *laddr; /* NSAP addresses */
+	caddr_t						cons_channel;
+	int 						(*dgout_routine)();
+	int							ce_bit;
+
+{
+	register struct tp_pcb 	*tpcb;
+	register struct tpdu 	*hdr;
+	struct socket 			*so;
+	struct tp_event 		e;
+	int 					error;
+	unsigned 				dutype;
+	u_short 				dref, sref, acktime, subseq;
+	u_char 					preferred_class, class_to_use, pdusize;
+	u_char					opt, dusize, addlopt, version;
+#ifdef TP_PERF_MEAS
+	u_char					perf_meas;
+#endif /* TP_PERF_MEAS */
+	u_char					fsufxlen, lsufxlen;
+	caddr_t					fsufxloc, lsufxloc;
+	int						tpdu_len;
+	u_int 					takes_data;
+	u_int					fcc_present; 
+	int						errlen;
+	struct tp_conn_param 	tpp;
+	int						tpcons_output();
+
+again:
+	hdr = mtod(m, struct tpdu *);
+	tpcb = 0;
+	error = errlen = tpdu_len = 0;
+	takes_data = fcc_present = FALSE;
+	acktime = 2; sref = subseq = 0;
+	fsufxloc = lsufxloc = NULL;
+	fsufxlen = lsufxlen =
+		preferred_class = class_to_use = pdusize = addlopt = 0;
+	dusize = TP_DFL_TPDUSIZE;
+#ifdef TP_PERF_MEAS
+	GET_CUR_TIME( &e.e_time ); perf_meas = 0;
+#endif /* TP_PERF_MEAS */
+	
+	IFDEBUG(D_TPINPUT)
+		printf("tp_input(0x%x, ... 0x%x)\n", m, cons_channel);
+	ENDDEBUG
+
+
+	/* 
+	 * get the actual tpdu length - necessary for monitoring
+	 * and for checksumming
+	 * 
+	 * Also, maybe measure the mbuf chain lengths and sizes.
+	 */
+
+	{ 	register struct mbuf *n=m;
+#	ifdef ARGO_DEBUG
+		int chain_length = 0;
+#	endif ARGO_DEBUG
+
+		for(;;) {
+			tpdu_len += n->m_len;
+			IFDEBUG(D_MBUF_MEAS)
+				if( n->m_flags & M_EXT) {
+					IncStat(ts_mb_cluster);
+				} else {
+					IncStat(ts_mb_small);
+				}
+				chain_length ++;
+			ENDDEBUG
+			if (n->m_next == MNULL ) {
+				break;
+			}
+			n = n->m_next;
+		}
+		IFDEBUG(D_MBUF_MEAS)
+			if(chain_length > 16)
+				chain_length = 0; /* zero used for anything > 16 */
+			tp_stat.ts_mb_len_distr[chain_length] ++;
+		ENDDEBUG
+	}
+	IFTRACE(D_TPINPUT)
+		tptraceTPCB(TPPTtpduin, hdr->tpdu_type, hdr, hdr->tpdu_li+1, tpdu_len, 
+			0);
+	ENDTRACE
+
+	dref = ntohs((short)hdr->tpdu_dref);
+	sref = ntohs((short)hdr->tpdu_sref);
+	dutype = (int)hdr->tpdu_type;
+
+	IFDEBUG(D_TPINPUT)
+		printf("input: dutype 0x%x cons_channel 0x%x dref 0x%x\n", dutype,
+			cons_channel, dref);
+		printf("input: dref 0x%x sref 0x%x\n", dref, sref);
+	ENDDEBUG
+	IFTRACE(D_TPINPUT)
+		tptrace(TPPTmisc, "channel dutype dref ", 
+			cons_channel, dutype, dref, 0);
+	ENDTRACE
+
+
+#ifdef ARGO_DEBUG
+	if( (dutype < TP_MIN_TPDUTYPE) || (dutype > TP_MAX_TPDUTYPE)) {
+		printf("BAD dutype! 0x%x, channel 0x%x dref 0x%x\n",
+			dutype, cons_channel, dref);
+		dump_buf (m, sizeof( struct mbuf ));
+
+		IncStat(ts_inv_dutype);
+		goto discard;
+	}
+#endif /* ARGO_DEBUG */
+
+	CHECK( (dutype < TP_MIN_TPDUTYPE || dutype > TP_MAX_TPDUTYPE),
+		E_TP_INV_TPDU, ts_inv_dutype, respond, 
+		2 );
+		/* unfortunately we can't take the address of the tpdu_type field,
+		 * since it's a bit field - so we just use the constant offset 2
+		 */
+
+	/* Now this isn't very neat but since you locate a pcb one way
+	 * at the beginning of connection establishment, and by
+	 * the dref for each tpdu after that, we have to treat CRs differently
+	 */
+	if ( dutype == CR_TPDU_type ) {
+		u_char alt_classes = 0;
+
+		preferred_class = 1 << hdr->tpdu_CRclass;
+		opt = hdr->tpdu_CRoptions;
+
+		WHILE_OPTIONS(P, hdr, 1 ) /* { */
+
+			switch( vbptr(P)->tpv_code ) {
+
+			case	TPP_tpdu_size: 		
+				vb_getval(P, u_char, dusize);
+				IFDEBUG(D_TPINPUT)
+					printf("CR dusize 0x%x\n", dusize);
+				ENDDEBUG
+				/* COS tests: NBS IA (Dec. 1987) Sec. 4.5.2.1 */
+				if (dusize < TP_MIN_TPDUSIZE || dusize > TP_MAX_TPDUSIZE)
+						dusize = TP_DFL_TPDUSIZE;
+				break;
+			case	TPP_ptpdu_size:
+				switch (vbptr(P)->tpv_len) {
+				case 1: pdusize = vbval(P, u_char); break;
+				case 2: pdusize = ntohs(vbval(P, u_short)); break;
+				default: ;
+				IFDEBUG(D_TPINPUT)
+					printf("malformed prefered TPDU option\n");
+				ENDDEBUG
+				}
+				break;
+			case	TPP_addl_opt:
+				vb_getval(P, u_char, addlopt);
+				break;
+			case	TPP_calling_sufx:
+				/* could use vb_getval, but we want to save the loc & len
+				 * for later use
+				 */
+				fsufxloc = (caddr_t) &vbptr(P)->tpv_val;
+				fsufxlen = vbptr(P)->tpv_len;
+				IFDEBUG(D_TPINPUT)
+					printf("CR fsufx:");
+					{ register int j;
+						for(j=0; j<fsufxlen; j++ ) {
+							printf(" 0x%x. ", *((caddr_t)(fsufxloc+j)) );
+						}
+						printf("\n");
+					}
+				ENDDEBUG
+				break;
+			case	TPP_called_sufx:
+				/* could use vb_getval, but we want to save the loc & len
+				 * for later use
+				 */
+				lsufxloc = (caddr_t) &vbptr(P)->tpv_val;
+				lsufxlen = vbptr(P)->tpv_len;
+				IFDEBUG(D_TPINPUT)
+					printf("CR lsufx:");
+					{ register int j;
+						for(j=0; j<lsufxlen; j++ ) {
+							printf(" 0x%x. ", *((u_char *)(lsufxloc+j)) );
+						}
+						printf("\n");
+					}
+				ENDDEBUG
+				break;
+
+#ifdef TP_PERF_MEAS
+			case	TPP_perf_meas:
+				vb_getval(P, u_char, perf_meas);
+				break;
+#endif /* TP_PERF_MEAS */
+
+			case	TPP_vers:
+				/* not in class 0; 1 octet; in CR_TPDU only */
+				/* COS tests says if version wrong, use default version!?XXX */
+				CHECK( (vbval(P, u_char) != TP_VERSION ), 
+					E_TP_INV_PVAL, ts_inv_pval, setversion,
+					(1 + (caddr_t)&vbptr(P)->tpv_val - (caddr_t)hdr) );
+			setversion:
+				version = vbval(P, u_char);
+				break;
+			case	TPP_acktime:
+				vb_getval(P, u_short, acktime);
+				acktime = ntohs(acktime);
+				acktime = acktime/500; /* convert to slowtimo ticks */
+				if((short)acktime <=0 )
+					acktime = 2; /* don't allow a bad peer to screw us up */
+				IFDEBUG(D_TPINPUT)
+					printf("CR acktime 0x%x\n", acktime);
+				ENDDEBUG
+				break;
+
+			case	TPP_alt_class:
+				{
+					u_char *aclass = 0;
+					register int i;
+					static u_char bad_alt_classes[5] =
+						{ ~0, ~3, ~5, ~0xf, ~0x1f};
+
+					aclass = 
+						(u_char *) &(((struct tp_vbp *)P)->tpv_val);
+					for (i = ((struct tp_vbp *)P)->tpv_len; i>0; i--) {
+						alt_classes |= (1<<((*aclass++)>>4));
+					}
+					CHECK( (bad_alt_classes[hdr->tpdu_CRclass] & alt_classes),
+						E_TP_INV_PVAL, ts_inv_aclass, respond,
+						((caddr_t)aclass) - (caddr_t)hdr);
+					IFDEBUG(D_TPINPUT)
+						printf("alt_classes 0x%x\n", alt_classes);
+					ENDDEBUG
+				}
+				break;
+
+			case	TPP_security:
+			case	TPP_residER:
+			case	TPP_priority:
+			case	TPP_transdelay:
+			case	TPP_throughput: 
+			case	TPP_addl_info: 
+			case	TPP_subseq:
+			default:
+				IFDEBUG(D_TPINPUT)
+					printf("param ignored CR_TPDU code= 0x%x\n",
+						 vbptr(P)->tpv_code);
+				ENDDEBUG
+				IncStat(ts_param_ignored);
+				break;
+
+			case	TPP_checksum:		
+				IFDEBUG(D_TPINPUT)
+					printf("CR before cksum\n");
+				ENDDEBUG
+
+				CHECK( iso_check_csum(m, tpdu_len), 
+					E_TP_INV_PVAL, ts_bad_csum, discard, 0)
+
+				IFDEBUG(D_TPINPUT)
+					printf("CR before cksum\n");
+				ENDDEBUG
+				break;
+			}
+
+		/* } */ END_WHILE_OPTIONS(P)
+
+		if (lsufxlen == 0) {
+			/* can't look for a tpcb w/o any called sufx */
+			error =  E_TP_LENGTH_INVAL;
+			IncStat(ts_inv_sufx);
+			goto respond;
+		} else {
+			register struct tp_pcb *t;
+			/*
+			 * The intention here is to trap all CR requests
+			 * to a given nsap, for constructing transport
+			 * service bridges at user level; so these
+			 * intercepts should precede the normal listens.
+			 * Phrasing the logic in this way also allows for
+			 * mop-up listeners, which we don't currently implement.
+			 * We also wish to have a single socket be able to
+			 * listen over any network service provider,
+			 * (cons or clns or ip).
+			 */
+			for (t = tp_listeners; t ; t = t->tp_nextlisten)
+				if ((t->tp_lsuffixlen == 0 ||
+					 (lsufxlen == t->tp_lsuffixlen &&
+					  bcmp(lsufxloc, t->tp_lsuffix, lsufxlen) == 0)) &&
+					((t->tp_flags & TPF_GENERAL_ADDR) ||
+					 (laddr->sa_family == t->tp_domain &&
+					  (*t->tp_nlproto->nlp_cmpnetaddr)
+								(t->tp_npcb, laddr, TP_LOCAL))))
+					break;
+
+			CHECK(t == 0, E_TP_NO_SESSION, ts_inv_sufx, respond,
+				(1 + 2 + (caddr_t)&hdr->_tpduf - (caddr_t)hdr))
+				/* _tpduf is the fixed part; add 2 to get the dref bits of 
+				 * the fixed part (can't take the address of a bit field) 
+				 */
+			IFDEBUG(D_TPINPUT)
+				printf("checking if dup CR\n");
+			ENDDEBUG
+			tpcb = t;
+			for (t = tpcb->tp_next; t != tpcb; t = t->tp_next) {
+				if (sref != t->tp_fref)
+					continue;
+				if ((*tpcb->tp_nlproto->nlp_cmpnetaddr)(
+						t->tp_npcb, faddr, TP_FOREIGN)) {
+					IFDEBUG(D_TPINPUT)
+						printf("duplicate CR discarded\n");
+					ENDDEBUG
+					goto discard;
+				}
+			}
+			IFTRACE(D_TPINPUT)
+				tptrace(TPPTmisc, "tp_input: tpcb *lsufxloc tpstate", 
+					tpcb, *lsufxloc, tpcb->tp_state, 0);
+			ENDTRACE
+		}
+
+		/* 
+		 * WE HAVE A TPCB 
+		 * already know that the classes in the CR match at least
+		 * one class implemented, but we don't know yet if they
+		 * include any classes permitted by this server.
+		 */
+
+		IFDEBUG(D_TPINPUT)
+			printf("HAVE A TPCB 1: 0x%x\n", tpcb);
+		ENDDEBUG
+		IFDEBUG(D_CONN)
+			printf(
+"CR: bef CHKS: flags 0x%x class_to_use 0x%x alt 0x%x opt 0x%x tp_class 0x%x\n", 
+				tpcb->tp_flags, class_to_use, alt_classes, opt, tpcb->tp_class);
+		ENDDEBUG
+		/* tpcb->tp_class doesn't include any classes not implemented  */
+		class_to_use = (preferred_class & tpcb->tp_class);
+		if( (class_to_use = preferred_class & tpcb->tp_class) == 0 )
+			class_to_use = alt_classes & tpcb->tp_class;
+
+		class_to_use = 1 << tp_mask_to_num(class_to_use);
+
+		{
+			tpp = tpcb->_tp_param;
+			tpp.p_class = class_to_use;
+			tpp.p_tpdusize = dusize;
+			tpp.p_ptpdusize = pdusize;
+			tpp.p_xtd_format = (opt & TPO_XTD_FMT) == TPO_XTD_FMT;
+			tpp.p_xpd_service = (addlopt & TPAO_USE_TXPD) == TPAO_USE_TXPD;
+			tpp.p_use_checksum = (tpp.p_class == TP_CLASS_0)?0:
+				(addlopt & TPAO_NO_CSUM) == 0;
+			tpp.p_version = version;
+#ifdef notdef
+			tpp.p_use_efc = (opt & TPO_USE_EFC) == TPO_USE_EFC;
+			tpp.p_use_nxpd = (addlopt & TPAO_USE_NXPD) == TPAO_USE_NXPD;
+			tpp.p_use_rcc = (addlopt & TPAO_USE_RCC) == TPAO_USE_RCC;
+#endif /* notdef */
+
+		CHECK(
+			tp_consistency(tpcb, 0 /* not force or strict */, &tpp) != 0, 
+			E_TP_NEGOT_FAILED, ts_negotfailed, clear_parent_tcb,
+			(1 + 2 + (caddr_t)&hdr->_tpdufr.CRCC - (caddr_t)hdr) 
+				/* ^ more or less the location of class */
+			)
+		}
+		IFTRACE(D_CONN)
+			tptrace(TPPTmisc, 
+				"after 1 consist class_to_use class, out, tpconsout",
+				class_to_use, 
+				tpcb->tp_class, dgout_routine, tpcons_output
+				);
+		ENDTRACE
+		CHECK(
+			((class_to_use == TP_CLASS_0)&&(dgout_routine != tpcons_output)),
+			E_TP_NEGOT_FAILED, ts_negotfailed, clear_parent_tcb,
+			(1 + 2 + (caddr_t)&hdr->_tpdufr.CRCC - (caddr_t)hdr) 
+				/* ^ more or less the location of class */
+			)
+		IFDEBUG(D_CONN)
+			printf("CR: after CRCCCHECKS: tpcb 0x%x, flags 0x%x\n", 
+				tpcb, tpcb->tp_flags);
+		ENDDEBUG
+		takes_data = TRUE;
+		e.ATTR(CR_TPDU).e_cdt  =  hdr->tpdu_CRcdt;
+		e.ev_number = CR_TPDU;
+
+		so = tpcb->tp_sock;
+		if (so->so_options & SO_ACCEPTCONN) {
+			struct tp_pcb *parent_tpcb = tpcb;
+			/* 
+			 * Create a socket, tpcb, ll pcb, etc. 
+			 * for this newborn connection, and fill in all the values. 
+			 */
+			IFDEBUG(D_CONN)
+				printf("abt to call tp_newsocket(0x%x, 0x%x, 0x%x, 0x%x)\n",
+					so, laddr, faddr, cons_channel);
+			ENDDEBUG
+			if( (so = 
+				tp_newsocket(so, faddr, cons_channel, 
+					class_to_use, 
+					((tpcb->tp_netservice == IN_CLNS) ? IN_CLNS :
+					(dgout_routine == tpcons_output)?ISO_CONS:ISO_CLNS))
+					) == (struct socket *)0 ) {
+				/* note - even if netservice is IN_CLNS, as far as
+				 * the tp entity is concerned, the only differences
+				 * are CO vs CL
+				 */
+				IFDEBUG(D_CONN)
+					printf("tp_newsocket returns 0\n");
+				ENDDEBUG
+				goto discard;
+			clear_parent_tcb:
+				tpcb = 0;
+				goto respond;
+			}
+			tpcb = sototpcb(so);
+			insque(tpcb, parent_tpcb);
+
+			/*
+			 * Stash the addresses in the net level pcb 
+			 * kind of like a pcbconnect() but don't need
+			 * or want all those checks.
+			 */
+			(tpcb->tp_nlproto->nlp_putnetaddr)(tpcb->tp_npcb, faddr, TP_FOREIGN);
+			(tpcb->tp_nlproto->nlp_putnetaddr)(tpcb->tp_npcb, laddr, TP_LOCAL);
+
+			/* stash the f suffix in the new tpcb */
+			if (tpcb->tp_fsuffixlen = fsufxlen) {
+				bcopy(fsufxloc, tpcb->tp_fsuffix, fsufxlen);
+				(tpcb->tp_nlproto->nlp_putsufx)
+						(tpcb->tp_npcb, fsufxloc, fsufxlen, TP_FOREIGN);
+			}
+			/* stash the l suffix in the new tpcb */
+			tpcb->tp_lsuffixlen = lsufxlen;
+			bcopy(lsufxloc, tpcb->tp_lsuffix, lsufxlen);
+			(tpcb->tp_nlproto->nlp_putsufx)
+					(tpcb->tp_npcb, lsufxloc, lsufxlen, TP_LOCAL);
+#ifdef TP_PERF_MEAS
+			if( tpcb->tp_perf_on = perf_meas ) { /* assignment */
+				/* ok, let's create an mbuf for stashing the
+				 * statistics if one doesn't already exist 
+				 */
+				(void) tp_setup_perf(tpcb);
+			}
+#endif /* TP_PERF_MEAS */
+			tpcb->tp_fref = sref;
+
+			/* We've already checked for consistency with the options 
+			 * set in tpp,  but we couldn't set them earlier because 
+			 * we didn't want to change options in the LISTENING tpcb.
+			 * Now we set the options in the new socket's tpcb.
+			 */
+			(void) tp_consistency( tpcb, TP_FORCE, &tpp);
+
+			if(!tpcb->tp_use_checksum)
+				IncStat(ts_csum_off);
+			if(tpcb->tp_xpd_service)
+				IncStat(ts_use_txpd);
+			if(tpcb->tp_xtd_format)
+				IncStat(ts_xtd_fmt);
+
+			tpcb->tp_peer_acktime = acktime;
+
+			/* 
+			 * The following kludge is used to test retransmissions and 
+			 * timeout during connection establishment.
+			 */
+			IFDEBUG(D_ZDREF)
+				IncStat(ts_zdebug);
+				/*tpcb->tp_fref = 0;*/
+			ENDDEBUG
+		}
+		LOCAL_CREDIT(tpcb);
+		IncStat(ts_CR_rcvd);
+		if (!tpcb->tp_cebit_off) {
+			tpcb->tp_win_recv = tp_start_win << 8;
+			tpcb->tp_cong_sample.cs_size = 0;
+			CONG_INIT_SAMPLE(tpcb);
+			CONG_UPDATE_SAMPLE(tpcb, ce_bit);
+		}
+	} else if ( dutype == ER_TPDU_type ) {
+		/* 
+		 * ER TPDUs have to be recognized separately
+		 * because they don't necessarily have a tpcb
+		 * with them and we don't want err out looking for such
+		 * a beast.
+		 * We could put a bunch of little kludges in the 
+		 * next section of code so it would avoid references to tpcb
+		 * if dutype == ER_TPDU_type but we don't want code for ERs to
+		 * mess up code for data transfer.
+		 */
+		IncStat(ts_ER_rcvd);
+		e.ev_number = ER_TPDU;
+		e.ATTR(ER_TPDU).e_reason =  (u_char)hdr->tpdu_ERreason;
+		CHECK (((int)dref <= 0 || dref >= tp_refinfo.tpr_size || 
+			(tpcb = tp_ref[dref].tpr_pcb ) == (struct tp_pcb *) 0 ||
+			tpcb->tp_refstate == REF_FREE ||
+			tpcb->tp_refstate == REF_FROZEN),
+		       E_TP_MISM_REFS, ts_inv_dref, discard, 0)
+
+	} else {
+		/* tpdu type is CC, XPD, XAK, GR, AK, DR, DC, or DT */
+
+		/* In the next 4 checks,
+		 * _tpduf is the fixed part; add 2 to get the dref bits of 
+		 * the fixed part (can't take the address of a bit field) 
+		 */
+#ifdef TPCONS
+		if (cons_channel && dutype == DT_TPDU_type) {
+			struct isopcb *isop = ((struct isopcb *)
+				((struct pklcd *)cons_channel)->lcd_upnext);
+			if (isop && isop->isop_refcnt == 1 && isop->isop_socket &&
+				(tpcb = sototpcb(isop->isop_socket)) &&
+				 (tpcb->tp_class == TP_CLASS_0/* || == CLASS_1 */)) {
+				IFDEBUG(D_TPINPUT)
+					printf("tpinput_dt: class 0 short circuit\n");
+				ENDDEBUG
+				dref = tpcb->tp_lref;
+				sref = tpcb->tp_fref;
+				CHECK( (tpcb->tp_refstate == REF_FREE), 
+					E_TP_MISM_REFS,ts_inv_dref, nonx_dref,
+					(1 + 2 + (caddr_t)&hdr->_tpduf - (caddr_t)hdr))
+				goto tp0_data;
+			}
+
+		}
+#endif
+		{
+
+			CHECK( ((int)dref <= 0 || dref >= tp_refinfo.tpr_size) ,
+				E_TP_MISM_REFS,ts_inv_dref, nonx_dref,
+				(1 + 2 + (caddr_t)&hdr->_tpduf - (caddr_t)hdr))
+			CHECK( ((tpcb = tp_ref[dref].tpr_pcb ) == (struct tp_pcb *) 0 ), 
+				E_TP_MISM_REFS,ts_inv_dref, nonx_dref,
+				(1 + 2 + (caddr_t)&hdr->_tpduf - (caddr_t)hdr))
+			CHECK( (tpcb->tp_refstate == REF_FREE), 
+				E_TP_MISM_REFS,ts_inv_dref, nonx_dref,
+				(1 + 2 + (caddr_t)&hdr->_tpduf - (caddr_t)hdr))
+		}
+
+		IFDEBUG(D_TPINPUT)
+			printf("HAVE A TPCB 2: 0x%x\n", tpcb);
+		ENDDEBUG
+
+		/* causes a DR to be sent for CC; ER for all else */
+		CHECK( (tpcb->tp_refstate == REF_FROZEN),
+			(dutype == CC_TPDU_type?E_TP_NO_SESSION:E_TP_MISM_REFS),
+			ts_inv_dref, respond,
+			(1 + 2 + (caddr_t)&hdr->_tpduf - (caddr_t)hdr))
+
+		IFDEBUG(D_TPINPUT)
+			printf("state of dref %d ok, tpcb 0x%x\n", dref,tpcb);
+		ENDDEBUG
+		/* 
+		 * At this point the state of the dref could be
+		 * FROZEN: tpr_pcb == NULL,  has ( reference only) timers
+		 *		   for example, DC may arrive after the close() has detached
+		 *         the tpcb (e.g., if user turned off SO_LISTEN option)
+		 * OPENING : a tpcb exists but no timers yet
+		 * OPEN  : tpcb exists & timers are outstanding
+		 */
+
+        if (!tpcb->tp_cebit_off)
+            CONG_UPDATE_SAMPLE(tpcb, ce_bit);
+
+		dusize = tpcb->tp_tpdusize;
+		pdusize = tpcb->tp_ptpdusize;
+
+		dutype = hdr->tpdu_type << 8; /* for the switch below */ 
+
+		WHILE_OPTIONS(P, hdr, tpcb->tp_xtd_format) /* { */
+
+#define caseof(x,y) case (((x)<<8)+(y))
+		switch( dutype | vbptr(P)->tpv_code ) {
+
+			caseof( CC_TPDU_type, TPP_addl_opt ): 
+					/* not in class 0; 1 octet */
+					vb_getval(P, u_char, addlopt);
+					break;
+			caseof( CC_TPDU_type, TPP_tpdu_size ): 
+				{
+					u_char odusize = dusize;
+					vb_getval(P, u_char, dusize);
+					CHECK( (dusize < TP_MIN_TPDUSIZE ||
+							dusize > TP_MAX_TPDUSIZE || dusize > odusize),
+						E_TP_INV_PVAL, ts_inv_pval, respond,
+						(1 + (caddr_t)&vbptr(P)->tpv_val - (caddr_t)hdr) )
+					IFDEBUG(D_TPINPUT)
+						printf("CC dusize 0x%x\n", dusize);
+					ENDDEBUG
+				}
+					break;
+			caseof( CC_TPDU_type, TPP_ptpdu_size ): 
+				{
+					u_short opdusize = pdusize;
+					switch (vbptr(P)->tpv_len) {
+					case 1: pdusize = vbval(P, u_char); break;
+					case 2: pdusize = ntohs(vbval(P, u_short)); break;
+					default: ;
+					IFDEBUG(D_TPINPUT)
+						printf("malformed prefered TPDU option\n");
+					ENDDEBUG
+					}
+					CHECK( (pdusize == 0 ||
+							(opdusize && (pdusize > opdusize))),
+						E_TP_INV_PVAL, ts_inv_pval, respond,
+						(1 + (caddr_t)&vbptr(P)->tpv_val - (caddr_t)hdr) )
+				}
+					break;
+			caseof( CC_TPDU_type, TPP_calling_sufx):
+					IFDEBUG(D_TPINPUT)
+						printf("CC calling (local) sufxlen 0x%x\n", lsufxlen);
+					ENDDEBUG
+					lsufxloc = (caddr_t) &vbptr(P)->tpv_val;
+					lsufxlen = vbptr(P)->tpv_len;
+					break;
+			caseof(	CC_TPDU_type, TPP_acktime ):
+					/* class 4 only, 2 octets */
+					vb_getval(P, u_short, acktime);
+					acktime = ntohs(acktime);
+					acktime = acktime/500; /* convert to slowtimo ticks */
+					if( (short)acktime <=0 )
+						acktime = 2;
+					break;
+			caseof(	CC_TPDU_type, TPP_called_sufx):
+					fsufxloc = (caddr_t) &vbptr(P)->tpv_val;
+					fsufxlen = vbptr(P)->tpv_len;
+					IFDEBUG(D_TPINPUT)
+						printf("CC called (foreign) sufx len %d\n", fsufxlen);
+					ENDDEBUG
+					break;
+
+			caseof( CC_TPDU_type,	TPP_checksum):		
+			caseof( DR_TPDU_type,	TPP_checksum):		
+			caseof( DT_TPDU_type,	TPP_checksum):		
+			caseof( XPD_TPDU_type,	TPP_checksum):		
+					if( tpcb->tp_use_checksum )  {
+						CHECK( iso_check_csum(m, tpdu_len), 
+							E_TP_INV_PVAL, ts_bad_csum, discard, 0)
+					}
+					break;
+
+			/*  this is different from the above because in the context
+			 *  of concat/ sep tpdu_len might not be the same as hdr len 
+			 */
+			caseof( AK_TPDU_type,	TPP_checksum):		
+			caseof( XAK_TPDU_type,	TPP_checksum):		
+			caseof( DC_TPDU_type,	TPP_checksum):		
+					if( tpcb->tp_use_checksum )  {
+						CHECK( iso_check_csum(m, (int)hdr->tpdu_li + 1), 
+							E_TP_INV_PVAL, ts_bad_csum, discard, 0)
+					}
+					break;
+#ifdef notdef
+			caseof( DR_TPDU_type, TPP_addl_info ):
+				/* ignore - its length and meaning are
+				 * user defined and there's no way
+				 * to pass this info to the user anyway
+				 */
+				break;
+#endif /* notdef */
+
+			caseof( AK_TPDU_type, TPP_subseq ):
+				/* used after reduction of window */
+				vb_getval(P, u_short, subseq);
+				subseq = ntohs(subseq);
+				IFDEBUG(D_ACKRECV)
+					printf("AK dref 0x%x Subseq 0x%x\n", dref, subseq);
+				ENDDEBUG
+				break;
+
+			caseof( AK_TPDU_type, TPP_flow_cntl_conf ):
+				{
+					u_int 	ylwe;
+					u_short ysubseq, ycredit;
+
+					fcc_present = TRUE;
+					vb_getval(P, u_int,	 	ylwe);
+					vb_getval(P, u_short, 	ysubseq);
+					vb_getval(P, u_short, 	ycredit);
+					ylwe = ntohl(ylwe);
+					ysubseq = ntohs(ysubseq);
+					ycredit = ntohs(ycredit);
+					IFDEBUG(D_ACKRECV)
+						printf("%s%x, subseq 0x%x, cdt 0x%x dref 0x%x\n", 
+							"AK FCC lwe 0x", ylwe, ysubseq, ycredit, dref);
+					ENDDEBUG
+				}
+				break;
+
+			default: 
+				IFDEBUG(D_TPINPUT)
+					printf("param ignored dutype 0x%x, code  0x%x\n",
+						dutype, vbptr(P)->tpv_code);
+				ENDDEBUG
+				IFTRACE(D_TPINPUT)
+					tptrace(TPPTmisc, "param ignored dutype code ",
+						dutype, vbptr(P)->tpv_code ,0,0);
+				ENDTRACE
+				IncStat(ts_param_ignored);
+				break;
+#undef caseof
+		}
+		/* } */ END_WHILE_OPTIONS(P)
+
+		/* NOTE: the variable dutype has been shifted left! */
+
+		switch( hdr->tpdu_type ) {
+		case CC_TPDU_type: 
+			/* If CC comes back with an unacceptable class
+			 * respond with a DR or ER
+			 */
+
+			opt = hdr->tpdu_CCoptions; /* 1 byte */
+
+			{
+				tpp = tpcb->_tp_param;
+				tpp.p_class = (1<<hdr->tpdu_CCclass);
+				tpp.p_tpdusize = dusize;
+				tpp.p_ptpdusize = pdusize;
+				tpp.p_dont_change_params = 0;
+				tpp.p_xtd_format = (opt & TPO_XTD_FMT) == TPO_XTD_FMT;
+				tpp.p_xpd_service = (addlopt & TPAO_USE_TXPD) == TPAO_USE_TXPD;
+				tpp.p_use_checksum = (addlopt & TPAO_NO_CSUM) == 0;
+#ifdef notdef
+				tpp.p_use_efc = (opt & TPO_USE_EFC) == TPO_USE_EFC;
+				tpp.p_use_nxpd = (addlopt & TPAO_USE_NXPD) == TPAO_USE_NXPD;
+				tpp.p_use_rcc = (addlopt & TPAO_USE_RCC) == TPAO_USE_RCC;
+#endif /* notdef */
+
+			CHECK(
+				tp_consistency(tpcb, TP_FORCE, &tpp) != 0, 
+				E_TP_NEGOT_FAILED, ts_negotfailed, respond,
+				(1 + 2 + (caddr_t)&hdr->_tpdufr.CRCC - (caddr_t)hdr) 
+					/* ^ more or less the location of class */
+				)
+			IFTRACE(D_CONN)
+				tptrace(TPPTmisc, 
+					"after 1 consist class, out, tpconsout",
+					tpcb->tp_class, dgout_routine, tpcons_output, 0
+					);
+			ENDTRACE
+			CHECK(
+				((class_to_use == TP_CLASS_0)&&
+					(dgout_routine != tpcons_output)),
+				E_TP_NEGOT_FAILED, ts_negotfailed, respond,
+				(1 + 2 + (caddr_t)&hdr->_tpdufr.CRCC - (caddr_t)hdr) 
+					/* ^ more or less the location of class */
+				)
+#ifdef TPCONS
+				if (tpcb->tp_netservice == ISO_CONS &&
+					class_to_use == TP_CLASS_0) {
+					struct isopcb *isop = (struct isopcb *)tpcb->tp_npcb;
+					struct pklcd *lcp = (struct pklcd *)isop->isop_chan;
+					lcp->lcd_flags &= ~X25_DG_CIRCUIT;
+				}
+#endif
+			}
+			if( ! tpcb->tp_use_checksum)
+				IncStat(ts_csum_off);
+			if(tpcb->tp_xpd_service)
+				IncStat(ts_use_txpd);
+			if(tpcb->tp_xtd_format)
+				IncStat(ts_xtd_fmt);
+
+			IFTRACE(D_CONN)
+				tptrace(TPPTmisc, "after CC class flags dusize CCclass",
+					tpcb->tp_class, tpcb->tp_flags, tpcb->tp_tpdusize, 
+					hdr->tpdu_CCclass);
+			ENDTRACE
+
+			/* if called or calling suffices appeared on the CC, 
+			 * they'd better jive with what's in the pcb
+			 */
+			if( fsufxlen ) {
+				CHECK( ((tpcb->tp_fsuffixlen != fsufxlen) ||
+					bcmp(fsufxloc, tpcb->tp_fsuffix, fsufxlen)),
+					E_TP_INV_PVAL,ts_inv_sufx, respond, 
+					(1+fsufxloc - (caddr_t)hdr))
+			}
+			if( lsufxlen ) {
+				CHECK( ((tpcb->tp_lsuffixlen != lsufxlen) ||
+					bcmp(lsufxloc, tpcb->tp_lsuffix, lsufxlen)),
+					E_TP_INV_PVAL,ts_inv_sufx, respond, 
+					(1+lsufxloc - (caddr_t)hdr))
+			}
+
+			e.ATTR(CC_TPDU).e_sref =  sref;
+			e.ATTR(CC_TPDU).e_cdt  =  hdr->tpdu_CCcdt;
+			takes_data = TRUE;
+			e.ev_number = CC_TPDU;
+			IncStat(ts_CC_rcvd);
+			break;
+
+		case DC_TPDU_type:
+			if (sref != tpcb->tp_fref)
+				printf("INPUT: inv sufx DCsref 0x%x, tp_fref 0x%x\n",
+					sref, tpcb->tp_fref);
+					
+			CHECK( (sref != tpcb->tp_fref), 
+				E_TP_MISM_REFS, ts_inv_sufx, discard,
+				(1 + (caddr_t)&hdr->tpdu_DCsref - (caddr_t)hdr))
+		
+			e.ev_number = DC_TPDU;
+			IncStat(ts_DC_rcvd);
+			break;
+
+		case DR_TPDU_type: 
+			IFTRACE(D_TPINPUT)
+				tptrace(TPPTmisc, "DR recvd", hdr->tpdu_DRreason, 0, 0, 0);
+			ENDTRACE
+			if (sref != tpcb->tp_fref) {
+				printf("INPUT: inv sufx DRsref 0x%x tp_fref 0x%x\n",
+					sref, tpcb->tp_fref);
+			}
+					
+			CHECK( (sref != 0 && sref != tpcb->tp_fref &&
+					tpcb->tp_state != TP_CRSENT), 
+				(TP_ERROR_SNDC | E_TP_MISM_REFS),ts_inv_sufx, respond,
+				(1 + (caddr_t)&hdr->tpdu_DRsref - (caddr_t)hdr))
+
+			e.ATTR(DR_TPDU).e_reason = hdr->tpdu_DRreason;
+			e.ATTR(DR_TPDU).e_sref =  (u_short)sref;
+			takes_data = TRUE;
+			e.ev_number = DR_TPDU;
+			IncStat(ts_DR_rcvd);
+			break;
+
+		case ER_TPDU_type:
+			IFTRACE(D_TPINPUT)
+				tptrace(TPPTmisc, "ER recvd", hdr->tpdu_ERreason,0,0,0);
+			ENDTRACE
+			e.ev_number = ER_TPDU;
+			e.ATTR(ER_TPDU).e_reason = hdr->tpdu_ERreason;
+			IncStat(ts_ER_rcvd);
+			break;
+
+		case AK_TPDU_type: 
+
+			e.ATTR(AK_TPDU).e_subseq = subseq;
+			e.ATTR(AK_TPDU).e_fcc_present = fcc_present;
+
+			if (tpcb->tp_xtd_format) {
+#ifdef BYTE_ORDER
+				union seq_type seqeotX;
+
+				seqeotX.s_seqeot = ntohl(hdr->tpdu_seqeotX);
+				e.ATTR(AK_TPDU).e_seq = seqeotX.s_seq;
+				e.ATTR(AK_TPDU).e_cdt = ntohs(hdr->tpdu_AKcdtX);
+#else
+				e.ATTR(AK_TPDU).e_cdt = hdr->tpdu_AKcdtX;
+				e.ATTR(AK_TPDU).e_seq = hdr->tpdu_AKseqX;
+#endif /* BYTE_ORDER */
+			} else {
+				e.ATTR(AK_TPDU).e_cdt = hdr->tpdu_AKcdt;
+				e.ATTR(AK_TPDU).e_seq = hdr->tpdu_AKseq;
+			}
+			IFTRACE(D_TPINPUT)
+				tptrace(TPPTmisc, "AK recvd seq cdt subseq fcc_pres", 
+					e.ATTR(AK_TPDU).e_seq, e.ATTR(AK_TPDU).e_cdt,
+					subseq, fcc_present);
+			ENDTRACE
+
+			e.ev_number = AK_TPDU;
+			IncStat(ts_AK_rcvd);
+			IncPStat(tpcb, tps_AK_rcvd);
+			break;
+
+		case XAK_TPDU_type: 
+			if (tpcb->tp_xtd_format) {
+#ifdef BYTE_ORDER
+				union seq_type seqeotX;
+
+				seqeotX.s_seqeot = ntohl(hdr->tpdu_seqeotX);
+				e.ATTR(XAK_TPDU).e_seq = seqeotX.s_seq;
+#else
+				e.ATTR(XAK_TPDU).e_seq = hdr->tpdu_XAKseqX;
+#endif /* BYTE_ORDER */
+			} else {
+				e.ATTR(XAK_TPDU).e_seq = hdr->tpdu_XAKseq;
+			}
+			e.ev_number = XAK_TPDU;
+			IncStat(ts_XAK_rcvd);
+			IncPStat(tpcb, tps_XAK_rcvd);
+			break;
+
+		case XPD_TPDU_type: 
+			if (tpcb->tp_xtd_format) {
+#ifdef BYTE_ORDER
+				union seq_type seqeotX;
+
+				seqeotX.s_seqeot = ntohl(hdr->tpdu_seqeotX);
+				e.ATTR(XPD_TPDU).e_seq = seqeotX.s_seq;
+#else
+				e.ATTR(XPD_TPDU).e_seq = hdr->tpdu_XPDseqX;
+#endif /* BYTE_ORDER */
+			} else {
+				e.ATTR(XPD_TPDU).e_seq = hdr->tpdu_XPDseq;
+			}
+			takes_data = TRUE;
+			e.ev_number = XPD_TPDU;
+			IncStat(ts_XPD_rcvd);
+			IncPStat(tpcb, tps_XPD_rcvd);
+			break;
+
+		case DT_TPDU_type:
+			{ /* the y option will cause occasional packets to be dropped.
+			   * A little crude but it works.
+			   */
+
+				IFDEBUG(D_DROP)
+					if(time.tv_usec & 0x4 && hdr->tpdu_DTseq & 0x1) {
+						IncStat(ts_ydebug);
+						goto discard;
+					}
+				ENDDEBUG
+			}
+			if (tpcb->tp_class == TP_CLASS_0) {
+			tp0_data:
+				e.ATTR(DT_TPDU).e_seq = 0; /* actually don't care */
+				e.ATTR(DT_TPDU).e_eot = (((struct tp0du *)hdr)->tp0du_eot);
+			} else if (tpcb->tp_xtd_format) {
+#ifdef BYTE_ORDER
+				union seq_type seqeotX;
+
+				seqeotX.s_seqeot = ntohl(hdr->tpdu_seqeotX);
+				e.ATTR(DT_TPDU).e_seq = seqeotX.s_seq;
+				e.ATTR(DT_TPDU).e_eot = seqeotX.s_eot;
+#else
+				e.ATTR(DT_TPDU).e_seq = hdr->tpdu_DTseqX;
+				e.ATTR(DT_TPDU).e_eot = hdr->tpdu_DTeotX;
+#endif /* BYTE_ORDER */
+			} else {
+				e.ATTR(DT_TPDU).e_seq = hdr->tpdu_DTseq;
+				e.ATTR(DT_TPDU).e_eot = hdr->tpdu_DTeot;
+			}
+			if(e.ATTR(DT_TPDU).e_eot)
+				IncStat(ts_eot_input);
+			takes_data = TRUE;
+			e.ev_number = DT_TPDU;
+			IncStat(ts_DT_rcvd);
+			IncPStat(tpcb, tps_DT_rcvd);
+			break;
+
+		case GR_TPDU_type: 
+			tp_indicate(T_DISCONNECT, tpcb, ECONNABORTED);
+			/* drop through */
+		default:
+			/* this should NEVER happen because there is a
+			 * check for dutype well above here
+			 */
+			error = E_TP_INV_TPDU; /* causes an ER  */
+			IFDEBUG(D_TPINPUT)
+				printf("INVALID dutype 0x%x\n", hdr->tpdu_type);
+			ENDDEBUG
+			IncStat(ts_inv_dutype);
+			goto respond;
+		}
+	}
+	/* peel off the tp header; 
+	 * remember that the du_li doesn't count itself.
+	 * This may leave us w/ an empty mbuf at the front of a chain.
+	 * We can't just throw away the empty mbuf because hdr still points
+	 * into the mbuf's data area and we're still using hdr (the tpdu header)
+	 */
+	m->m_len -= ((int)hdr->tpdu_li + 1);
+	m->m_data += ((int)hdr->tpdu_li + 1);
+
+	if (takes_data) {
+		int max = tpdu_info[ hdr->tpdu_type ] [TP_MAX_DATA_INDEX];
+		int datalen = tpdu_len - hdr->tpdu_li - 1, mbtype = MT_DATA;
+		struct {
+			struct tp_disc_reason dr;
+			struct cmsghdr x_hdr;
+		} x;
+#define c_hdr x.x_hdr
+		register struct mbuf *n;
+
+		CHECK( (max && datalen > max), E_TP_LENGTH_INVAL,
+		        ts_inv_length, respond, (max + hdr->tpdu_li + 1) );
+		switch( hdr->tpdu_type ) {
+
+		case CR_TPDU_type:
+			c_hdr.cmsg_type = TPOPT_CONN_DATA;
+			goto make_control_msg;
+
+		case CC_TPDU_type:
+			c_hdr.cmsg_type = TPOPT_CFRM_DATA;
+			goto make_control_msg;
+
+		case DR_TPDU_type:
+			x.dr.dr_hdr.cmsg_len = sizeof(x) - sizeof(c_hdr);
+			x.dr.dr_hdr.cmsg_type = TPOPT_DISC_REASON;
+			x.dr.dr_hdr.cmsg_level = SOL_TRANSPORT;
+			x.dr.dr_reason = hdr->tpdu_DRreason;
+			c_hdr.cmsg_type = TPOPT_DISC_DATA;
+		make_control_msg:
+			datalen += sizeof(c_hdr);
+			c_hdr.cmsg_len = datalen;
+			c_hdr.cmsg_level = SOL_TRANSPORT;
+			mbtype = MT_CONTROL;
+			MGET(n, M_DONTWAIT, MT_DATA);
+			if (n == 0)
+				{m_freem(m); m = 0; datalen = 0; goto invoke; }
+			if (hdr->tpdu_type == DR_TPDU_type) {
+				datalen += sizeof(x) - sizeof(c_hdr);
+				bcopy((caddr_t)&x, mtod(n, caddr_t), n->m_len = sizeof(x));
+			} else
+				bcopy((caddr_t)&c_hdr, mtod(n, caddr_t),
+					  n->m_len = sizeof(c_hdr));
+			n->m_next = m;
+			m = n;
+			/* FALLTHROUGH */
+
+		case XPD_TPDU_type:
+			if (mbtype != MT_CONTROL)
+				mbtype = MT_OOBDATA;
+			m->m_flags |= M_EOR;
+			/* FALLTHROUGH */
+
+		case DT_TPDU_type:
+			for (n = m; n; n = n->m_next) { 
+				MCHTYPE(n, mbtype);
+			}
+		invoke:
+			e.ATTR(DT_TPDU).e_datalen = datalen;
+			e.ATTR(DT_TPDU).e_data =  m;
+			break;
+
+		default:
+			printf(
+				"ERROR in tp_input! hdr->tpdu_type 0x%x takes_data 0x%x m 0x%x\n",
+				hdr->tpdu_type, takes_data, m);
+			break;
+		}
+		/* prevent m_freem() after tp_driver() from throwing it all away */
+		m = MNULL;
+	}
+
+	IncStat(ts_tpdu_rcvd);
+
+	IFDEBUG(D_TPINPUT)
+		printf( "tp_input: before driver, state 0x%x event 0x%x m 0x%x",
+			tpcb->tp_state, e.ev_number, m );
+		printf(" e.e_data 0x%x\n", e.ATTR(DT_TPDU).e_data);
+		printf("takes_data 0x%x m_len 0x%x, tpdu_len 0x%x\n",
+			takes_data, (m==MNULL)?0:m->m_len,  tpdu_len);
+	ENDDEBUG
+
+	error = tp_driver(tpcb, &e);
+
+	ASSERT(tpcb != (struct tp_pcb *)0);
+	ASSERT(tpcb->tp_sock != (struct socket *)0);
+	if( tpcb->tp_sock->so_error == 0 )
+		tpcb->tp_sock->so_error = error;
+
+	/* Kludge to keep the state tables under control (adding
+	 * data on connect & disconnect & freeing the mbuf containing
+	 * the data would have exploded the tables and made a big mess ).
+	 */
+	switch(e.ev_number) {
+		case CC_TPDU:
+		case DR_TPDU:
+		case CR_TPDU:
+			m = e.ATTR(CC_TPDU).e_data; /* same field for all three dutypes */
+			IFDEBUG(D_TPINPUT)
+				printf("after driver, restoring m to 0x%x, takes_data 0x%x\n", 
+				m, takes_data);
+			ENDDEBUG
+			break;
+		default:
+			break;
+	}
+	/* Concatenated sequences are terminated by any tpdu that 
+	 * carries data: CR, CC, DT, XPD, DR.
+	 * All other tpdu types may be concatenated: AK, XAK, DC, ER.
+	 */
+
+separate:
+	if ( takes_data == 0 )  {
+		ASSERT( m != MNULL );
+		/* 
+		 * we already peeled off the prev. tp header so 
+		 * we can just pull up some more and repeat
+		 */
+
+		if( m = tp_inputprep(m) ) {
+		IFDEBUG(D_TPINPUT)
+			hdr = mtod(m, struct tpdu *);
+			printf("tp_input @ separate: hdr 0x%x size %d m 0x%x\n", 
+			hdr, (int) hdr->tpdu_li + 1, m);
+			dump_mbuf(m, "tp_input after driver, at separate");
+		ENDDEBUG
+
+			IncStat(ts_concat_rcvd);
+			goto again;
+		}
+	}
+	if ( m != MNULL ) {
+		IFDEBUG(D_TPINPUT)
+			printf("tp_input : m_freem(0x%x)\n", m);
+		ENDDEBUG
+		m_freem(m);
+		IFDEBUG(D_TPINPUT)
+			printf("tp_input : after m_freem 0x%x\n", m);
+		ENDDEBUG
+	}
+	return (ProtoHook) tpcb;
+
+discard:
+	/* class 4: drop the tpdu */
+	/* class 2,0: Should drop the net connection, if you can figure out
+	 * to which connection it applies
+	 */
+	IFDEBUG(D_TPINPUT)
+		printf("tp_input DISCARD\n");
+	ENDDEBUG
+	IFTRACE(D_TPINPUT)
+		tptrace(TPPTmisc, "tp_input DISCARD m",  m,0,0,0);
+	ENDTRACE
+	m_freem(m);
+	IncStat(ts_recv_drop);
+	return (ProtoHook)0;
+
+nonx_dref:
+	switch (dutype) {
+	default:
+		goto discard;
+	case CC_TPDU_type:
+		/* error = E_TP_MISM_REFS; */
+		break;
+	case DR_TPDU_type:
+		error |= TP_ERROR_SNDC;
+	}
+respond:
+	IFDEBUG(D_TPINPUT)
+		printf("RESPOND: error 0x%x, errlen 0x%x\n", error, errlen);
+	ENDDEBUG
+	IFTRACE(D_TPINPUT)
+		tptrace(TPPTmisc, "tp_input RESPOND m error sref", m, error, sref, 0);
+	ENDTRACE
+	if (sref == 0)
+		goto discard;
+	(void) tp_error_emit(error, (u_long)sref, (struct sockaddr_iso *)faddr,
+				(struct sockaddr_iso *)laddr, m, errlen, tpcb,
+				cons_channel, dgout_routine);
+	IFDEBUG(D_ERROR_EMIT)
+		printf("tp_input after error_emit\n");
+	ENDDEBUG
+
+#ifdef lint
+	printf("",sref,opt);
+#endif /* lint */
+	IncStat(ts_recv_drop);
+	return (ProtoHook)0;
+}
+
+
+/*
+ * NAME: tp_headersize()
+ *
+ * CALLED FROM:
+ *  tp_emit() and tp_sbsend()
+ *  TP needs to know the header size so it can figure out how
+ *  much data to put in each tpdu.
+ *
+ * FUNCTION, ARGUMENTS, and RETURN VALUE:
+ *  For a given connection, represented by (tpcb), and 
+ *  tpdu type (dutype), return the size of a tp header.
+ *
+ * RETURNS:	  the expected size of the heade in bytesr
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:	 It would be nice if it got the network header size as well.
+ */
+int
+tp_headersize(dutype, tpcb) 
+	int 			dutype;
+	struct tp_pcb 	*tpcb;
+{
+	register int size = 0;
+
+	IFTRACE(D_CONN)
+		tptrace(TPPTmisc, "tp_headersize dutype class xtd_format",
+			dutype, tpcb->tp_class, tpcb->tp_xtd_format, 0);
+	ENDTRACE
+	if( !( (tpcb->tp_class == TP_CLASS_0) || 
+			(tpcb->tp_class == TP_CLASS_4) || 
+			(dutype == DR_TPDU_type) || 
+			(dutype == CR_TPDU_type) )) {
+				printf("tp_headersize:dutype 0x%x, class 0x%x", 
+			dutype, tpcb->tp_class);
+	/* TODO: identify this and GET RID OF IT */
+	}
+	ASSERT( (tpcb->tp_class == TP_CLASS_0) || 
+			(tpcb->tp_class == TP_CLASS_4) || 
+			(dutype == DR_TPDU_type) || 
+			(dutype == CR_TPDU_type) );
+
+	if( tpcb->tp_class == TP_CLASS_0 ) {
+		size =  tpdu_info[ dutype ] [TP_LEN_CLASS_0_INDEX];
+	} else  {
+		size = tpdu_info[ dutype ] [tpcb->tp_xtd_format];
+	} 
+	return size;
+	/* caller must get network level header size separately */
+}
diff --git a/sys/netiso/tp_ip.h b/sys/netiso/tp_ip.h
new file mode 100644
index 00000000000..f2777676e13
--- /dev/null
+++ b/sys/netiso/tp_ip.h
@@ -0,0 +1,91 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tp_ip.h	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* 
+ * ARGO TP
+ *
+ * $Header: tp_ip.h,v 5.1 88/10/12 12:19:47 root Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_ip.h,v $
+ *
+ * internet IP-dependent structures and include files
+ *
+ */
+
+
+#ifndef __TP_IP__
+#define __TP_IP__
+
+#ifndef SOCK_STREAM
+#include <sys/socket.h>
+#endif
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <net/route.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+
+
+struct inpcb tp_inpcb;	
+	/* queue of active inpcbs for tp ; for tp with dod ip */
+
+#endif /* __TP_IP__ */
diff --git a/sys/netiso/tp_iso.c b/sys/netiso/tp_iso.c
new file mode 100644
index 00000000000..1cf67f86648
--- /dev/null
+++ b/sys/netiso/tp_iso.c
@@ -0,0 +1,693 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tp_iso.c	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* 
+ * ARGO TP
+ * $Header: /var/src/sys/netiso/RCS/tp_iso.c,v 5.1 89/02/09 16:20:51 hagens Exp $
+ * $Source: /var/src/sys/netiso/RCS/tp_iso.c,v $
+ *
+ * Here is where you find the iso-dependent code.  We've tried
+ * keep all net-level and (primarily) address-family-dependent stuff
+ * out of the tp source, and everthing here is reached indirectly
+ * through a switch table (struct nl_protosw *) tpcb->tp_nlproto 
+ * (see tp_pcb.c). 
+ * The routines here are:
+ * 		iso_getsufx: gets transport suffix out of an isopcb structure.
+ * 		iso_putsufx: put transport suffix into an isopcb structure.
+ *		iso_putnetaddr: put a whole net addr into an isopcb.
+ *		iso_getnetaddr: get a whole net addr from an isopcb.
+ *		iso_cmpnetaddr: compare a whole net addr from an isopcb.
+ *		iso_recycle_suffix: clear suffix for reuse in isopcb
+ * 		tpclnp_ctlinput: handle ER CNLPdu : icmp-like stuff
+ * 		tpclnp_mtu: figure out what size tpdu to use
+ *		tpclnp_input: take a pkt from clnp, strip off its clnp header, 
+ *				give to tp
+ *		tpclnp_output_dg: package a pkt for clnp given 2 addresses & some data
+ *		tpclnp_output: package a pkt for clnp given an isopcb & some data
+ */
+
+#ifdef ISO
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/domain.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/protosw.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netiso/argo_debug.h>
+#include <netiso/tp_param.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_pcb.h>
+#include <netiso/tp_trace.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_tpdu.h>
+#include <netiso/tp_clnp.h>
+#include <netiso/cltp_var.h>
+
+/*
+ * CALLED FROM:
+ * 	pr_usrreq() on PRU_BIND, PRU_CONNECT, PRU_ACCEPT, and PRU_PEERADDR
+ * FUNCTION, ARGUMENTS:
+ * 	The argument (which) takes the value TP_LOCAL or TP_FOREIGN.
+ */
+
+iso_getsufx(isop, lenp, data_out, which)
+	struct isopcb *isop;
+	u_short *lenp;
+	caddr_t data_out;
+	int which;
+{
+	register struct sockaddr_iso *addr = 0;
+
+	switch (which) {
+	case TP_LOCAL:
+		addr = isop->isop_laddr;
+		break;
+
+	case TP_FOREIGN:
+		addr = isop->isop_faddr;
+	}
+	if (addr)
+		bcopy(TSEL(addr), data_out, (*lenp = addr->siso_tlen));
+}
+
+/* CALLED FROM:
+ * 	tp_newsocket(); i.e., when a connection is being established by an
+ * 	incoming CR_TPDU.
+ *
+ * FUNCTION, ARGUMENTS:
+ * 	Put a transport suffix (found in name) into an isopcb structure (isop).
+ * 	The argument (which) takes the value TP_LOCAL or TP_FOREIGN.
+ */
+void
+iso_putsufx(isop, sufxloc, sufxlen, which)
+	struct isopcb *isop;
+	caddr_t sufxloc;
+	int sufxlen, which;
+{
+	struct sockaddr_iso **dst, *backup;
+	register struct sockaddr_iso *addr;
+	struct mbuf *m;
+	int len;
+
+	switch (which) {
+	default:
+		return;
+
+	case TP_LOCAL:
+		dst = &isop->isop_laddr;
+		backup = &isop->isop_sladdr;
+		break;
+
+	case TP_FOREIGN:
+		dst = &isop->isop_faddr;
+		backup = &isop->isop_sfaddr;
+	}
+	if ((addr = *dst) == 0) {
+		addr = *dst = backup;
+		addr->siso_nlen = 0;
+		addr->siso_slen = 0;
+		addr->siso_plen = 0;
+		printf("iso_putsufx on un-initialized isopcb\n");
+	}
+	len = sufxlen + addr->siso_nlen +
+			(sizeof(*addr) - sizeof(addr->siso_data));
+	if (addr == backup) {
+		if (len > sizeof(*addr)) {
+				m = m_getclr(M_DONTWAIT, MT_SONAME);
+				if (m == 0)
+					return;
+				addr = *dst = mtod(m, struct sockaddr_iso *);
+				*addr = *backup;
+				m->m_len = len;
+		}
+	}
+	bcopy(sufxloc, TSEL(addr), sufxlen);
+	addr->siso_tlen = sufxlen;
+	addr->siso_len = len;
+}
+
+/*
+ * CALLED FROM:
+ * 	tp.trans whenever we go into REFWAIT state.
+ * FUNCTION and ARGUMENT:
+ *	 Called when a ref is frozen, to allow the suffix to be reused. 
+ * 	(isop) is the net level pcb.  This really shouldn't have to be
+ * 	done in a NET level pcb but... for the internet world that just
+ * 	the way it is done in BSD...
+ * 	The alternative is to have the port unusable until the reference
+ * 	timer goes off.
+ */
+void
+iso_recycle_tsuffix(isop)
+	struct isopcb	*isop;
+{
+	isop->isop_laddr->siso_tlen = isop->isop_faddr->siso_tlen = 0;
+}
+
+/*
+ * CALLED FROM:
+ * 	tp_newsocket(); i.e., when a connection is being established by an
+ * 	incoming CR_TPDU.
+ *
+ * FUNCTION and ARGUMENTS:
+ * 	Copy a whole net addr from a struct sockaddr (name).
+ * 	into an isopcb (isop).
+ * 	The argument (which) takes values TP_LOCAL or TP_FOREIGN
+ */ 
+void
+iso_putnetaddr(isop, name, which)
+	register struct isopcb	*isop;
+	struct sockaddr_iso	*name;
+	int which;
+{
+	struct sockaddr_iso **sisop, *backup;
+	register struct sockaddr_iso *siso;
+
+	switch (which) {
+	default:
+		printf("iso_putnetaddr: should panic\n");
+		return;
+	case TP_LOCAL:
+		sisop = &isop->isop_laddr;
+		backup = &isop->isop_sladdr;
+		break;
+	case TP_FOREIGN:
+		sisop = &isop->isop_faddr;
+		backup = &isop->isop_sfaddr;
+	}
+	siso = ((*sisop == 0) ? (*sisop = backup) : *sisop);
+	IFDEBUG(D_TPISO)
+		printf("ISO_PUTNETADDR\n");
+		dump_isoaddr(isop->isop_faddr);
+	ENDDEBUG
+	siso->siso_addr = name->siso_addr;
+}
+
+/*
+ * CALLED FROM:
+ * 	tp_input() when a connection is being established by an
+ * 	incoming CR_TPDU, and considered for interception.
+ *
+ * FUNCTION and ARGUMENTS:
+ * 	compare a whole net addr from a struct sockaddr (name),
+ * 	with that implicitly stored in an isopcb (isop).
+ * 	The argument (which) takes values TP_LOCAL or TP_FOREIGN.
+ */ 
+iso_cmpnetaddr(isop, name, which)
+	register struct isopcb	*isop;
+	register struct sockaddr_iso	*name;
+	int which;
+{
+	struct sockaddr_iso **sisop, *backup;
+	register struct sockaddr_iso *siso;
+
+	switch (which) {
+	default:
+		printf("iso_cmpnetaddr: should panic\n");
+		return 0;
+	case TP_LOCAL:
+		sisop = &isop->isop_laddr;
+		backup = &isop->isop_sladdr;
+		break;
+	case TP_FOREIGN:
+		sisop = &isop->isop_faddr;
+		backup = &isop->isop_sfaddr;
+	}
+	siso = ((*sisop == 0) ? (*sisop = backup) : *sisop);
+	IFDEBUG(D_TPISO)
+		printf("ISO_CMPNETADDR\n");
+		dump_isoaddr(siso);
+	ENDDEBUG
+	if (name->siso_tlen && bcmp(TSEL(name), TSEL(siso), name->siso_tlen))
+		return (0);
+	return (bcmp((caddr_t)name->siso_data,
+			 (caddr_t)siso->siso_data, name->siso_nlen) == 0);
+}
+
+/*
+ * CALLED FROM:
+ *  pr_usrreq() PRU_SOCKADDR, PRU_ACCEPT, PRU_PEERADDR
+ * FUNCTION and ARGUMENTS:
+ * 	Copy a whole net addr from an isopcb (isop) into
+ * 	a struct sockaddr (name).
+ * 	The argument (which) takes values TP_LOCAL or TP_FOREIGN.
+ */ 
+
+void
+iso_getnetaddr( isop, name, which)
+	struct isopcb *isop;
+	struct mbuf *name;
+	int which;
+{
+	struct sockaddr_iso *siso =
+		(which == TP_LOCAL ? isop->isop_laddr : isop->isop_faddr);
+	if (siso)
+		bcopy((caddr_t)siso, mtod(name, caddr_t),
+				(unsigned)(name->m_len = siso->siso_len));
+	else
+		name->m_len = 0;
+}
+/*
+ * NAME: 	tpclnp_mtu()
+ *
+ * CALLED FROM:
+ *  tp_route_to() on incoming CR, CC, and pr_usrreq() for PRU_CONNECT
+ *
+ * FUNCTION, ARGUMENTS, and RETURN VALUE:
+ *
+ * Perform subnetwork dependent part of determining MTU information.
+ * It appears that setting a double pointer to the rtentry associated with
+ * the destination, and returning the header size for the network protocol
+ * suffices.
+ * 
+ * SIDE EFFECTS:
+ * Sets tp_routep pointer in pcb.
+ *
+ * NOTES:
+ */
+tpclnp_mtu(tpcb)
+register struct tp_pcb *tpcb;
+{
+	struct isopcb			*isop = (struct isopcb *)tpcb->tp_npcb;
+
+	IFDEBUG(D_CONN)
+		printf("tpclnp_mtu(tpcb)\n", tpcb);
+	ENDDEBUG
+	tpcb->tp_routep = &(isop->isop_route.ro_rt);
+	if (tpcb->tp_netservice == ISO_CONS)
+		return 0;
+	else
+		return (sizeof(struct clnp_fixed) + sizeof(struct clnp_segment) +
+			2 * sizeof(struct iso_addr));
+
+}
+
+/*
+ * CALLED FROM:
+ *  tp_emit()
+ * FUNCTION and ARGUMENTS:
+ *  Take a packet(m0) from tp and package it so that clnp will accept it.
+ *  This means prepending space for the clnp header and filling in a few
+ *  of the fields.
+ *  isop is the isopcb structure; datalen is the length of the data in the
+ *  mbuf string m0.
+ * RETURN VALUE:
+ *  whatever (E*) is returned form the net layer output routine.
+ */
+
+int
+tpclnp_output(isop, m0, datalen, nochksum)
+	struct isopcb		*isop;
+	struct mbuf 		*m0;
+	int 				datalen;
+	int					nochksum;
+{
+	register struct mbuf *m = m0;
+	IncStat(ts_tpdu_sent);
+
+	IFDEBUG(D_TPISO)
+		struct tpdu *hdr = mtod(m0, struct tpdu *);
+
+		printf(
+"abt to call clnp_output: datalen 0x%x, hdr.li 0x%x, hdr.dutype 0x%x nocsum x%x dst addr:\n",
+			datalen,
+			(int)hdr->tpdu_li, (int)hdr->tpdu_type, nochksum);
+		dump_isoaddr(isop->isop_faddr);
+		printf("\nsrc addr:\n");
+		dump_isoaddr(isop->isop_laddr);
+		dump_mbuf(m0, "at tpclnp_output");
+	ENDDEBUG
+
+	return 
+		clnp_output(m0, isop, datalen,  /* flags */nochksum ? CLNP_NO_CKSUM : 0);
+}
+
+/*
+ * CALLED FROM:
+ *  tp_error_emit()
+ * FUNCTION and ARGUMENTS:
+ *  This is a copy of tpclnp_output that takes the addresses
+ *  instead of a pcb.  It's used by the tp_error_emit, when we
+ *  don't have an iso_pcb with which to call the normal output rtn.
+ * RETURN VALUE:
+ *  ENOBUFS or
+ *  whatever (E*) is returned form the net layer output routine.
+ */
+
+int
+tpclnp_output_dg(laddr, faddr, m0, datalen, ro, nochksum)
+	struct iso_addr		*laddr, *faddr;
+	struct mbuf 		*m0;
+	int 				datalen;
+	struct route 		*ro;
+	int					nochksum;
+{
+	struct isopcb		tmppcb;
+	int					err;
+	int					flags;
+	register struct mbuf *m = m0;
+
+	IFDEBUG(D_TPISO)
+		printf("tpclnp_output_dg  datalen 0x%x m0 0x%x\n", datalen, m0);
+	ENDDEBUG
+
+	/*
+	 *	Fill in minimal portion of isopcb so that clnp can send the
+	 *	packet.
+	 */
+	bzero((caddr_t)&tmppcb, sizeof(tmppcb));
+	tmppcb.isop_laddr = &tmppcb.isop_sladdr;
+	tmppcb.isop_laddr->siso_addr = *laddr;
+	tmppcb.isop_faddr = &tmppcb.isop_sfaddr;
+	tmppcb.isop_faddr->siso_addr = *faddr;
+
+	IFDEBUG(D_TPISO)
+		printf("tpclnp_output_dg  faddr: \n");
+		dump_isoaddr(&tmppcb.isop_sfaddr);
+		printf("\ntpclnp_output_dg  laddr: \n");
+		dump_isoaddr(&tmppcb.isop_sladdr);
+		printf("\n");
+	ENDDEBUG
+
+	/*
+	 *	Do not use packet cache since this is a one shot error packet
+	 */
+	flags = (CLNP_NOCACHE|(nochksum?CLNP_NO_CKSUM:0));
+
+	IncStat(ts_tpdu_sent);
+
+	err = clnp_output(m0, &tmppcb, datalen,  flags);
+	
+	/*
+	 *	Free route allocated by clnp (if the route was indeed allocated)
+	 */
+	if (tmppcb.isop_route.ro_rt)
+		RTFREE(tmppcb.isop_route.ro_rt);
+	
+	return(err);
+}
+/*
+ * CALLED FROM:
+ * 	clnp's input routine, indirectly through the protosw.
+ * FUNCTION and ARGUMENTS:
+ * Take a packet (m) from clnp, strip off the clnp header and give it to tp
+ * No return value.  
+ */
+ProtoHook
+tpclnp_input(m, src, dst, clnp_len, ce_bit)
+	register struct mbuf *m;
+	struct sockaddr_iso *src, *dst;
+	int clnp_len, ce_bit;
+{
+	struct mbuf *tp_inputprep();
+	int tp_input(), cltp_input(), (*input)() = tp_input;
+
+	IncStat(ts_pkt_rcvd);
+
+	IFDEBUG(D_TPINPUT)
+		printf("tpclnp_input: m 0x%x clnp_len 0x%x\n", m, clnp_len);
+		dump_mbuf(m, "at tpclnp_input");
+	ENDDEBUG
+	/*
+	 * CLNP gives us an mbuf chain WITH the clnp header pulled up,
+	 * and the length of the clnp header.
+	 * First, strip off the Clnp header. leave the mbuf there for the
+	 * pullup that follows.
+	 */
+	m->m_len -= clnp_len;
+	m->m_data += clnp_len;
+	m->m_pkthdr.len -= clnp_len;
+	/* XXXX: should probably be in clnp_input */
+	switch (dst->siso_data[dst->siso_nlen - 1]) {
+#ifdef TUBA
+	case ISOPROTO_TCP:
+		return (tuba_tcpinput(m, src, dst));
+#endif
+	case 0:
+		if (m->m_len == 0 && (m = m_pullup(m, 1)) == 0)
+			return 0;
+		if (*(mtod(m, u_char *)) == ISO10747_IDRP)
+			return (idrp_input(m, src, dst));
+	}
+	m = tp_inputprep(m);
+	if (m == 0)
+		return 0;
+	if (mtod(m, u_char *)[1] == UD_TPDU_type)
+		input = cltp_input;
+
+	IFDEBUG(D_TPINPUT)
+		dump_mbuf(m, "after tpclnp_input both pullups");
+	ENDDEBUG
+
+	IFDEBUG(D_TPISO)
+		printf("calling %sinput : src 0x%x, dst 0x%x, src addr:\n", 
+			(input == tp_input ? "tp_" : "clts_"), src, dst);
+		dump_isoaddr(src);
+		printf(" dst addr:\n");
+		dump_isoaddr(dst);
+	ENDDEBUG
+
+	(void) (*input)(m, (struct sockaddr *)src, (struct sockaddr *)dst,
+				0, tpclnp_output_dg, ce_bit);
+
+	IFDEBUG(D_QUENCH)
+		{ 
+			if(time.tv_usec & 0x4 && time.tv_usec & 0x40) {
+				printf("tpclnp_input: FAKING %s\n", 
+					tp_stat.ts_pkt_rcvd & 0x1?"QUENCH":"QUENCH2");
+				if(tp_stat.ts_pkt_rcvd & 0x1) {
+					tpclnp_ctlinput(PRC_QUENCH, &src);
+				} else {
+					tpclnp_ctlinput(PRC_QUENCH2, &src);
+				}
+			}
+		}
+	ENDDEBUG
+
+	return 0;
+}
+
+ProtoHook
+iso_rtchange()
+{
+	return 0;
+}
+
+/*
+ * CALLED FROM:
+ *  tpclnp_ctlinput()
+ * FUNCTION and ARGUMENTS:
+ *  find the tpcb pointer and pass it to tp_quench
+ */
+void
+tpiso_decbit(isop)
+	struct isopcb *isop;
+{
+	tp_quench((struct tp_pcb *)isop->isop_socket->so_pcb, PRC_QUENCH2);
+}
+/*
+ * CALLED FROM:
+ *  tpclnp_ctlinput()
+ * FUNCTION and ARGUMENTS:
+ *  find the tpcb pointer and pass it to tp_quench
+ */
+void
+tpiso_quench(isop)
+	struct isopcb *isop;
+{
+	tp_quench((struct tp_pcb *)isop->isop_socket->so_pcb, PRC_QUENCH);
+}
+
+/*
+ * CALLED FROM:
+ *  The network layer through the protosw table.
+ * FUNCTION and ARGUMENTS:
+ *	When clnp an ICMP-like msg this gets called.
+ *	It either returns an error status to the user or
+ *	it causes all connections on this address to be aborted
+ *	by calling the appropriate xx_notify() routine.
+ *	(cmd) is the type of ICMP error.   
+ * 	(siso) is the address of the guy who sent the ER CLNPDU
+ */
+ProtoHook
+tpclnp_ctlinput(cmd, siso)
+	int cmd;
+	struct sockaddr_iso *siso;
+{
+	extern u_char inetctlerrmap[];
+	extern ProtoHook tpiso_abort();
+	extern ProtoHook iso_rtchange();
+	extern ProtoHook tpiso_reset();
+	void iso_pcbnotify();
+
+	IFDEBUG(D_TPINPUT)
+		printf("tpclnp_ctlinput1: cmd 0x%x addr: \n", cmd);
+		dump_isoaddr(siso);
+	ENDDEBUG
+
+	if (cmd < 0 || cmd > PRC_NCMDS)
+		return 0;
+	if (siso->siso_family != AF_ISO)
+		return 0;
+	switch (cmd) {
+
+		case	PRC_QUENCH2:
+			iso_pcbnotify(&tp_isopcb, siso, 0, (int (*)())tpiso_decbit);
+			break;
+
+		case	PRC_QUENCH:
+			iso_pcbnotify(&tp_isopcb, siso, 0, (int (*)())tpiso_quench);
+			break;
+
+		case	PRC_TIMXCEED_REASS:
+		case	PRC_ROUTEDEAD:
+			iso_pcbnotify(&tp_isopcb, siso, 0, tpiso_reset);
+			break;
+
+		case	PRC_HOSTUNREACH:
+		case	PRC_UNREACH_NET:
+		case	PRC_IFDOWN:
+		case	PRC_HOSTDEAD:
+			iso_pcbnotify(&tp_isopcb, siso,
+					(int)inetctlerrmap[cmd], iso_rtchange);
+			break;
+
+		default:
+		/*
+		case	PRC_MSGSIZE:
+		case	PRC_UNREACH_HOST:
+		case	PRC_UNREACH_PROTOCOL:
+		case	PRC_UNREACH_PORT:
+		case	PRC_UNREACH_NEEDFRAG:
+		case	PRC_UNREACH_SRCFAIL:
+		case	PRC_REDIRECT_NET:
+		case	PRC_REDIRECT_HOST:
+		case	PRC_REDIRECT_TOSNET:
+		case	PRC_REDIRECT_TOSHOST:
+		case	PRC_TIMXCEED_INTRANS:
+		case	PRC_PARAMPROB:
+		*/
+		iso_pcbnotify(&tp_isopcb, siso, (int)inetctlerrmap[cmd], tpiso_abort);
+		break;
+	}
+	return 0;
+}
+/*
+ * XXX - Variant which is called by clnp_er.c with an isoaddr rather
+ * than a sockaddr_iso.
+ */
+
+static struct sockaddr_iso siso = {sizeof(siso), AF_ISO};
+tpclnp_ctlinput1(cmd, isoa)
+	int cmd;
+	struct iso_addr *isoa;
+{
+	bzero((caddr_t)&siso.siso_addr, sizeof(siso.siso_addr));
+	bcopy((caddr_t)isoa, (caddr_t)&siso.siso_addr, isoa->isoa_len);
+	tpclnp_ctlinput(cmd, &siso);
+}
+
+/*
+ * These next 2 routines are
+ * CALLED FROM:
+ *	xxx_notify() from tp_ctlinput() when
+ *  net level gets some ICMP-equiv. type event.
+ * FUNCTION and ARGUMENTS:
+ *  Cause the connection to be aborted with some sort of error
+ *  reason indicating that the network layer caused the abort.
+ *  Fakes an ER TPDU so we can go through the driver.
+ *  abort always aborts the TP connection.
+ *  reset may or may not, depending on the TP class that's in use.
+ */
+ProtoHook
+tpiso_abort(isop)
+	struct isopcb *isop;
+{
+	struct tp_event e;
+
+	IFDEBUG(D_CONN)
+		printf("tpiso_abort 0x%x\n", isop);
+	ENDDEBUG
+	e.ev_number = ER_TPDU;
+	e.ATTR(ER_TPDU).e_reason = ECONNABORTED;
+	return  tp_driver((struct tp_pcb *)isop->isop_socket->so_pcb, &e);
+}
+
+ProtoHook
+tpiso_reset(isop)
+	struct isopcb *isop;
+{
+	struct tp_event e;
+
+	e.ev_number = T_NETRESET;
+	return tp_driver((struct tp_pcb *)isop->isop_socket->so_pcb, &e);
+
+}
+
+#endif /* ISO */
diff --git a/sys/netiso/tp_meas.c b/sys/netiso/tp_meas.c
new file mode 100644
index 00000000000..f8bbbe6dceb
--- /dev/null
+++ b/sys/netiso/tp_meas.c
@@ -0,0 +1,127 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tp_meas.c	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/*
+ * $Header: tp_meas.c,v 5.2 88/11/18 17:28:04 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_meas.c,v $
+ * 
+ * tp_meas.c : create a performance measurement event
+ * in the circular buffer tp_Meas[]
+ */
+
+#include <sys/types.h>
+#include <sys/time.h>
+
+#include <netiso/argo_debug.h>
+#include <netiso/tp_meas.h>
+
+extern struct timeval time;
+
+#ifdef TP_PERF_MEAS
+int		tp_Measn = 0;
+struct tp_Meas tp_Meas[TPMEASN];
+
+/*
+ * NAME:	 tpmeas()
+ *
+ * CALLED FROM: tp_emit(), tp_soisdisconecting(), tp_soisdisconnected()
+ *	tp0_stash(), tp_stash(), tp_send(), tp_goodack(), tp_usrreq()
+ *
+ * FUNCTION and ARGUMENTS:
+ *  stashes a performance-measurement event for the given reference (ref)
+ *  (kind) tells which kind of event, timev is the time to be stored
+ *  with this event, (seq), (win), and (size) are integers that usually
+ *  refer to the sequence number, window number (on send) and 
+ *  size of tpdu or window.
+ *
+ * RETURNS:		Nada
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+void
+Tpmeas(ref, kind, timev, seq, win, size)
+	u_int 	ref;
+	u_int	kind;
+	struct 	timeval *timev;
+	u_int	seq, win, size;
+{
+	register struct tp_Meas *tpm;
+	static int mseq;
+
+	tpm = &tp_Meas[tp_Measn++];
+	tp_Measn %= TPMEASN;
+
+	tpm->tpm_kind = kind;
+	tpm->tpm_tseq = mseq++;
+	tpm->tpm_ref = ref;
+	if(kind == TPtime_from_ll)
+		bcopy((caddr_t)timev, (caddr_t)&tpm->tpm_time, sizeof(struct timeval));
+	else
+		bcopy( (caddr_t)&time, 
+			(caddr_t)&tpm->tpm_time, sizeof(struct timeval) );
+	tpm->tpm_seq = seq;
+	tpm->tpm_window = win;
+	tpm->tpm_size = size;
+}
+
+#endif /* TP_PERF_MEAS */
diff --git a/sys/netiso/tp_meas.h b/sys/netiso/tp_meas.h
new file mode 100644
index 00000000000..10ef93d350b
--- /dev/null
+++ b/sys/netiso/tp_meas.h
@@ -0,0 +1,94 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tp_meas.h	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+				Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+#ifdef TP_PERF_MEAS
+#define tpmeas(a, b, t, c, d, e) \
+	Tpmeas((u_int)(a), (u_int)(b), t, (u_int)(c), (u_int)(d), (u_int)(e))
+
+struct tp_Meas {
+	int			tpm_tseq;
+	u_char		tpm_kind;
+	u_short 	tpm_ref;
+	u_short		tpm_size;
+	u_short		tpm_window;
+	u_int		tpm_seq;
+	struct timeval	tpm_time;
+};
+
+#define TPMEASN 4000
+extern int tp_Measn;
+extern struct tp_Meas tp_Meas[];
+
+/*
+ * the kinds of events for packet tracing are:
+ */
+#define TPtime_from_session	0x01
+#define TPtime_to_session	0x02
+#define TPtime_ack_rcvd		0x03 
+#define TPtime_ack_sent		0x04
+#define TPtime_from_ll		0x05
+#define TPtime_to_ll		0x06
+#define TPsbsend			0x07 
+#define TPtime_open			0x08
+#define TPtime_open_X		0x28 /* xtd format */
+#define TPtime_close		0x09
+
+#endif /* TP_PERF_MEAS */
diff --git a/sys/netiso/tp_output.c b/sys/netiso/tp_output.c
new file mode 100644
index 00000000000..cdd7c4fe76b
--- /dev/null
+++ b/sys/netiso/tp_output.c
@@ -0,0 +1,712 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tp_output.c	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* 
+ * ARGO TP
+ *
+ * $Header: tp_output.c,v 5.4 88/11/18 17:28:08 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_output.c,v $
+ *
+ * In here is tp_ctloutput(), the guy called by [sg]etsockopt(),
+ */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/systm.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <netiso/tp_param.h>
+#include <netiso/tp_user.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_ip.h>
+#include <netiso/tp_clnp.h>
+#include <netiso/tp_timer.h>
+#include <netiso/argo_debug.h>
+#include <netiso/tp_pcb.h>
+#include <netiso/tp_trace.h>
+
+#define TPDUSIZESHIFT 24
+#define CLASSHIFT 16
+
+/*
+ * NAME: 	tp_consistency()
+ *
+ * CALLED FROM:
+ * 	tp_ctloutput(), tp_input()
+ *
+ * FUNCTION and ARGUMENTS:
+ * 	Checks the consistency of options and tpdusize with class,
+ *	using the parameters passed in via (param).
+ *	(cmd) may be TP_STRICT or TP_FORCE or both.
+ *  Force means it will set all the values in (tpcb) to those in
+ *  the input arguements iff no errors were encountered.
+ *  Strict means that no inconsistency will be tolerated.  If it's
+ *  not used, checksum and tpdusize inconsistencies will be tolerated.
+ *  The reason for this is that in some cases, when we're negotiating down 
+ *	from class  4, these options should be changed but should not 
+ *  cause negotiation to fail.
+ *
+ * RETURNS
+ *  E* or EOK
+ *  E* if the various parms aren't ok for a given class
+ *  EOK if they are ok for a given class
+ */
+
+int
+tp_consistency( tpcb, cmd, param )
+	u_int cmd;
+	struct tp_conn_param *param;
+	struct tp_pcb *tpcb;
+{
+	register int	error = EOK;
+	int 			class_to_use  = tp_mask_to_num(param->p_class);
+
+	IFTRACE(D_SETPARAMS)
+		tptrace(TPPTmisc, 
+		"tp_consist enter class_to_use dontchange param.class cmd", 
+		class_to_use, param->p_dont_change_params, param->p_class, cmd);
+	ENDTRACE
+	IFDEBUG(D_SETPARAMS)
+		printf("tp_consistency %s %s\n", 
+			cmd& TP_FORCE?	"TP_FORCE":	"",
+			cmd& TP_STRICT?	"TP_STRICT":"");
+	ENDDEBUG
+	if ((cmd & TP_FORCE) && (param->p_dont_change_params)) {
+		cmd &= ~TP_FORCE;
+	}
+	/* can switch net services within a domain, but
+	 * cannot switch domains 
+	 */
+	switch( param->p_netservice) {
+	case ISO_CONS:
+	case ISO_CLNS:
+	case ISO_COSNS:
+		/* param->p_netservice in ISO DOMAIN */
+		if(tpcb->tp_domain != AF_ISO ) {
+			error = EINVAL; goto done;
+		}
+		break;
+	case IN_CLNS:
+		/* param->p_netservice in INET DOMAIN */
+		if( tpcb->tp_domain != AF_INET ) {
+			error = EINVAL; goto done;
+		}
+		break;
+		/* no others not possible-> netservice is a 2-bit field! */
+	}
+
+	IFDEBUG(D_SETPARAMS)
+		printf("p_class 0x%x, class_to_use 0x%x\n",  param->p_class,
+			class_to_use);
+	ENDDEBUG
+	if((param->p_netservice < 0) || (param->p_netservice > TP_MAX_NETSERVICES)){
+		error = EINVAL; goto done;
+	}
+	if( (param->p_class & TP_CLASSES_IMPLEMENTED) == 0 ) {
+		error = EINVAL; goto done;
+	} 
+	IFDEBUG(D_SETPARAMS)
+		printf("Nretrans 0x%x\n",  param->p_Nretrans );
+	ENDDEBUG
+	if( ( param->p_Nretrans < 1 ) ||
+		  (param->p_cr_ticks < 1) || (param->p_cc_ticks < 1) ) {
+			/* bad for any class because negot has to be done a la class 4 */
+			error = EINVAL; goto done;
+	}
+	IFDEBUG(D_SETPARAMS)
+		printf("use_csum 0x%x\n",  param->p_use_checksum );
+		printf("xtd_format 0x%x\n",  param->p_xtd_format );
+		printf("xpd_service 0x%x\n",  param->p_xpd_service );
+		printf("tpdusize 0x%x\n",  param->p_tpdusize );
+		printf("tpcb->flags 0x%x\n",  tpcb->tp_flags );
+	ENDDEBUG
+	switch( class_to_use ) {
+
+	case 0:
+		/* do not use checksums, xtd format, or XPD */
+
+		if( param->p_use_checksum | param->p_xtd_format | param->p_xpd_service ) {
+			if(cmd & TP_STRICT) {
+				error = EINVAL;
+			} else {
+				param->p_use_checksum = 0;
+				param->p_xtd_format = 0;
+				param->p_xpd_service = 0;
+			}
+			break;
+		}
+
+		if (param->p_tpdusize < TP_MIN_TPDUSIZE) {
+			if(cmd & TP_STRICT) {
+				error = EINVAL;
+			} else {
+				param->p_tpdusize = TP_MIN_TPDUSIZE;
+			}
+			break;
+		}
+		if (param->p_tpdusize > TP0_TPDUSIZE)  {
+			if (cmd & TP_STRICT) {
+				error = EINVAL; 
+			} else {
+				param->p_tpdusize = TP0_TPDUSIZE;
+			}
+			break;
+		} 
+
+		/* connect/disc data not allowed for class 0 */
+		if (tpcb->tp_ucddata) {
+			if(cmd & TP_STRICT) {
+				error = EINVAL;
+			} else if(cmd & TP_FORCE) {
+				m_freem(tpcb->tp_ucddata);
+				tpcb->tp_ucddata = 0;
+			}
+		}
+		break;
+		
+	case 4:
+		IFDEBUG(D_SETPARAMS)
+			printf("dt_ticks 0x%x\n",  param->p_dt_ticks );
+			printf("x_ticks 0x%x\n",  param->p_x_ticks );
+			printf("dr_ticks 0x%x\n",  param->p_dr_ticks );
+			printf("keepalive 0x%x\n",  param->p_keepalive_ticks );
+			printf("sendack 0x%x\n",  param->p_sendack_ticks );
+			printf("inact 0x%x\n",  param->p_inact_ticks );
+			printf("ref 0x%x\n",  param->p_ref_ticks );
+		ENDDEBUG
+		if( (param->p_class & TP_CLASS_4 ) && (
+			  (param->p_dt_ticks < 1) || (param->p_dr_ticks < 1) || 
+			  (param->p_x_ticks < 1)	|| (param->p_keepalive_ticks < 1) ||
+			  (param->p_sendack_ticks < 1) || (param->p_ref_ticks < 1) ||
+			  (param->p_inact_ticks < 1) ) ) {
+				error = EINVAL;
+				break;
+		}
+		IFDEBUG(D_SETPARAMS)
+			printf("rx_strat 0x%x\n",  param->p_rx_strat );
+		ENDDEBUG
+		if(param->p_rx_strat > 
+			( TPRX_USE_CW | TPRX_EACH | TPRX_FASTSTART) ) {
+				if(cmd & TP_STRICT) {
+					error = EINVAL;
+				} else {
+					param->p_rx_strat = TPRX_USE_CW;
+				}
+				break;
+		}
+		IFDEBUG(D_SETPARAMS)
+			printf("ack_strat 0x%x\n",  param->p_ack_strat );
+		ENDDEBUG
+		if((param->p_ack_strat != 0) && (param->p_ack_strat != 1)) {
+			if(cmd & TP_STRICT) {
+				error = EINVAL;
+			} else {
+				param->p_ack_strat = TPACK_WINDOW;
+			}
+			break;
+		}
+		if (param->p_tpdusize < TP_MIN_TPDUSIZE) {
+			if(cmd & TP_STRICT) {
+				error = EINVAL;
+			} else {
+				param->p_tpdusize = TP_MIN_TPDUSIZE;
+			}
+			break;
+		}
+		if (param->p_tpdusize > TP_TPDUSIZE)  {
+			if(cmd & TP_STRICT) {
+				error = EINVAL; 
+			} else {
+				param->p_tpdusize = TP_TPDUSIZE;
+			}
+			break;
+		} 
+		break;
+	}
+
+	if ((error==0) && (cmd & TP_FORCE)) {
+		long dusize = ((long)param->p_ptpdusize) << 7;
+		/* Enforce Negotation rules below */
+		tpcb->tp_class = param->p_class;
+		if (tpcb->tp_use_checksum || param->p_use_checksum)
+			tpcb->tp_use_checksum = 1;
+		if (!tpcb->tp_xpd_service || !param->p_xpd_service)
+			tpcb->tp_xpd_service = 0;
+		if (!tpcb->tp_xtd_format || !param->p_xtd_format)
+			tpcb->tp_xtd_format = 0;
+		if (dusize) {
+			if (tpcb->tp_l_tpdusize > dusize)
+				tpcb->tp_l_tpdusize = dusize;
+			if (tpcb->tp_ptpdusize == 0 ||
+				tpcb->tp_ptpdusize > param->p_ptpdusize)
+				tpcb->tp_ptpdusize = param->p_ptpdusize;
+		} else {
+			if (param->p_tpdusize != 0 &&
+				tpcb->tp_tpdusize > param->p_tpdusize)
+				tpcb->tp_tpdusize = param->p_tpdusize;
+			tpcb->tp_l_tpdusize = 1 << tpcb->tp_tpdusize;
+		}
+	}
+done:
+
+	IFTRACE(D_CONN)
+		tptrace(TPPTmisc, "tp_consist returns class xtdfmt cmd", 
+			error, tpcb->tp_class, tpcb->tp_xtd_format, cmd);
+	ENDTRACE
+	IFDEBUG(D_CONN)
+		printf(
+		"tp_consist rtns 0x%x class 0x%x xtd_fmt 0x%x cmd 0x%x\n",
+			error, tpcb->tp_class, tpcb->tp_xtd_format, cmd);
+	ENDDEBUG
+	return error;
+}
+
+/*
+ * NAME: 	tp_ctloutput()
+ *
+ * CALLED FROM:
+ * 	[sg]etsockopt(), via so[sg]etopt(). 
+ *
+ * FUNCTION and ARGUMENTS:
+ * 	Implements the socket options at transport level.
+ * 	(cmd) is either PRCO_SETOPT or PRCO_GETOPT (see ../sys/protosw.h).
+ * 	(so) is the socket.
+ * 	(level) is SOL_TRANSPORT (see ../sys/socket.h)
+ * 	(optname) is the particular command or option to be set.
+ * 	(**mp) is an mbuf structure.  
+ *
+ * RETURN VALUE:
+ * 	ENOTSOCK if the socket hasn't got an associated tpcb
+ *  EINVAL if 
+ * 		trying to set window too big
+ * 		trying to set illegal max tpdu size 
+ * 		trying to set illegal credit fraction
+ * 		trying to use unknown or unimplemented class of TP
+ *		structure passed to set timer values is wrong size
+ *  	illegal combination of command/GET-SET option, 
+ *			e.g., GET w/ TPOPT_CDDATA_CLEAR: 
+ *  EOPNOTSUPP if the level isn't transport, or command is neither GET nor SET
+ *   or if the transport-specific command is not implemented
+ *  EISCONN if trying a command that isn't allowed after a connection
+ *   is established
+ *  ENOTCONN if trying a command that is allowed only if a connection is
+ *   established
+ *  EMSGSIZE if trying to give too much data on connect/disconnect
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+ProtoHook 
+tp_ctloutput(cmd, so, level, optname, mp)
+	int 			cmd, level, optname;
+	struct socket	*so;
+	struct mbuf 	**mp;
+{
+	struct		tp_pcb	*tpcb = sototpcb(so);
+	int 		s = splnet();
+	caddr_t		value;
+	unsigned	val_len;
+	int			error = 0;
+
+	IFTRACE(D_REQUEST)
+		tptrace(TPPTmisc, "tp_ctloutput cmd so optname mp", 
+			cmd, so, optname, mp);
+	ENDTRACE
+	IFDEBUG(D_REQUEST)
+		printf(
+	"tp_ctloutput so 0x%x cmd 0x%x optname 0x%x, mp 0x%x *mp 0x%x tpcb 0x%x\n", 
+			so, cmd, optname, mp, mp?*mp:0, tpcb);
+	ENDDEBUG
+	if( tpcb == (struct tp_pcb *)0 ) {
+		error = ENOTSOCK; goto done;
+	}
+	if(*mp == MNULL) {
+		register struct mbuf *m;
+
+		MGET(m, M_DONTWAIT, TPMT_SONAME); /* does off, type, next */
+		if (m == NULL) {
+			splx(s);
+			return ENOBUFS;
+		}
+		m->m_len = 0;
+		m->m_act = 0;
+		*mp = m;
+	}
+
+	/*
+	 *	Hook so one can set network options via a tp socket.
+	 */
+	if ( level == SOL_NETWORK ) {
+		if ((tpcb->tp_nlproto == NULL) || (tpcb->tp_npcb == NULL))
+			error = ENOTSOCK;
+		else if (tpcb->tp_nlproto->nlp_ctloutput == NULL)
+			error = EOPNOTSUPP;
+		else
+			return ((tpcb->tp_nlproto->nlp_ctloutput)(cmd, optname, 
+				tpcb->tp_npcb, *mp));
+		goto done;
+	} else if ( level == SOL_SOCKET) {
+		if (optname == SO_RCVBUF && cmd == PRCO_SETOPT) {
+			u_long old_credit = tpcb->tp_maxlcredit;
+			tp_rsyset(tpcb);
+			if (tpcb->tp_rhiwat != so->so_rcv.sb_hiwat &&
+			    tpcb->tp_state == TP_OPEN &&
+			    (old_credit < tpcb->tp_maxlcredit))
+				tp_emit(AK_TPDU_type, tpcb,
+					tpcb->tp_rcvnxt, 0, MNULL);
+			tpcb->tp_rhiwat = so->so_rcv.sb_hiwat;
+		}
+		goto done;
+	} else if ( level !=  SOL_TRANSPORT ) {
+		error = EOPNOTSUPP; goto done;
+	} 
+	if (cmd != PRCO_GETOPT && cmd != PRCO_SETOPT) {
+		error = EOPNOTSUPP; goto done;
+	} 
+	if ( so->so_error ) {
+		error = so->so_error; goto done;
+	}
+
+	/* The only options allowed after connection is established
+	 * are GET (anything) and SET DISC DATA and SET PERF MEAS
+	 */
+	if ( ((so->so_state & SS_ISCONNECTING)||(so->so_state & SS_ISCONNECTED))
+		&&
+		(cmd == PRCO_SETOPT  && 
+			optname != TPOPT_DISC_DATA && 
+			optname != TPOPT_CFRM_DATA && 
+			optname != TPOPT_PERF_MEAS &&
+			optname != TPOPT_CDDATA_CLEAR ) ) {
+		error = EISCONN; goto done;
+	} 
+	/* The only options allowed after disconnection are GET DISC DATA,
+	 * and TPOPT_PSTATISTICS
+	 * and they're not allowed if the ref timer has gone off, because
+	 * the tpcb is gone 
+	 */
+	if ((so->so_state & (SS_ISCONNECTED | SS_ISCONFIRMING)) ==  0) {
+		if ( so->so_pcb == (caddr_t)0 ) {
+			error = ENOTCONN; goto done;
+		}
+		if ( (tpcb->tp_state == TP_REFWAIT || tpcb->tp_state == TP_CLOSING) &&
+				(optname != TPOPT_DISC_DATA && optname != TPOPT_PSTATISTICS)) {
+			error = ENOTCONN; goto done;
+		}
+	}
+
+	value = mtod(*mp, caddr_t);  /* it's aligned, don't worry,
+								  * but lint complains about it 
+								  */
+	val_len = (*mp)->m_len;
+
+	switch (optname) {
+
+	case TPOPT_INTERCEPT:
+#define INA(t) (((struct inpcb *)(t->tp_npcb))->inp_laddr.s_addr)
+#define ISOA(t) (((struct isopcb *)(t->tp_npcb))->isop_laddr->siso_addr)
+
+		if ((so->so_state & SS_PRIV) == 0) {
+			error = EPERM;
+		} else if (cmd != PRCO_SETOPT || tpcb->tp_state != TP_CLOSED ||
+					(tpcb->tp_flags & TPF_GENERAL_ADDR) ||
+					tpcb->tp_next == 0)
+			error = EINVAL;
+		else {
+			register struct tp_pcb *t;
+			error = EADDRINUSE;
+			for (t = tp_listeners; t; t = t->tp_nextlisten)
+				if ((t->tp_flags & TPF_GENERAL_ADDR) == 0 &&
+						t->tp_domain == tpcb->tp_domain)
+					switch (tpcb->tp_domain) {
+					default:
+						goto done;
+#ifdef	INET
+					case AF_INET:
+						if (INA(t) == INA(tpcb))
+							goto done;
+						continue;
+#endif
+#ifdef ISO
+					case AF_ISO:
+						if (bcmp(ISOA(t).isoa_genaddr, ISOA(tpcb).isoa_genaddr,
+										ISOA(t).isoa_len) == 0)
+							goto done;
+						continue;
+#endif
+					}
+			tpcb->tp_lsuffixlen = 0;
+			tpcb->tp_state = TP_LISTENING;
+			error = 0;
+			remque(tpcb);
+			tpcb->tp_next = tpcb->tp_prev = tpcb;
+			tpcb->tp_nextlisten = tp_listeners;
+			tp_listeners = tpcb;
+		}
+		break;
+
+	case TPOPT_MY_TSEL:
+		if ( cmd == PRCO_GETOPT ) {
+			ASSERT( tpcb->tp_lsuffixlen <= MAX_TSAP_SEL_LEN );
+			bcopy((caddr_t)tpcb->tp_lsuffix, value, tpcb->tp_lsuffixlen);
+			(*mp)->m_len = tpcb->tp_lsuffixlen;
+		} else /* cmd == PRCO_SETOPT  */ {
+			if( (val_len > MAX_TSAP_SEL_LEN) || (val_len <= 0 )) {
+				printf("val_len 0x%x (*mp)->m_len 0x%x\n", val_len, (*mp));
+				error = EINVAL;
+			} else {
+				bcopy(value, (caddr_t)tpcb->tp_lsuffix, val_len);
+				tpcb->tp_lsuffixlen = val_len;
+			}
+		}
+		break;
+
+	case TPOPT_PEER_TSEL:
+		if ( cmd == PRCO_GETOPT ) {
+			ASSERT( tpcb->tp_fsuffixlen <= MAX_TSAP_SEL_LEN );
+			bcopy((caddr_t)tpcb->tp_fsuffix, value, tpcb->tp_fsuffixlen);
+			(*mp)->m_len = tpcb->tp_fsuffixlen;
+		} else /* cmd == PRCO_SETOPT  */ {
+			if( (val_len > MAX_TSAP_SEL_LEN) || (val_len <= 0 )) {
+				printf("val_len 0x%x (*mp)->m_len 0x%x\n", val_len, (*mp));
+				error = EINVAL; 
+			} else {
+				bcopy(value, (caddr_t)tpcb->tp_fsuffix, val_len);
+				tpcb->tp_fsuffixlen = val_len;
+			}
+		}
+		break;
+
+	case TPOPT_FLAGS:
+		IFDEBUG(D_REQUEST)
+			printf("%s TPOPT_FLAGS value 0x%x *value 0x%x, flags 0x%x \n", 
+				cmd==PRCO_GETOPT?"GET":"SET", 
+				value,
+				*value, 
+				tpcb->tp_flags);
+		ENDDEBUG
+
+		if ( cmd == PRCO_GETOPT ) {
+			*(int *)value = (int)tpcb->tp_flags;
+			(*mp)->m_len = sizeof(u_int);
+		} else /* cmd == PRCO_SETOPT  */ {
+			error = EINVAL; goto done;
+		}
+		break;
+
+	case TPOPT_PARAMS:
+		/* This handles:
+		 * timer values,
+		 * class, use of transport expedited data,
+		 * max tpdu size, checksum, xtd format and
+		 * disconnect indications, and may get rid of connect/disc data
+		 */
+		IFDEBUG(D_SETPARAMS)
+			printf("TPOPT_PARAMS value 0x%x, cmd %s \n", value,
+				cmd==PRCO_GETOPT?"GET":"SET");
+		ENDDEBUG
+		IFDEBUG(D_REQUEST)
+			printf("TPOPT_PARAMS value 0x%x, cmd %s \n", value,
+				cmd==PRCO_GETOPT?"GET":"SET");
+		ENDDEBUG
+
+		if ( cmd == PRCO_GETOPT ) {
+			*(struct tp_conn_param *)value = tpcb->_tp_param;
+			(*mp)->m_len = sizeof(tpcb->_tp_param);
+		} else /* cmd == PRCO_SETOPT  */ {
+			if( (error = 
+				tp_consistency(tpcb, TP_STRICT | TP_FORCE, 
+								(struct tp_conn_param *)value))==0) {
+				/* 
+				 * tp_consistency doesn't copy the whole set of params 
+				 */
+				tpcb->_tp_param = *(struct tp_conn_param *)value;
+				(*mp)->m_len = sizeof(tpcb->_tp_param);
+			}
+		}
+		break;
+
+	case TPOPT_PSTATISTICS: 
+#ifdef TP_PERF_MEAS
+		if (cmd == PRCO_SETOPT) {
+			error = EINVAL; goto done;
+		} 
+		IFPERF(tpcb)
+			if (*mp) {
+				struct mbuf * n;
+				do {
+					MFREE(*mp, n);
+					*mp = n;
+				} while (n);
+			}
+			*mp = m_copym(tpcb->tp_p_mbuf, (int)M_COPYALL, M_WAITOK);
+		ENDPERF 
+		else {
+			error = EINVAL; goto done;
+		} 
+		break;
+#else
+		error = EOPNOTSUPP;
+		goto done;
+#endif /* TP_PERF_MEAS */
+		
+	case TPOPT_CDDATA_CLEAR: 
+		if (cmd == PRCO_GETOPT) {
+			error = EINVAL;
+		} else {
+			if (tpcb->tp_ucddata) {
+				m_freem(tpcb->tp_ucddata);
+				tpcb->tp_ucddata = 0;
+			}
+		}
+		break;
+
+	case TPOPT_CFRM_DATA:
+	case TPOPT_DISC_DATA: 
+	case TPOPT_CONN_DATA: 
+		if( tpcb->tp_class == TP_CLASS_0 ) {
+			error = EOPNOTSUPP;
+			break;
+		}
+		IFDEBUG(D_REQUEST)
+			printf("%s\n", optname==TPOPT_DISC_DATA?"DISC data":"CONN data");
+			printf("m_len 0x%x, vallen 0x%x so_snd.cc 0x%x\n", 
+				(*mp)->m_len, val_len, so->so_snd.sb_cc);
+			dump_mbuf(so->so_snd.sb_mb, "tp_ctloutput: sosnd ");
+		ENDDEBUG
+		if (cmd == PRCO_SETOPT) {
+			int len = tpcb->tp_ucddata ?  tpcb->tp_ucddata->m_len : 0;
+			/* can append connect data in several calls */
+			if (len + val_len > 
+				(optname==TPOPT_CONN_DATA?TP_MAX_CR_DATA:TP_MAX_DR_DATA) ) {
+				error = EMSGSIZE; goto done;
+			} 
+			(*mp)->m_next = MNULL;
+			(*mp)->m_act = 0;
+			if (tpcb->tp_ucddata)
+				m_cat(tpcb->tp_ucddata, *mp);
+			else
+				tpcb->tp_ucddata = *mp;
+			IFDEBUG(D_REQUEST)
+				dump_mbuf(tpcb->tp_ucddata, "tp_ctloutput after CONN_DATA");
+			ENDDEBUG
+			IFTRACE(D_REQUEST)
+				tptrace(TPPTmisc,"C/D DATA: flags snd.sbcc val_len",
+					tpcb->tp_flags, so->so_snd.sb_cc,val_len,0);
+			ENDTRACE
+			*mp = MNULL;
+			if (optname == TPOPT_CFRM_DATA && (so->so_state & SS_ISCONFIRMING))
+				(void) tp_confirm(tpcb);
+		}
+		break;
+
+	case TPOPT_PERF_MEAS: 
+#ifdef TP_PERF_MEAS
+		if (cmd == PRCO_GETOPT) {
+			*value = (u_int)tpcb->tp_perf_on;
+			(*mp)->m_len = sizeof(u_int);
+		} else if (cmd == PRCO_SETOPT) {
+			(*mp)->m_len = 0;
+			if ((*value) != 0 && (*value) != 1 )
+				error = EINVAL;
+			else  tpcb->tp_perf_on = (*value);
+		}
+		if( tpcb->tp_perf_on ) 
+			error = tp_setup_perf(tpcb);
+#else  /* TP_PERF_MEAS */
+		error = EOPNOTSUPP;
+#endif /* TP_PERF_MEAS */
+		break;
+
+	default:
+		error = EOPNOTSUPP;
+	}
+	
+done:
+	IFDEBUG(D_REQUEST)
+		dump_mbuf(so->so_snd.sb_mb, "tp_ctloutput sosnd at end");
+		dump_mbuf(*mp, "tp_ctloutput *mp");
+	ENDDEBUG
+	/* 
+	 * sigh: getsockopt looks only at m_len : all output data must 
+	 * reside in the first mbuf 
+	 */
+	if (*mp) {
+		if (cmd == PRCO_SETOPT) {
+			m_freem(*mp);
+			*mp = MNULL;
+		} else {
+			ASSERT ( m_compress(*mp, mp) <= MLEN );
+			if (error)
+				(*mp)->m_len = 0;
+			IFDEBUG(D_REQUEST)
+				dump_mbuf(*mp, "tp_ctloutput *mp after compress");
+			ENDDEBUG
+		}
+	}
+	splx(s);
+	return error;
+}
diff --git a/sys/netiso/tp_param.h b/sys/netiso/tp_param.h
new file mode 100644
index 00000000000..f1862a24392
--- /dev/null
+++ b/sys/netiso/tp_param.h
@@ -0,0 +1,367 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tp_param.h	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* 
+ * ARGO TP
+ *
+ * $Header: tp_param.h,v 5.3 88/11/18 17:28:18 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_param.h,v $
+ *
+ */
+
+#ifndef __TP_PARAM__
+#define __TP_PARAM__
+
+
+/******************************************************
+ * compile time parameters that can be changed
+ *****************************************************/
+
+#define 	TP_CLASSES_IMPLEMENTED 0x11 /* zero and 4 */
+
+#define		TP_DECBIT_CLEAR_COUNT	3
+
+/*#define 	N_TPREF				100 */
+#ifdef KERNEL
+extern int N_TPREF;
+#endif
+
+#define 	TP_SOCKBUFSIZE		((u_long)4096)
+#define 	TP0_SOCKBUFSIZE		((u_long)512)
+#define		MAX_TSAP_SEL_LEN	64
+
+/* maximum tpdu size we'll accept: */
+#define 	TP_TPDUSIZE			0xc		/* 4096 octets for classes 1-4*/
+#define 	TP0_TPDUSIZE		0xb		/* 2048 octets for class 0 */
+#define 	TP_DFL_TPDUSIZE		0x7		/* 128 octets default */
+	/* NOTE: don't ever negotiate 8192 because could get 
+	 * wraparound in checksumming
+	 * (No mtu is likely to be larger than 4K anyway...)
+	 */
+#define		TP_NRETRANS			12		/* TCP_MAXRXTSHIFT + 1 */
+#define		TP_MAXRXTSHIFT		6		/* factor of 64 */
+#define		TP_MAXPORT			0xefff
+
+/* ALPHA: to be used in the context: gain= 1/(2**alpha), or 
+ * put another way, gaintimes(x) (x)>>alpha (forgetting the case alpha==0) 
+ */
+#define 	TP_RTT_ALPHA		3 
+#define 	TP_RTV_ALPHA		2
+#define		TP_REXMTVAL(tpcb)\
+	((tp_rttadd + (tpcb)->tp_rtt + ((tpcb)->tp_rtv) << 2) / tp_rttdiv)
+#define		TP_RANGESET(tv, value, min, max) \
+	((tv = value) > (max) ? (tv = max) : (tv < min ? tv = min : tv))
+
+/*
+ * not sure how to treat data on disconnect 
+ */
+#define 	T_CONN_DATA			0x1
+#define 	T_DISCONNECT		0x2
+#define 	T_DISC_DATA			0x4
+#define 	T_XDATA				0x8
+
+#define ISO_CLNS	 0
+#define IN_CLNS	 	 1
+#define ISO_CONS	 2
+#define ISO_COSNS	 3
+#define TP_MAX_NETSERVICES 3
+
+/* Indices into tp stats ackreason[i] */
+#define _ACK_DONT_ 0
+#define _ACK_STRAT_EACH_ 0x1
+#define _ACK_STRAT_FULLWIN_ 0x2
+#define _ACK_DUP_ 0x3
+#define _ACK_EOT_ 0x4
+#define _ACK_REORDER_ 0x5
+#define _ACK_USRRCV_ 0x6
+#define _ACK_FCC_ 0x7
+#define _ACK_NUM_REASONS_ 0x8
+
+/* masks for use in tp_stash() */
+#define ACK_DONT 			0
+#define ACK_STRAT_EACH		(1<< _ACK_STRAT_EACH_)
+#define ACK_STRAT_FULLWIN	(1<< _ACK_STRAT_FULLWIN_)
+#define ACK_DUP 			(1<< _ACK_DUP_)
+#define ACK_EOT				(1<< _ACK_EOT_)
+#define ACK_REORDER			(1<< _ACK_REORDER_)
+
+/******************************************************
+ * constants used in the protocol 
+ *****************************************************/
+
+#define		TP_VERSION 			0x1
+
+#define 	TP_MAX_HEADER_LEN	256
+
+#define 	TP_MIN_TPDUSIZE		0x7		/* 128 octets */
+#define 	TP_MAX_TPDUSIZE		0xd		/* 8192 octets */
+
+#define		TP_MAX_XPD_DATA		0x10	/* 16 octets */
+#define		TP_MAX_CC_DATA		0x20	/* 32 octets */
+#define		TP_MAX_CR_DATA		TP_MAX_CC_DATA
+#define		TP_MAX_DR_DATA		0x40	/* 64 octets */
+
+#define		TP_XTD_FMT_BIT 	0x80000000
+#define		TP_XTD_FMT_MASK	0x7fffffff
+#define		TP_NML_FMT_BIT 	0x80
+#define		TP_NML_FMT_MASK	0x7f
+
+/*  
+ * values for the tpdu_type field, 2nd byte in a tpdu 
+ */
+
+#define TP_MIN_TPDUTYPE 0x1
+
+#define XPD_TPDU_type	0x1
+#define XAK_TPDU_type	0x2
+#define GR_TPDU_type	0x3	
+#define AK_TPDU_type	0x6
+#define ER_TPDU_type	0x7
+#define DR_TPDU_type	0x8
+#define DC_TPDU_type	0xc
+#define CC_TPDU_type	0xd
+#define CR_TPDU_type	0xe
+#define DT_TPDU_type	0xf
+
+#define TP_MAX_TPDUTYPE 0xf
+
+/*
+ * identifiers for the variable-length options in tpdus 
+ */
+
+#define		TPP_acktime			0x85
+#define		TPP_residER			0x86
+#define		TPP_priority		0x87
+#define		TPP_transdelay		0x88
+#define		TPP_throughput		0x89
+#define		TPP_subseq			0x8a
+#define		TPP_flow_cntl_conf	0x8c	/* not implemented */
+#define		TPP_addl_info		0xe0
+#define		TPP_tpdu_size		0xc0
+#define		TPP_calling_sufx	0xc1
+#define		TPP_invalid_tpdu	0xc1	/* the bozos used a value twice */
+#define		TPP_called_sufx		0xc2
+#define		TPP_checksum		0xc3
+#define		TPP_vers			0xc4
+#define		TPP_security		0xc5
+#define		TPP_addl_opt		0xc6
+#define		TPP_alt_class		0xc7
+#define		TPP_perf_meas		0xc8	/* local item : perf meas on, svp */
+#define		TPP_ptpdu_size		0xf0	/* preferred TPDU size */
+#define		TPP_inact_time		0xf2	/* inactivity time exchanged */
+
+
+/******************************************************
+ * Some fundamental data types
+ *****************************************************/
+#ifndef		TRUE
+#define		TRUE				1
+#endif		/* TRUE */
+
+#ifndef		FALSE
+#define		FALSE				0
+#endif		/* FALSE */
+
+#define		TP_LOCAL				22
+#define		TP_FOREIGN				33
+
+#ifndef 	EOK
+#define 	EOK 	0
+#endif  	/* EOK */
+
+#define 	TP_CLASS_0 	(1<<0)
+#define 	TP_CLASS_1 	(1<<1)
+#define 	TP_CLASS_2 	(1<<2)
+#define 	TP_CLASS_3 	(1<<3)
+#define 	TP_CLASS_4 	(1<<4)
+
+#define 	TP_FORCE 	0x1
+#define 	TP_STRICT 	0x2
+
+#ifndef 	MNULL
+#define 	MNULL				(struct mbuf *)0
+#endif 	/* MNULL */
+	/* if ../sys/mbuf.h gets MT_types up to 0x40, these will 
+	 * have to be changed:
+	 */
+#define 	MT_XPD 				0x44	
+#define 	MT_EOT 				0x40
+
+#define		TP_ENOREF			0x80000000
+
+typedef 	unsigned int	SeqNum;
+typedef		unsigned short	RefNum;
+typedef		int				ProtoHook;
+
+/******************************************************
+ * Macro used all over, for driver
+ *****************************************************/
+
+#define  DoEvent(x) \
+  ((E.ev_number=(x)),(tp_driver(tpcb,&E)))
+
+/******************************************************
+ * Some macros used all over, for timestamping
+ *****************************************************/
+
+#define GET_CUR_TIME(tvalp) ((*tvalp) = time)
+
+#define GET_TIME_SINCE(oldtvalp, diffp) {\
+	(diffp)->tv_sec = time.tv_sec - (oldtvalp)->tv_sec;\
+	(diffp)->tv_usec = time.tv_usec - (oldtvalp)->tv_usec;\
+	if( (diffp)->tv_usec <0 ) {\
+		(diffp)->tv_sec --;\
+		(diffp)->tv_usec = 1000000 - (diffp)->tv_usec;\
+	}\
+}
+			
+/******************************************************
+ * Some macros used for address families
+ *****************************************************/
+
+#define satosiso(ADDR) ((struct sockaddr_iso *)(ADDR))
+#define satosin(ADDR) ((struct sockaddr_in *)(ADDR))
+
+/******************************************************
+ * Macro used for changing types of mbufs
+ *****************************************************/
+
+#define CHANGE_MTYPE(m, TYPE)\
+	if((m)->m_type != TYPE) { \
+		mbstat.m_mtypes[(m)->m_type]--; mbstat.m_mtypes[TYPE]++; \
+		(m)->m_type = TYPE; \
+	}
+
+/******************************************************
+ * Macros used for adding options to a tpdu header and for
+ * parsing the headers.
+ * Options are variable-length and must be bcopy-d because on the
+ * RT your assignments must be N-word aligned for objects of length
+ * N.  Such a drag.
+ *****************************************************/
+
+struct tp_vbp {
+	u_char	tpv_code;
+	char 	tpv_len;
+	char	tpv_val;
+};
+#define vbptr(x) ((struct tp_vbp *)(x))
+#define vbval(x,type) (*((type *)&(((struct tp_vbp *)(x))->tpv_val)))
+#define vbcode(x) (vbptr(x)->tpv_code)
+#define vblen(x) (vbptr(x)->tpv_len)
+
+#define vb_putval(dst,type,src)\
+	bcopy((caddr_t)&(src),(caddr_t)&(((struct tp_vbp *)(dst))->tpv_val),\
+	sizeof(type))
+
+#define vb_getval(src,type,dst)\
+bcopy((caddr_t)&(((struct tp_vbp *)(src))->tpv_val),(caddr_t)&(dst),sizeof(type))
+
+#define ADDOPTION(type, DU, len, src)\
+{	register caddr_t P;\
+	P = (caddr_t)(DU) + (int)((DU)->tpdu_li);\
+	vbptr(P)->tpv_code = type;\
+	vbptr(P)->tpv_len = len;\
+	bcopy((caddr_t)&src, (caddr_t)&(vbptr(P)->tpv_val), (unsigned)len);\
+	DU->tpdu_li += len+2;/* 1 for code, 1 for length */\
+}
+/******************************************************
+ * Macro for the local credit:
+ * uses max transmission unit for the ll
+ * (as modified by the max TPDU size negotiated) 
+ *****************************************************/
+
+#if defined(ARGO_DEBUG)&&!defined(LOCAL_CREDIT_EXPAND)
+#define LOCAL_CREDIT(tpcb) tp_local_credit(tpcb)
+#else
+#define LOCAL_CREDIT(tpcb) { if (tpcb->tp_rsycnt == 0) {\
+    register struct sockbuf *xxsb = &((tpcb)->tp_sock->so_rcv);\
+    register int xxi = sbspace(xxsb);\
+    xxi = (xxi<0) ? 0 : ((xxi) / (tpcb)->tp_l_tpdusize);\
+    xxi = min(xxi, (tpcb)->tp_maxlcredit); \
+    if (!(tpcb->tp_cebit_off)) { \
+        (tpcb)->tp_lcredit = ROUND((tpcb)->tp_win_recv); \
+        if (xxi < (tpcb)->tp_lcredit) { \
+            (tpcb)->tp_lcredit = xxi; \
+        } \
+    } else \
+        (tpcb)->tp_lcredit = xxi; \
+} }
+#endif /* ARGO_DEBUG */
+
+#ifdef KERNEL
+extern int tp_rttadd, tp_rttdiv;
+#include <sys/syslog.h>
+#define printf logpri(LOG_DEBUG),addlog
+
+#ifndef  tp_NSTATES 
+
+#include <netiso/tp_states.h>
+#include <netiso/tp_events.h>
+#if defined(__STDC__) || defined(__cplusplus)
+#undef ATTR
+#define ATTR(X) ev_union.EV_ ## X
+#endif /* defined(__STDC__) || defined(__cplusplus) */
+
+#endif  /* tp_NSTATES  */
+#endif /* KERNEL */
+
+#endif /* __TP_PARAM__ */
diff --git a/sys/netiso/tp_pcb.c b/sys/netiso/tp_pcb.c
new file mode 100644
index 00000000000..de345c1e377
--- /dev/null
+++ b/sys/netiso/tp_pcb.c
@@ -0,0 +1,999 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tp_pcb.c	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+				Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* 
+ * ARGO TP
+ *
+ * $Header: tp_pcb.c,v 5.4 88/11/18 17:28:24 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_pcb.c,v $
+ *
+ *
+ * This is the initialization and cleanup stuff - 
+ * for the tp machine in general as well as  for the individual pcbs.
+ * tp_init() is called at system startup.  tp_attach() and tp_getref() are
+ * called when a socket is created.  tp_detach() and tp_freeref()
+ * are called during the closing stage and/or when the reference timer 
+ * goes off. 
+ * tp_soisdisconnecting() and tp_soisdisconnected() are tp-specific 
+ * versions of soisconnect*
+ * and are called (obviously) during the closing phase.
+ *
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+
+#include <netiso/argo_debug.h>
+#include <netiso/tp_param.h>
+#include <netiso/tp_timer.h>
+#include <netiso/tp_ip.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_pcb.h>
+#include <netiso/tp_tpdu.h>
+#include <netiso/tp_trace.h>
+#include <netiso/tp_meas.h>
+#include <netiso/tp_seq.h>
+#include <netiso/tp_clnp.h>
+
+/* ticks are in units of: 
+ * 500 nano-fortnights ;-) or
+ * 500 ms or 
+ * 1/2 second 
+ */
+
+struct tp_conn_param tp_conn_param[] = {
+	/* ISO_CLNS: TP4 CONNECTION LESS */
+	{
+		TP_NRETRANS, 	/* short p_Nretrans;  */
+		20,		/* 10 sec */ 	/* short p_dr_ticks;  */
+
+		20,		/* 10 sec */ 	/* short p_cc_ticks; */
+		20,		/* 10 sec */ 	/* short p_dt_ticks; */
+
+		40,		/* 20 sec */ 	/* short p_x_ticks;	 */
+		80,		/* 40 sec */ 	/* short p_cr_ticks;*/
+
+		240,	/* 2 min */ 	/* short p_keepalive_ticks;*/
+		10,		/* 5 sec */ 	/* short p_sendack_ticks;  */
+
+		600,	/* 5 min */ 	/* short p_ref_ticks;	*/
+		360,	/* 3 min */ 	/* short p_inact_ticks;	*/
+
+		(short) 100, 			/* short p_lcdtfract */
+		(short) TP_SOCKBUFSIZE,	/* short p_winsize */
+		TP_TPDUSIZE, 			/* u_char p_tpdusize */
+
+		TPACK_WINDOW, 			/* 4 bits p_ack_strat */
+		TPRX_USE_CW | TPRX_FASTSTART, 
+								/* 4 bits p_rx_strat*/
+		TP_CLASS_4 | TP_CLASS_0,/* 5 bits p_class */
+		1,						/* 1 bit xtd format */
+		1,						/* 1 bit xpd service */
+		1,						/* 1 bit use_checksum */
+		0,						/* 1 bit use net xpd */
+		0,						/* 1 bit use rcc */
+		0,						/* 1 bit use efc */
+		1,						/* no disc indications */
+		0,						/* don't change params */
+		ISO_CLNS,				/* p_netservice */
+	},
+	/* IN_CLNS: TP4 CONNECTION LESS */
+	{
+		TP_NRETRANS, 	/* short p_Nretrans;  */
+		20,		/* 10 sec */ 	/* short p_dr_ticks;  */
+
+		20,		/* 10 sec */ 	/* short p_cc_ticks; */
+		20,		/* 10 sec */ 	/* short p_dt_ticks; */
+
+		40,		/* 20 sec */ 	/* short p_x_ticks;	 */
+		80,		/* 40 sec */ 	/* short p_cr_ticks;*/
+
+		240,	/* 2 min */ 	/* short p_keepalive_ticks;*/
+		10,		/* 5 sec */ 	/* short p_sendack_ticks;  */
+
+		600,	/* 5 min */ 	/* short p_ref_ticks;	*/
+		360,	/* 3 min */ 	/* short p_inact_ticks;	*/
+
+		(short) 100, 			/* short p_lcdtfract */
+		(short) TP_SOCKBUFSIZE,	/* short p_winsize */
+		TP_TPDUSIZE, 			/* u_char p_tpdusize */
+
+		TPACK_WINDOW, 			/* 4 bits p_ack_strat */
+		TPRX_USE_CW | TPRX_FASTSTART, 
+								/* 4 bits p_rx_strat*/
+		TP_CLASS_4,				/* 5 bits p_class */
+		1,						/* 1 bit xtd format */
+		1,						/* 1 bit xpd service */
+		1,						/* 1 bit use_checksum */
+		0,						/* 1 bit use net xpd */
+		0,						/* 1 bit use rcc */
+		0,						/* 1 bit use efc */
+		1,						/* no disc indications */
+		0,						/* don't change params */
+		IN_CLNS,				/* p_netservice */
+	},
+	/* ISO_CONS: TP0 CONNECTION MODE */
+	{
+		TP_NRETRANS, 			/* short p_Nretrans;  */
+		0,		/* n/a */		/* short p_dr_ticks; */
+
+		40,		/* 20 sec */	/* short p_cc_ticks; */
+		0,		/* n/a */		/* short p_dt_ticks; */
+
+		0,		/* n/a */		/* short p_x_ticks;	*/
+		360,	/* 3  min */	/* short p_cr_ticks;*/
+
+		0,		/* n/a */		/* short p_keepalive_ticks;*/
+		0,		/* n/a */		/* short p_sendack_ticks; */
+
+		600,	/* for cr/cc to clear *//* short p_ref_ticks;	*/
+		0,		/* n/a */		/* short p_inact_ticks;	*/
+
+		/* Use tp4 defaults just in case the user changes ONLY
+		 * the class 
+		 */
+		(short) 100, 			/* short p_lcdtfract */
+		(short) TP0_SOCKBUFSIZE,	/* short p_winsize */
+		TP0_TPDUSIZE, 			/* 8 bits p_tpdusize */
+
+		0, 						/* 4 bits p_ack_strat */
+		0, 						/* 4 bits p_rx_strat*/
+		TP_CLASS_0,				/* 5 bits p_class */
+		0,						/* 1 bit xtd format */
+		0,						/* 1 bit xpd service */
+		0,						/* 1 bit use_checksum */
+		0,						/* 1 bit use net xpd */
+		0,						/* 1 bit use rcc */
+		0,						/* 1 bit use efc */
+		0,						/* no disc indications */
+		0,						/* don't change params */
+		ISO_CONS,				/* p_netservice */
+	},
+	/* ISO_COSNS: TP4 CONNECTION LESS SERVICE over CONSNS */
+	{
+		TP_NRETRANS, 	/* short p_Nretrans;  */
+		40,		/* 20 sec */ 	/* short p_dr_ticks;  */
+
+		40,		/* 20 sec */ 	/* short p_cc_ticks; */
+		80,		/* 40 sec */ 	/* short p_dt_ticks; */
+
+		120,		/* 1 min */ 	/* short p_x_ticks;	 */
+		360,		/* 3 min */ 	/* short p_cr_ticks;*/
+
+		360,	/* 3 min */ 	/* short p_keepalive_ticks;*/
+		20,		/* 10 sec */ 	/* short p_sendack_ticks;  */
+
+		600,	/* 5 min */ 	/* short p_ref_ticks;	*/
+		480,	/* 4 min */ 	/* short p_inact_ticks;	*/
+
+		(short) 100, 			/* short p_lcdtfract */
+		(short) TP0_SOCKBUFSIZE,	/* short p_winsize */
+		TP0_TPDUSIZE, 			/* u_char p_tpdusize */
+
+		TPACK_WINDOW, 			/* 4 bits p_ack_strat */
+		TPRX_USE_CW ,			/* No fast start */ 
+								/* 4 bits p_rx_strat*/
+		TP_CLASS_4 | TP_CLASS_0,/* 5 bits p_class */
+		0,						/* 1 bit xtd format */
+		1,						/* 1 bit xpd service */
+		1,						/* 1 bit use_checksum */
+		0,						/* 1 bit use net xpd */
+		0,						/* 1 bit use rcc */
+		0,						/* 1 bit use efc */
+		0,						/* no disc indications */
+		0,						/* don't change params */
+		ISO_COSNS,				/* p_netservice */
+	},
+};
+
+#ifdef INET
+int		in_putnetaddr();
+int		in_getnetaddr();
+int		in_cmpnetaddr();
+int 	in_putsufx(); 
+int 	in_getsufx(); 
+int 	in_recycle_tsuffix(); 
+int 	tpip_mtu(); 
+int 	in_pcbbind(); 
+int 	in_pcbconnect(); 
+int 	in_pcbdisconnect(); 
+int 	in_pcbdetach(); 
+int 	in_pcballoc(); 
+int 	tpip_output(); 
+int 	tpip_output_dg(); 
+struct inpcb	tp_inpcb;
+#endif /* INET */
+#ifdef ISO
+int		iso_putnetaddr();
+int		iso_getnetaddr();
+int		iso_cmpnetaddr();
+int 	iso_putsufx(); 
+int 	iso_getsufx(); 
+int 	iso_recycle_tsuffix(); 
+int		tpclnp_mtu(); 
+int		iso_pcbbind(); 
+int		iso_pcbconnect(); 
+int		iso_pcbdisconnect(); 
+int 	iso_pcbdetach(); 
+int 	iso_pcballoc(); 
+int 	tpclnp_output(); 
+int 	tpclnp_output_dg(); 
+int		iso_nlctloutput();
+struct isopcb	tp_isopcb;
+#endif /* ISO */
+#ifdef TPCONS
+int		iso_putnetaddr();
+int		iso_getnetaddr();
+int		iso_cmpnetaddr();
+int 	iso_putsufx(); 
+int 	iso_getsufx(); 
+int 	iso_recycle_tsuffix(); 
+int		iso_pcbbind(); 
+int		tpcons_pcbconnect(); 
+int		tpclnp_mtu();
+int		iso_pcbdisconnect(); 
+int 	iso_pcbdetach(); 
+int 	iso_pcballoc(); 
+int 	tpcons_output(); 
+struct isopcb	tp_isopcb;
+#endif /* TPCONS */
+
+
+struct nl_protosw nl_protosw[] = {
+	/* ISO_CLNS */
+#ifdef ISO
+	{ AF_ISO, iso_putnetaddr, iso_getnetaddr, iso_cmpnetaddr,
+		iso_putsufx, iso_getsufx,
+		iso_recycle_tsuffix,
+		tpclnp_mtu, iso_pcbbind, iso_pcbconnect,
+		iso_pcbdisconnect,	iso_pcbdetach,
+		iso_pcballoc,
+		tpclnp_output, tpclnp_output_dg, iso_nlctloutput,
+		(caddr_t) &tp_isopcb,
+		},
+#else
+	{ 0 },
+#endif /* ISO */
+	/* IN_CLNS */
+#ifdef INET
+	{ AF_INET, in_putnetaddr, in_getnetaddr, in_cmpnetaddr,
+		in_putsufx, in_getsufx,
+		in_recycle_tsuffix,
+		tpip_mtu, in_pcbbind, in_pcbconnect,
+		in_pcbdisconnect,	in_pcbdetach,
+		in_pcballoc,
+		tpip_output, tpip_output_dg, /* nl_ctloutput */ NULL,
+		(caddr_t) &tp_inpcb,
+		},
+#else
+	{ 0 },
+#endif /* INET */
+	/* ISO_CONS */
+#if defined(ISO) && defined(TPCONS)
+	{ AF_ISO, iso_putnetaddr, iso_getnetaddr, iso_cmpnetaddr,
+		iso_putsufx, iso_getsufx,
+		iso_recycle_tsuffix,
+		tpclnp_mtu, iso_pcbbind, tpcons_pcbconnect,
+		iso_pcbdisconnect,	iso_pcbdetach,
+		iso_pcballoc,
+		tpcons_output, tpcons_output, iso_nlctloutput,
+		(caddr_t) &tp_isopcb,
+		},
+#else
+	{ 0 },
+#endif /* ISO_CONS */
+	/* End of protosw marker */
+	{ 0 }
+};
+
+u_long tp_sendspace = 1024 * 4;
+u_long tp_recvspace = 1024 * 4;
+
+/*
+ * NAME:  tp_init()
+ *
+ * CALLED FROM:
+ *  autoconf through the protosw structure
+ *
+ * FUNCTION:
+ *  initialize tp machine
+ *
+ * RETURNS:  Nada
+ *
+ * SIDE EFFECTS:
+ * 
+ * NOTES:
+ */
+int
+tp_init()
+{
+	static int 	init_done=0;
+	void	 	tp_timerinit();
+
+	if (init_done++)
+		return 0;
+
+
+	/* FOR INET */
+	tp_inpcb.inp_next = tp_inpcb.inp_prev = &tp_inpcb;
+	/* FOR ISO */
+	tp_isopcb.isop_next = tp_isopcb.isop_prev = &tp_isopcb;
+
+    tp_start_win = 2;
+
+	tp_timerinit();
+	bzero((caddr_t)&tp_stat, sizeof(struct tp_stat));
+	return 0;
+}
+
+/*
+ * NAME: 	tp_soisdisconnecting()
+ *
+ * CALLED FROM:
+ *  tp.trans
+ *
+ * FUNCTION and ARGUMENTS:
+ *  Set state of the socket (so) to reflect that fact that we're disconnectING
+ *
+ * RETURNS: 	Nada
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ *  This differs from the regular soisdisconnecting() in that the latter
+ *  also sets the SS_CANTRECVMORE and SS_CANTSENDMORE flags.
+ *  We don't want to set those flags because those flags will cause
+ *  a SIGPIPE to be delivered in sosend() and we don't like that.
+ *  If anyone else is sleeping on this socket, wake 'em up.
+ */
+void
+tp_soisdisconnecting(so)
+	register struct socket *so;
+{
+	soisdisconnecting(so);
+	so->so_state &= ~SS_CANTSENDMORE;
+	IFPERF(sototpcb(so))
+		register struct tp_pcb *tpcb = sototpcb(so);
+		u_int 	fsufx, lsufx;
+
+		bcopy ((caddr_t)tpcb->tp_fsuffix, (caddr_t)&fsufx, sizeof(u_int) );
+		bcopy ((caddr_t)tpcb->tp_lsuffix, (caddr_t)&lsufx, sizeof(u_int) );
+
+		tpmeas(tpcb->tp_lref, TPtime_close, &time, fsufx, lsufx, tpcb->tp_fref);
+		tpcb->tp_perf_on = 0; /* turn perf off */
+	ENDPERF
+}
+
+
+/*
+ * NAME: tp_soisdisconnected()
+ *
+ * CALLED FROM:
+ *	tp.trans	
+ *
+ * FUNCTION and ARGUMENTS:
+ *  Set state of the socket (so) to reflect that fact that we're disconnectED
+ *  Set the state of the reference structure to closed, and
+ *  recycle the suffix.
+ *  Start a reference timer.
+ *
+ * RETURNS:	Nada
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ *  This differs from the regular soisdisconnected() in that the latter
+ *  also sets the SS_CANTRECVMORE and SS_CANTSENDMORE flags.
+ *  We don't want to set those flags because those flags will cause
+ *  a SIGPIPE to be delivered in sosend() and we don't like that.
+ *  If anyone else is sleeping on this socket, wake 'em up.
+ */
+void
+tp_soisdisconnected(tpcb)
+	register struct tp_pcb	*tpcb;
+{
+	register struct socket	*so = tpcb->tp_sock;
+
+	soisdisconnecting(so);
+	so->so_state &= ~SS_CANTSENDMORE;
+	IFPERF(tpcb)
+		register struct tp_pcb *ttpcb = sototpcb(so);
+		u_int 	fsufx, lsufx;
+
+		/* CHOKE */
+		bcopy ((caddr_t)ttpcb->tp_fsuffix, (caddr_t)&fsufx, sizeof(u_int) );
+		bcopy ((caddr_t)ttpcb->tp_lsuffix, (caddr_t)&lsufx, sizeof(u_int) );
+
+		tpmeas(ttpcb->tp_lref, TPtime_close, 
+		   &time, &lsufx, &fsufx, ttpcb->tp_fref);
+		tpcb->tp_perf_on = 0; /* turn perf off */
+	ENDPERF
+
+	tpcb->tp_refstate = REF_FROZEN;
+	tp_recycle_tsuffix(tpcb);
+	tp_etimeout(tpcb, TM_reference, (int)tpcb->tp_refer_ticks);
+}
+
+/*
+ * NAME:	tp_freeref()
+ *
+ * CALLED FROM:
+ *  tp.trans when the reference timer goes off, and
+ *  from tp_attach() and tp_detach() when a tpcb is partially set up but not
+ *  set up enough to have a ref timer set for it, and it's discarded
+ *  due to some sort of error or an early close()
+ *
+ * FUNCTION and ARGUMENTS:
+ *  Frees the reference represented by (r) for re-use.
+ *
+ * RETURNS: Nothing
+ * 
+ * SIDE EFFECTS:
+ *
+ * NOTES:	better be called at clock priority !!!!!
+ */
+void
+tp_freeref(n)
+RefNum n;
+{
+	register struct tp_ref *r = tp_ref + n;
+	register struct tp_pcb *tpcb;
+
+	tpcb = r->tpr_pcb;
+	IFDEBUG(D_TIMER)
+		printf("tp_freeref called for ref %d pcb %x maxrefopen %d\n", 
+		n, tpcb, tp_refinfo.tpr_maxopen);
+	ENDDEBUG
+	IFTRACE(D_TIMER)
+		tptrace(TPPTmisc, "tp_freeref ref maxrefopen pcb",
+		n, tp_refinfo.tpr_maxopen, tpcb, 0);
+	ENDTRACE
+	if (tpcb == 0)
+		return;
+	IFDEBUG(D_CONN)
+		printf("tp_freeref: CLEARING tpr_pcb 0x%x\n", tpcb);
+	ENDDEBUG
+	r->tpr_pcb = (struct tp_pcb *)0;
+	tpcb->tp_refstate = REF_FREE;
+
+	for (r = tp_ref + tp_refinfo.tpr_maxopen; r > tp_ref; r--)
+		if (r->tpr_pcb)
+			break;
+	tp_refinfo.tpr_maxopen = r - tp_ref;
+	tp_refinfo.tpr_numopen--;
+
+	IFDEBUG(D_TIMER)
+		printf("tp_freeref ends w/ maxrefopen %d\n", tp_refinfo.tpr_maxopen);
+	ENDDEBUG
+}
+
+/*
+ * NAME:  tp_getref()
+ *
+ * CALLED FROM:
+ *  tp_attach()
+ *
+ * FUNCTION and ARGUMENTS:
+ *  obtains the next free reference and allocates the appropriate
+ *  ref structure, links that structure to (tpcb) 
+ *
+ * RETURN VALUE:
+ *	a reference number
+ *  or TP_ENOREF
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+u_long
+tp_getref(tpcb) 
+	register struct tp_pcb *tpcb;
+{
+	register struct tp_ref	*r, *rlim;
+	register int 			i;
+	caddr_t obase;
+	unsigned size;
+
+	if (++tp_refinfo.tpr_numopen < tp_refinfo.tpr_size)
+		for (r = tp_refinfo.tpr_base, rlim = r + tp_refinfo.tpr_size;
+								++r < rlim; ) 	/* tp_ref[0] is never used */
+			if (r->tpr_pcb == 0)
+				goto got_one;
+	/* else have to allocate more space */
+
+	obase = (caddr_t)tp_refinfo.tpr_base;
+	size = tp_refinfo.tpr_size * sizeof(struct tp_ref);
+	r = (struct tp_ref *) malloc(size + size, M_PCB, M_NOWAIT);
+	if (r == 0)
+		return (--tp_refinfo.tpr_numopen, TP_ENOREF);
+	tp_refinfo.tpr_base = tp_ref = r;
+	tp_refinfo.tpr_size *= 2;
+	bcopy(obase, (caddr_t)r, size);
+	free(obase, M_PCB);
+	r = (struct tp_ref *)(size + (caddr_t)r);
+	bzero((caddr_t)r, size);
+
+got_one:
+	r->tpr_pcb = tpcb;
+	tpcb->tp_refstate = REF_OPENING;
+	i = r - tp_refinfo.tpr_base;
+	if (tp_refinfo.tpr_maxopen < i) 
+		tp_refinfo.tpr_maxopen = i;
+	return (u_long)i;
+}
+
+/*
+ * NAME: tp_set_npcb()
+ *
+ * CALLED FROM:
+ *	tp_attach(), tp_route_to()
+ *
+ * FUNCTION and ARGUMENTS:
+ *  given a tpcb, allocate an appropriate lower-lever npcb, freeing
+ *  any old ones that might need re-assigning.
+ */
+tp_set_npcb(tpcb)
+register struct tp_pcb *tpcb;
+{
+	register struct socket *so = tpcb->tp_sock;
+	int error;
+
+	if (tpcb->tp_nlproto && tpcb->tp_npcb) {
+		short so_state = so->so_state;
+		so->so_state &= ~SS_NOFDREF;
+		tpcb->tp_nlproto->nlp_pcbdetach(tpcb->tp_npcb);
+		so->so_state = so_state;
+	}
+	tpcb->tp_nlproto = &nl_protosw[tpcb->tp_netservice];
+	/* xx_pcballoc sets so_pcb */
+	error = tpcb->tp_nlproto->nlp_pcballoc(so, tpcb->tp_nlproto->nlp_pcblist);
+	tpcb->tp_npcb = so->so_pcb;
+	so->so_pcb = (caddr_t)tpcb;
+	return (error);
+}
+/*
+ * NAME: tp_attach()
+ *
+ * CALLED FROM:
+ *	tp_usrreq, PRU_ATTACH
+ *
+ * FUNCTION and ARGUMENTS:
+ *  given a socket (so) and a protocol family (dom), allocate a tpcb
+ *  and ref structure, initialize everything in the structures that
+ *  needs to be initialized.
+ *
+ * RETURN VALUE:
+ *  0 ok
+ *  EINVAL if DEBUG(X) in is on and a disaster has occurred
+ *  ENOPROTOOPT if TP hasn't been configured or if the
+ *   socket wasn't created with tp as its protocol
+ *  EISCONN if this socket is already part of a connection
+ *  ETOOMANYREFS if ran out of tp reference numbers.
+ *  E* whatever error is returned from soreserve()
+ *    for from the network-layer pcb allocation routine
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+tp_attach(so, protocol)
+	struct socket 			*so;
+	int 					protocol;
+{
+	register struct tp_pcb	*tpcb;
+	int 					error = 0;
+	int 					dom = so->so_proto->pr_domain->dom_family;
+	u_long					lref;
+	extern struct tp_conn_param tp_conn_param[];
+
+	IFDEBUG(D_CONN)
+		printf("tp_attach:dom 0x%x so 0x%x ", dom, so);
+	ENDDEBUG
+	IFTRACE(D_CONN)
+		tptrace(TPPTmisc, "tp_attach:dom so", dom, so, 0, 0);
+	ENDTRACE
+
+	if (so->so_pcb != NULL) { 
+		return EISCONN;	/* socket already part of a connection*/
+	}
+
+	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0)
+		error = soreserve(so, tp_sendspace, tp_recvspace);
+		/* later an ioctl will allow reallocation IF still in closed state */
+
+	if (error)
+		goto bad2;
+
+	MALLOC(tpcb, struct tp_pcb *, sizeof(*tpcb), M_PCB, M_NOWAIT);
+	if (tpcb == NULL) {
+		error = ENOBUFS;
+		goto bad2;
+	}
+	bzero( (caddr_t)tpcb, sizeof (struct tp_pcb) );
+
+	if ( ((lref = tp_getref(tpcb)) &  TP_ENOREF) != 0 ) { 
+		error = ETOOMANYREFS; 
+		goto bad3;
+	}
+	tpcb->tp_lref = lref;
+	tpcb->tp_sock =  so;
+	tpcb->tp_domain = dom;
+	tpcb->tp_rhiwat = so->so_rcv.sb_hiwat;
+	/* tpcb->tp_proto = protocol; someday maybe? */
+	if (protocol && protocol<ISOPROTO_TP4) {
+		tpcb->tp_netservice = ISO_CONS;
+		tpcb->tp_snduna = (SeqNum) -1;/* kludge so the pseudo-ack from the CR/CC
+								 * will generate correct fake-ack values
+								 */
+	} else {
+		tpcb->tp_netservice = (dom== AF_INET)?IN_CLNS:ISO_CLNS;
+		/* the default */
+	}
+	tpcb->_tp_param = tp_conn_param[tpcb->tp_netservice];
+
+	tpcb->tp_state = TP_CLOSED;
+	tpcb->tp_vers  = TP_VERSION;
+	tpcb->tp_notdetached = 1;
+
+		   /* Spec says default is 128 octets,
+			* that is, if the tpdusize argument never appears, use 128.
+			* As the initiator, we will always "propose" the 2048
+			* size, that is, we will put this argument in the CR 
+			* always, but accept what the other side sends on the CC.
+			* If the initiator sends us something larger on a CR,
+			* we'll respond w/ this.
+			* Our maximum is 4096.  See tp_chksum.c comments.
+			*/
+	tpcb->tp_cong_win = 
+		tpcb->tp_l_tpdusize = 1 << tpcb->tp_tpdusize;
+
+	tpcb->tp_seqmask  = TP_NML_FMT_MASK;
+	tpcb->tp_seqbit  =  TP_NML_FMT_BIT;
+	tpcb->tp_seqhalf  =  tpcb->tp_seqbit >> 1;
+
+	/* attach to a network-layer protoswitch */
+	if ( error =  tp_set_npcb(tpcb))
+		goto bad4;
+	ASSERT( tpcb->tp_nlproto->nlp_afamily == tpcb->tp_domain);
+
+	/* nothing to do for iso case */
+	if( dom == AF_INET )
+		sotoinpcb(so)->inp_ppcb = (caddr_t) tpcb;
+
+	return 0;
+
+bad4:
+	IFDEBUG(D_CONN)
+		printf("BAD4 in tp_attach, so 0x%x\n", so);
+	ENDDEBUG
+	tp_freeref(tpcb->tp_lref);
+
+bad3:
+	IFDEBUG(D_CONN)
+		printf("BAD3 in tp_attach, so 0x%x\n", so);
+	ENDDEBUG
+
+	free((caddr_t)tpcb, M_PCB); /* never a cluster  */
+
+bad2:
+	IFDEBUG(D_CONN)
+		printf("BAD2 in tp_attach, so 0x%x\n", so);
+	ENDDEBUG
+	so->so_pcb = 0;
+
+/*bad:*/
+	IFDEBUG(D_CONN)
+		printf("BAD in tp_attach, so 0x%x\n", so);
+	ENDDEBUG
+	return error;
+}
+
+/*
+ * NAME:  tp_detach()
+ *
+ * CALLED FROM:
+ *	tp.trans, on behalf of a user close request
+ *  and when the reference timer goes off
+ * (if the disconnect  was initiated by the protocol entity 
+ * rather than by the user)
+ *
+ * FUNCTION and ARGUMENTS:
+ *  remove the tpcb structure from the list of active or
+ *  partially active connections, recycle all the mbufs
+ *  associated with the pcb, ref structure, sockbufs, etc.
+ *  Only free the ref structure if you know that a ref timer
+ *  wasn't set for this tpcb.
+ *
+ * RETURNS:  Nada
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ *  tp_soisdisconnected() was already when this is called
+ */
+void
+tp_detach(tpcb)
+	register struct tp_pcb 	*tpcb;
+{
+	void					tp_freeref(), tp_rsyflush();
+	register struct socket	 *so = tpcb->tp_sock;
+
+	IFDEBUG(D_CONN)
+		printf("tp_detach(tpcb 0x%x, so 0x%x)\n",
+			tpcb,so);
+	ENDDEBUG
+	IFTRACE(D_CONN)
+		tptraceTPCB(TPPTmisc, "tp_detach tpcb so lsufx", 
+			tpcb, so, *(u_short *)(tpcb->tp_lsuffix), 0);
+	ENDTRACE
+
+	IFDEBUG(D_CONN)
+		printf("so_snd at 0x%x so_rcv at 0x%x\n", &so->so_snd, &so->so_rcv);
+		dump_mbuf(so->so_snd.sb_mb, "so_snd at detach ");
+		printf("about to call LL detach, nlproto 0x%x, nl_detach 0x%x\n",
+				tpcb->tp_nlproto, tpcb->tp_nlproto->nlp_pcbdetach);
+	ENDDEBUG
+
+	if (tpcb->tp_Xsnd.sb_mb) {
+		printf("Unsent Xdata on detach; would panic");
+		sbflush(&tpcb->tp_Xsnd);
+	}
+	if (tpcb->tp_ucddata)
+		m_freem(tpcb->tp_ucddata);
+
+	IFDEBUG(D_CONN)
+		printf("reassembly info cnt %d rsyq 0x%x\n",
+		    tpcb->tp_rsycnt, tpcb->tp_rsyq);
+	ENDDEBUG
+	if (tpcb->tp_rsyq)
+		tp_rsyflush(tpcb);
+
+	if (tpcb->tp_next) {
+		remque(tpcb);
+		tpcb->tp_next = tpcb->tp_prev = 0;
+	}
+	tpcb->tp_notdetached = 0;
+
+	IFDEBUG(D_CONN)
+		printf("calling (...nlproto->...)(0x%x, so 0x%x)\n", 
+			tpcb->tp_npcb, so);
+		printf("so 0x%x so_head 0x%x,  qlen %d q0len %d qlimit %d\n", 
+		so,  so->so_head,
+		so->so_q0len, so->so_qlen, so->so_qlimit);
+	ENDDEBUG
+
+	(tpcb->tp_nlproto->nlp_pcbdetach)(tpcb->tp_npcb);
+				/* does an so->so_pcb = 0; sofree(so) */
+
+	IFDEBUG(D_CONN)
+		printf("after xxx_pcbdetach\n");
+	ENDDEBUG
+
+	if (tpcb->tp_state == TP_LISTENING) {
+		register struct tp_pcb **tt;
+		for (tt = &tp_listeners; *tt; tt = &((*tt)->tp_nextlisten))
+			if (*tt == tpcb)
+				break;
+		if (*tt)
+			*tt = tpcb->tp_nextlisten;
+		else
+			printf("tp_detach from listen: should panic\n");
+	}
+	if (tpcb->tp_refstate == REF_OPENING ) {
+		/* no connection existed here so no reference timer will be called */
+		IFDEBUG(D_CONN)
+			printf("SETTING ref %d to REF_FREE\n", tpcb->tp_lref);
+		ENDDEBUG
+
+		tp_freeref(tpcb->tp_lref);
+	}
+#ifdef TP_PERF_MEAS
+	/* 
+	 * Get rid of the cluster mbuf allocated for performance measurements, if
+	 * there is one.  Note that tpcb->tp_perf_on says nothing about whether or 
+	 * not a cluster mbuf was allocated, so you have to check for a pointer 
+	 * to one (that is, we need the TP_PERF_MEASs around the following section 
+	 * of code, not the IFPERFs)
+	 */
+	if (tpcb->tp_p_mbuf) {
+		register struct mbuf *m = tpcb->tp_p_mbuf;
+		struct mbuf *n;
+		IFDEBUG(D_PERF_MEAS)
+			printf("freeing tp_p_meas 0x%x  ", tpcb->tp_p_meas);
+		ENDDEBUG
+		do {
+		    MFREE(m, n);
+		    m = n;
+		} while (n);
+		tpcb->tp_p_meas = 0;
+		tpcb->tp_p_mbuf = 0;
+	}
+#endif /* TP_PERF_MEAS */
+
+	IFDEBUG(D_CONN)
+		printf( "end of detach, NOT single, tpcb 0x%x\n", tpcb);
+	ENDDEBUG
+	/* free((caddr_t)tpcb, M_PCB); WHere to put this ? */
+}
+
+struct que {
+	struct tp_pcb *next;
+	struct tp_pcb *prev;
+} tp_bound_pcbs =
+{(struct tp_pcb *)&tp_bound_pcbs, (struct tp_pcb *)&tp_bound_pcbs};
+
+u_short tp_unique;
+
+tp_tselinuse(tlen, tsel, siso, reuseaddr)
+caddr_t tsel;
+register struct sockaddr_iso *siso;
+{
+	struct tp_pcb *b = tp_bound_pcbs.next, *l = tp_listeners;
+	register struct tp_pcb *t;
+
+	for (;;) {
+		if (b != (struct tp_pcb *)&tp_bound_pcbs) {
+			t = b; b = t->tp_next;
+		} else if (l) {
+			t = l; l = t->tp_nextlisten;
+		} else
+			break;
+		if (tlen == t->tp_lsuffixlen && bcmp(tsel, t->tp_lsuffix, tlen) == 0) {
+			if (t->tp_flags & TPF_GENERAL_ADDR) {
+				if (siso == 0 || reuseaddr == 0)
+					return 1;
+			} else if (siso) {
+				if (siso->siso_family == t->tp_domain &&
+					t->tp_nlproto->nlp_cmpnetaddr(t->tp_npcb, siso, TP_LOCAL))
+						return 1;
+			} else if (reuseaddr == 0)
+						return 1;
+		}
+	}
+	return 0;
+
+}
+
+
+tp_pcbbind(tpcb, nam)
+register struct tp_pcb *tpcb;
+register struct mbuf *nam;
+{
+	register struct sockaddr_iso *siso = 0;
+	int tlen = 0, wrapped = 0;
+	caddr_t tsel;
+	u_short tutil;
+
+	if (tpcb->tp_state != TP_CLOSED)
+		return (EINVAL);
+	if (nam) {
+		siso = mtod(nam, struct sockaddr_iso *);
+		switch (siso->siso_family) {
+		default:
+			return (EAFNOSUPPORT);
+#ifdef ISO
+		case AF_ISO:
+			tlen = siso->siso_tlen;
+			tsel = TSEL(siso);
+			if (siso->siso_nlen == 0)
+				siso = 0;
+			break;
+#endif
+#ifdef INET
+		case AF_INET:
+			tsel = (caddr_t)&tutil;
+			if (tutil =  ((struct sockaddr_in *)siso)->sin_port) {
+				tlen = 2;
+			}
+			if (((struct sockaddr_in *)siso)->sin_addr.s_addr == 0)
+				siso = 0;
+		}
+#endif
+	}
+	if (tpcb->tp_lsuffixlen == 0) {
+		if (tlen) {
+			if (tp_tselinuse(tlen, tsel, siso,
+								tpcb->tp_sock->so_options & SO_REUSEADDR))
+				return (EINVAL);
+		} else {
+			for (tsel = (caddr_t)&tutil, tlen = 2;;){
+				if (tp_unique++ < ISO_PORT_RESERVED ||
+					tp_unique > ISO_PORT_USERRESERVED) {
+						if (wrapped++)
+							return ESRCH;
+						tp_unique = ISO_PORT_RESERVED;
+				}
+				tutil = htons(tp_unique);
+				if (tp_tselinuse(tlen, tsel, siso, 0) == 0)
+					break;
+			}
+			if (siso) switch (siso->siso_family) {
+#ifdef ISO
+				case AF_ISO:
+					bcopy(tsel, TSEL(siso), tlen);
+					siso->siso_tlen = tlen;
+					break;
+#endif
+#ifdef INET
+				case AF_INET:
+					((struct sockaddr_in *)siso)->sin_port = tutil;
+#endif
+				}
+		}
+		bcopy(tsel, tpcb->tp_lsuffix, (tpcb->tp_lsuffixlen = tlen));
+		insque(tpcb, &tp_bound_pcbs);
+	} else {
+		if (tlen || siso == 0)
+			return (EINVAL);
+	}
+	if (siso == 0) {
+		tpcb->tp_flags |= TPF_GENERAL_ADDR;
+		return (0);
+	}
+	return tpcb->tp_nlproto->nlp_pcbbind(tpcb->tp_npcb, nam);
+}
diff --git a/sys/netiso/tp_pcb.h b/sys/netiso/tp_pcb.h
new file mode 100644
index 00000000000..0353cb47b20
--- /dev/null
+++ b/sys/netiso/tp_pcb.h
@@ -0,0 +1,356 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tp_pcb.h	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* 
+ * ARGO TP
+ *
+ * $Header: tp_pcb.h,v 5.2 88/11/18 17:09:32 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_pcb.h,v $
+ *
+ * 
+ * This file defines the transport protocol control block (tpcb).
+ * and a bunch of #define values that are used in the tpcb.
+ */
+
+#ifndef  __TP_PCB__
+#define  __TP_PCB__
+
+#include <netiso/tp_param.h>
+#include <netiso/tp_timer.h>
+#include <netiso/tp_user.h>
+#ifndef sblock
+#include <sys/socketvar.h>
+#endif /* sblock */
+
+/* NOTE: the code depends on REF_CLOSED > REF_OPEN > the rest, and
+ * on REF_FREE being zero
+ *
+ * Possible improvement:
+ * think about merging the tp_ref w/ the tpcb and doing a search
+ * through the tpcb list, from tpb. This would slow down lookup
+ * during data transfer
+ * It would be a little nicer also to have something based on the
+ * clock (like top n bits of the reference is part of the clock, to
+ * minimize the likelihood  of reuse after a crash)
+ * also, need to keep the timer servicing part to a minimum (although
+ * the cost of this is probably independent of whether the timers are
+ * in the pcb or in an array..
+ * Last, would have to make the number of timers a function of the amount of
+ * mbufs available, plus some for the frozen references.
+ *
+ * Possible improvement:
+ * Might not need the ref_state stuff either...
+ * REF_FREE could correspond to tp_state == CLOSED or nonexistend tpcb,
+ * REF_OPEN to tp_state anywhere from AK_WAIT or CR_SENT to CLOSING
+ * REF_OPENING could correspond to LISTENING, because that's the
+ * way it's used, not because the correspondence is exact.
+ * REF_CLOSED could correspond to REFWAIT
+ */
+#define REF_FROZEN 3	/* has ref timer only */
+#define REF_OPEN 2		/* has timers, possibly active */
+#define REF_OPENING 1	/* in use (has a pcb) but no timers */
+#define REF_FREE 0		/* free to reallocate */
+
+#define TM_NTIMERS 		6
+
+struct tp_ref {
+	struct tp_pcb 		*tpr_pcb;	/* back ptr to PCB */
+};
+
+/* PER system stuff (one static structure instead of a bunch of names) */
+struct tp_refinfo {
+	struct tp_ref		*tpr_base;
+	int					tpr_size;
+	int					tpr_maxopen;
+	int					tpr_numopen;
+};
+
+struct nl_protosw {
+	int		nlp_afamily;			/* address family */
+	int		(*nlp_putnetaddr)();	/* puts addresses in nl pcb */
+	int		(*nlp_getnetaddr)();	/* gets addresses from nl pcb */
+	int		(*nlp_cmpnetaddr)();	/* compares address in pcb with sockaddr */
+	int		(*nlp_putsufx)();		/* puts transport suffixes in nl pcb */
+	int		(*nlp_getsufx)();		/* gets transport suffixes from nl pcb */
+	int		(*nlp_recycle_suffix)();/* clears suffix from nl pcb */
+	int		(*nlp_mtu)();			/* figures out mtu based on nl used */
+	int		(*nlp_pcbbind)();		/* bind to pcb for net level */
+	int		(*nlp_pcbconn)();		/* connect for net level */
+	int		(*nlp_pcbdisc)();		/* disconnect net level */
+	int		(*nlp_pcbdetach)();		/* detach net level pcb */
+	int		(*nlp_pcballoc)();		/* allocate a net level pcb */
+	int		(*nlp_output)();		/* prepare a packet to give to nl */
+	int		(*nlp_dgoutput)();		/* prepare a packet to give to nl */
+	int		(*nlp_ctloutput)();		/* hook for network set/get options */
+	caddr_t	nlp_pcblist;			/* list of xx_pcb's for connections */
+};
+
+
+struct tp_pcb {
+	struct tp_pcb		*tp_next;
+	struct tp_pcb		*tp_prev;
+	struct tp_pcb		*tp_nextlisten; /* chain all listeners */
+	struct socket 		*tp_sock;		/* back ptr */
+	u_short 			tp_state;		/* state of fsm */
+	short 				tp_retrans;		/* # times can still retrans */
+	caddr_t				tp_npcb;		/* to lower layer pcb */
+	struct nl_protosw	*tp_nlproto;	/* lower-layer dependent routines */
+	struct rtentry		**tp_routep;	/* obtain mtu; inside npcb */
+
+
+	RefNum				tp_lref;	 	/* local reference */
+	RefNum 				tp_fref;		/* foreign reference */
+
+	u_int				tp_seqmask;		/* mask for seq space */
+	u_int				tp_seqbit;		/* bit for seq number wraparound */
+	u_int				tp_seqhalf;		/* half the seq space */
+
+	struct mbuf			*tp_ucddata;	/* user connect/disconnect data */
+
+	/* credit & sequencing info for SENDING */
+	u_short 			tp_fcredit;		/* current remote credit in # packets */
+	u_short 			tp_maxfcredit;	/* max remote credit in # packets */
+	u_short				tp_dupacks;		/* intuit packet loss before rxt timo */
+	u_long				tp_cong_win;	/* congestion window in bytes.
+										 * see profuse comments in TCP code
+										 */
+	u_long				tp_ssthresh;	/* cong_win threshold for slow start
+										 * exponential to linear switch
+										 */
+	SeqNum				tp_snduna;		/* seq # of lowest unacked DT */
+	SeqNum				tp_sndnew;		/* seq # of lowest unsent DT  */
+	SeqNum				tp_sndnum;		/* next seq # to be assigned */
+	SeqNum				tp_sndnxt;		/* what to do next; poss. rxt */
+	struct mbuf			*tp_sndnxt_m;	/* packet corres. to sndnxt*/
+	int					tp_Nwindow;		/* for perf. measurement */
+
+	/* credit & sequencing info for RECEIVING */
+	SeqNum				tp_rcvnxt;		/* next DT seq # expect to recv */
+	SeqNum	 			tp_sent_lcdt;	/* cdt according to last ack sent */
+	SeqNum	 			tp_sent_uwe;	/* uwe according to last ack sent */
+	SeqNum	 			tp_sent_rcvnxt;	/* rcvnxt according to last ack sent 
+										 * needed for perf measurements only
+										 */
+	u_short				tp_lcredit;		/* current local credit in # packets */
+	u_short				tp_maxlcredit;	/* needed for reassembly queue */
+	struct mbuf			**tp_rsyq;		/* unacked stuff recvd out of order */
+	int					tp_rsycnt;		/* number of packets "" "" "" ""    */
+	u_long				tp_rhiwat;		/* remember original RCVBUF size */
+
+	/* receiver congestion state stuff ...  */
+	u_int               tp_win_recv;
+
+	/* receive window as a scaled int (8 bit fraction part) */
+
+	struct cong_sample {
+		ushort  cs_size; 				/* current window size */
+		ushort  cs_received;   			/* PDUs received in this sample */
+		ushort  cs_ce_set;    /* PDUs received in this sample with CE bit set */
+	} tp_cong_sample;
+
+
+	/* parameters per-connection controllable by user */
+	struct tp_conn_param _tp_param; 
+
+#define	tp_Nretrans _tp_param.p_Nretrans
+#define	tp_dr_ticks _tp_param.p_dr_ticks
+#define	tp_cc_ticks _tp_param.p_cc_ticks
+#define	tp_dt_ticks _tp_param.p_dt_ticks
+#define	tp_xpd_ticks _tp_param.p_x_ticks
+#define	tp_cr_ticks _tp_param.p_cr_ticks
+#define	tp_keepalive_ticks _tp_param.p_keepalive_ticks
+#define	tp_sendack_ticks _tp_param.p_sendack_ticks
+#define	tp_refer_ticks _tp_param.p_ref_ticks
+#define	tp_inact_ticks _tp_param.p_inact_ticks
+#define	tp_xtd_format _tp_param.p_xtd_format
+#define	tp_xpd_service _tp_param.p_xpd_service
+#define	tp_ack_strat _tp_param.p_ack_strat
+#define	tp_rx_strat _tp_param.p_rx_strat
+#define	tp_use_checksum _tp_param.p_use_checksum
+#define	tp_use_efc _tp_param.p_use_efc
+#define	tp_use_nxpd _tp_param.p_use_nxpd
+#define	tp_use_rcc _tp_param.p_use_rcc
+#define	tp_tpdusize _tp_param.p_tpdusize
+#define	tp_class _tp_param.p_class
+#define	tp_winsize _tp_param.p_winsize
+#define	tp_no_disc_indications _tp_param.p_no_disc_indications
+#define	tp_dont_change_params _tp_param.p_dont_change_params
+#define	tp_netservice _tp_param.p_netservice
+#define	tp_version _tp_param.p_version
+#define	tp_ptpdusize _tp_param.p_ptpdusize
+
+	int					tp_l_tpdusize;
+		/* whereas tp_tpdusize is log2(the negotiated max size)
+		 * l_tpdusize is the size we'll use when sending, in # chars
+		 */
+
+	int					tp_rtv;			/* max round-trip time variance */
+	int					tp_rtt; 		/* smoothed round-trip time */
+	SeqNum				tp_rttseq;		/* packet being timed */
+	int					tp_rttemit;		/* when emitted, in ticks */
+	int					tp_idle;		/* last activity, in ticks */
+	short				tp_rxtcur;		/* current retransmit value */
+	short				tp_rxtshift;	/* log(2) of rexmt exp. backoff */
+	u_char				tp_cebit_off;	/* real DEC bit algorithms not in use */
+	u_char				tp_oktonagle;	/* Last unsent pckt may be append to */
+	u_char				tp_flags;		/* values: */
+#define TPF_NLQOS_PDN	 	TPFLAG_NLQOS_PDN
+#define TPF_PEER_ON_SAMENET	TPFLAG_PEER_ON_SAMENET
+#define TPF_GENERAL_ADDR	TPFLAG_GENERAL_ADDR
+#define TPF_DELACK			0x8
+#define TPF_ACKNOW			0x10
+
+#define PEER_IS_LOCAL(t)	(((t)->tp_flags & TPF_PEER_ON_SAME_NET) != 0)
+#define USES_PDN(t)			(((t)->tp_flags & TPF_NLQOS_PDN) != 0)
+
+
+	unsigned 
+		tp_sendfcc:1,			/* shall next ack include FCC parameter? */
+		tp_trace:1,				/* is this pcb being traced? (not used yet) */
+		tp_perf_on:1,			/* 0/1 -> performance measuring on  */
+		tp_reneged:1,			/* have we reneged on cdt since last ack? */
+		tp_decbit:3,			/* dec bit was set, we're in reneg mode  */
+		tp_notdetached:1;		/* Call tp_detach before freeing XXXXXXX */
+
+#ifdef TP_PERF_MEAS
+	/* performance stats - see tp_stat.h */
+	struct tp_pmeas		*tp_p_meas;
+	struct mbuf			*tp_p_mbuf;
+#endif /* TP_PERF_MEAS */
+
+	/* addressing */
+	u_short				tp_domain;		/* domain (INET, ISO) */
+	/* for compatibility with the *old* way and with INET, be sure that
+	 * that lsuffix and fsuffix are aligned to a short addr.
+	 * having them follow the u_short *suffixlen should suffice (choke)
+	 */
+	u_short				tp_fsuffixlen;	/* foreign suffix */
+	char				tp_fsuffix[MAX_TSAP_SEL_LEN];
+	u_short				tp_lsuffixlen;	/* local suffix */
+	char				tp_lsuffix[MAX_TSAP_SEL_LEN];
+#define SHORT_LSUFXP(tpcb) ((short *)((tpcb)->tp_lsuffix))
+#define SHORT_FSUFXP(tpcb) ((short *)((tpcb)->tp_fsuffix))
+
+	/* Timer stuff */
+	u_char 				tp_vers;			/* protocol version */
+	u_char 				tp_peer_acktime;	/* used for DT retrans time */
+	u_char	 			tp_refstate;		/* values REF_FROZEN, etc. above */
+	struct tp_pcb		*tp_fasttimeo;		/* limit pcbs to examine */
+	u_int			 	tp_timer[TM_NTIMERS]; /* C timers */
+
+	struct sockbuf		tp_Xsnd;		/* for expedited data */
+/*	struct sockbuf		tp_Xrcv;		/* for expedited data */
+#define tp_Xrcv tp_sock->so_rcv
+	SeqNum				tp_Xsndnxt;	/* next XPD seq # to send */
+	SeqNum				tp_Xuna;		/* seq # of unacked XPD */
+	SeqNum				tp_Xrcvnxt;	/* next XPD seq # expect to recv */
+
+	/* AK subsequencing */
+	u_short				tp_s_subseq;	/* next subseq to send */
+	u_short				tp_r_subseq;	/* highest recv subseq */
+
+};
+
+u_int	tp_start_win;
+
+#define ROUND(scaled_int) (((scaled_int) >> 8) + (((scaled_int) & 0x80) ? 1:0))
+
+/* to round off a scaled int with an 8 bit fraction part */
+
+#define CONG_INIT_SAMPLE(pcb) \
+	pcb->tp_cong_sample.cs_received = \
+    pcb->tp_cong_sample.cs_ce_set = 0; \
+    pcb->tp_cong_sample.cs_size = max(pcb->tp_lcredit, 1) << 1;
+
+#define CONG_UPDATE_SAMPLE(pcb, ce_bit) \
+    pcb->tp_cong_sample.cs_received++; \
+    if (ce_bit) { \
+        pcb->tp_cong_sample.cs_ce_set++; \
+    } \
+    if (pcb->tp_cong_sample.cs_size <= pcb->tp_cong_sample.cs_received) { \
+        if ((pcb->tp_cong_sample.cs_ce_set << 1) >=  \
+                    pcb->tp_cong_sample.cs_size ) { \
+            pcb->tp_win_recv -= pcb->tp_win_recv >> 3; /* multiply by .875 */ \
+            pcb->tp_win_recv = max(1 << 8, pcb->tp_win_recv); \
+        } \
+        else { \
+            pcb->tp_win_recv += (1 << 8); /* add one to the scaled int */ \
+        } \
+        pcb->tp_lcredit = ROUND(pcb->tp_win_recv); \
+        CONG_INIT_SAMPLE(pcb); \
+    }
+
+#ifdef KERNEL
+extern struct tp_refinfo 	tp_refinfo;
+extern struct timeval 	time;
+extern struct tp_ref	*tp_ref;
+extern struct tp_param	tp_param;
+extern struct nl_protosw  nl_protosw[];
+extern struct tp_pcb	*tp_listeners;
+extern struct tp_pcb	*tp_ftimeolist;
+#endif
+
+#define	sototpcb(so) 	((struct tp_pcb *)(so->so_pcb))
+#define	sototpref(so)	((sototpcb(so)->tp_ref))
+#define	tpcbtoso(tp)	((struct socket *)((tp)->tp_sock))
+#define	tpcbtoref(tp)	((struct tp_ref *)((tp)->tp_ref))
+
+#endif  /* __TP_PCB__ */
diff --git a/sys/netiso/tp_seq.h b/sys/netiso/tp_seq.h
new file mode 100644
index 00000000000..f14e5ae7c7d
--- /dev/null
+++ b/sys/netiso/tp_seq.h
@@ -0,0 +1,124 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tp_seq.h	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* 
+ * ARGO TP
+ *
+ * $Header: tp_seq.h,v 5.1 88/10/12 12:20:59 root Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_seq.h,v $
+ *
+ * These macros perform sequence number arithmetic modulo (2**7 or 2**31).
+ * The relevant fields in the tpcb are:
+ *  	tp_seqmask : the mask of bits that define the sequence space.
+ *  	tp_seqbit  : 1 + tp_seqmask
+ *  	tp_seqhalf : tp_seqbit / 2 or half the sequence space (rounded up)
+ * Not exactly fast, but at least it's maintainable.
+ */
+
+#ifndef __TP_SEQ__
+#define __TP_SEQ__
+
+#define SEQ(tpcb,x) \
+	((x) & (tpcb)->tp_seqmask)
+
+#define SEQ_GT(tpcb, seq, operand ) \
+( ((int)((seq)-(operand)) > 0)\
+? ((int)((seq)-(operand)) < (int)(tpcb)->tp_seqhalf)\
+: !(-((int)(seq)-(operand)) < (int)(tpcb)->tp_seqhalf))
+
+#define SEQ_GEQ(tpcb, seq, operand ) \
+( ((int)((seq)-(operand)) >= 0)\
+? ((int)((seq)-(operand)) < (int)(tpcb)->tp_seqhalf)\
+: !((-((int)(seq)-(operand))) < (int)(tpcb)->tp_seqhalf))
+
+#define SEQ_LEQ(tpcb, seq, operand ) \
+( ((int)((seq)-(operand)) <= 0)\
+? ((-(int)((seq)-(operand))) < (int)(tpcb)->tp_seqhalf)\
+: !(((int)(seq)-(operand)) < (int)(tpcb)->tp_seqhalf))
+
+#define SEQ_LT(tpcb, seq, operand ) \
+( ((int)((seq)-(operand)) < 0)\
+? ((-(int)((seq)-(operand))) < (int)(tpcb)->tp_seqhalf)\
+: !(((int)(seq)-(operand)) < (int)(tpcb)->tp_seqhalf))
+	
+#define SEQ_MIN(tpcb, a, b) ( SEQ_GT(tpcb, a, b) ? b : a)
+
+#define SEQ_MAX(tpcb, a, b) ( SEQ_GT(tpcb, a, b) ? a : b)
+
+#define SEQ_INC(tpcb, Seq) ((++Seq), ((Seq) &= (tpcb)->tp_seqmask))
+
+#define SEQ_DEC(tpcb, Seq)\
+	((Seq) = (((Seq)+(unsigned)((int)(tpcb)->tp_seqbit - 1))&(tpcb)->tp_seqmask))
+
+/* (amt) had better be less than the seq bit ! */
+
+#define SEQ_SUB(tpcb, Seq, amt)\
+	(((Seq) + (unsigned)((int)(tpcb)->tp_seqbit - amt)) & (tpcb)->tp_seqmask)
+#define SEQ_ADD(tpcb, Seq, amt) (((Seq) + (unsigned)amt) & (tpcb)->tp_seqmask)
+
+
+#define IN_RWINDOW(tpcb, seq, lwe, uwe)\
+	( SEQ_GEQ(tpcb, seq, lwe) && SEQ_LT(tpcb, seq, uwe) )
+
+#define IN_SWINDOW(tpcb, seq, lwe, uwe)\
+	( SEQ_GT(tpcb, seq, lwe) && SEQ_LEQ(tpcb, seq, uwe) )
+
+#endif /* __TP_SEQ__ */
diff --git a/sys/netiso/tp_stat.h b/sys/netiso/tp_stat.h
new file mode 100644
index 00000000000..bf6e1a5e124
--- /dev/null
+++ b/sys/netiso/tp_stat.h
@@ -0,0 +1,283 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tp_stat.h	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* 
+ * ARGO TP
+ *
+ * $Header: tp_stat.h,v 5.4 88/11/18 17:28:38 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_stat.h,v $
+ *
+ * Here are the data structures in which the global
+ * statistics(counters) are gathered.
+ */
+
+#ifndef __TP_STAT__
+#define __TP_STAT__
+
+struct tp_stat {
+	u_long ts_param_ignored;
+	u_long ts_unused3;
+	u_long ts_bad_csum;
+
+	u_long ts_inv_length;
+	u_long ts_inv_pcode;
+	u_long ts_inv_dutype;
+	u_long ts_negotfailed;
+	u_long ts_inv_dref;
+	u_long ts_inv_pval;
+	u_long ts_inv_sufx;
+	u_long ts_inv_aclass;
+
+	u_long ts_xtd_fmt;
+	u_long ts_use_txpd;
+	u_long ts_csum_off;
+	u_long	ts_send_drop;
+	u_long	ts_recv_drop;
+
+	u_long ts_xpd_intheway;/* xpd mark caused data flow to stop */
+	u_long ts_xpdmark_del;	/* xpd markers thrown away */
+	u_long ts_dt_ooo;		/* dt tpdus received out of order */
+	u_long ts_dt_niw;		/* dt tpdus received & not in window */
+	u_long ts_xpd_niw;		/* xpd tpdus received & not in window */
+	u_long ts_xpd_dup;		
+	u_long ts_dt_dup;		/* dt tpdus received & are duplicates */
+
+	u_long ts_zfcdt;		/* # times f credit went down to 0 */
+	u_long ts_lcdt_reduced; /* 
+		# times local cdt reduced on an acknowledgement.
+		*/
+
+	u_long	ts_pkt_rcvd; /* from ip */
+	u_long	ts_tpdu_rcvd; /* accepted as a TPDU in tp_input */
+	u_long	ts_tpdu_sent;
+	u_long	ts_unused2;
+
+	u_long	ts_retrans_cr;
+	u_long	ts_retrans_cc;
+	u_long	ts_retrans_dr;
+	u_long	ts_retrans_dt;
+	u_long	ts_retrans_xpd;
+	u_long	ts_conn_gaveup;
+
+	u_long ts_ER_sent;
+	u_long	ts_DT_sent;
+	u_long	ts_XPD_sent;
+	u_long	ts_AK_sent;
+	u_long	ts_XAK_sent;
+	u_long	ts_DR_sent;
+	u_long	ts_DC_sent;
+	u_long	ts_CR_sent;
+	u_long	ts_CC_sent;
+
+	u_long ts_ER_rcvd;
+	u_long	ts_DT_rcvd;
+	u_long	ts_XPD_rcvd;
+	u_long	ts_AK_rcvd;
+	u_long	ts_XAK_rcvd;
+	u_long	ts_DR_rcvd;
+	u_long	ts_DC_rcvd;
+	u_long	ts_CR_rcvd;
+	u_long	ts_CC_rcvd;
+
+	u_long	ts_Eticks;
+	u_long	ts_Eexpired;
+	u_long	ts_Eset;
+	u_long	ts_Ecan_act;
+	u_long	ts_Cticks;
+	u_long	ts_Cexpired;
+	u_long	ts_Cset;
+	u_long	ts_Ccan_act;
+	u_long	ts_Ccan_inact;
+	u_long	ts_Fdelack;
+	u_long	ts_Fpruned;
+
+	u_long	ts_concat_rcvd;
+
+	u_long	ts_zdebug; /* zero dref to test timeout on conn estab tp_input.c */
+	u_long ts_ydebug; /* throw away pseudo-random pkts tp_input.c */
+	u_long ts_unused5;
+	u_long ts_unused; /* kludged concat to test separation tp_emit.c */
+	u_long ts_vdebug; /* kludge to test input size checking tp_emit.c */
+	u_long ts_unused4;
+	u_long ts_ldebug; /* faked a renegging of credit */
+
+	u_long ts_mb_small;
+	u_long ts_mb_cluster;
+	u_long ts_mb_len_distr[17];
+
+	u_long ts_eot_input;
+	u_long ts_eot_user;
+	u_long	ts_EOT_sent;
+	u_long ts_tp0_conn;
+	u_long ts_tp4_conn;
+	u_long ts_quench;
+	u_long ts_rcvdecbit;
+
+#define NRTT_CATEGORIES 4
+	/*  The 4 categories are:
+	 * 0 --> tp_flags: ~TPF_PEER_ON_SAMENET |  TPF_NL_PDN
+	 * 1 --> tp_flags: ~TPF_PEER_ON_SAMENET | ~TPF_NL_PDN
+	 * 2 --> tp_flags:  TPF_PEER_ON_SAMENET | ~TPF_NL_PDN
+	 * 3 --> tp_flags:  TPF_PEER_ON_SAMENET |  TPF_NL_PDN
+	 */
+	int ts_rtt[NRTT_CATEGORIES];
+	int ts_rtv[NRTT_CATEGORIES];
+
+	u_long ts_ackreason[_ACK_NUM_REASONS_];
+		/*  ACK_DONT 0 / ACK_STRAT_EACH 0x1 / ACK_STRAT_FULLWIN 0x4
+	  	 *	ACK_DUP 0x8 / ACK_EOT 0x10  / ACK_REORDER 0x20
+	  	 *	ACK_USRRCV **
+	  	 *	ACK_FCC **
+		 */
+} tp_stat ;
+#define 	TP_PM_MAX			0xa /* 10 decimal */
+
+#define IncStat(x) tp_stat./**/x/**/++
+
+#ifdef TP_PERF_MEAS
+
+#define PStat(Tpcb, X) (Tpcb)->tp_p_meas->/**/X/**/
+#define IncPStat(Tpcb, X) if((Tpcb)->tp_perf_on) (Tpcb)->tp_p_meas->/**/X/**/++
+
+/* BEWARE OF MACROS like this ^^^ must be sure it's surrounded by {} if
+ * it's used in an if-else statement. 
+ */
+
+
+/* for perf measurement stuff: maximum window size it can handle */
+
+struct tp_pmeas {
+		/* the first few are distributions as a fn of window size 
+		 * only keep enough space for normal format plus 1 slot for
+		 * extended format, in case any windows larger than 15 are used
+		 */
+
+		/* 
+		 * tps_npdusent: for each call to tp_sbsend, we inc the 
+		 * element representing the number of pdus sent in this call
+		 */
+		int		tps_win_lim_by_cdt[TP_PM_MAX+1]; 
+		int		tps_win_lim_by_data[TP_PM_MAX+1]; 
+		/* 
+		 * tps_sendtime: Each call to tp_sbsend() is timed.  For
+		 * Each window size, we keep the running average of the time
+		 * taken by tp_sbsend() for each window size.
+		 */
+		int	tps_sendtime[TP_PM_MAX+1]; 
+		/*
+		 * n_TMsendack: # times ack sent because timer went off
+		 * n_ack_cuz_eot: # times ack sent due to EOTSDU on incoming packet
+		 * n_ack_cuz_dup: # times ack sent for receiving a duplicate pkt.
+		 * n_ack_cuz_fullwin: # times ack sent for receiving the full window.
+		 * n_ack_cuz_doack: # times ack sent for having just reordered data.
+		 */
+		int		tps_n_TMsendack;
+		int		tps_n_ack_cuz_eot;
+		int		tps_n_ack_cuz_fullwin;
+		int		tps_n_ack_cuz_reorder;
+		int		tps_n_ack_cuz_dup;
+		int		tps_n_ack_cuz_strat;
+		/*
+		 * when we send an ack: how much less than the "expected" window
+		 * did we actually ack.  For example: if we last sent a credit
+		 * of 10, and we're acking now for whatever reason, and have
+		 * only received 6 since our last credit advertisement, we'll
+		 * keep the difference, 4, in this variable.
+		 */
+		int		tps_ack_early[TP_PM_MAX+1]; 
+		/*
+		 * when we ack, for the # pkts we actually acked w/ this ack,
+		 * how much cdt are we advertising?
+		 * [ size of window acknowledged ] [ cdt we're giving ]
+		 */
+		int		tps_cdt_acked[TP_PM_MAX+1][TP_PM_MAX+1]; 
+
+		int 	tps_AK_sent;
+		int 	tps_XAK_sent;
+		int 	tps_DT_sent;
+		int 	tps_XPD_sent;
+		int 	tps_AK_rcvd;
+		int 	tps_XAK_rcvd;
+		int 	tps_DT_rcvd;
+		int 	tps_XPD_rcvd;
+
+		int		Nb_from_sess;
+		int		Nb_to_sess;
+		int		Nb_to_ll;
+		int		Nb_from_ll;
+};
+
+#define  IFPERF(tpcb)  if (tpcb->tp_perf_on && tpcb->tp_p_meas) {
+#define  ENDPERF }
+
+#else
+
+int PStat_Junk;
+#define PStat(tpcb, x)  PStat_Junk
+#define IncPStat(tpcb, x)  /* no-op */
+#define tpmeas(a,b,c,d,e,f) 0
+
+#define IFPERF(x)	if (0) {
+#define ENDPERF }
+
+#endif /* TP_PERF_MEAS */
+
+#endif /* __TP_STAT__ */
diff --git a/sys/netiso/tp_states.h b/sys/netiso/tp_states.h
new file mode 100644
index 00000000000..ac6213a64d7
--- /dev/null
+++ b/sys/netiso/tp_states.h
@@ -0,0 +1,13 @@
+/* $Header$ */
+/* $Source$ */
+#define ST_ERROR 0x0
+#define TP_CLOSED 0x1
+#define TP_CRSENT 0x2
+#define TP_AKWAIT 0x3
+#define TP_OPEN 0x4
+#define TP_CLOSING 0x5
+#define TP_REFWAIT 0x6
+#define TP_LISTENING 0x7
+#define TP_CONFIRMING 0x8
+
+#define tp_NSTATES 0x9
diff --git a/sys/netiso/tp_states.init b/sys/netiso/tp_states.init
new file mode 100644
index 00000000000..89e53453866
--- /dev/null
+++ b/sys/netiso/tp_states.init
@@ -0,0 +1,75 @@
+/* $Header$ */
+/* $Source$ */
+{0x3,0x0},
+{0x6,0x1},
+{0x6,0x2},
+{0x6,0x0},
+{0x2,0x3},
+{0x2,0x0},
+{0x1,0x0},
+{0x5,0x0},
+{0x4,0x0},
+{0x7,0x0},
+{0x7,0x0},
+{0x1,0x4},
+{0x8,0x5},
+{0x8,0x6},
+{0x4,0x7},
+{0x3,0x8},
+{0x1,0x9},
+{0x2,0xa},
+{0x6,0xb},
+{0x1,0xc},
+{0x6,0xd},
+{0x6,0xe},
+{0x6,0xf},
+{0x6,0x10},
+{0x1,0x11},
+{0x6,0x12},
+{0x5,0x13},
+{0x4,0x14},
+{0x4,0x15},
+{0x2,0x16},
+{0x6,0x17},
+{0x3,0x18},
+{0x4,0x19},
+{0x4,0x1a},
+{0x4,0x1b},
+{0x3,0x1c},
+{0x4,0x1c},
+{0x4,0x1d},
+{0x4,0x1e},
+{0x4,0x1f},
+{0x4,0x20},
+{0x3,0x20},
+{0x6,0x21},
+{0x5,0x22},
+{0x6,0x23},
+{0x5,0x24},
+{0x3,0x25},
+{0x5,0x26},
+{0x5,0x27},
+{0x4,0x28},
+{0x4,0x29},
+{0x5,0x2a},
+{0x6,0x2b},
+{0x1,0x2c},
+{0x4,0x2d},
+{0x4,0x2e},
+{0x4,0x2f},
+{0x4,0x30},
+{0x4,0x31},
+{0x4,0x32},
+{0x4,0x33},
+{0x4,0x34},
+{0x4,0x35},
+{0x4,0x36},
+{0x6,0x37},
+{0x6,0x38},
+{0x7,0x0},
+{0x5,0x0},
+{0x3,0x0},
+{0x2,0x0},
+{0x4,0x0},
+{0x6,0x0},
+{0x1,0x0},
diff --git a/sys/netiso/tp_subr.c b/sys/netiso/tp_subr.c
new file mode 100644
index 00000000000..1259ee41253
--- /dev/null
+++ b/sys/netiso/tp_subr.c
@@ -0,0 +1,947 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tp_subr.c	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* 
+ * ARGO TP
+ *
+ * $Header: tp_subr.c,v 5.3 88/11/18 17:28:43 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_subr.c,v $
+ *
+ * The main work of data transfer is done here.
+ * These routines are called from tp.trans.
+ * They include the routines that check the validity of acks and Xacks,
+ * (tp_goodack() and tp_goodXack() )
+ * take packets from socket buffers and send them (tp_send()),
+ * drop the data from the socket buffers (tp_sbdrop()),  
+ * and put incoming packet data into socket buffers (tp_stash()).
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <netiso/tp_ip.h>
+#include <netiso/iso.h>
+#include <netiso/argo_debug.h>
+#include <netiso/tp_timer.h>
+#include <netiso/tp_param.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_pcb.h>
+#include <netiso/tp_tpdu.h>
+#include <netiso/tp_trace.h>
+#include <netiso/tp_meas.h>
+#include <netiso/tp_seq.h>
+
+int		tp_emit(), tp_sbdrop();
+int		tprexmtthresh = 3;
+extern int	ticks;
+void	tp_send();
+
+/*
+ * CALLED FROM:
+ *	tp.trans, when an XAK arrives
+ * FUNCTION and ARGUMENTS:
+ * 	Determines if the sequence number (seq) from the XAK 
+ * 	acks anything new.  If so, drop the appropriate tpdu
+ * 	from the XPD send queue.
+ * RETURN VALUE:
+ * 	Returns 1 if it did this, 0 if the ack caused no action.
+ */
+int
+tp_goodXack(tpcb, seq)
+	struct tp_pcb	*tpcb;
+	SeqNum 			seq; 
+{
+
+	IFTRACE(D_XPD)
+		tptraceTPCB(TPPTgotXack, 
+			seq, tpcb->tp_Xuna, tpcb->tp_Xsndnxt, tpcb->tp_sndnew, 
+			tpcb->tp_snduna); 
+	ENDTRACE
+
+	if ( seq == tpcb->tp_Xuna ) {
+			tpcb->tp_Xuna = tpcb->tp_Xsndnxt;
+
+			/* DROP 1 packet from the Xsnd socket buf - just so happens
+			 * that only one packet can be there at any time
+			 * so drop the whole thing.  If you allow > 1 packet
+			 * the socket buffer, then you'll have to keep
+			 * track of how many characters went w/ each XPD tpdu, so this
+			 * will get messier
+			 */
+			IFDEBUG(D_XPD)
+				dump_mbuf(tpcb->tp_Xsnd.sb_mb,
+					"tp_goodXack Xsnd before sbdrop");
+			ENDDEBUG
+
+			IFTRACE(D_XPD)
+				tptraceTPCB(TPPTmisc, 
+					"goodXack: dropping cc ",
+					(int)(tpcb->tp_Xsnd.sb_cc),
+					0,0,0);
+			ENDTRACE
+			sbdroprecord(&tpcb->tp_Xsnd);
+			return 1;
+	} 
+	return 0;
+}
+
+/*
+ * CALLED FROM:
+ *  tp_good_ack()
+ * FUNCTION and ARGUMENTS:
+ *  updates
+ *  smoothed average round trip time (*rtt)
+ *  roundtrip time variance (*rtv) - actually deviation, not variance
+ *  given the new value (diff)
+ * RETURN VALUE:
+ * void
+ */
+
+void
+tp_rtt_rtv(tpcb)
+register struct tp_pcb *tpcb;
+{
+	int old = tpcb->tp_rtt;
+	int delta, elapsed = ticks - tpcb->tp_rttemit;
+
+	if (tpcb->tp_rtt != 0) {
+		/*
+		 * rtt is the smoothed round trip time in machine clock ticks (hz).
+		 * It is stored as a fixed point number, unscaled (unlike the tcp
+		 * srtt).  The rationale here is that it is only significant to the
+		 * nearest unit of slowtimo, which is at least 8 machine clock ticks
+		 * so there is no need to scale.  The smoothing is done according
+		 * to the same formula as TCP (rtt = rtt*7/8 + measured_rtt/8).
+		 */
+		delta = elapsed - tpcb->tp_rtt;
+		if ((tpcb->tp_rtt += (delta >> TP_RTT_ALPHA)) <= 0)
+			tpcb->tp_rtt = 1;
+		/*
+		 * rtv is a smoothed accumulated mean difference, unscaled
+		 * for reasons expressed above.
+		 * It is smoothed with an alpha of .75, and the round trip timer
+		 * will be set to rtt + 4*rtv, also as TCP does.
+		 */
+		if (delta < 0)
+			delta = -delta;
+		if ((tpcb->tp_rtv += ((delta - tpcb->tp_rtv) >> TP_RTV_ALPHA)) <= 0)
+			tpcb->tp_rtv = 1;
+	} else {
+		/* 
+		 * No rtt measurement yet - use the unsmoothed rtt.
+		 * Set the variance to half the rtt (so our first
+		 * retransmit happens at 3*rtt)
+		 */
+		tpcb->tp_rtt = elapsed;
+		tpcb->tp_rtv = elapsed >> 1;
+	}
+	tpcb->tp_rttemit = 0;
+	tpcb->tp_rxtshift = 0;
+	/*
+	 * Quoting TCP: "the retransmit should happen at rtt + 4 * rttvar.
+	 * Because of the way we do the smoothing, srtt and rttvar
+	 * will each average +1/2 tick of bias.  When we compute
+	 * the retransmit timer, we want 1/2 tick of rounding and
+	 * 1 extra tick because of +-1/2 tick uncertainty in the
+	 * firing of the timer.  The bias will give us exactly the
+	 * 1.5 tick we need.  But, because the bias is
+	 * statistical, we have to test that we don't drop below
+	 * the minimum feasible timer (which is 2 ticks)."
+	 */
+	TP_RANGESET(tpcb->tp_dt_ticks, TP_REXMTVAL(tpcb),
+		tpcb->tp_peer_acktime, 128 /* XXX */);
+	IFDEBUG(D_RTT)
+		printf("%s tpcb 0x%x, elapsed %d, delta %d, rtt %d, rtv %d, old %d\n",
+			"tp_rtt_rtv:",tpcb,elapsed,delta,tpcb->tp_rtt,tpcb->tp_rtv,old);
+	ENDDEBUG
+	tpcb->tp_rxtcur = tpcb->tp_dt_ticks;
+}
+
+/*
+ * CALLED FROM:
+ *  tp.trans when an AK arrives
+ * FUNCTION and ARGUMENTS:
+ * 	Given (cdt), the credit from the AK tpdu, and 
+ *	(seq), the sequence number from the AK tpdu,
+ *  tp_goodack() determines if the AK acknowledges something in the send
+ * 	window, and if so, drops the appropriate packets from the retransmission
+ *  list, computes the round trip time, and updates the retransmission timer
+ *  based on the new smoothed round trip time.
+ * RETURN VALUE:
+ * 	Returns 1 if
+ * 	EITHER it actually acked something heretofore unacknowledged
+ * 	OR no news but the credit should be processed.
+ * 	If something heretofore unacked was acked with this sequence number,
+ * 	the appropriate tpdus are dropped from the retransmission control list,
+ * 	by calling tp_sbdrop().
+ * 	No need to see the tpdu itself.
+ */
+int
+tp_goodack(tpcb, cdt, seq, subseq)
+	register struct tp_pcb	*tpcb;
+	u_int					cdt;
+	register SeqNum			seq;
+	u_int					subseq;
+{
+	int 	old_fcredit; 
+	int 	bang = 0; 	/* bang --> ack for something heretofore unacked */
+	u_int	bytes_acked;
+
+	IFDEBUG(D_ACKRECV)
+		printf("goodack tpcb 0x%x seq 0x%x cdt %d una 0x%x new 0x%x nxt 0x%x\n",
+			tpcb, seq, cdt, tpcb->tp_snduna, tpcb->tp_sndnew, tpcb->tp_sndnxt);
+	ENDDEBUG
+	IFTRACE(D_ACKRECV)
+		tptraceTPCB(TPPTgotack, 
+			seq,cdt, tpcb->tp_snduna,tpcb->tp_sndnew,subseq); 
+	ENDTRACE
+
+	IFPERF(tpcb)
+		tpmeas(tpcb->tp_lref, TPtime_ack_rcvd, (struct timeval *)0, seq, 0, 0);
+	ENDPERF
+
+	if (seq == tpcb->tp_snduna) {
+		if (subseq < tpcb->tp_r_subseq ||
+			(subseq == tpcb->tp_r_subseq && cdt <= tpcb->tp_fcredit)) {
+		discard_the_ack:
+			IFDEBUG(D_ACKRECV)
+				printf("goodack discard : tpcb 0x%x subseq %d r_subseq %d\n",
+					tpcb, subseq, tpcb->tp_r_subseq);
+			ENDDEBUG
+			goto done;
+		}
+		if (cdt == tpcb->tp_fcredit /*&& thus subseq > tpcb->tp_r_subseq */) {
+			tpcb->tp_r_subseq = subseq;
+			if (tpcb->tp_timer[TM_data_retrans] == 0)
+				tpcb->tp_dupacks = 0;
+			else if (++tpcb->tp_dupacks == tprexmtthresh) {
+				/* partner went out of his way to signal with different
+				   subsequences that he has the same lack of an expected
+				   packet.  This may be an early indiciation of a loss */
+
+				SeqNum onxt = tpcb->tp_sndnxt;
+				struct mbuf *onxt_m = tpcb->tp_sndnxt_m;
+				u_int win = min(tpcb->tp_fcredit,
+							tpcb->tp_cong_win / tpcb->tp_l_tpdusize) / 2;
+				IFDEBUG(D_ACKRECV)
+					printf("%s tpcb 0x%x seq 0x%x rttseq 0x%x onxt 0x%x\n",
+						"goodack dupacks:", tpcb, seq, tpcb->tp_rttseq, onxt);
+				ENDDEBUG
+				if (win < 2)
+					win = 2;
+				tpcb->tp_ssthresh = win * tpcb->tp_l_tpdusize;
+				tpcb->tp_timer[TM_data_retrans] = 0;
+				tpcb->tp_rttemit = 0;
+				tpcb->tp_sndnxt = tpcb->tp_snduna;
+				tpcb->tp_sndnxt_m = 0;
+				tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
+				tp_send(tpcb);
+				tpcb->tp_cong_win = tpcb->tp_ssthresh +
+					tpcb->tp_dupacks * tpcb->tp_l_tpdusize;
+				if (SEQ_GT(tpcb, onxt, tpcb->tp_sndnxt)) {
+					tpcb->tp_sndnxt = onxt;
+					tpcb->tp_sndnxt_m = onxt_m;
+				}
+
+			} else if (tpcb->tp_dupacks > tprexmtthresh) {
+				tpcb->tp_cong_win += tpcb->tp_l_tpdusize;
+			}
+			goto done;
+		}
+	} else if (SEQ_LT(tpcb, seq, tpcb->tp_snduna))
+		goto discard_the_ack;
+	/*
+	 * If the congestion window was inflated to account
+	 * for the other side's cached packets, retract it.
+	 */
+	if (tpcb->tp_dupacks > tprexmtthresh &&
+		tpcb->tp_cong_win > tpcb->tp_ssthresh)
+			tpcb->tp_cong_win = tpcb->tp_ssthresh;
+	tpcb->tp_r_subseq = subseq;
+	old_fcredit = tpcb->tp_fcredit;
+	tpcb->tp_fcredit = cdt;
+	if (cdt > tpcb->tp_maxfcredit)
+		tpcb->tp_maxfcredit = cdt;
+	tpcb->tp_dupacks = 0;
+
+	if (IN_SWINDOW(tpcb, seq, tpcb->tp_snduna, tpcb->tp_sndnew)) {
+
+		tpsbcheck(tpcb, 0);
+		bytes_acked = tp_sbdrop(tpcb, seq);
+		tpsbcheck(tpcb, 1);
+		/*
+		 * If transmit timer is running and timed sequence
+		 * number was acked, update smoothed round trip time.
+		 * Since we now have an rtt measurement, cancel the
+		 * timer backoff (cf., Phil Karn's retransmit alg.).
+		 * Recompute the initial retransmit timer.
+		 */
+		if (tpcb->tp_rttemit && SEQ_GT(tpcb, seq, tpcb->tp_rttseq))
+			tp_rtt_rtv(tpcb);
+		/*
+		 * If all outstanding data is acked, stop retransmit timer.
+		 * If there is more data to be acked, restart retransmit
+		 * timer, using current (possibly backed-off) value.
+		 * OSI combines the keepalive and persistance functions.
+		 * So, there is no persistance timer per se, to restart.
+		 */
+		if (tpcb->tp_class != TP_CLASS_0)
+			tpcb->tp_timer[TM_data_retrans] =
+				(seq == tpcb->tp_sndnew) ? 0 : tpcb->tp_rxtcur;
+		/*
+		 * When new data is acked, open the congestion window.
+		 * If the window gives us less than ssthresh packets
+		 * in flight, open exponentially (maxseg per packet).
+		 * Otherwise open linearly: maxseg per window
+		 * (maxseg^2 / cwnd per packet), plus a constant
+		 * fraction of a packet (maxseg/8) to help larger windows
+		 * open quickly enough.
+		 */
+		{
+			u_int cw = tpcb->tp_cong_win, incr = tpcb->tp_l_tpdusize;
+
+			incr = min(incr, bytes_acked);
+			if (cw > tpcb->tp_ssthresh)
+				incr = incr * incr / cw + incr / 8;
+			tpcb->tp_cong_win =
+				min(cw + incr, tpcb->tp_sock->so_snd.sb_hiwat);
+		}
+		tpcb->tp_snduna = seq;
+		if (SEQ_LT(tpcb, tpcb->tp_sndnxt, seq)) {
+				tpcb->tp_sndnxt = seq;
+				tpcb->tp_sndnxt_m = 0;
+		}
+		bang++;
+	} 
+
+	if( cdt != 0 && old_fcredit == 0 ) {
+		tpcb->tp_sendfcc = 1;
+	}
+	if (cdt == 0) {
+		if (old_fcredit != 0)
+			IncStat(ts_zfcdt);
+		/* The following might mean that the window shrunk */
+		if (tpcb->tp_timer[TM_data_retrans]) {
+			tpcb->tp_timer[TM_data_retrans] = 0;
+			tpcb->tp_timer[TM_sendack] = tpcb->tp_dt_ticks;
+			if (tpcb->tp_sndnxt != tpcb->tp_snduna) {
+				tpcb->tp_sndnxt = tpcb->tp_snduna;
+				tpcb->tp_sndnxt_m = 0;
+			}
+		}
+	}
+	tpcb->tp_fcredit = cdt;
+	bang |= (old_fcredit < cdt);
+
+done:
+	IFDEBUG(D_ACKRECV)
+		printf("goodack returns 0x%x, cdt 0x%x ocdt 0x%x cwin 0x%x\n",
+			bang, cdt, old_fcredit, tpcb->tp_cong_win);
+	ENDDEBUG
+	/* if (bang) XXXXX Very bad to remove this test, but somethings broken */
+		tp_send(tpcb);
+	return (bang);
+}
+
+/*
+ * CALLED FROM:
+ *  tp_goodack()
+ * FUNCTION and ARGUMENTS:
+ *  drops everything up TO but not INCLUDING seq # (seq)
+ *  from the retransmission queue.
+ */
+tp_sbdrop(tpcb, seq) 
+	register struct 	tp_pcb 			*tpcb;
+	SeqNum					seq;
+{
+	struct sockbuf *sb = &tpcb->tp_sock->so_snd;
+	register int i = SEQ_SUB(tpcb, seq, tpcb->tp_snduna);
+	int	oldcc = sb->sb_cc, oldi = i;
+
+	if (i >= tpcb->tp_seqhalf)
+		printf("tp_spdropping too much -- should panic");
+	while (i-- > 0)
+		sbdroprecord(sb);
+	IFDEBUG(D_ACKRECV)
+		printf("tp_sbdroping %d pkts %d bytes on %x at 0x%x\n",
+			oldi, oldcc - sb->sb_cc, tpcb, seq);
+	ENDDEBUG
+	if (sb->sb_flags & SB_NOTIFY)
+		sowwakeup(tpcb->tp_sock);
+	return (oldcc - sb->sb_cc);
+}
+
+/*
+ * CALLED FROM:
+ * 	tp.trans on user send request, arrival of AK and arrival of XAK
+ * FUNCTION and ARGUMENTS:
+ * 	Emits tpdus starting at sequence number (tpcb->tp_sndnxt).
+ * 	Emits until a) runs out of data, or  b) runs into an XPD mark, or
+ * 			c) it hits seq number (highseq) limited by cong or credit.
+ *
+ * 	If you want XPD to buffer > 1 du per socket buffer, you can
+ * 	modifiy this to issue XPD tpdus also, but then it'll have
+ * 	to take some argument(s) to distinguish between the type of DU to
+ * 	hand tp_emit.
+ *
+ * 	When something is sent for the first time, its time-of-send
+ * 	is stashed (in system clock ticks rather than pf_slowtimo ticks).
+ *  When the ack arrives, the smoothed round-trip time is figured
+ *  using this value.
+ */
+void
+tp_send(tpcb)
+	register struct tp_pcb	*tpcb;
+{
+	register int			len;
+	register struct mbuf	*m;
+	struct mbuf				*mb = 0;
+	struct 	sockbuf			*sb = &tpcb->tp_sock->so_snd;
+	unsigned int			eotsdu = 0;
+	SeqNum					highseq, checkseq;
+	int						idle, idleticks, off, cong_win;
+#ifdef TP_PERF_MEAS
+	int			 			send_start_time = ticks;
+	SeqNum					oldnxt = tpcb->tp_sndnxt; 
+#endif /* TP_PERF_MEAS */
+
+	idle = (tpcb->tp_snduna == tpcb->tp_sndnew);
+	if (idle) {
+		idleticks = tpcb->tp_inact_ticks - tpcb->tp_timer[TM_inact];
+		if (idleticks > tpcb->tp_dt_ticks)
+			/*
+			 * We have been idle for "a while" and no acks are
+			 * expected to clock out any data we send --
+			 * slow start to get ack "clock" running again.
+			 */
+			tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
+	}
+
+	cong_win = tpcb->tp_cong_win;
+	highseq = SEQ(tpcb, tpcb->tp_fcredit + tpcb->tp_snduna);
+	if (tpcb->tp_Xsnd.sb_mb)
+		highseq = SEQ_MIN(tpcb, highseq, tpcb->tp_sndnew);
+		
+	IFDEBUG(D_DATA)
+		printf("tp_send enter tpcb 0x%x nxt 0x%x win %d high 0x%x\n",
+				tpcb, tpcb->tp_sndnxt, cong_win, highseq);
+	ENDDEBUG
+	IFTRACE(D_DATA)
+		tptraceTPCB( TPPTmisc, "tp_send sndnew snduna", 
+			tpcb->tp_sndnew,  tpcb->tp_snduna, 0, 0);
+		tptraceTPCB( TPPTmisc, "tp_send tpcb->tp_sndnxt win fcredit congwin", 
+			tpcb->tp_sndnxt, cong_win, tpcb->tp_fcredit, tpcb->tp_cong_win);
+	ENDTRACE
+	IFTRACE(D_DATA)
+		tptraceTPCB( TPPTmisc, "tp_send 2 nxt high fcredit congwin", 
+			tpcb->tp_sndnxt, highseq, tpcb->tp_fcredit, cong_win);
+	ENDTRACE
+
+	if (tpcb->tp_sndnxt_m)
+		m = tpcb->tp_sndnxt_m;
+	else {
+		off = SEQ_SUB(tpcb, tpcb->tp_sndnxt, tpcb->tp_snduna);
+		for (m = sb->sb_mb; m && off > 0; m = m->m_next)
+			off--;
+	}
+send:
+	/*
+	 * Avoid silly window syndrome here . . . figure out how!
+	 */
+	checkseq = tpcb->tp_sndnum;
+	if (idle && SEQ_LT(tpcb, tpcb->tp_sndnum, highseq))
+		checkseq = highseq; /* i.e. DON'T retain highest assigned packet */
+
+	while ((SEQ_LT(tpcb, tpcb->tp_sndnxt, highseq)) && m && cong_win > 0) {
+
+		eotsdu = (m->m_flags & M_EOR) != 0;
+		len = m->m_pkthdr.len;
+		if (tpcb->tp_sndnxt == checkseq && eotsdu == 0 &&
+			len < (tpcb->tp_l_tpdusize / 2))
+				break;  /* Nagle . . . . . */
+		cong_win -= len;
+		/* make a copy - mb goes into the retransmission list 
+		 * while m gets emitted.  m_copy won't copy a zero-length mbuf.
+		 */
+		mb = m;
+		m = m_copy(mb, 0, M_COPYALL);
+		if (m == MNULL)
+				break;
+		IFTRACE(D_STASH)
+			tptraceTPCB( TPPTmisc, 
+				"tp_send mcopy nxt high eotsdu len", 
+				tpcb->tp_sndnxt, highseq, eotsdu, len);
+		ENDTRACE
+
+		IFDEBUG(D_DATA)
+			printf("tp_sending tpcb 0x%x nxt 0x%x\n",
+				tpcb, tpcb->tp_sndnxt);
+		ENDDEBUG
+		/* when headers are precomputed, may need to fill
+			   in checksum here */
+		if (tpcb->tp_sock->so_error =
+			tp_emit(DT_TPDU_type, tpcb, tpcb->tp_sndnxt, eotsdu, m)) {
+			/* error */
+			break;
+		}
+		m = mb->m_nextpkt;
+		tpcb->tp_sndnxt_m = m;
+		if (tpcb->tp_sndnxt == tpcb->tp_sndnew) {
+			SEQ_INC(tpcb, tpcb->tp_sndnew);
+			/*
+			 * Time this transmission if not a retransmission and
+			 * not currently timing anything.
+			 */
+			if (tpcb->tp_rttemit == 0) {
+				tpcb->tp_rttemit = ticks;
+				tpcb->tp_rttseq = tpcb->tp_sndnxt;
+			}
+			tpcb->tp_sndnxt = tpcb->tp_sndnew;
+		} else
+			SEQ_INC(tpcb, tpcb->tp_sndnxt);
+		/*
+		 * Set retransmit timer if not currently set.
+		 * Initial value for retransmit timer is smoothed
+		 * round-trip time + 2 * round-trip time variance.
+		 * Initialize shift counter which is used for backoff
+		 * of retransmit time.
+		 */
+		if (tpcb->tp_timer[TM_data_retrans] == 0 &&
+			tpcb->tp_class != TP_CLASS_0) {
+			tpcb->tp_timer[TM_data_retrans] = tpcb->tp_dt_ticks;
+			tpcb->tp_timer[TM_sendack] = tpcb->tp_keepalive_ticks;
+			tpcb->tp_rxtshift = 0;
+		}
+	}
+	if (SEQ_GT(tpcb, tpcb->tp_sndnew, tpcb->tp_sndnum))
+		tpcb->tp_oktonagle = 0;
+#ifdef TP_PERF_MEAS
+	IFPERF(tpcb)
+		{
+			register int npkts;
+			int	 elapsed = ticks - send_start_time, *t;
+			struct timeval now;
+
+			npkts = SEQ_SUB(tpcb, tpcb->tp_sndnxt, oldnxt);
+
+			if (npkts > 0) 
+				tpcb->tp_Nwindow++;
+
+			if (npkts > TP_PM_MAX) 
+				npkts = TP_PM_MAX; 
+
+			t = &(tpcb->tp_p_meas->tps_sendtime[npkts]);
+			*t += (t - elapsed) >> TP_RTT_ALPHA;
+
+			if (mb == 0) {
+				IncPStat(tpcb, tps_win_lim_by_data[npkts] );
+			} else {
+				IncPStat(tpcb, tps_win_lim_by_cdt[npkts] );
+				/* not true with congestion-window being used */
+			}
+			now.tv_sec = elapsed / hz;
+			now.tv_usec = (elapsed - (hz * now.tv_sec)) * 1000000 / hz;
+			tpmeas( tpcb->tp_lref, 
+					TPsbsend, &elapsed, newseq, tpcb->tp_Nwindow, npkts);
+		}
+	ENDPERF
+#endif /* TP_PERF_MEAS */
+
+
+	IFTRACE(D_DATA)
+		tptraceTPCB( TPPTmisc, 
+			"tp_send at end: new nxt eotsdu error",
+			tpcb->tp_sndnew, tpcb->tp_sndnxt, eotsdu, tpcb->tp_sock->so_error);
+		
+	ENDTRACE
+}
+
+int TPNagleok;
+int TPNagled;
+
+tp_packetize(tpcb, m, eotsdu)
+register struct tp_pcb *tpcb;
+register struct mbuf *m;
+int eotsdu;
+{
+	register struct mbuf *n;
+	register struct sockbuf *sb = &tpcb->tp_sock->so_snd;
+	int	maxsize = tpcb->tp_l_tpdusize 
+			- tp_headersize(DT_TPDU_type, tpcb)
+			- (tpcb->tp_use_checksum?4:0) ;
+	int totlen = m->m_pkthdr.len;
+	struct mbuf *m_split();
+	/*
+	 * Pre-packetize the data in the sockbuf
+	 * according to negotiated mtu.  Do it here
+	 * where we can safely wait for mbufs.
+	 *
+	 * This presumes knowledge of sockbuf conventions.
+	 * TODO: allocate space for header and fill it in (once!).
+	 */
+	IFDEBUG(D_DATA)
+		printf("SEND BF: maxsize %d totlen %d eotsdu %d sndnum 0x%x\n",
+			maxsize, totlen, eotsdu, tpcb->tp_sndnum);
+	ENDTRACE
+	if (tpcb->tp_oktonagle) {
+		if ((n = sb->sb_mb) == 0)
+			panic("tp_packetize");
+		while (n->m_act)
+			n = n->m_act;
+		if (n->m_flags & M_EOR)
+			panic("tp_packetize 2");
+		SEQ_INC(tpcb, tpcb->tp_sndnum);
+		if (totlen + n->m_pkthdr.len < maxsize) {
+			/* There is an unsent packet with space, combine data */
+			struct mbuf *old_n = n;
+			tpsbcheck(tpcb,3);
+			n->m_pkthdr.len += totlen;
+			while (n->m_next)
+				n = n->m_next;
+			sbcompress(sb, m, n);
+			tpsbcheck(tpcb,4);
+			n = old_n;
+			TPNagled++;
+			goto out;
+		}
+	}
+	while (m) {
+		n = m;
+		if (totlen > maxsize) {
+			if ((m = m_split(n, maxsize, M_WAIT)) == 0)
+				panic("tp_packetize");
+		} else
+			m = 0;
+		totlen -= maxsize;
+		tpsbcheck(tpcb, 5);
+		sbappendrecord(sb, n);
+		tpsbcheck(tpcb, 6);
+		SEQ_INC(tpcb, tpcb->tp_sndnum);
+	}
+out:
+	if (eotsdu) {
+		n->m_flags |= M_EOR;  /* XXX belongs at end */
+		tpcb->tp_oktonagle = 0;
+	} else {
+		SEQ_DEC(tpcb, tpcb->tp_sndnum);
+		tpcb->tp_oktonagle = 1;
+		TPNagleok++;
+	}
+	IFDEBUG(D_DATA)
+		printf("SEND out: oktonagle %d sndnum 0x%x\n",
+			tpcb->tp_oktonagle, tpcb->tp_sndnum);
+	ENDTRACE
+	return 0;
+}
+
+
+/*
+ * NAME: tp_stash()
+ * CALLED FROM:
+ *	tp.trans on arrival of a DT tpdu
+ * FUNCTION, ARGUMENTS, and RETURN VALUE:
+ * 	Returns 1 if 
+ *		a) something new arrived and it's got eotsdu_reached bit on,
+ * 		b) this arrival was caused other out-of-sequence things to be
+ *    	accepted, or
+ * 		c) this arrival is the highest seq # for which we last gave credit
+ *   	(sender just sent a whole window)
+ *  In other words, returns 1 if tp should send an ack immediately, 0 if 
+ *  the ack can wait a while.
+ *
+ * Note: this implementation no longer renegs on credit, (except
+ * when debugging option D_RENEG is on, for the purpose of testing
+ * ack subsequencing), so we don't  need to check for incoming tpdus 
+ * being in a reneged portion of the window.
+ */
+
+tp_stash(tpcb, e)
+	register struct tp_pcb		*tpcb;
+	register struct tp_event	*e;
+{
+	register int		ack_reason= tpcb->tp_ack_strat & ACK_STRAT_EACH;
+									/* 0--> delay acks until full window */
+									/* 1--> ack each tpdu */
+#ifndef lint
+#define E e->ATTR(DT_TPDU)
+#else /* lint */
+#define E e->ev_union.EV_DT_TPDU
+#endif /* lint */
+
+	if ( E.e_eot ) {
+		register struct mbuf *n = E.e_data;
+		n->m_flags |= M_EOR;
+		n->m_act = 0;
+	}
+		IFDEBUG(D_STASH)
+			dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb, 
+				"stash: so_rcv before appending");
+			dump_mbuf(E.e_data,
+				"stash: e_data before appending");
+		ENDDEBUG
+
+	IFPERF(tpcb)
+		PStat(tpcb, Nb_from_ll) += E.e_datalen;
+		tpmeas(tpcb->tp_lref, TPtime_from_ll, &e->e_time,
+			E.e_seq, (u_int)PStat(tpcb, Nb_from_ll), (u_int)E.e_datalen);
+	ENDPERF
+
+	if (E.e_seq == tpcb->tp_rcvnxt) {
+
+		IFDEBUG(D_STASH)
+			printf("stash EQ: seq 0x%x datalen 0x%x eot 0x%x\n", 
+			E.e_seq, E.e_datalen, E.e_eot);
+		ENDDEBUG
+
+		IFTRACE(D_STASH)
+			tptraceTPCB(TPPTmisc, "stash EQ: seq len eot", 
+			E.e_seq, E.e_datalen, E.e_eot, 0);
+		ENDTRACE
+
+		SET_DELACK(tpcb);
+
+		sbappend(&tpcb->tp_sock->so_rcv, E.e_data);
+
+		SEQ_INC( tpcb, tpcb->tp_rcvnxt );
+		/* 
+		 * move chains from the reassembly queue to the socket buffer
+		 */
+		if (tpcb->tp_rsycnt) {
+			register struct mbuf **mp;
+			struct mbuf **mplim;
+
+			mp = tpcb->tp_rsyq + (tpcb->tp_rcvnxt % tpcb->tp_maxlcredit);
+			mplim = tpcb->tp_rsyq + tpcb->tp_maxlcredit;
+
+			while (tpcb->tp_rsycnt && *mp) {
+				sbappend(&tpcb->tp_sock->so_rcv, *mp);
+				tpcb->tp_rsycnt--;
+				*mp = 0;
+				SEQ_INC(tpcb, tpcb->tp_rcvnxt);
+				ack_reason |= ACK_REORDER;
+				if (++mp == mplim)
+					mp = tpcb->tp_rsyq;
+			}
+		}
+		IFDEBUG(D_STASH)
+			dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb, 
+				"stash: so_rcv after appending");
+		ENDDEBUG
+
+	} else {
+		register struct mbuf **mp;
+		SeqNum uwe;
+
+		IFTRACE(D_STASH)
+			tptraceTPCB(TPPTmisc, "stash Reseq: seq rcvnxt lcdt", 
+			E.e_seq, tpcb->tp_rcvnxt, tpcb->tp_lcredit, 0);
+		ENDTRACE
+
+		if (tpcb->tp_rsyq == 0)
+			tp_rsyset(tpcb);
+		uwe = SEQ(tpcb, tpcb->tp_rcvnxt + tpcb->tp_maxlcredit);
+		if (tpcb->tp_rsyq == 0 ||
+						!IN_RWINDOW(tpcb, E.e_seq, tpcb->tp_rcvnxt, uwe)) {
+			ack_reason = ACK_DONT;
+			m_freem(E.e_data);
+		} else if (*(mp = tpcb->tp_rsyq + (E.e_seq % tpcb->tp_maxlcredit))) {
+			IFDEBUG(D_STASH)
+				printf("tp_stash - drop & ack\n");
+			ENDDEBUG
+
+			/* retransmission - drop it and force an ack */
+			IncStat(ts_dt_dup);
+			IFPERF(tpcb)
+				IncPStat(tpcb, tps_n_ack_cuz_dup);
+			ENDPERF
+
+			m_freem(E.e_data);
+			ack_reason |= ACK_DUP;
+		} else {
+			*mp = E.e_data;
+			tpcb->tp_rsycnt++;
+			ack_reason = ACK_DONT;
+		}
+	}
+	/* there were some comments of historical interest here. */
+	{
+		LOCAL_CREDIT(tpcb);
+
+		if ( E.e_seq ==  tpcb->tp_sent_uwe )
+			ack_reason |= ACK_STRAT_FULLWIN;
+
+		IFTRACE(D_STASH)
+			tptraceTPCB(TPPTmisc, 
+				"end of stash, eot, ack_reason, sent_uwe ",
+				E.e_eot, ack_reason, tpcb->tp_sent_uwe, 0); 
+		ENDTRACE
+
+		if ( ack_reason == ACK_DONT ) {
+			IncStat( ts_ackreason[ACK_DONT] );
+			return 0;
+		} else {
+			IFPERF(tpcb)
+				if(ack_reason & ACK_STRAT_EACH) {
+					IncPStat(tpcb, tps_n_ack_cuz_strat);
+				} else if(ack_reason & ACK_STRAT_FULLWIN) {
+					IncPStat(tpcb, tps_n_ack_cuz_fullwin);
+				} else if(ack_reason & ACK_REORDER) {
+					IncPStat(tpcb, tps_n_ack_cuz_reorder);
+				}
+				tpmeas(tpcb->tp_lref, TPtime_ack_sent, 0, 
+							SEQ_ADD(tpcb, E.e_seq, 1), 0, 0);
+			ENDPERF
+			{
+				register int i;
+
+				/* keep track of all reasons that apply */
+				for( i=1; i<_ACK_NUM_REASONS_ ;i++) {
+					if( ack_reason & (1<<i) ) 
+						IncStat( ts_ackreason[i] );
+				}
+			}
+			return 1;
+		}
+	}
+}
+
+/*
+ * tp_rsyflush - drop all the packets on the reassembly queue.
+ * Do this when closing the socket, or when somebody has changed
+ * the space avaible in the receive socket (XXX).
+ */
+tp_rsyflush(tpcb)
+register struct tp_pcb *tpcb;
+{
+	register struct mbuf *m, **mp;
+	if (tpcb->tp_rsycnt) {
+		for (mp == tpcb->tp_rsyq + tpcb->tp_maxlcredit;
+									 --mp >= tpcb->tp_rsyq; )
+			if (*mp) {
+				tpcb->tp_rsycnt--;
+				m_freem(*mp);
+			}
+		if (tpcb->tp_rsycnt) {
+			printf("tp_rsyflush %x\n", tpcb);
+			tpcb->tp_rsycnt = 0;
+		}
+	}
+	free((caddr_t)tpcb->tp_rsyq, M_PCB);
+	tpcb->tp_rsyq = 0;
+}
+
+tp_rsyset(tpcb)
+register struct tp_pcb *tpcb;
+{
+	register struct socket *so = tpcb->tp_sock;
+	int maxcredit  = tpcb->tp_xtd_format ? 0xffff : 0xf;
+	int old_credit = tpcb->tp_maxlcredit;
+	caddr_t	rsyq;
+
+	tpcb->tp_maxlcredit = maxcredit = min(maxcredit,
+		  (so->so_rcv.sb_hiwat + tpcb->tp_l_tpdusize)/ tpcb->tp_l_tpdusize);
+
+	if (old_credit == tpcb->tp_maxlcredit && tpcb->tp_rsyq != 0)
+		return;
+	maxcredit *= sizeof(struct mbuf *);
+	if (tpcb->tp_rsyq)
+		tp_rsyflush(tpcb);
+	if (rsyq = (caddr_t)malloc(maxcredit, M_PCB, M_NOWAIT))
+		bzero(rsyq, maxcredit);
+	tpcb->tp_rsyq = (struct mbuf **)rsyq;
+}
+
+tpsbcheck(tpcb, i)
+struct tp_pcb *tpcb;
+{
+	register struct mbuf *n, *m;
+	register int len = 0, mbcnt = 0, pktlen;
+	struct sockbuf *sb = &tpcb->tp_sock->so_snd;
+
+	for (n = sb->sb_mb; n; n = n->m_nextpkt) {
+		if ((n->m_flags & M_PKTHDR) == 0)
+			panic("tpsbcheck nohdr");
+		pktlen = len + n->m_pkthdr.len;
+	    for (m = n; m; m = m->m_next) {
+			len += m->m_len;
+			mbcnt += MSIZE;
+			if (m->m_flags & M_EXT)
+				mbcnt += m->m_ext.ext_size;
+		}
+		if (len != pktlen) {
+			printf("test %d; len %d != pktlen %d on mbuf 0x%x\n",
+				i, len, pktlen, n);
+			panic("tpsbcheck short");
+		}
+	}
+	if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
+		printf("test %d: cc %d != %d || mbcnt %d != %d\n", i, len, sb->sb_cc,
+		    mbcnt, sb->sb_mbcnt);
+		panic("tpsbcheck");
+	}
+}
diff --git a/sys/netiso/tp_subr2.c b/sys/netiso/tp_subr2.c
new file mode 100644
index 00000000000..60c7ce2a50b
--- /dev/null
+++ b/sys/netiso/tp_subr2.c
@@ -0,0 +1,880 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tp_subr2.c	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* 
+ * ARGO TP
+ *
+ * $Header: tp_subr2.c,v 5.5 88/11/18 17:28:55 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_subr2.c,v $
+ *
+ * Some auxiliary routines:
+ * 	tp_protocol_error: required by xebec- called when a combo of state,
+ *	    event, predicate isn't covered for by the transition file.
+ *	tp_indicate: gives indications(signals) to the user process
+ *	tp_getoptions: initializes variables that are affected by the options
+ *	    chosen.
+ */
+
+/* this def'n is to cause the expansion of this macro in the
+ * routine tp_local_credit :
+ */
+#define LOCAL_CREDIT_EXPAND
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#undef MNULL
+#include <netiso/argo_debug.h>
+#include <netiso/tp_param.h>
+#include <netiso/tp_ip.h>
+#include <netiso/iso.h>
+#include <netiso/iso_errno.h>
+#include <netiso/iso_pcb.h>
+#include <netiso/tp_timer.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_tpdu.h>
+#include <netiso/tp_pcb.h>
+#include <netiso/tp_seq.h>
+#include <netiso/tp_trace.h>
+#include <netiso/tp_user.h>
+#include <netiso/cons.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#ifdef TRUE
+#undef FALSE
+#undef TRUE
+#endif
+#include <netccitt/x25.h>
+#include <netccitt/pk.h>
+#include <netccitt/pk_var.h>
+
+void tp_rsyset();
+
+/*
+ * NAME: 	tp_local_credit()
+ *
+ * CALLED FROM:
+ *  tp_emit(), tp_usrreq()
+ *
+ * FUNCTION and ARGUMENTS:
+ *	Computes the local credit and stashes it in tpcb->tp_lcredit.
+ *  It's a macro in the production system rather than a procdure.
+ *
+ * RETURNS:
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ *  This doesn't actually get called in a production system - 
+ *  the macro gets expanded instead in place of calls to this proc.
+ *  But for debugging, we call this and that allows us to add
+ *  debugging messages easily here.
+ */
+void
+tp_local_credit(tpcb)
+	struct tp_pcb *tpcb;
+{
+	LOCAL_CREDIT(tpcb);
+	IFDEBUG(D_CREDIT)
+		printf("ref 0x%x lcdt 0x%x l_tpdusize 0x%x decbit 0x%x\n",
+			tpcb->tp_lref, 
+			tpcb->tp_lcredit, 
+			tpcb->tp_l_tpdusize, 
+			tpcb->tp_decbit, 
+			tpcb->tp_cong_win
+			);
+	ENDDEBUG
+	IFTRACE(D_CREDIT)
+		tptraceTPCB(TPPTmisc,
+			"lcdt tpdusz \n",
+			 tpcb->tp_lcredit, tpcb->tp_l_tpdusize, 0, 0);
+	ENDTRACE
+}
+
+/*
+ * NAME:  tp_protocol_error()
+ *
+ * CALLED FROM:
+ *  tp_driver(), when it doesn't know what to do with
+ * 	a combo of event, state, predicate
+ *
+ * FUNCTION and ARGUMENTS:
+ *  print error mesg 
+ *
+ * RETURN VALUE:
+ *  EIO - always
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+int
+tp_protocol_error(e,tpcb)
+	struct tp_event	*e;
+	struct tp_pcb	*tpcb;
+{
+	printf("TP PROTOCOL ERROR! tpcb 0x%x event 0x%x, state 0x%x\n",
+		tpcb, e->ev_number, tpcb->tp_state);
+	IFTRACE(D_DRIVER)
+		tptraceTPCB(TPPTmisc, "PROTOCOL ERROR tpcb event state",
+			tpcb, e->ev_number, tpcb->tp_state, 0 );
+	ENDTRACE
+	return EIO; /* for lack of anything better */
+}
+
+
+/* Not used at the moment */
+ProtoHook
+tp_drain()
+{
+	return 0;
+}
+
+
+/*
+ * NAME: tp_indicate()
+ *
+ * CALLED FROM:
+ * 	tp.trans when XPD arrive, when a connection is being disconnected by
+ *  the arrival of a DR or ER, and when a connection times out.
+ *
+ * FUNCTION and ARGUMENTS:
+ *  (ind) is the type of indication : T_DISCONNECT, T_XPD
+ *  (error) is an E* value that will be put in the socket structure
+ *  to be passed along to the user later.
+ * 	Gives a SIGURG to the user process or group indicated by the socket
+ * 	attached to the tpcb.
+ *
+ * RETURNS:  Rien
+ * 
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+void
+tp_indicate(ind, tpcb, error)
+	int				ind; 
+	u_short			error;
+	register struct tp_pcb	*tpcb;
+{
+	register struct socket *so = tpcb->tp_sock;
+	IFTRACE(D_INDICATION)
+		tptraceTPCB(TPPTindicate, ind, *(u_short *)(tpcb->tp_lsuffix), 
+			*(u_short *)(tpcb->tp_fsuffix), error,so->so_pgid);
+	ENDTRACE
+	IFDEBUG(D_INDICATION)
+		char *ls, *fs;
+		ls = tpcb->tp_lsuffix, 
+		fs = tpcb->tp_fsuffix, 
+
+		printf(
+"indicate 0x%x lsuf 0x%02x%02x fsuf 0x%02x%02x err 0x%x  noind 0x%x ref 0x%x\n",
+		ind, 
+		*ls, *(ls+1), *fs, *(fs+1),
+		error, /*so->so_pgrp,*/
+		tpcb->tp_no_disc_indications,
+		tpcb->tp_lref);
+	ENDDEBUG
+
+	if (ind == ER_TPDU) {
+		register struct mbuf *m;
+		struct tp_disc_reason x;
+
+		if ((so->so_state & SS_CANTRCVMORE) == 0 &&
+				(m = m_get(M_DONTWAIT, MT_OOBDATA)) != 0) {
+
+			x.dr_hdr.cmsg_len = m->m_len = sizeof(x);
+			x.dr_hdr.cmsg_level = SOL_TRANSPORT;
+			x.dr_hdr.cmsg_type= TPOPT_DISC_REASON;
+			x.dr_reason = error;
+			*mtod(m, struct tp_disc_reason *) = x;
+			sbappendrecord(&tpcb->tp_Xrcv, m);
+			error = 0;
+		} else
+			error = ECONNRESET;
+	}
+	so->so_error = error;
+
+	if (ind == T_DISCONNECT)  {
+		if (error == 0)
+			so->so_error = ENOTCONN;
+		if ( tpcb->tp_no_disc_indications )
+			return;
+	}
+	IFTRACE(D_INDICATION)
+		tptraceTPCB(TPPTmisc, "doing sohasoutofband(so)", so,0,0,0);
+	ENDTRACE
+	sohasoutofband(so);
+}
+
+/*
+ * NAME : tp_getoptions()
+ *
+ * CALLED FROM:
+ * 	tp.trans whenever we go into OPEN state 
+ *
+ * FUNCTION and ARGUMENTS:
+ *  sets the proper flags and values in the tpcb, to control
+ *  the appropriate actions for the given class, options,
+ *  sequence space, etc, etc.
+ * 
+ * RETURNS: Nada
+ * 
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+void
+tp_getoptions(tpcb)
+struct tp_pcb *tpcb;
+{
+	tpcb->tp_seqmask = 
+		tpcb->tp_xtd_format ?	TP_XTD_FMT_MASK :	TP_NML_FMT_MASK ;
+	tpcb->tp_seqbit =
+		tpcb->tp_xtd_format ?	TP_XTD_FMT_BIT :	TP_NML_FMT_BIT ;
+	tpcb->tp_seqhalf = tpcb->tp_seqbit >> 1;
+	tpcb->tp_dt_ticks =
+		max(tpcb->tp_dt_ticks, (tpcb->tp_peer_acktime + 2));
+	tp_rsyset(tpcb);
+	
+}
+
+/*
+ * NAME:  tp_recycle_tsuffix()
+ *
+ * CALLED FROM:
+ *  Called when a ref is frozen.
+ *
+ * FUNCTION and ARGUMENTS:
+ *  allows the suffix to be reused. 
+ *
+ * RETURNS: zilch
+ *
+ * SIDE EFFECTS:
+ *
+ * NOTES:
+ */
+void
+tp_recycle_tsuffix(tpcb)
+	struct tp_pcb	*tpcb;
+{
+	bzero((caddr_t)tpcb->tp_lsuffix, sizeof( tpcb->tp_lsuffix));
+	bzero((caddr_t)tpcb->tp_fsuffix, sizeof( tpcb->tp_fsuffix));
+	tpcb->tp_fsuffixlen = tpcb->tp_lsuffixlen = 0;
+
+	(tpcb->tp_nlproto->nlp_recycle_suffix)(tpcb->tp_npcb);
+}
+
+/*
+ * NAME: tp_quench()
+ *
+ * CALLED FROM:
+ *  tp{af}_quench() when ICMP source quench or similar thing arrives.
+ *
+ * FUNCTION and ARGUMENTS:
+ *  Drop the congestion window back to 1.
+ *  Congestion window scheme:
+ *  Initial value is 1.  ("slow start" as Nagle, et. al. call it)
+ *  For each good ack that arrives, the congestion window is increased
+ *  by 1 (up to max size of logical infinity, which is to say, 
+ *	it doesn't wrap around).
+ *  Source quench causes it to drop back to 1.
+ *  tp_send() uses the smaller of (regular window, congestion window). 
+ *  One retransmission strategy option is to have any retransmission 
+ *	cause reset the congestion window back  to 1.
+ *
+ *	(cmd) is either PRC_QUENCH: source quench, or
+ *		PRC_QUENCH2: dest. quench (dec bit)
+ *
+ * RETURNS:
+ * 
+ * SIDE EFFECTS:
+ * 
+ * NOTES:
+ */
+void
+tp_quench( tpcb, cmd )
+	struct tp_pcb *tpcb;
+	int cmd;
+{
+	IFDEBUG(D_QUENCH)
+		printf("tp_quench tpcb 0x%x ref 0x%x sufx 0x%x\n",
+			tpcb, tpcb->tp_lref, *(u_short *)(tpcb->tp_lsuffix));
+		printf("cong_win 0x%x decbit 0x%x \n",
+			tpcb->tp_cong_win, tpcb->tp_decbit);
+	ENDDEBUG
+	switch(cmd) {
+		case PRC_QUENCH:
+			tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
+			IncStat(ts_quench);
+			break;
+		case PRC_QUENCH2:
+			tpcb->tp_cong_win = tpcb->tp_l_tpdusize; /* might as well quench source also */
+			tpcb->tp_decbit = TP_DECBIT_CLEAR_COUNT;
+			IncStat(ts_rcvdecbit);
+			break;
+	}
+}
+
+
+/*
+ * NAME:	tp_netcmd()
+ *
+ * CALLED FROM:			
+ *
+ * FUNCTION and ARGUMENTS:			
+ *
+ * RETURNS:			
+ *
+ * SIDE EFFECTS:	
+ *
+ * NOTES:			
+ */
+tp_netcmd( tpcb, cmd )
+	struct tp_pcb *tpcb;
+	int cmd;
+{
+#ifdef TPCONS
+	struct isopcb *isop;
+	struct pklcd *lcp;
+
+	if (tpcb->tp_netservice != ISO_CONS)
+		return;
+	isop = (struct isopcb *)tpcb->tp_npcb;
+	lcp = (struct pklcd *)isop->isop_chan;
+	switch (cmd) {
+
+	case CONN_CLOSE:
+	case CONN_REFUSE:
+		if (isop->isop_refcnt == 1) {
+			/* This is really superfluous, since it would happen
+			   anyway in iso_pcbdetach, although it is a courtesy
+			   to free up the x.25 channel before the refwait timer
+			   expires. */
+			lcp->lcd_upper = 0;
+			lcp->lcd_upnext = 0;
+			pk_disconnect(lcp);
+			isop->isop_chan = 0;
+			isop->isop_refcnt = 0;
+		}
+		break;
+
+	default:
+		printf("tp_netcmd(0x%x, 0x%x) NOT IMPLEMENTED\n", tpcb, cmd);
+		break;
+	}
+#else /* TPCONS */
+	printf("tp_netcmd(): X25 NOT CONFIGURED!!\n");
+#endif
+}
+/*
+ * CALLED FROM:
+ *  tp_ctloutput() and tp_emit()
+ * FUNCTION and ARGUMENTS:
+ * 	Convert a class mask to the highest numeric value it represents.
+ */
+
+int
+tp_mask_to_num(x)
+	u_char x;
+{
+	register int j;
+
+	for(j = 4; j>=0 ;j--) {
+		if(x & (1<<j))
+			break;
+	}
+	ASSERT( (j == 4) || (j == 0) ); /* for now */
+	if( (j != 4) && (j != 0) ) {
+		printf("ASSERTION ERROR: tp_mask_to_num: x 0x%x j %d\n",
+			x, j);
+	}
+	IFTRACE(D_TPINPUT)
+		tptrace(TPPTmisc, "tp_mask_to_num(x) returns j", x, j, 0, 0);
+	ENDTRACE
+	IFDEBUG(D_TPINPUT)
+		printf("tp_mask_to_num(0x%x) returns 0x%x\n", x, j);
+	ENDDEBUG
+	return j;
+}
+
+static 
+copyQOSparms(src, dst)
+	struct tp_conn_param *src, *dst;
+{
+	/* copy all but the bits stuff at the end */
+#define COPYSIZE (12 * sizeof(short))
+
+	bcopy((caddr_t)src, (caddr_t)dst, COPYSIZE);
+	dst->p_tpdusize = src->p_tpdusize;
+	dst->p_ack_strat = src->p_ack_strat;
+	dst->p_rx_strat = src->p_rx_strat;
+#undef COPYSIZE
+}
+/*
+ * Determine a reasonable value for maxseg size.
+ * If the route is known, check route for mtu.
+ * We also initialize the congestion/slow start
+ * window to be a single segment if the destination isn't local.
+ * While looking at the routing entry, we also initialize other path-dependent
+ * parameters from pre-set or cached values in the routing entry.
+ */
+void
+tp_mss(tpcb, nhdr_size)
+	register struct tp_pcb *tpcb;
+	int nhdr_size;
+{
+	register struct rtentry *rt;
+	struct ifnet *ifp;
+	register int rtt, mss;
+	u_long bufsize;
+	int i, ssthresh = 0, rt_mss;
+	struct socket *so;
+
+	if (tpcb->tp_ptpdusize)
+		mss = tpcb->tp_ptpdusize << 7;
+	else
+		mss = 1 << tpcb->tp_tpdusize;
+	so = tpcb->tp_sock;
+	if ((rt = *(tpcb->tp_routep)) == 0) {
+		bufsize = so->so_rcv.sb_hiwat;
+		goto punt_route;
+	}
+	ifp = rt->rt_ifp;
+
+#ifdef RTV_MTU	/* if route characteristics exist ... */
+	/*
+	 * While we're here, check if there's an initial rtt
+	 * or rttvar.  Convert from the route-table units
+	 * to hz ticks for the smoothed timers and slow-timeout units
+	 * for other inital variables.
+	 */
+	if (tpcb->tp_rtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) {
+		tpcb->tp_rtt = rtt * hz / RTM_RTTUNIT;
+		if (rt->rt_rmx.rmx_rttvar)
+			tpcb->tp_rtv = rt->rt_rmx.rmx_rttvar
+						* hz / RTM_RTTUNIT;
+		else
+			tpcb->tp_rtv = tpcb->tp_rtt;
+	}
+	/*
+	 * if there's an mtu associated with the route, use it
+	 */
+	if (rt->rt_rmx.rmx_mtu)
+		rt_mss = rt->rt_rmx.rmx_mtu - nhdr_size;
+	else
+#endif /* RTV_MTU */
+		rt_mss = (ifp->if_mtu - nhdr_size);
+	if (tpcb->tp_ptpdusize == 0 || /* assume application doesn't care */
+	    mss > rt_mss /* network won't support what was asked for */)
+		mss = rt_mss;
+	/* can propose mtu which are multiples of 128 */
+	mss &= ~0x7f;
+	/*
+	 * If there's a pipesize, change the socket buffer
+	 * to that size.
+	 */
+#ifdef RTV_SPIPE
+	if ((bufsize = rt->rt_rmx.rmx_sendpipe) > 0) {
+#endif
+		bufsize = min(bufsize, so->so_snd.sb_hiwat);
+		(void) sbreserve(&so->so_snd, bufsize);
+	}
+#ifdef RTV_SPIPE
+	if ((bufsize = rt->rt_rmx.rmx_recvpipe) > 0) {
+#endif
+		bufsize = min(bufsize, so->so_rcv.sb_hiwat);
+		(void) sbreserve(&so->so_rcv, bufsize);
+	} else
+		bufsize = so->so_rcv.sb_hiwat;
+#ifdef RTV_SSTHRESH
+	/*
+	 * There's some sort of gateway or interface
+	 * buffer limit on the path.  Use this to set
+	 * the slow start threshhold, but set the
+	 * threshold to no less than 2*mss.
+	 */
+	ssthresh = rt->rt_rmx.rmx_ssthresh;
+punt_route:
+	/*
+	 * The current mss is initialized to the default value.
+	 * If we compute a smaller value, reduce the current mss.
+	 * If we compute a larger value, return it for use in sending
+	 * a max seg size option.
+	 * If we received an offer, don't exceed it.
+	 * However, do not accept offers under 128 bytes.
+	 */
+	if (tpcb->tp_l_tpdusize)
+		mss = min(mss, tpcb->tp_l_tpdusize);
+	/*
+	 * We want a minimum recv window of 4 packets to
+	 * signal packet loss by duplicate acks.
+	 */
+	mss = min(mss, bufsize >> 2) & ~0x7f;
+	mss = max(mss, 128);		/* sanity */
+	tpcb->tp_cong_win =
+		(rt == 0 || (rt->rt_flags & RTF_GATEWAY)) ? mss : bufsize;
+	tpcb->tp_l_tpdusize = mss;
+	tp_rsyset(tpcb);
+	tpcb->tp_ssthresh = max(2 * mss, ssthresh);
+	/* Calculate log2 of mss */
+	for (i = TP_MIN_TPDUSIZE + 1; i <= TP_MAX_TPDUSIZE; i++)
+		if ((1 << i) > mss)
+			break;
+	i--;
+	tpcb->tp_tpdusize = i;
+#endif /* RTV_MTU */
+}
+
+/*
+ * CALLED FROM:
+ *  tp_usrreq on PRU_CONNECT and tp_input on receipt of CR
+ *	
+ * FUNCTION and ARGUMENTS:
+ * 	-- An mbuf containing the peer's network address.
+ *  -- Our control block, which will be modified
+ *  -- In the case of cons, a control block for that layer.
+ *
+ *	
+ * RETURNS:
+ *	errno value	 : 
+ *	EAFNOSUPPORT if can't find an nl_protosw for x.25 (really could panic)
+ *	ECONNREFUSED if trying to run TP0 with non-type 37 address
+ *  possibly other E* returned from cons_netcmd()
+ *
+ * SIDE EFFECTS:
+ *   Determines recommended tpdusize, buffering and intial delays
+ *	 based on information cached on the route.
+ */
+int
+tp_route_to( m, tpcb, channel)
+	struct mbuf					*m;
+	register struct tp_pcb		*tpcb;
+	caddr_t 					channel;
+{
+	register struct sockaddr_iso *siso;	/* NOTE: this may be a sockaddr_in */
+	extern struct tp_conn_param tp_conn_param[];
+	int error = 0, save_netservice = tpcb->tp_netservice;
+	register struct rtentry *rt = 0;
+	int nhdr_size, mtu, bufsize;
+
+	siso = mtod(m, struct sockaddr_iso *);
+	IFTRACE(D_CONN)
+		tptraceTPCB(TPPTmisc, 
+		"route_to: so  afi netservice class",
+		tpcb->tp_sock, siso->siso_addr.isoa_genaddr[0], tpcb->tp_netservice,
+			tpcb->tp_class);
+	ENDTRACE
+	IFDEBUG(D_CONN)
+		printf("tp_route_to( m x%x, channel 0x%x, tpcb 0x%x netserv 0x%x)\n", 
+			m, channel, tpcb, tpcb->tp_netservice);
+		printf("m->mlen x%x, m->m_data:\n", m->m_len);
+		dump_buf(mtod(m, caddr_t), m->m_len);
+	ENDDEBUG
+	if (channel) {
+#ifdef TPCONS
+		struct pklcd *lcp = (struct pklcd *)channel;
+		struct isopcb *isop = (struct isopcb *)lcp->lcd_upnext,
+			*isop_new = (struct isopcb *)tpcb->tp_npcb;
+		/* The next 2 lines believe that you haven't
+		   set any network level options or done a pcbconnect
+		   and XXXXXXX'edly apply to both inpcb's and isopcb's */
+		remque(isop_new);
+		free(isop_new, M_PCB);
+		tpcb->tp_npcb = (caddr_t)isop;
+		tpcb->tp_netservice = ISO_CONS;
+		tpcb->tp_nlproto = nl_protosw + ISO_CONS;
+		if (isop->isop_refcnt++ == 0) {
+			iso_putsufx(isop, tpcb->tp_lsuffix, tpcb->tp_lsuffixlen, TP_LOCAL);
+			isop->isop_socket = tpcb->tp_sock;
+		} else
+			/* there are already connections sharing this */;
+#endif
+	} else {
+		switch (siso->siso_family) {
+		default:
+			error = EAFNOSUPPORT;
+			goto done;
+#ifdef ISO
+		case AF_ISO:
+		{
+			struct isopcb *isop = (struct isopcb *)tpcb->tp_npcb;
+			int flags = tpcb->tp_sock->so_options & SO_DONTROUTE;
+			tpcb->tp_netservice = ISO_CLNS;
+			if (clnp_route(&siso->siso_addr, &isop->isop_route,
+							flags, (void **)0, (void **)0) == 0) {
+				rt = isop->isop_route.ro_rt;
+				if (rt && rt->rt_flags & RTF_PROTO1)
+					tpcb->tp_netservice = ISO_CONS;
+			}
+		}    break;
+#endif
+#ifdef INET
+		case AF_INET:
+			tpcb->tp_netservice = IN_CLNS;
+#endif
+		}
+		if (tpcb->tp_nlproto->nlp_afamily != siso->siso_family) {
+			IFDEBUG(D_CONN)
+				printf("tp_route_to( CHANGING nlproto old 0x%x new 0x%x)\n", 
+						save_netservice, tpcb->tp_netservice);
+			ENDDEBUG
+			if (error = tp_set_npcb(tpcb))
+				goto done;
+		}
+		IFDEBUG(D_CONN)
+			printf("tp_route_to  calling nlp_pcbconn, netserv %d\n",
+				tpcb->tp_netservice);
+		ENDDEBUG
+		tpcb->tp_nlproto = nl_protosw + tpcb->tp_netservice;
+		error = (tpcb->tp_nlproto->nlp_pcbconn)(tpcb->tp_npcb, m);
+	}
+	if (error)
+		goto done;
+	nhdr_size = tpcb->tp_nlproto->nlp_mtu(tpcb); /* only gets common info */
+	tp_mss(tpcb, nhdr_size);
+done:
+	IFDEBUG(D_CONN)
+		printf("tp_route_to  returns 0x%x\n", error);
+	ENDDEBUG
+	IFTRACE(D_CONN)
+		tptraceTPCB(TPPTmisc, "route_to: returns: error netserv class", error, 
+			tpcb->tp_netservice, tpcb->tp_class, 0);
+	ENDTRACE
+	return error;
+}
+
+
+/* class zero version */
+void
+tp0_stash( tpcb, e )
+	register struct tp_pcb		*tpcb;
+	register struct tp_event	*e;
+{
+#ifndef lint
+#define E e->ATTR(DT_TPDU)
+#else /* lint */
+#define E e->ev_union.EV_DT_TPDU
+#endif /* lint */
+
+	register struct sockbuf *sb = &tpcb->tp_sock->so_rcv;
+	register struct isopcb *isop = (struct isopcb *)tpcb->tp_npcb;
+
+	IFPERF(tpcb)
+		PStat(tpcb, Nb_from_ll) += E.e_datalen;
+		tpmeas(tpcb->tp_lref, TPtime_from_ll, &e->e_time,
+				E.e_seq, PStat(tpcb, Nb_from_ll), E.e_datalen);
+	ENDPERF
+
+	IFDEBUG(D_STASH)
+		printf("stash EQ: seq 0x%x datalen 0x%x eot 0x%x", 
+		E.e_seq, E.e_datalen, E.e_eot);
+	ENDDEBUG
+
+	IFTRACE(D_STASH)
+		tptraceTPCB(TPPTmisc, "stash EQ: seq len eot", 
+		E.e_seq, E.e_datalen, E.e_eot, 0);
+	ENDTRACE
+
+	if ( E.e_eot ) {
+		register struct mbuf *n = E.e_data;
+		n->m_flags |= M_EOR;
+		n->m_act = MNULL; /* set on tp_input */
+	}
+	sbappend(sb, E.e_data);
+	IFDEBUG(D_STASH)
+		dump_mbuf(sb->sb_mb, "stash 0: so_rcv after appending");
+	ENDDEBUG
+	if (tpcb->tp_netservice != ISO_CONS)
+		printf("tp0_stash: tp running over something wierd\n");
+	else {
+		register struct pklcd *lcp = (struct pklcd *)isop->isop_chan;
+		pk_flowcontrol(lcp, sbspace(sb) <= 0, 1);
+	}
+} 
+
+void
+tp0_openflow(tpcb)
+register struct tp_pcb *tpcb;
+{
+	register struct isopcb *isop = (struct isopcb *)tpcb->tp_npcb;
+	if (tpcb->tp_netservice != ISO_CONS)
+		printf("tp0_openflow: tp running over something wierd\n");
+	else {
+		register struct pklcd *lcp = (struct pklcd *)isop->isop_chan;
+		if (lcp->lcd_rxrnr_condition)
+			pk_flowcontrol(lcp, 0, 0);
+	}
+}
+#ifndef TPCONS
+static
+pk_flowcontrol() {}
+#endif
+
+#ifdef TP_PERF_MEAS
+/*
+ * CALLED FROM:
+ *  tp_ctloutput() when the user sets TPOPT_PERF_MEAS on
+ *  and tp_newsocket() when a new connection is made from 
+ *  a listening socket with tp_perf_on == true.
+ * FUNCTION and ARGUMENTS:
+ *  (tpcb) is the usual; this procedure gets a clear cluster mbuf for
+ *  a tp_pmeas structure, and makes tpcb->tp_p_meas point to it.
+ * RETURN VALUE:
+ *  ENOBUFS if it cannot get a cluster mbuf.
+ */
+
+int 
+tp_setup_perf(tpcb)
+	register struct tp_pcb *tpcb;
+{
+	register struct mbuf *q;
+
+	if( tpcb->tp_p_meas == 0 ) {
+		MGET(q, M_WAITOK, MT_PCB);
+		if (q == 0)
+			return ENOBUFS;
+		MCLGET(q, M_WAITOK);
+		if ((q->m_flags & M_EXT) == 0) {
+			(void) m_free(q);
+			return ENOBUFS;
+		}
+		q->m_len = sizeof (struct tp_pmeas);
+		tpcb->tp_p_mbuf = q;
+		tpcb->tp_p_meas = mtod(q, struct tp_pmeas *);
+		bzero( (caddr_t)tpcb->tp_p_meas, sizeof (struct tp_pmeas) );
+		IFDEBUG(D_PERF_MEAS)
+			printf(
+			"tpcb 0x%x so 0x%x ref 0x%x tp_p_meas 0x%x tp_perf_on 0x%x\n", 
+				tpcb, tpcb->tp_sock, tpcb->tp_lref, 
+				tpcb->tp_p_meas, tpcb->tp_perf_on);
+		ENDDEBUG
+		tpcb->tp_perf_on = 1;
+	}
+	return 0;
+}
+#endif /* TP_PERF_MEAS */
+
+#ifdef ARGO_DEBUG
+dump_addr (addr)
+	register struct sockaddr *addr;
+{
+	switch( addr->sa_family ) {
+		case AF_INET:
+			dump_inaddr((struct sockaddr_in *)addr);
+			break;
+#ifdef ISO
+		case AF_ISO:
+			dump_isoaddr((struct sockaddr_iso *)addr);
+			break;
+#endif /* ISO */
+		default:
+			printf("BAD AF: 0x%x\n", addr->sa_family);
+			break;
+	}
+}
+
+#define	MAX_COLUMNS	8
+/*
+ *	Dump the buffer to the screen in a readable format. Format is:
+ *
+ *		hex/dec  where hex is the hex format, dec is the decimal format.
+ *		columns of hex/dec numbers will be printed, followed by the
+ *		character representations (if printable).
+ */
+Dump_buf(buf, len)
+caddr_t	buf;
+int		len;
+{
+	int		i,j;
+#define Buf ((u_char *)buf)
+	printf("Dump buf 0x%x len 0x%x\n", buf, len);
+	for (i = 0; i < len; i += MAX_COLUMNS) {
+		printf("+%d:\t", i);
+		for (j = 0; j < MAX_COLUMNS; j++) {
+			if (i + j < len) {
+				printf("%x/%d\t", Buf[i+j], Buf[i+j]);
+			} else {
+				printf("	");
+			}
+		}
+
+		for (j = 0; j < MAX_COLUMNS; j++) {
+			if (i + j < len) {
+				if (((Buf[i+j]) > 31) && ((Buf[i+j]) < 128))
+					printf("%c", Buf[i+j]);
+				else
+					printf(".");
+			}
+		}
+		printf("\n");
+	}
+}
+#endif /* ARGO_DEBUG */
diff --git a/sys/netiso/tp_timer.c b/sys/netiso/tp_timer.c
new file mode 100644
index 00000000000..b3a0be3a945
--- /dev/null
+++ b/sys/netiso/tp_timer.c
@@ -0,0 +1,377 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tp_timer.c	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* 
+ * ARGO TP
+ *
+ * $Header: tp_timer.c,v 5.2 88/11/18 17:29:07 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_timer.c,v $
+ *
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/malloc.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/kernel.h>
+
+#include <netiso/argo_debug.h>
+#include <netiso/tp_param.h>
+#include <netiso/tp_timer.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_pcb.h>
+#include <netiso/tp_tpdu.h>
+#include <netiso/tp_trace.h>
+#include <netiso/tp_seq.h>
+
+struct	tp_ref *tp_ref;
+int	tp_rttdiv, tp_rttadd, N_TPREF = 127;
+struct	tp_refinfo tp_refinfo;
+struct	tp_pcb *tp_ftimeolist = (struct tp_pcb *)&tp_ftimeolist;
+
+/*
+ * CALLED FROM:
+ *  at autoconfig time from tp_init() 
+ * 	a combo of event, state, predicate
+ * FUNCTION and ARGUMENTS:
+ *  initialize data structures for the timers
+ */
+void
+tp_timerinit()
+{
+	register int s;
+	/*
+	 * Initialize storage
+	 */
+	if (tp_refinfo.tpr_base)
+		return;
+	tp_refinfo.tpr_size = N_TPREF + 1;  /* Need to start somewhere */
+	s = sizeof(*tp_ref) * tp_refinfo.tpr_size;
+	if ((tp_ref = (struct tp_ref *) malloc(s, M_PCB, M_NOWAIT)) == 0)
+		panic("tp_timerinit");
+	bzero((caddr_t)tp_ref, (unsigned) s);
+	tp_refinfo.tpr_base = tp_ref;
+	tp_rttdiv = hz / PR_SLOWHZ;
+	tp_rttadd = (2 * tp_rttdiv) - 1;
+}
+#ifdef TP_DEBUG_TIMERS
+/**********************  e timers *************************/
+
+/*
+ * CALLED FROM:
+ *  tp.trans all over
+ * FUNCTION and ARGUMENTS:
+ * Set an E type timer.
+ */
+void
+tp_etimeout(tpcb, fun, ticks)
+	register struct tp_pcb	*tpcb;
+	int 					fun; 	/* function to be called */
+	int						ticks;
+{
+
+	register u_int *callp;
+	IFDEBUG(D_TIMER)
+		printf("etimeout pcb 0x%x state 0x%x\n", tpcb, tpcb->tp_state);
+	ENDDEBUG
+	IFTRACE(D_TIMER)
+		tptrace(TPPTmisc, "tp_etimeout ref refstate tks Etick", tpcb->tp_lref,
+		tpcb->tp_state, ticks, tp_stat.ts_Eticks);
+	ENDTRACE
+	if (tpcb == 0)
+		return;
+	IncStat(ts_Eset);
+	if (ticks == 0)
+		ticks = 1;
+	callp = tpcb->tp_timer + fun;
+	if (*callp == 0 || *callp > ticks)
+		*callp = ticks;
+}
+
+/*
+ * CALLED FROM:
+ *  tp.trans all over
+ * FUNCTION and ARGUMENTS:
+ *  Cancel all occurrences of E-timer function (fun) for reference (refp)
+ */
+void
+tp_euntimeout(tpcb, fun)
+	register struct tp_pcb	*tpcb;
+	int			  fun;
+{
+	IFTRACE(D_TIMER)
+		tptrace(TPPTmisc, "tp_euntimeout ref", tpcb->tp_lref, 0, 0, 0);
+	ENDTRACE
+
+	if (tpcb)
+		tpcb->tp_timer[fun] = 0;
+}
+
+/****************  c timers **********************
+ *
+ * These are not chained together; they sit
+ * in the tp_ref structure. they are the kind that
+ * are typically cancelled so it's faster not to
+ * mess with the chains
+ */
+#endif
+/*
+ * CALLED FROM:
+ *  the clock, every 500 ms
+ * FUNCTION and ARGUMENTS:
+ *  Look for open references with active timers.
+ *  If they exist, call the appropriate timer routines to update
+ *  the timers and possibly generate events.
+ */
+ProtoHook
+tp_slowtimo()
+{
+	register u_int 	*cp;
+	register struct tp_ref		*rp;
+	struct tp_pcb		*tpcb;
+	struct tp_event		E;
+	int 				s = splnet(), t;
+
+	/* check only open reference structures */
+	IncStat(ts_Cticks);
+	/* tp_ref[0] is never used */
+	for (rp = tp_ref + tp_refinfo.tpr_maxopen; rp > tp_ref; rp--) {
+		if ((tpcb = rp->tpr_pcb) == 0 || tpcb->tp_refstate < REF_OPEN) 
+			continue;
+		/* check the timers */
+		for (t = 0; t < TM_NTIMERS; t++) {
+			cp = tpcb->tp_timer + t;
+			if (*cp && --(*cp) <= 0 ) {
+				*cp = 0;
+				E.ev_number = t;
+				IFDEBUG(D_TIMER)
+					printf("tp_slowtimo: pcb 0x%x t %d\n",
+							tpcb, t);
+				ENDDEBUG
+				IncStat(ts_Cexpired);
+				tp_driver(tpcb, &E);
+				if (t == TM_reference && tpcb->tp_state == TP_CLOSED) {
+					if (tpcb->tp_notdetached) {
+						IFDEBUG(D_CONN)
+							printf("PRU_DETACH: not detached\n");
+						ENDDEBUG
+						tp_detach(tpcb);
+					}
+					/* XXX wart; where else to do it? */
+					free((caddr_t)tpcb, M_PCB);
+				}
+			}
+		}
+	}
+	splx(s);
+	return 0;
+}
+
+/*
+ * Called From: tp.trans from tp_slowtimo() -- retransmission timer went off.
+ */
+tp_data_retrans(tpcb)
+register struct tp_pcb *tpcb;
+{
+	int rexmt, win;
+	tpcb->tp_rttemit = 0;	/* cancel current round trip time */
+	tpcb->tp_dupacks = 0;
+	tpcb->tp_sndnxt = tpcb->tp_snduna;
+	if (tpcb->tp_fcredit == 0) {
+		/*
+		 * We transmitted new data, started timing it and the window
+		 * got shrunk under us.  This can only happen if all data
+		 * that they wanted us to send got acked, so don't
+		 * bother shrinking the congestion windows, et. al.
+		 * The retransmission timer should have been reset in goodack()
+		 */
+		IFDEBUG(D_ACKRECV)
+			printf("tp_data_retrans: 0 window tpcb 0x%x una 0x%x\n",
+				tpcb, tpcb->tp_snduna);
+		ENDDEBUG
+		tpcb->tp_rxtshift = 0;
+		tpcb->tp_timer[TM_data_retrans] = 0;
+		tpcb->tp_timer[TM_sendack] = tpcb->tp_dt_ticks;
+		return;
+	}
+	rexmt = tpcb->tp_dt_ticks << min(tpcb->tp_rxtshift, TP_MAXRXTSHIFT);
+	win = min(tpcb->tp_fcredit, (tpcb->tp_cong_win / tpcb->tp_l_tpdusize / 2));
+	win = max(win, 2);
+	tpcb->tp_cong_win = tpcb->tp_l_tpdusize;	/* slow start again. */
+	tpcb->tp_ssthresh = win * tpcb->tp_l_tpdusize;
+	/* We're losing; our srtt estimate is probably bogus.
+	 * Clobber it so we'll take the next rtt measurement as our srtt;
+	 * Maintain current rxt times until then.
+	 */
+	if (++tpcb->tp_rxtshift > TP_NRETRANS / 4) {
+		/* tpcb->tp_nlprotosw->nlp_losing(tpcb->tp_npcb) someday */
+		tpcb->tp_rtt = 0;
+	}
+	TP_RANGESET(tpcb->tp_rxtcur, rexmt, tpcb->tp_peer_acktime, 128);
+	tpcb->tp_timer[TM_data_retrans] = tpcb->tp_rxtcur;
+	tp_send(tpcb);
+}
+
+int
+tp_fasttimo()
+{
+	register struct tp_pcb *t;
+	int s = splnet();
+	struct tp_event		E;
+
+	E.ev_number = TM_sendack;
+	while ((t = tp_ftimeolist) != (struct tp_pcb *)&tp_ftimeolist) {
+		if (t == 0) {
+			printf("tp_fasttimeo: should panic");
+			tp_ftimeolist = (struct tp_pcb *)&tp_ftimeolist;
+		} else {
+			if (t->tp_flags & TPF_DELACK) {
+				IncStat(ts_Fdelack);
+				tp_driver(t, &E);
+				t->tp_flags &= ~TPF_DELACK;
+			} else
+				IncStat(ts_Fpruned);
+			tp_ftimeolist = t->tp_fasttimeo;
+			t->tp_fasttimeo = 0;
+		}
+	}
+	splx(s);
+}
+
+#ifdef TP_DEBUG_TIMERS
+/*
+ * CALLED FROM:
+ *  tp.trans, tp_emit()
+ * FUNCTION and ARGUMENTS:
+ * 	Set a C type timer of type (which) to go off after (ticks) time.
+ */
+void
+tp_ctimeout(tpcb, which, ticks)
+	register struct tp_pcb	*tpcb;
+	int 					which, ticks; 
+{
+
+	IFTRACE(D_TIMER)
+		tptrace(TPPTmisc, "tp_ctimeout ref which tpcb active", 
+			tpcb->tp_lref, which, tpcb, tpcb->tp_timer[which]);
+	ENDTRACE
+	if(tpcb->tp_timer[which])
+		IncStat(ts_Ccan_act);
+	IncStat(ts_Cset);
+	if (ticks <= 0)
+		ticks = 1;
+	tpcb->tp_timer[which] = ticks;
+}
+
+/*
+ * CALLED FROM:
+ *  tp.trans 
+ * FUNCTION and ARGUMENTS:
+ * 	Version of tp_ctimeout that resets the C-type time if the 
+ * 	parameter (ticks) is > the current value of the timer.
+ */
+void
+tp_ctimeout_MIN(tpcb, which, ticks)
+	register struct tp_pcb	*tpcb;
+	int						which, ticks; 
+{
+	IFTRACE(D_TIMER)
+		tptrace(TPPTmisc, "tp_ctimeout_MIN ref which tpcb active", 
+			tpcb->tp_lref, which, tpcb, tpcb->tp_timer[which]);
+	ENDTRACE
+	IncStat(ts_Cset);
+	if (tpcb->tp_timer[which])  {
+		tpcb->tp_timer[which] = min(ticks, tpcb->tp_timer[which]);
+		IncStat(ts_Ccan_act);
+	} else
+		tpcb->tp_timer[which] = ticks;
+}
+
+/*
+ * CALLED FROM:
+ *  tp.trans
+ * FUNCTION and ARGUMENTS:
+ *  Cancel the (which) timer in the ref structure indicated by (refp).
+ */
+void
+tp_cuntimeout(tpcb, which)
+	register struct tp_pcb	*tpcb;
+	int						which;
+{
+	IFDEBUG(D_TIMER)
+		printf("tp_cuntimeout(0x%x, %d) active %d\n",
+				tpcb, which, tpcb->tp_timer[which]);
+	ENDDEBUG
+
+	IFTRACE(D_TIMER)
+		tptrace(TPPTmisc, "tp_cuntimeout ref which, active", refp-tp_ref, 
+			which, tpcb->tp_timer[which], 0);
+	ENDTRACE
+
+	if (tpcb->tp_timer[which])
+		IncStat(ts_Ccan_act);
+	else
+		IncStat(ts_Ccan_inact);
+	tpcb->tp_timer[which] = 0;
+}
+#endif
diff --git a/sys/netiso/tp_timer.h b/sys/netiso/tp_timer.h
new file mode 100644
index 00000000000..a6f7735586b
--- /dev/null
+++ b/sys/netiso/tp_timer.h
@@ -0,0 +1,93 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tp_timer.h	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* 
+ * ARGO TP
+ *
+ * $Header: tp_timer.h,v 5.1 88/10/12 12:21:41 root Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_timer.h,v $
+ *
+ * ARGO TP
+ * The callout structures used by the tp timers.
+ */
+
+#ifndef __TP_TIMER__
+#define __TP_TIMER__
+
+#define SET_DELACK(t) {\
+    (t)->tp_flags |= TPF_DELACK; \
+    if ((t)->tp_fasttimeo == 0)\
+		{ (t)->tp_fasttimeo = tp_ftimeolist; tp_ftimeolist = (t); } }
+
+#ifdef ARGO_DEBUG
+#define TP_DEBUG_TIMERS
+#endif
+
+#ifndef TP_DEBUG_TIMERS
+#define tp_ctimeout(tpcb, which, timo) ((tpcb)->tp_timer[which] = (timo))
+#define tp_cuntimeout(tpcb, which) ((tpcb)->tp_timer[which] = 0)
+#define tp_etimeout tp_ctimeout
+#define tp_euntimeout tp_cuntimeout
+#define tp_ctimeout_MIN(p, w, t) \
+    { if((p)->tp_timer[w] > (t)) (p)->tp_timer[w] = (t);}
+#endif /* TP_DEBUG_TIMERS */
+
+#endif /* __TP_TIMER__ */
diff --git a/sys/netiso/tp_tpdu.h b/sys/netiso/tp_tpdu.h
new file mode 100644
index 00000000000..15f130d1703
--- /dev/null
+++ b/sys/netiso/tp_tpdu.h
@@ -0,0 +1,296 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tp_tpdu.h	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* 
+ * ARGO TP
+ *
+ * $Header: tp_tpdu.h,v 4.4 88/07/26 16:45:40 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_tpdu.h,v $
+ *
+ * This ghastly set of macros makes it possible to
+ * refer to tpdu structures without going mad.
+ */
+
+#ifndef __TP_TPDU__
+#define __TP_TPDU__
+
+#ifndef BYTE_ORDER
+/*
+ * Definitions for byte order,
+ * according to byte significance from low address to high.
+ */
+#define	LITTLE_ENDIAN	1234	/* least-significant byte first (vax) */
+#define	BIG_ENDIAN	4321	/* most-significant byte first (IBM, net) */
+#define	PDP_ENDIAN	3412	/* LSB first in word, MSW first in long (pdp) */
+
+#ifdef vax
+#define	BYTE_ORDER	LITTLE_ENDIAN
+#else
+#define	BYTE_ORDER	BIG_ENDIAN	/* mc68000, tahoe, most others */
+#endif
+#endif /* BYTE_ORDER */
+
+/* This much of a tpdu is the same for all types of tpdus  (except
+ * DT tpdus in class 0; their exceptions are handled by the data
+ * structure below
+ */
+struct tpdu_fixed {
+	u_char			_tpduf_li:8,		/* length indicator */
+#if BYTE_ORDER == LITTLE_ENDIAN
+				_tpduf_cdt: 4,		/* credit */
+				_tpduf_type: 4;		/* type of tpdu (DT, CR, etc.) */
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+				_tpduf_type: 4,		/* type of tpdu (DT, CR, etc.) */
+				_tpduf_cdt: 4;		/* credit */
+#endif
+	u_short			_tpduf_dref;		/* destination ref; not in DT in class 0 */
+};
+
+#define tpdu_li _tpduf._tpduf_li
+#define tpdu_type _tpduf._tpduf_type
+#define tpdu_cdt _tpduf._tpduf_cdt
+#define tpdu_dref _tpduf._tpduf_dref
+			
+struct tp0du {
+	u_char		_tp0_li,
+				_tp0_cdt_type,		/* same as in tpdu_fixed */
+#if BYTE_ORDER == BIG_ENDIAN
+				_tp0_eot: 1,		/* eot */
+				_tp0_mbz: 7,		/* must be zero */
+#endif
+#if BYTE_ORDER == LITTLE_ENDIAN
+				_tp0_mbz: 7,		/* must be zero */
+				_tp0_eot: 1,		/* eot */
+#endif
+				_tp0_notused: 8;	/* data begins on this octet */
+};
+
+#define tp0du_eot _tp0_eot
+#define tp0du_mbz _tp0_mbz
+			
+/*
+ * This is used when the extended format seqence numbers are
+ * being sent and received. 
+ */
+				/*
+				 * the seqeot field is an int that overlays the seq
+				 * and eot fields, this allows the htonl operation
+				 * to be applied to the entire 32 bit quantity, and
+				 * simplifies the structure definitions.
+				 */
+union seq_type {
+	struct {
+#if BYTE_ORDER == BIG_ENDIAN
+		unsigned int	st_eot:1,		/* end-of-tsdu */
+				 		st_seq:31;		/* 31 bit sequence number */
+#endif
+#if BYTE_ORDER == LITTLE_ENDIAN
+		unsigned int	st_seq:31,		/* 31 bit sequence number */
+						st_eot:1;		/* end-of-tsdu */
+#endif
+	} st;
+	unsigned int s_seqeot;
+#define s_eot	st.st_eot
+#define s_seq	st.st_seq
+};
+
+/* Then most tpdu types have a portion that is always present but
+ * differs among the tpdu types :
+ */
+union  tpdu_fixed_rest {
+
+		struct {
+			u_short		_tpdufr_sref, 		/* source reference */
+#if BYTE_ORDER == BIG_ENDIAN
+						_tpdufr_class: 4,	/* class [ ISO 8073 13.3.3.e ] */
+						_tpdufr_opt: 4,		/* options [ ISO 8073 13.3.3.e ] */
+#endif
+#if BYTE_ORDER == LITTLE_ENDIAN
+						_tpdufr_opt: 4,		/* options [ ISO 8073 13.3.3.e ] */
+						_tpdufr_class: 4,	/* class [ ISO 8073 13.3.3.e ] */
+#endif
+						_tpdufr_xx: 8;		/* unused */
+		} CRCC;
+
+#define tpdu_CRli _tpduf._tpduf_li
+#define tpdu_CRtype _tpduf._tpduf_type
+#define tpdu_CRcdt _tpduf._tpduf_cdt
+#define tpdu_CRdref_0 _tpduf._tpduf_dref
+#define tpdu_CRsref _tpdufr.CRCC._tpdufr_sref
+#define tpdu_sref _tpdufr.CRCC._tpdufr_sref
+#define tpdu_CRclass _tpdufr.CRCC._tpdufr_class
+#define tpdu_CRoptions _tpdufr.CRCC._tpdufr_opt
+
+#define tpdu_CCli _tpduf._tpduf_li
+#define tpdu_CCtype _tpduf._tpduf_type
+#define tpdu_CCcdt _tpduf._tpduf_cdt
+#define tpdu_CCdref _tpduf._tpduf_dref
+#define tpdu_CCsref _tpdufr.CRCC._tpdufr_sref
+#define tpdu_CCclass _tpdufr.CRCC._tpdufr_class
+#define tpdu_CCoptions _tpdufr.CRCC._tpdufr_opt
+
+/* OPTIONS and ADDL OPTIONS bits */
+#define TPO_USE_EFC	 			0x1
+#define TPO_XTD_FMT	 			0x2
+#define TPAO_USE_TXPD 			0x1
+#define TPAO_NO_CSUM 			0x2
+#define TPAO_USE_RCC 			0x4
+#define TPAO_USE_NXPD 			0x8
+
+		struct {
+			unsigned short _tpdufr_sref;	/* source reference */
+			unsigned char  _tpdufr_reason;	/* [ ISO 8073 13.5.3.d ] */
+		} DR;
+#define tpdu_DRli _tpduf._tpduf_li
+#define tpdu_DRtype _tpduf._tpduf_type
+#define tpdu_DRdref _tpduf._tpduf_dref
+#define tpdu_DRsref _tpdufr.DR._tpdufr_sref
+#define tpdu_DRreason _tpdufr.DR._tpdufr_reason
+
+		unsigned short _tpdufr_sref;	/* source reference */
+
+#define tpdu_DCli _tpduf._tpduf_li
+#define tpdu_DCtype _tpduf._tpduf_type
+#define tpdu_DCdref _tpduf._tpduf_dref
+#define tpdu_DCsref _tpdufr._tpdufr_sref
+
+		struct {
+#if BYTE_ORDER == BIG_ENDIAN
+			unsigned char _tpdufr_eot:1,	/* end-of-tsdu */
+						  _tpdufr_seq:7; 	/* 7 bit sequence number */
+#endif
+#if BYTE_ORDER == LITTLE_ENDIAN
+			unsigned char	_tpdufr_seq:7, 	/* 7 bit sequence number */
+							_tpdufr_eot:1;	/* end-of-tsdu */
+#endif
+		}SEQEOT;
+		struct {
+#if BYTE_ORDER == BIG_ENDIAN
+			unsigned int	_tpdufr_Xeot:1,		/* end-of-tsdu */
+					 		_tpdufr_Xseq:31;	/* 31 bit sequence number */
+#endif
+#if BYTE_ORDER == LITTLE_ENDIAN
+			unsigned int	_tpdufr_Xseq:31,	/* 31 bit sequence number */
+							_tpdufr_Xeot:1;		/* end-of-tsdu */
+#endif
+		}SEQEOT31;
+		unsigned int _tpdufr_Xseqeot;
+#define tpdu_seqeotX _tpdufr._tpdufr_Xseqeot
+
+#define tpdu_DTli _tpduf._tpduf_li
+#define tpdu_DTtype _tpduf._tpduf_type
+#define tpdu_DTdref _tpduf._tpduf_dref
+#define tpdu_DTseq _tpdufr.SEQEOT._tpdufr_seq
+#define tpdu_DTeot _tpdufr.SEQEOT._tpdufr_eot
+#define tpdu_DTseqX _tpdufr.SEQEOT31._tpdufr_Xseq
+#define tpdu_DTeotX _tpdufr.SEQEOT31._tpdufr_Xeot
+
+#define tpdu_XPDli _tpduf._tpduf_li
+#define tpdu_XPDtype _tpduf._tpduf_type
+#define tpdu_XPDdref _tpduf._tpduf_dref
+#define tpdu_XPDseq _tpdufr.SEQEOT._tpdufr_seq
+#define tpdu_XPDeot _tpdufr.SEQEOT._tpdufr_eot
+#define tpdu_XPDseqX _tpdufr.SEQEOT31._tpdufr_Xseq
+#define tpdu_XPDeotX _tpdufr.SEQEOT31._tpdufr_Xeot
+
+		struct {
+#if BYTE_ORDER == BIG_ENDIAN
+			unsigned	_tpdufr_yrseq0:1,	/* always zero */
+						_tpdufr_yrseq:31; 	/* [ ISO 8073 13.9.3.d ] */
+#endif
+#if BYTE_ORDER == LITTLE_ENDIAN
+			unsigned	_tpdufr_yrseq:31, 	/* [ ISO 8073 13.9.3.d ] */
+						_tpdufr_yrseq0:1;	/* always zero */
+#endif
+			unsigned short _tpdufr_cdt; /* [ ISO 8073 13.9.3.b ] */
+		} AK31;
+
+#define tpdu_AKli _tpduf._tpduf_li
+#define tpdu_AKtype _tpduf._tpduf_type
+#define tpdu_AKdref _tpduf._tpduf_dref
+#define tpdu_AKseq _tpdufr.SEQEOT._tpdufr_seq
+#define tpdu_AKseqX _tpdufr.AK31._tpdufr_yrseq
+/* location of cdt depends on size of seq. numbers */
+#define tpdu_AKcdt _tpduf._tpduf_cdt
+#define tpdu_AKcdtX _tpdufr.AK31._tpdufr_cdt
+
+#define tpdu_XAKli _tpduf._tpduf_li
+#define tpdu_XAKtype _tpduf._tpduf_type
+#define tpdu_XAKdref _tpduf._tpduf_dref
+#define tpdu_XAKseq _tpdufr.SEQEOT._tpdufr_seq
+#define tpdu_XAKseqX _tpdufr.SEQEOT31._tpdufr_Xseq
+
+		unsigned char _tpdu_ERreason;  	/* [ ISO 8073 13.12.3.c ] */
+
+#define tpdu_ERli _tpduf._tpduf_li
+#define tpdu_ERtype _tpduf._tpduf_type
+#define tpdu_ERdref _tpduf._tpduf_dref
+#define tpdu_ERreason _tpdufr._tpdu_ERreason
+
+};
+
+struct tpdu {
+	struct	tpdu_fixed 		_tpduf;
+	union 	tpdu_fixed_rest _tpdufr;
+};
+
+#endif /* __TP_TPDU__ */
diff --git a/sys/netiso/tp_trace.c b/sys/netiso/tp_trace.c
new file mode 100644
index 00000000000..115597bf472
--- /dev/null
+++ b/sys/netiso/tp_trace.c
@@ -0,0 +1,175 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tp_trace.c	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* 
+ * ARGO TP
+ *
+ * $Header: tp_trace.c,v 5.3 88/11/18 17:29:14 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_trace.c,v $
+ *
+ * The whole protocol trace module.
+ * We keep a circular buffer of trace structures, which are big
+ * unions of different structures we might want to see.
+ * Unfortunately this gets too big pretty easily. Pcbs were removed
+ * from the tracing when the kernel got too big to boot.
+ */
+
+#define TP_TRACEFILE
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+
+#include <netiso/tp_param.h>
+#include <netiso/tp_timer.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_param.h>
+#include <netiso/tp_ip.h>
+#include <netiso/tp_pcb.h>
+#include <netiso/tp_tpdu.h>
+#include <netiso/argo_debug.h>
+#include <netiso/tp_trace.h>
+
+#ifdef TPPT
+static tp_seq = 0;
+u_char tp_traceflags[128];
+
+/*
+ * The argument tpcb is the obvious.
+ * event here is just the type of trace event - TPPTmisc, etc.
+ * The rest of the arguments have different uses depending
+ * on the type of trace event.
+ */
+/*ARGSUSED*/
+/*VARARGS*/
+
+void
+tpTrace(tpcb, event, arg, src, len, arg4, arg5)
+	struct tp_pcb	*tpcb;
+	u_int 			event, arg;
+	u_int	 		src;
+	u_int	 		len; 
+	u_int	 		arg4;
+	u_int	 		arg5;
+{
+	register struct tp_Trace *tp;
+
+	tp = &tp_Trace[tp_Tracen++];
+	tp_Tracen %= TPTRACEN;
+
+	tp->tpt_event = event;
+	tp->tpt_tseq = tp_seq++;
+	tp->tpt_arg = arg;
+	if(tpcb)
+		tp->tpt_arg2 = tpcb->tp_lref;
+	bcopy( (caddr_t)&time, (caddr_t)&tp->tpt_time, sizeof(struct timeval) );
+
+	switch(event) {
+
+	case TPPTertpdu:
+		bcopy((caddr_t)src, (caddr_t)&tp->tpt_ertpdu,
+			(unsigned)MIN((int)len, sizeof(struct tp_Trace)));
+		break;
+
+	case TPPTusrreq:
+	case TPPTmisc:
+
+		/* arg is a string */
+		bcopy((caddr_t)arg, (caddr_t)tp->tpt_str, 
+			(unsigned)MIN(1+strlen((caddr_t) arg), TPTRACE_STRLEN));
+		tp->tpt_m2 = src; 
+		tp->tpt_m3 = len;
+		tp->tpt_m4 = arg4;
+		tp->tpt_m1 = arg5;
+		break;
+
+	case TPPTgotXack: 
+	case TPPTXack: 
+	case TPPTsendack: 
+	case TPPTgotack: 
+	case TPPTack: 
+	case TPPTindicate: 
+	default:
+	case TPPTdriver: 
+		tp->tpt_m2 = arg; 
+		tp->tpt_m3 = src;
+		tp->tpt_m4 = len;
+		tp->tpt_m5 = arg4;
+		tp->tpt_m1 = arg5; 
+		break;
+	case TPPTparam:
+		bcopy((caddr_t)src, (caddr_t)&tp->tpt_param, sizeof(struct tp_param));
+		break;
+	case TPPTref:
+		bcopy((caddr_t)src, (caddr_t)&tp->tpt_ref, sizeof(struct tp_ref));
+		break;
+
+	case TPPTtpduin:
+	case TPPTtpduout:
+		tp->tpt_arg2 = arg4;
+		bcopy((caddr_t)src, (caddr_t)&tp->tpt_tpdu,
+		      (unsigned)MIN((int)len, sizeof(struct tp_Trace)));
+		break;
+	}
+}
+#endif /* TPPT */
diff --git a/sys/netiso/tp_trace.h b/sys/netiso/tp_trace.h
new file mode 100644
index 00000000000..885730549e5
--- /dev/null
+++ b/sys/netiso/tp_trace.h
@@ -0,0 +1,198 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tp_trace.h	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* 
+ * ARGO TP
+ *
+ * $Header: tp_trace.h,v 5.1 88/10/12 12:21:51 root Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_trace.h,v $
+ *
+ * 
+ * Definitions needed for the protocol trace mechanism.
+ */
+
+#ifndef __TP_TRACE__
+#define __TP_TRACE__
+
+
+#define TPPTsendack	1
+#define TPPTgotack	2
+#define TPPTXack	3
+#define TPPTgotXack	4
+#define TPPTack		5
+#define TPPTindicate	6
+#define TPPTusrreq	7
+#define TPPTmisc	8
+#define TPPTpcb		9
+#define TPPTref		10
+#define TPPTtpduin	11
+#define TPPTparam	12
+#define TPPTertpdu	13
+#define TPPTdriver	14
+#define TPPTtpduout	15
+
+#include <netiso/tp_pcb.h>
+
+/* this #if is to avoid lint */
+
+#if  defined(TP_TRACEFILE)||!defined(KERNEL)
+
+#include <netiso/tp_tpdu.h>
+
+#define TPTRACE_STRLEN 50
+
+
+/* for packet tracing */
+struct tp_timeval {
+	SeqNum	tptv_seq;
+	u_int tptv_kind;
+	u_int tptv_window;
+	u_int tptv_size;
+};
+
+struct	tp_Trace {
+	u_int	tpt_event;
+	u_int	tpt_arg;
+	u_int 	tpt_arg2;
+	int	tpt_tseq;
+	struct timeval	tpt_time;
+	union {
+		struct inpcb	tpt_Inpcb; /* protocol control block */
+		struct tp_ref 	tpt_Ref; /* ref part of pcb */
+		struct tpdu 	tpt_Tpdu; /* header*/
+		struct tp_refinfo tpt_Param; /* ?? bytes, make sure < 128??*/
+		struct tp_timeval tpt_Time;
+		struct {
+			u_int tptm_2;
+			u_int tptm_3;
+			u_int tptm_4;
+			u_int tptm_5;
+			char tpt_Str[TPTRACE_STRLEN];
+			u_int tptm_1;
+		} tptmisc;
+		u_char 			tpt_Ertpdu; /* use rest of structure */
+	} tpt_stuff;
+};
+#define tpt_inpcb tpt_stuff.tpt_Inpcb
+#define tpt_pcb tpt_stuff.tpt_Pcb
+#define tpt_ref tpt_stuff.tpt_Ref
+#define tpt_tpdu tpt_stuff.tpt_Tpdu
+#define tpt_param tpt_stuff.tpt_Param
+#define tpt_ertpdu tpt_stuff.tpt_Ertpdu
+#define tpt_str tpt_stuff.tptmisc.tpt_Str
+#define tpt_m1 tpt_stuff.tptmisc.tptm_1
+#define tpt_m2 tpt_stuff.tptmisc.tptm_2
+#define tpt_m3 tpt_stuff.tptmisc.tptm_3
+#define tpt_m4 tpt_stuff.tptmisc.tptm_4
+#define tpt_m5 tpt_stuff.tptmisc.tptm_5
+
+#define tpt_seq tpt_stuff.tpt_Time.tptv_seq
+#define tpt_kind tpt_stuff.tpt_Time.tptv_kind
+#define tpt_window tpt_stuff.tpt_Time.tptv_window
+#define tpt_size tpt_stuff.tpt_Time.tptv_size
+
+#endif /* defined(TP_TRACEFILE)||!defined(KERNEL) */
+
+
+#ifdef TPPT
+
+#define TPTRACEN 300
+
+#define tptrace(A,B,C,D,E,F) \
+	tpTrace((struct tp_pcb *)0,\
+	(u_int)(A),(u_int)(B),(u_int)(C),(u_int)(D),(u_int)(E),(u_int)(F))
+
+#define tptraceTPCB(A,B,C,D,E,F) \
+	tpTrace(tpcb,\
+	(u_int)(A),(u_int)(B),(u_int)(C),(u_int)(D),(u_int)(E),(u_int)(F))
+
+extern void tpTrace();
+extern struct tp_Trace tp_Trace[];
+extern u_char	tp_traceflags[];
+int tp_Tracen = 0;
+
+#define IFTRACE(ascii)\
+	if(tp_traceflags[ascii]) {
+/* 
+ * for some reason lint complains about tp_param being undefined no
+ * matter where or how many times I define it.
+ */
+#define ENDTRACE  }
+
+
+#else  /* TPPT */
+
+/***********************************************
+ * NO TPPT TRACE STUFF
+ **********************************************/
+#define TPTRACEN 1
+
+#define tptrace(A,B,C,D,E,F) 0
+#define tptraceTPCB(A,B,C,D,E,F) 0
+
+#define IFTRACE(ascii)	 if (0) {
+#define ENDTRACE	 }
+
+#endif /* TPPT */
+
+
+
+#endif /* __TP_TRACE__ */
diff --git a/sys/netiso/tp_user.h b/sys/netiso/tp_user.h
new file mode 100644
index 00000000000..b81491b7650
--- /dev/null
+++ b/sys/netiso/tp_user.h
@@ -0,0 +1,162 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tp_user.h	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+		Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* 
+ * ARGO TP
+ *
+ * $Header: tp_user.h,v 5.2 88/11/04 15:44:44 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_user.h,v $
+ *
+ * These are the values a real-live user ;-) needs. 
+ */
+
+#ifndef _TYPES_
+#include  <sys/types.h>
+#endif
+
+#ifndef __TP_USER__
+#define __TP_USER__
+
+struct tp_conn_param {
+	/* PER CONNECTION parameters */
+	short	p_Nretrans; 
+	short	p_dr_ticks;
+
+	short	p_cc_ticks;
+	short	p_dt_ticks;
+
+	short	p_x_ticks;
+	short	p_cr_ticks;
+
+	short	p_keepalive_ticks;
+	short	p_sendack_ticks;
+
+	short	p_ref_ticks;
+	short	p_inact_ticks;
+
+	short	p_ptpdusize;	/* preferred tpdusize/128 */
+	short	p_winsize;
+
+	u_char	p_tpdusize; 	/* log 2 of size */
+
+	u_char	p_ack_strat;	/* see comments in tp_pcb.h */
+	u_char	p_rx_strat;	/* see comments in tp_pcb.h */
+	u_char	p_class;	 	/* class bitmask */
+	u_char	p_xtd_format;
+	u_char	p_xpd_service;
+	u_char	p_use_checksum;
+	u_char	p_use_nxpd; 	/* netwk expedited data: not implemented */
+	u_char	p_use_rcc;	/* receipt confirmation: not implemented */
+	u_char	p_use_efc;	/* explicit flow control: not implemented */
+	u_char	p_no_disc_indications;	/* don't deliver indic on disc */
+	u_char	p_dont_change_params;	/* use these params as they are */
+	u_char	p_netservice;
+	u_char	p_version;	/* only here for checking */
+};
+
+/*
+ * These sockopt level definitions should be considered for socket.h
+ */
+#define	SOL_TRANSPORT	0xfffe
+#define	SOL_NETWORK	0xfffd
+
+/* get/set socket opt commands */
+#define		TPACK_WINDOW	0x0 /* ack only on full window */
+#define		TPACK_EACH		0x1 /* ack every packet */
+
+#define		TPRX_USE_CW		0x8 /* use congestion window transmit */
+#define		TPRX_EACH		0x4 /* retrans each packet of a set */
+#define		TPRX_FASTSTART	0x1 /* don't use slow start */
+
+#define TPOPT_INTERCEPT		0x200
+#define TPOPT_FLAGS			0x300
+#define TPOPT_CONN_DATA		0x400 
+#define TPOPT_DISC_DATA		0x500 
+#define TPOPT_CFRM_DATA		0x600 
+#define TPOPT_CDDATA_CLEAR	0x700 
+#define TPOPT_MY_TSEL		0x800 
+#define TPOPT_PEER_TSEL		0x900 
+#define TPOPT_PERF_MEAS		0xa00
+#define TPOPT_PSTATISTICS	0xb00
+#define TPOPT_PARAMS		0xc00 /* to replace a bunch of the others */
+#define TPOPT_DISC_REASON	0xe00
+
+struct tp_disc_reason {
+	struct cmsghdr dr_hdr;
+	u_int	dr_reason;
+};
+
+/* 
+ ***********************flags**********************************
+ */
+
+/* read only flags */
+#define TPFLAG_NLQOS_PDN		(u_char)0x01
+#define TPFLAG_PEER_ON_SAMENET	(u_char)0x02
+#define TPFLAG_GENERAL_ADDR		(u_char)0x04 /* bound to wildcard addr */
+
+
+/* 
+ ***********************end flags******************************
+ */
+
+
+#endif /* __TP_USER__ */
diff --git a/sys/netiso/tp_usrreq.c b/sys/netiso/tp_usrreq.c
new file mode 100644
index 00000000000..8060c947f54
--- /dev/null
+++ b/sys/netiso/tp_usrreq.c
@@ -0,0 +1,756 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tp_usrreq.c	8.1 (Berkeley) 6/10/93
+ */
+
+/***********************************************************
+				Copyright IBM Corporation 1987
+
+                      All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its 
+documentation for any purpose and without fee is hereby granted, 
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in 
+supporting documentation, and that the name of IBM not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.  
+
+IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+******************************************************************/
+
+/*
+ * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
+ */
+/* 
+ * ARGO TP
+ *
+ * $Header: tp_usrreq.c,v 5.4 88/11/18 17:29:18 nhall Exp $
+ * $Source: /usr/argo/sys/netiso/RCS/tp_usrreq.c,v $
+ *
+ * tp_usrreq(), the fellow that gets called from most of the socket code.
+ * Pretty straighforward.
+ * THe only really awful stuff here is the OOB processing, which is done
+ * wholly here.
+ * tp_rcvoob() and tp_sendoob() are contained here and called by tp_usrreq().
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+
+#include <netiso/tp_param.h>
+#include <netiso/tp_timer.h>
+#include <netiso/tp_stat.h>
+#include <netiso/tp_seq.h>
+#include <netiso/tp_ip.h>
+#include <netiso/tp_pcb.h>
+#include <netiso/argo_debug.h>
+#include <netiso/tp_trace.h>
+#include <netiso/tp_meas.h>
+#include <netiso/iso.h>
+#include <netiso/iso_errno.h>
+
+int tp_attach(), tp_driver(), tp_pcbbind();
+int TNew;
+int TPNagle1, TPNagle2;
+struct tp_pcb *tp_listeners, *tp_intercepts;
+
+#ifdef ARGO_DEBUG
+/*
+ * CALLED FROM:
+ *  anywhere you want to debug...
+ * FUNCTION and ARGUMENTS:
+ *  print (str) followed by the control info in the mbufs of an mbuf chain (n)
+ */
+void
+dump_mbuf(n, str)
+	struct mbuf *n;
+	char *str;
+{
+	struct mbuf *nextrecord;
+
+	printf("dump %s\n", str);
+
+	if (n == MNULL)  {
+		printf("EMPTY:\n");
+		return;
+	}
+		
+	while (n) {
+		nextrecord = n->m_act;
+		printf("RECORD:\n");
+		while (n) {
+			printf("%x : Len %x Data %x A %x Nx %x Tp %x\n",
+				n, n->m_len, n->m_data, n->m_act, n->m_next, n->m_type);
+#ifdef notdef
+			{
+				register char *p = mtod(n, char *);
+				register int i;
+
+				printf("data: ");
+				for (i = 0; i < n->m_len; i++) {
+					if (i%8 == 0)
+						printf("\n");
+					printf("0x%x ", *(p+i));
+				}
+				printf("\n");
+			}
+#endif /* notdef */
+			if (n->m_next == n) {
+				printf("LOOP!\n");
+				return;
+			}
+			n = n->m_next;
+		}
+		n = nextrecord;
+	}
+	printf("\n");
+}
+
+#endif /* ARGO_DEBUG */
+
+/*
+ * CALLED FROM:
+ *  tp_usrreq(), PRU_RCVOOB
+ * FUNCTION and ARGUMENTS:
+ * 	Copy data from the expedited data socket buffer into
+ * 	the pre-allocated mbuf m.
+ * 	There is an isomorphism between XPD TPDUs and expedited data TSDUs.
+ * 	XPD tpdus are limited to 16 bytes of data so they fit in one mbuf.
+ * RETURN VALUE:
+ *  EINVAL if debugging is on and a disaster has occurred
+ *  ENOTCONN if the socket isn't connected
+ *  EWOULDBLOCK if the socket is in non-blocking mode and there's no
+ *		xpd data in the buffer
+ *  E* whatever is returned from the fsm.
+ */
+tp_rcvoob(tpcb, so, m, outflags, inflags)
+	struct tp_pcb	*tpcb;
+	register struct socket	*so;
+	register struct mbuf 	*m;
+	int 		 	*outflags;
+	int 		 	inflags;
+{
+	register struct mbuf *n;
+	register struct sockbuf *sb = &so->so_rcv;
+	struct tp_event E;
+	int error = 0;
+	register struct mbuf **nn;
+
+	IFDEBUG(D_XPD)
+		printf("PRU_RCVOOB, sostate 0x%x\n", so->so_state);
+	ENDDEBUG
+
+	/* if you use soreceive */
+	if (m == MNULL)
+		return ENOBUFS;
+
+restart:
+	if ((((so->so_state & SS_ISCONNECTED) == 0)
+		 || (so->so_state & SS_ISDISCONNECTING) != 0) &&
+		(so->so_proto->pr_flags & PR_CONNREQUIRED)) {
+			return ENOTCONN;
+	}
+
+	/* Take the first mbuf off the chain.
+	 * Each XPD TPDU gives you a complete TSDU so the chains don't get 
+	 * coalesced, but one TSDU may span several mbufs.
+	 * Nevertheless, since n should have a most 16 bytes, it
+	 * will fit into m.  (size was checked in tp_input() )
+	 */
+
+	/*
+	 * Code for excision of OOB data should be added to
+	 * uipc_socket2.c (like sbappend).
+	 */
+	
+	sblock(sb, M_WAITOK);
+	for (nn = &sb->sb_mb; n = *nn; nn = &n->m_act)
+		if (n->m_type == MT_OOBDATA)
+			break;
+
+	if (n == 0) {
+		IFDEBUG(D_XPD)
+			printf("RCVOOB: empty queue!\n");
+		ENDDEBUG
+		sbunlock(sb);
+		if (so->so_state & SS_NBIO) {
+			return  EWOULDBLOCK;
+		}
+		sbwait(sb);
+		goto restart;
+	}
+	m->m_len = 0;
+
+	/* Assuming at most one xpd tpdu is in the buffer at once */
+	while (n != MNULL) {
+		m->m_len += n->m_len;
+		bcopy(mtod(n, caddr_t), mtod(m, caddr_t), (unsigned)n->m_len);
+		m->m_data += n->m_len; /* so mtod() in bcopy() above gives right addr */
+		n = n->m_next;
+	}
+	m->m_data = m->m_dat;
+	m->m_flags |= M_EOR;
+
+	IFDEBUG(D_XPD)
+		printf("tp_rcvoob: xpdlen 0x%x\n", m->m_len);
+		dump_mbuf(so->so_rcv.sb_mb, "RCVOOB: Rcv socketbuf");
+		dump_mbuf(sb->sb_mb, "RCVOOB: Xrcv socketbuf");
+	ENDDEBUG
+
+	if ((inflags & MSG_PEEK) == 0) {
+		n = *nn;
+		*nn = n->m_act;
+		for (; n; n = m_free(n)) 
+			sbfree(sb, n);
+	}
+
+release:
+	sbunlock(sb);
+
+	IFTRACE(D_XPD)
+		tptraceTPCB(TPPTmisc, "PRU_RCVOOB @ release sb_cc m_len",
+			tpcb->tp_Xrcv.sb_cc, m->m_len, 0, 0);
+	ENDTRACE
+	if (error == 0)
+		error = DoEvent(T_USR_Xrcvd); 
+	return error;
+}
+
+/*
+ * CALLED FROM:
+ *  tp_usrreq(), PRU_SENDOOB
+ * FUNCTION and ARGUMENTS:
+ * 	Send what's in the mbuf chain (m) as an XPD TPDU.
+ * 	The mbuf may not contain more then 16 bytes of data.
+ * 	XPD TSDUs aren't segmented, so they translate into
+ * 	exactly one XPD TPDU, with EOT bit set.
+ * RETURN VALUE:
+ *  EWOULDBLOCK if socket is in non-blocking mode and the previous
+ *   xpd data haven't been acked yet.
+ *  EMSGSIZE if trying to send > max-xpd bytes (16)
+ *  ENOBUFS if ran out of mbufs
+ */
+tp_sendoob(tpcb, so, xdata, outflags)
+	struct tp_pcb	*tpcb;
+	register struct socket	*so;
+	register struct mbuf 	*xdata;
+	int 		 	*outflags; /* not used */
+{
+	/*
+	 * Each mbuf chain represents a sequence # in the XPD seq space.
+	 * The first one in the queue has sequence # tp_Xuna.
+	 * When we add to the XPD queue, we stuff a zero-length
+	 * mbuf (mark) into the DATA queue, with its sequence number in m_next
+	 * to be assigned to this XPD tpdu, so data xfer can stop
+	 * when it reaches the zero-length mbuf if this XPD TPDU hasn't
+	 * yet been acknowledged.  
+	 */
+	register struct sockbuf *sb = &(tpcb->tp_Xsnd);
+	register struct mbuf 	*xmark;
+	register int 			len=0;
+	struct tp_event E;
+
+	IFDEBUG(D_XPD)
+		printf("tp_sendoob:");
+		if (xdata)
+			printf("xdata len 0x%x\n", xdata->m_len);
+	ENDDEBUG
+	/* DO NOT LOCK the Xsnd buffer!!!! You can have at MOST one 
+	 * socket buf locked at any time!!! (otherwise you might
+	 * sleep() in sblock() w/ a signal pending and cause the
+	 * system call to be aborted w/ a locked socketbuf, which
+	 * is a problem.  So the so_snd buffer lock
+	 * (done in sosend()) serves as the lock for Xpd.
+	 */
+	if (sb->sb_mb) { /* Anything already in eXpedited data sockbuf? */
+		if (so->so_state & SS_NBIO) {
+			return EWOULDBLOCK;
+		}
+		while (sb->sb_mb) {
+			sbunlock(&so->so_snd); /* already locked by sosend */
+			sbwait(&so->so_snd);
+			sblock(&so->so_snd, M_WAITOK);  /* sosend will unlock on return */
+		}
+	}
+
+	if (xdata == (struct mbuf *)0) {
+		/* empty xpd packet */
+		MGETHDR(xdata, M_WAIT, MT_OOBDATA);
+		if (xdata == NULL) {
+			return ENOBUFS;
+		}
+		xdata->m_len = 0;
+		xdata->m_pkthdr.len = 0;
+	}
+	IFDEBUG(D_XPD)
+		printf("tp_sendoob 1:");
+		if (xdata)
+			printf("xdata len 0x%x\n", xdata->m_len);
+	ENDDEBUG
+	xmark = xdata; /* temporary use of variable xmark */
+	while (xmark) {
+		len += xmark->m_len;
+		xmark = xmark->m_next;
+	}
+	if (len > TP_MAX_XPD_DATA) {
+		return EMSGSIZE;
+	}
+	IFDEBUG(D_XPD)
+		printf("tp_sendoob 2:");
+		if (xdata)
+			printf("xdata len 0x%x\n", len);
+	ENDDEBUG
+
+
+	IFTRACE(D_XPD)
+		tptraceTPCB(TPPTmisc, "XPD mark m_next ", xdata->m_next, 0, 0, 0);
+	ENDTRACE
+
+	sbappendrecord(sb, xdata);	
+
+	IFDEBUG(D_XPD)
+		printf("tp_sendoob len 0x%x\n", len);
+		dump_mbuf(so->so_snd.sb_mb, "XPD request Regular sndbuf:");
+		dump_mbuf(tpcb->tp_Xsnd.sb_mb, "XPD request Xsndbuf:");
+	ENDDEBUG
+	return DoEvent(T_XPD_req); 
+}
+
+/*
+ * CALLED FROM:
+ *  the socket routines
+ * FUNCTION and ARGUMENTS:
+ * 	Handles all "user requests" except the [gs]ockopts() requests.
+ * 	The argument (req) is the request type (PRU*), 
+ * 	(m) is an mbuf chain, generally used for send and
+ * 	receive type requests only.
+ * 	(nam) is used for addresses usually, in particular for the bind request.
+ * 
+ */
+/*ARGSUSED*/
+ProtoHook
+tp_usrreq(so, req, m, nam, controlp)
+	struct socket *so;
+	u_int req;
+	struct mbuf *m, *nam, *controlp;
+{	
+	register struct tp_pcb *tpcb =  sototpcb(so);
+	int s = splnet();
+	int error = 0;
+	int flags, *outflags = &flags; 
+	u_long eotsdu = 0;
+	struct tp_event E;
+
+	IFDEBUG(D_REQUEST)
+		printf("usrreq(0x%x,%d,0x%x,0x%x,0x%x)\n",so,req,m,nam,outflags);
+		if (so->so_error)
+			printf("WARNING!!! so->so_error is 0x%x\n", so->so_error);
+	ENDDEBUG
+	IFTRACE(D_REQUEST)
+		tptraceTPCB(TPPTusrreq, "req so m state [", req, so, m, 
+			tpcb?tpcb->tp_state:0);
+	ENDTRACE
+
+	if ((u_int)tpcb == 0 && req != PRU_ATTACH) {
+		IFTRACE(D_REQUEST)
+			tptraceTPCB(TPPTusrreq, "req failed NO TPCB[", 0, 0, 0, 0);
+		ENDTRACE
+		splx(s);
+		return ENOTCONN;
+	}
+
+	switch (req) {
+
+	case PRU_ATTACH:
+		if (tpcb) {
+			error = EISCONN;
+		} else if ((error = tp_attach(so, (int)nam)) == 0)
+			tpcb = sototpcb(so);
+		break;
+
+	case PRU_ABORT: 	/* called from close() */
+		/* called for each incoming connect queued on the 
+		 *	parent (accepting) socket 
+		 */
+		if (tpcb->tp_state == TP_OPEN || tpcb->tp_state == TP_CONFIRMING) {
+			E.ATTR(T_DISC_req).e_reason = E_TP_NO_SESSION;
+			error = DoEvent(T_DISC_req); /* pretend it was a close() */
+			break;
+		} /* else DROP THROUGH */
+
+	case PRU_DETACH: 	/* called from close() */
+		/* called only after disconnect was called */
+		error = DoEvent(T_DETACH);
+		if (tpcb->tp_state == TP_CLOSED) {
+			if (tpcb->tp_notdetached) {
+				IFDEBUG(D_CONN)
+					printf("PRU_DETACH: not detached\n");
+				ENDDEBUG
+				tp_detach(tpcb);
+			}
+			free((caddr_t)tpcb, M_PCB);
+			tpcb = 0;
+		}
+		break;
+
+	case PRU_SHUTDOWN:
+		/* recv end may have been released; local credit might be zero  */
+	case PRU_DISCONNECT:
+		E.ATTR(T_DISC_req).e_reason = E_TP_NORMAL_DISC;
+		error = DoEvent(T_DISC_req);
+		break;
+
+	case PRU_BIND:
+		error =  tp_pcbbind(tpcb, nam);
+		break;
+
+	case PRU_LISTEN:
+		if (tpcb->tp_state != TP_CLOSED || tpcb->tp_lsuffixlen == 0 ||
+				tpcb->tp_next == 0)
+			error = EINVAL;
+		else {
+			register struct tp_pcb **tt;
+			remque(tpcb);
+			tpcb->tp_next = tpcb->tp_prev = tpcb;
+			for (tt = &tp_listeners; *tt; tt = &((*tt)->tp_nextlisten))
+				if ((*tt)->tp_lsuffixlen)
+					break;
+			tpcb->tp_nextlisten = *tt;
+			*tt = tpcb;
+			error = DoEvent(T_LISTEN_req);
+		}
+		break;
+
+	case PRU_CONNECT2:
+		error = EOPNOTSUPP; /* for unix domain sockets */
+		break;
+
+	case PRU_CONNECT:
+		IFTRACE(D_CONN)
+			tptraceTPCB(TPPTmisc, 
+			"PRU_CONNECT: so 0x%x *SHORT_LSUFXP(tpcb) 0x%x lsuflen 0x%x, class 0x%x",
+			tpcb->tp_sock, *SHORT_LSUFXP(tpcb), tpcb->tp_lsuffixlen,
+				tpcb->tp_class);
+		ENDTRACE
+		IFDEBUG(D_CONN)
+			printf("PRU_CONNECT: so *SHORT_LSUFXP(tpcb) 0x%x lsuflen 0x%x, class 0x%x",
+			tpcb->tp_sock, *SHORT_LSUFXP(tpcb), tpcb->tp_lsuffixlen,
+				tpcb->tp_class);
+		ENDDEBUG
+		if (tpcb->tp_lsuffixlen == 0) {
+			if (error = tp_pcbbind(tpcb, MNULL)) {
+				IFDEBUG(D_CONN)
+					printf("pcbbind returns error 0x%x\n", error);
+				ENDDEBUG
+				break;
+			}
+		}
+		IFDEBUG(D_CONN)
+			printf("isop 0x%x isop->isop_socket offset 12 :\n", tpcb->tp_npcb);
+			dump_buf(tpcb->tp_npcb, 16);
+		ENDDEBUG
+		if (error = tp_route_to(nam, tpcb, /* channel */0))
+			break;
+		IFDEBUG(D_CONN)
+			printf(
+				"PRU_CONNECT after tpcb 0x%x so 0x%x npcb 0x%x flags 0x%x\n", 
+				tpcb, so, tpcb->tp_npcb, tpcb->tp_flags);
+			printf("isop 0x%x isop->isop_socket offset 12 :\n", tpcb->tp_npcb);
+			dump_buf(tpcb->tp_npcb, 16);
+		ENDDEBUG
+		if (tpcb->tp_fsuffixlen ==  0) {
+			/* didn't set peer extended suffix */
+			(tpcb->tp_nlproto->nlp_getsufx)(tpcb->tp_npcb, &tpcb->tp_fsuffixlen,
+				tpcb->tp_fsuffix, TP_FOREIGN);
+		}
+		if (tpcb->tp_state == TP_CLOSED) {
+			soisconnecting(so);  
+			error = DoEvent(T_CONN_req);
+		} else {
+			(tpcb->tp_nlproto->nlp_pcbdisc)(tpcb->tp_npcb);
+			error = EISCONN;
+		}
+		IFPERF(tpcb)
+			u_int lsufx, fsufx;
+			lsufx = *(u_short *)(tpcb->tp_lsuffix);
+			fsufx = *(u_short *)(tpcb->tp_fsuffix);
+
+			tpmeas(tpcb->tp_lref, 
+				TPtime_open | (tpcb->tp_xtd_format << 4), 
+				&time, lsufx, fsufx, tpcb->tp_fref);
+		ENDPERF
+		break;
+
+	case PRU_ACCEPT: 
+		(tpcb->tp_nlproto->nlp_getnetaddr)(tpcb->tp_npcb, nam, TP_FOREIGN);
+		IFDEBUG(D_REQUEST)
+			printf("ACCEPT PEERADDDR:");
+			dump_buf(mtod(nam, char *), nam->m_len);
+		ENDDEBUG
+		IFPERF(tpcb)
+			u_int lsufx, fsufx;
+			lsufx = *(u_short *)(tpcb->tp_lsuffix);
+			fsufx = *(u_short *)(tpcb->tp_fsuffix);
+
+			tpmeas(tpcb->tp_lref, TPtime_open, 
+				&time, lsufx, fsufx, tpcb->tp_fref);
+		ENDPERF
+		break;
+
+	case PRU_RCVD:
+		if (so->so_state & SS_ISCONFIRMING) {
+			if (tpcb->tp_state == TP_CONFIRMING)
+				error = tp_confirm(tpcb);
+			break;
+		}
+		IFTRACE(D_DATA)
+			tptraceTPCB(TPPTmisc,
+			"RCVD BF: lcredit sent_lcdt cc hiwat \n",
+				tpcb->tp_lcredit, tpcb->tp_sent_lcdt,
+				so->so_rcv.sb_cc, so->so_rcv.sb_hiwat);
+			LOCAL_CREDIT(tpcb);
+			tptraceTPCB(TPPTmisc, 
+				"PRU_RCVD AF sbspace lcredit hiwat cc",
+				sbspace(&so->so_rcv), tpcb->tp_lcredit,
+				so->so_rcv.sb_cc, so->so_rcv.sb_hiwat);
+		ENDTRACE
+		IFDEBUG(D_REQUEST)
+			printf("RCVD: cc %d space %d hiwat %d\n",
+				so->so_rcv.sb_cc, sbspace(&so->so_rcv),
+				so->so_rcv.sb_hiwat);
+		ENDDEBUG
+		if (((int)nam) & MSG_OOB)
+			error = DoEvent(T_USR_Xrcvd); 
+		else 
+			error = DoEvent(T_USR_rcvd); 
+		break;
+
+	case PRU_RCVOOB:
+		if ((so->so_state & SS_ISCONNECTED) == 0) {
+			error = ENOTCONN;
+			break;
+		}
+		if (! tpcb->tp_xpd_service) {
+			error = EOPNOTSUPP;
+			break;
+		}
+		/* kludge - nam is really flags here */
+		error = tp_rcvoob(tpcb, so, m, outflags, (int)nam);
+		break;
+
+	case PRU_SEND:
+	case PRU_SENDOOB:
+		if (controlp) {
+			error = tp_snd_control(controlp, so, &m);
+			controlp = NULL;
+			if (error)
+				break;
+		}
+		if ((so->so_state & SS_ISCONFIRMING) &&
+		    (tpcb->tp_state == TP_CONFIRMING) &&
+		    (error = tp_confirm(tpcb)))
+			    break;
+		if (req == PRU_SENDOOB) {
+			error = (tpcb->tp_xpd_service == 0) ?
+						EOPNOTSUPP : tp_sendoob(tpcb, so, m, outflags);
+			break;
+		}
+		if (m == 0)
+			break;
+		if (m->m_flags & M_EOR) {
+			eotsdu = 1;
+			m->m_flags &= ~M_EOR;
+		}
+		if (eotsdu == 0 && m->m_pkthdr.len == 0)
+			break;
+		if (tpcb->tp_state != TP_AKWAIT && tpcb->tp_state != TP_OPEN) {
+			error = ENOTCONN;
+			break;
+		}
+		/*
+		 * The protocol machine copies mbuf chains,
+		 * prepends headers, assigns seq numbers, and
+		 * puts the packets on the device.
+		 * When they are acked they are removed from the socket buf.
+		 *
+		 * sosend calls this up until sbspace goes negative.
+		 * Sbspace may be made negative by appending this mbuf chain,
+		 * possibly by a whole cluster.
+		 */
+		{
+			/*
+			 * Could have eotsdu and no data.(presently MUST have
+			 * an mbuf though, even if its length == 0) 
+			 */
+			int totlen = m->m_pkthdr.len;
+			struct sockbuf *sb = &so->so_snd;
+			IFPERF(tpcb)
+			   PStat(tpcb, Nb_from_sess) += totlen;
+			   tpmeas(tpcb->tp_lref, TPtime_from_session, 0, 0, 
+					PStat(tpcb, Nb_from_sess), totlen);
+			ENDPERF
+			IFDEBUG(D_SYSCALL)
+				printf(
+				"PRU_SEND: eot %d before sbappend 0x%x len 0x%x to sb @ 0x%x\n",
+					eotsdu, m, totlen, sb);
+				dump_mbuf(sb->sb_mb, "so_snd.sb_mb");
+				dump_mbuf(m, "m : to be added");
+			ENDDEBUG
+			tp_packetize(tpcb, m, eotsdu);
+			IFDEBUG(D_SYSCALL)
+				printf("PRU_SEND: eot %d after sbappend 0x%x\n", eotsdu, m);
+				dump_mbuf(sb->sb_mb, "so_snd.sb_mb");
+			ENDDEBUG
+			if (tpcb->tp_state == TP_OPEN)
+				error = DoEvent(T_DATA_req); 
+			IFDEBUG(D_SYSCALL)
+				printf("PRU_SEND: after driver error 0x%x \n",error);
+				printf("so_snd 0x%x cc 0t%d mbcnt 0t%d\n",
+						sb, sb->sb_cc, sb->sb_mbcnt);
+				dump_mbuf(sb->sb_mb, "so_snd.sb_mb after driver");
+			ENDDEBUG
+		}
+		break;
+
+	case PRU_SOCKADDR:
+		(tpcb->tp_nlproto->nlp_getnetaddr)(tpcb->tp_npcb, nam, TP_LOCAL);
+		break;
+
+	case PRU_PEERADDR:
+		(tpcb->tp_nlproto->nlp_getnetaddr)(tpcb->tp_npcb, nam, TP_FOREIGN);
+		break;
+
+	case PRU_CONTROL:
+		error = EOPNOTSUPP;
+		break;
+
+	case PRU_PROTOSEND:
+	case PRU_PROTORCV:
+	case PRU_SENSE:
+	case PRU_SLOWTIMO:
+	case PRU_FASTTIMO:
+		error = EOPNOTSUPP;
+		break;
+
+	default:
+#ifdef ARGO_DEBUG
+		printf("tp_usrreq UNKNOWN PRU %d\n", req);
+#endif /* ARGO_DEBUG */
+		error = EOPNOTSUPP;
+	}
+
+	IFDEBUG(D_REQUEST)
+		printf("%s, so 0x%x, tpcb 0x%x, error %d, state %d\n",
+			"returning from tp_usrreq", so, tpcb, error,
+			tpcb ? tpcb->tp_state : 0);
+	ENDDEBUG
+	IFTRACE(D_REQUEST)
+		tptraceTPCB(TPPTusrreq, "END req so m state [", req, so, m, 
+			tpcb ? tpcb->tp_state : 0);
+	ENDTRACE
+	if (controlp) {
+		m_freem(controlp);
+		printf("control data unexpectedly retained in tp_usrreq()");
+	}
+	splx(s);
+	return error;
+}
+tp_ltrace(so, uio)
+struct socket *so;
+struct uio *uio;
+{
+	IFTRACE(D_DATA)
+		register struct tp_pcb *tpcb =  sototpcb(so);
+		if (tpcb) {
+			tptraceTPCB(TPPTmisc, "sosend so resid iovcnt", so,
+				uio->uio_resid, uio->uio_iovcnt, 0);
+		}
+	ENDTRACE
+}
+
+tp_confirm(tpcb)
+register struct tp_pcb *tpcb;
+{
+	struct tp_event E;
+	if (tpcb->tp_state == TP_CONFIRMING)
+	    return DoEvent(T_ACPT_req);
+	printf("Tp confirm called when not confirming; tpcb 0x%x, state 0x%x\n",
+		tpcb, tpcb->tp_state);
+	return 0;
+}
+
+/*
+ * Process control data sent with sendmsg()
+ */
+tp_snd_control(m, so, data)
+	struct mbuf *m;
+	struct socket *so;
+	register struct mbuf **data;
+{
+	register struct cmsghdr *ch;
+	int error = 0;
+
+	if (m && m->m_len) {
+		ch = mtod(m, struct cmsghdr *);
+		m->m_len -= sizeof (*ch);
+		m->m_data += sizeof (*ch);
+		error = tp_ctloutput(PRCO_SETOPT,
+							 so, ch->cmsg_level, ch->cmsg_type, &m);
+		if (ch->cmsg_type == TPOPT_DISC_DATA) {
+			if (data && *data) {
+				m_freem(*data);
+				*data = 0;
+			}
+			error = tp_usrreq(so, PRU_DISCONNECT, (struct mbuf *)0,
+								(caddr_t)0, (struct mbuf *)0);
+		}
+	}
+	if (m)
+		m_freem(m);
+	return error;
+}
diff --git a/sys/netiso/tuba_subr.c b/sys/netiso/tuba_subr.c
new file mode 100644
index 00000000000..d346927255c
--- /dev/null
+++ b/sys/netiso/tuba_subr.c
@@ -0,0 +1,348 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tuba_subr.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+
+#include <net/route.h>
+#include <net/if.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#include <netinet/tcp_debug.h>
+
+#include <netiso/argo_debug.h>
+#include <netiso/iso.h>
+#include <netiso/clnp.h>
+#include <netiso/iso_pcb.h>
+#include <netiso/iso_var.h>
+#include <netiso/tuba_table.h>
+
+static	struct	sockaddr_iso null_siso = { sizeof(null_siso), AF_ISO, };
+extern	int	tuba_table_size, tcp_keepidle, tcp_keepintvl, tcp_maxidle;
+extern	int	tcppcbcachemiss, tcppredack, tcppreddat, tcprexmtthresh;
+extern	struct	tcpiphdr tcp_saveti;
+struct	inpcb	tuba_inpcb;
+struct	inpcb	*tuba_last_inpcb = &tuba_inpcb;
+struct	isopcb	tuba_isopcb;
+/*
+ * Tuba initialization
+ */
+tuba_init()
+{
+#define TUBAHDRSIZE (3 /*LLC*/ + 9 /*CLNP Fixed*/ + 42 /*Addresses*/ \
+		     + 6 /*CLNP Segment*/ + 20 /*TCP*/)
+
+	tuba_inpcb.inp_next = tuba_inpcb.inp_prev = &tuba_inpcb;
+	tuba_isopcb.isop_next = tuba_isopcb.isop_prev = &tuba_isopcb;
+	tuba_isopcb.isop_faddr = &tuba_isopcb.isop_sfaddr;
+	tuba_isopcb.isop_laddr = &tuba_isopcb.isop_sladdr;
+	if (max_protohdr < TUBAHDRSIZE)
+		max_protohdr = TUBAHDRSIZE;
+	if (max_linkhdr + TUBAHDRSIZE > MHLEN)
+		panic("tuba_init");
+}
+
+struct addr_arg {
+	int	error;
+	int	offset;
+	u_long	sum;
+};
+
+/*
+ * Calculate contribution to fudge factor for TCP checksum,
+ * and coincidentally set pointer for convenience of clnp_output
+ * if we are are responding when there is no isopcb around.
+ */
+static void
+tuba_getaddr(arg, siso, index)
+	register struct addr_arg *arg;
+	struct sockaddr_iso **siso;
+	u_long index;
+{
+	register struct tuba_cache *tc;
+	if (index <= tuba_table_size && (tc = tuba_table[index])) {
+		if (siso)
+			*siso = &tc->tc_siso;
+		arg->sum += (arg->offset & 1 ? tc->tc_ssum : tc->tc_sum)
+				+ (0xffff ^ index);
+		arg->offset += tc->tc_siso.siso_nlen + 1;
+	} else
+		arg->error = 1;
+}
+
+tuba_output(m, tp)
+	register struct mbuf *m;
+	struct tcpcb *tp;
+{
+	register struct tcpiphdr *n;
+	struct	isopcb *isop;
+	struct	addr_arg arg;
+
+	if (tp == 0 || (n = tp->t_template) == 0 || 
+	    (isop = (struct isopcb *)tp->t_tuba_pcb) == 0) {
+		isop = &tuba_isopcb;
+		n = mtod(m, struct tcpiphdr *);
+		arg.error = arg.sum = arg.offset = 0;
+		tuba_getaddr(&arg, &tuba_isopcb.isop_faddr, n->ti_dst.s_addr);
+		tuba_getaddr(&arg, &tuba_isopcb.isop_laddr, n->ti_src.s_addr);
+		REDUCE(arg.sum, arg.sum);
+		goto adjust;
+	}
+	if (n->ti_sum == 0) {
+		arg.error = arg.sum = arg.offset = 0;
+		tuba_getaddr(&arg, (struct sockaddr_iso **)0, n->ti_dst.s_addr);
+		tuba_getaddr(&arg, (struct sockaddr_iso **)0, n->ti_src.s_addr);
+		REDUCE(arg.sum, arg.sum);
+		n->ti_sum = arg.sum;
+		n = mtod(m, struct tcpiphdr *);
+	adjust:
+		if (arg.error) {
+			m_freem(m);
+			return (EADDRNOTAVAIL);
+		}
+		REDUCE(n->ti_sum, n->ti_sum + (0xffff ^ arg.sum));
+	}
+	m->m_len -= sizeof (struct ip);
+	m->m_pkthdr.len -= sizeof (struct ip);
+	m->m_data += sizeof (struct ip);
+	return (clnp_output(m, isop, m->m_pkthdr.len, 0));
+}
+
+tuba_refcnt(isop, delta)
+	struct isopcb *isop;
+{
+	register struct tuba_cache *tc;
+	unsigned index, sum;
+
+	if (delta != 1)
+		delta = -1;
+	if (isop == 0 || isop->isop_faddr == 0 || isop->isop_laddr == 0 ||
+	    (delta == -1 && isop->isop_tuba_cached == 0) ||
+	    (delta == 1 && isop->isop_tuba_cached != 0))
+		return;
+	isop->isop_tuba_cached = (delta == 1);
+	if ((index = tuba_lookup(isop->isop_faddr, M_DONTWAIT)) != 0 &&
+	    (tc = tuba_table[index]) != 0 && (delta == 1 || tc->tc_refcnt > 0))
+		tc->tc_refcnt += delta;
+	if ((index = tuba_lookup(isop->isop_laddr, M_DONTWAIT)) != 0 &&
+	    (tc = tuba_table[index]) != 0 && (delta == 1 || tc->tc_refcnt > 0))
+		tc->tc_refcnt += delta;
+}
+
+tuba_pcbdetach(isop)
+	struct isopcb *isop;
+{
+	if (isop == 0)
+		return;
+	tuba_refcnt(isop, -1);
+	isop->isop_socket = 0;
+	iso_pcbdetach(isop);
+}
+
+/*
+ * Avoid  in_pcbconnect in faked out tcp_input()
+ */
+tuba_pcbconnect(inp, nam)
+	register struct inpcb *inp;
+	struct mbuf *nam;
+{
+	register struct sockaddr_iso *siso;
+	struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *);
+	struct tcpcb *tp = intotcpcb(inp);
+	struct isopcb *isop = (struct isopcb *)tp->t_tuba_pcb;
+	int error;
+
+	/* hardwire iso_pcbbind() here */
+	siso = isop->isop_laddr = &isop->isop_sladdr;
+	*siso = tuba_table[inp->inp_laddr.s_addr]->tc_siso;
+	siso->siso_tlen = sizeof(inp->inp_lport);
+	bcopy((caddr_t)&inp->inp_lport, TSEL(siso), sizeof(inp->inp_lport));
+
+	/* hardwire in_pcbconnect() here without assigning route */
+	inp->inp_fport = sin->sin_port;
+	inp->inp_faddr = sin->sin_addr;
+
+	/* reuse nam argument to call iso_pcbconnect() */
+	nam->m_len = sizeof(*siso);
+	siso = mtod(nam, struct sockaddr_iso *);
+	*siso = tuba_table[inp->inp_faddr.s_addr]->tc_siso;
+	siso->siso_tlen = sizeof(inp->inp_fport);
+	bcopy((caddr_t)&inp->inp_fport, TSEL(siso), sizeof(inp->inp_fport));
+
+	if ((error = iso_pcbconnect(isop, nam)) == 0)
+		tuba_refcnt(isop, 1);
+	return (error);
+}
+
+/*
+ * CALLED FROM:
+ * 	clnp's input routine, indirectly through the protosw.
+ * FUNCTION and ARGUMENTS:
+ * Take a packet (m) from clnp, strip off the clnp header
+ * and do tcp input processing.
+ * No return value.  
+ */
+tuba_tcpinput(m, src, dst)
+	register struct mbuf *m;
+	struct sockaddr_iso *src, *dst;
+{
+	unsigned long sum, lindex, findex;
+	register struct tcpiphdr *ti;
+	register struct inpcb *inp;
+	caddr_t optp = NULL;
+	int optlen;
+	int len, tlen, off;
+	register struct tcpcb *tp = 0;
+	int tiflags;
+	struct socket *so;
+	int todrop, acked, ourfinisacked, needoutput = 0;
+	short ostate;
+	struct in_addr laddr;
+	int dropsocket = 0, iss = 0;
+	u_long tiwin, ts_val, ts_ecr;
+	int ts_present = 0;
+
+	if ((m->m_flags & M_PKTHDR) == 0)
+		panic("tuba_tcpinput");
+	/*
+	 * Do some housekeeping looking up CLNP addresses.
+	 * If we are out of space might as well drop the packet now.
+	 */
+	tcpstat.tcps_rcvtotal++;
+	lindex = tuba_lookup(dst, M_DONTWAIT);
+	findex = tuba_lookup(src, M_DONTWAIT);
+	if (lindex == 0 || findex == 0)
+		goto drop;
+	/*
+	 * CLNP gave us an mbuf chain WITH the clnp header pulled up,
+	 * but the data pointer pushed past it.
+	 */
+	len = m->m_len;
+	tlen = m->m_pkthdr.len;
+	m->m_data -= sizeof(struct ip);
+	m->m_len += sizeof(struct ip);
+	m->m_pkthdr.len += sizeof(struct ip);
+	m->m_flags &= ~(M_MCAST|M_BCAST); /* XXX should do this in clnp_input */
+	/*
+	 * The reassembly code assumes it will be overwriting a useless
+	 * part of the packet, which is why we need to have it point
+	 * into the packet itself.
+	 *
+	 * Check to see if the data is properly alligned
+	 * so that we can save copying the tcp header.
+	 * This code knows way too much about the structure of mbufs!
+	 */
+	off = ((sizeof (long) - 1) & ((m->m_flags & M_EXT) ?
+		(m->m_data - m->m_ext.ext_buf) :  (m->m_data - m->m_pktdat)));
+	if (off || len < sizeof(struct tcphdr)) {
+		struct mbuf *m0 = m;
+
+		MGETHDR(m, M_DONTWAIT, MT_DATA);
+		if (m == 0) { 
+			m = m0;
+			goto drop;
+		}
+		m->m_next = m0;
+		m->m_data += max_linkhdr;
+		m->m_pkthdr = m0->m_pkthdr;
+		m->m_flags = m0->m_flags & M_COPYFLAGS;
+		if (len < sizeof(struct tcphdr)) {
+			m->m_len = 0;
+			if ((m = m_pullup(m, sizeof(struct tcpiphdr))) == 0) {
+				tcpstat.tcps_rcvshort++;
+				return;
+			}
+		} else {
+			bcopy(mtod(m0, caddr_t) + sizeof(struct ip),
+			      mtod(m, caddr_t) + sizeof(struct ip),
+			      sizeof(struct tcphdr));
+			m0->m_len -= sizeof(struct tcpiphdr);
+			m0->m_data += sizeof(struct tcpiphdr);
+			m->m_len = sizeof(struct tcpiphdr);
+		}
+	}
+	/*
+	 * Calculate checksum of extended TCP header and data,
+	 * replacing what would have been IP addresses by
+	 * the IP checksum of the CLNP addresses.
+	 */
+	ti = mtod(m, struct tcpiphdr *);
+	ti->ti_dst.s_addr = tuba_table[lindex]->tc_sum;
+	if (dst->siso_nlen & 1)
+		ti->ti_src.s_addr = tuba_table[findex]->tc_sum;
+	else
+		ti->ti_src.s_addr = tuba_table[findex]->tc_ssum;
+	ti->ti_prev = ti->ti_next = 0;
+	ti->ti_x1 = 0; ti->ti_pr = ISOPROTO_TCP;
+	ti->ti_len = htons((u_short)tlen);
+	if (ti->ti_sum = in_cksum(m, m->m_pkthdr.len)) {
+		tcpstat.tcps_rcvbadsum++;
+		goto drop;
+	}
+	ti->ti_src.s_addr = findex;
+	ti->ti_dst.s_addr = lindex;
+	/*
+	 * Now include the rest of TCP input
+	 */
+#define TUBA_INCLUDE
+#define	in_pcbconnect	tuba_pcbconnect
+#define	tcb		tuba_inpcb
+#define tcp_last_inpcb	tuba_last_inpcb
+
+#include <netinet/tcp_input.c>
+}
+
+#define tcp_slowtimo	tuba_slowtimo
+#define tcp_fasttimo	tuba_fasttimo
+
+#include <netinet/tcp_timer.c>
diff --git a/sys/netiso/tuba_table.c b/sys/netiso/tuba_table.c
new file mode 100644
index 00000000000..a1bf5f98de0
--- /dev/null
+++ b/sys/netiso/tuba_table.c
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tuba_table.c	8.2 (Berkeley) 11/15/93
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/radix.h>
+
+#include <netiso/iso.h>
+#include <netiso/tuba_table.h>
+
+int	tuba_table_size;
+struct	tuba_cache **tuba_table;
+struct	radix_node_head *tuba_tree;
+extern	int arpt_keep, arpt_prune;	/* use same values as arp cache */
+
+void
+tuba_timer()
+{
+	int s = splnet();
+	int	i;
+	register struct	tuba_cache *tc;
+	long	timelimit = time.tv_sec - arpt_keep;
+
+	timeout(tuba_timer, (caddr_t)0, arpt_prune * hz);
+	for (i = tuba_table_size; i > 0; i--)
+		if ((tc = tuba_table[i]) && (tc->tc_refcnt == 0) &&
+		    (tc->tc_time < timelimit)) {
+			tuba_table[i] = 0;
+			rn_delete(&tc->tc_siso.siso_addr, NULL, tuba_tree);
+			free((caddr_t)tc, M_RTABLE);
+		}
+	splx(s);
+}
+
+tuba_table_init()
+{
+	rn_inithead((void **)&tuba_tree, 40);
+	timeout(tuba_timer, (caddr_t)0, arpt_prune * hz);
+}
+
+int
+tuba_lookup(siso, wait)
+	register struct sockaddr_iso *siso;
+{
+	struct radix_node *rn, *rn_match();
+	register struct tuba_cache *tc;
+	struct tuba_cache **new;
+	int dupentry = 0, sum_a = 0, sum_b = 0, old_size, i;
+
+	if ((rn = rn_match((caddr_t)&siso->siso_addr, tuba_tree->rnh_treetop))
+	     && ((rn->rn_flags & RNF_ROOT) == 0)) {
+		tc = (struct tuba_cache *)rn;
+		tc->tc_time = time.tv_sec;
+		return (tc->tc_index);
+	}
+	if ((tc = (struct tuba_cache *)malloc(sizeof(*tc), M_RTABLE, wait))
+		== NULL)
+		return (0);
+	bzero((caddr_t)tc, sizeof (*tc));
+	bcopy(siso->siso_data, tc->tc_siso.siso_data,
+		tc->tc_siso.siso_nlen =  siso->siso_nlen);
+	rn_insert(&tc->tc_siso.siso_addr, tuba_tree, &dupentry, tc->tc_nodes);
+	if (dupentry)
+		panic("tuba_lookup 1");
+	tc->tc_siso.siso_family = AF_ISO;
+	tc->tc_siso.siso_len = sizeof(tc->tc_siso);
+	tc->tc_time = time.tv_sec;
+	for (i = sum_a = tc->tc_siso.siso_nlen; --i >= 0; )
+		(i & 1 ? sum_a : sum_b) += (u_char)tc->tc_siso.siso_data[i];
+	REDUCE(tc->tc_sum, (sum_a << 8) + sum_b);
+	HTONS(tc->tc_sum);
+	SWAB(tc->tc_ssum, tc->tc_sum);
+	for (i = tuba_table_size; i > 0; i--)
+		if (tuba_table[i] == 0)
+			goto fixup;
+	old_size = tuba_table_size;
+	if (tuba_table_size == 0)
+		tuba_table_size = 15;
+	if (tuba_table_size > 0x7fff)
+		return (0);
+	tuba_table_size = 1 + 2 * tuba_table_size;
+	i = (tuba_table_size + 1) * sizeof(tc);
+	new = (struct tuba_cache **)malloc((unsigned)i, M_RTABLE, wait);
+	if (new == 0) {
+		tuba_table_size = old_size;
+		rn_delete(&tc->tc_siso.siso_addr, NULL, tuba_tree);
+		free((caddr_t)tc, M_RTABLE);
+		return (0);
+	}
+	bzero((caddr_t)new, (unsigned)i);
+	if (tuba_table) {
+		bcopy((caddr_t)tuba_table, (caddr_t)new, i >> 1);
+		free((caddr_t)tuba_table, M_RTABLE);
+	}
+	tuba_table = new;
+	i = tuba_table_size;
+fixup:
+	tuba_table[i] = tc;
+	tc->tc_index = i;
+	return (tc->tc_index);
+}
diff --git a/sys/netiso/tuba_table.h b/sys/netiso/tuba_table.h
new file mode 100644
index 00000000000..6be8afaf523
--- /dev/null
+++ b/sys/netiso/tuba_table.h
@@ -0,0 +1,59 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tuba_table.h	8.1 (Berkeley) 6/10/93
+ */
+
+struct tuba_cache {
+	struct	radix_node tc_nodes[2];		/* convenient lookup */
+	int	tc_refcnt;
+	int	tc_time;			/* last looked up */
+	int	tc_flags;
+#define TCF_PERM	1
+	int	tc_index;
+	u_short	tc_sum;				/* cksum of nsap inc. length */
+	u_short	tc_ssum;			/* swab(tc_sum) */
+	struct	sockaddr_iso tc_siso;		/* for responding */
+};
+
+#define ADDCARRY(x)  (x >= 65535 ? x -= 65535 : x)
+#define REDUCE(a, b) { union { u_short s[2]; long l;} l_util; long x; \
+	l_util.l = (b); x = l_util.s[0] + l_util.s[1]; ADDCARRY(x); \
+	if (x == 0) x = 0xffff; a = x;}
+#define SWAB(a, b) { union { u_char c[2]; u_short s;} s; u_char t; \
+	s.s = (b); t = s.c[0]; s.c[0] = s.c[1]; s.c[1] = t; a = s.s;}
+
+#ifdef KERNEL
+extern	int	tuba_table_size;
+extern	struct	tuba_cache **tuba_table;
+extern	struct	radix_node_head *tuba_tree;
+#endif
diff --git a/sys/netiso/tuba_usrreq.c b/sys/netiso/tuba_usrreq.c
new file mode 100644
index 00000000000..2d9211707a4
--- /dev/null
+++ b/sys/netiso/tuba_usrreq.c
@@ -0,0 +1,312 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tuba_usrreq.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/stat.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#include <netinet/tcp_debug.h>
+
+#include <netiso/argo_debug.h>
+#include <netiso/iso.h>
+#include <netiso/clnp.h>
+#include <netiso/iso_pcb.h>
+#include <netiso/iso_var.h>
+#include <netiso/tuba_table.h>
+/*
+ * TCP protocol interface to socket abstraction.
+ */
+extern	char *tcpstates[];
+extern	struct inpcb tuba_inpcb;
+extern	struct isopcb tuba_isopcb;
+
+/*
+ * Process a TCP user request for TCP tb.  If this is a send request
+ * then m is the mbuf chain of send data.  If this is a timer expiration
+ * (called from the software clock routine), then timertype tells which timer.
+ */
+/*ARGSUSED*/
+tuba_usrreq(so, req, m, nam, control)
+	struct socket *so;
+	int req;
+	struct mbuf *m, *nam, *control;
+{
+	register struct inpcb *inp;
+	register struct isopcb *isop;
+	register struct tcpcb *tp;
+	int s;
+	int error = 0;
+	int ostate;
+	struct sockaddr_iso *siso;
+
+	if (req == PRU_CONTROL)
+		return (iso_control(so, (int)m, (caddr_t)nam,
+			(struct ifnet *)control));
+
+	s = splnet();
+	inp = sotoinpcb(so);
+	/*
+	 * When a TCP is attached to a socket, then there will be
+	 * a (struct inpcb) pointed at by the socket, and this
+	 * structure will point at a subsidary (struct tcpcb).
+	 */
+	if (inp == 0  && req != PRU_ATTACH) {
+		splx(s);
+		return (EINVAL);		/* XXX */
+	}
+	if (inp) {
+		tp = intotcpcb(inp);
+		if (tp == 0)
+			panic("tuba_usrreq");
+		ostate = tp->t_state;
+		isop = (struct isopcb *)tp->t_tuba_pcb;
+		if (isop == 0)
+			panic("tuba_usrreq 2");
+	} else
+		ostate = 0;
+	switch (req) {
+
+	/*
+	 * TCP attaches to socket via PRU_ATTACH, reserving space,
+	 * and an internet control block.  We also need to
+	 * allocate an isopcb and separate the control block from
+	 * tcp/ip ones.
+	 */
+	case PRU_ATTACH:
+		if (error = iso_pcballoc(so, &tuba_isopcb))
+			break;
+		isop = (struct isopcb *)so->so_pcb;
+		so->so_pcb = 0;
+		if (error = tcp_usrreq(so, req, m, nam, control)) {
+			isop->isop_socket = 0;
+			iso_pcbdetach(isop);
+		} else {
+			inp = sotoinpcb(so);
+			remque(inp);
+			insque(inp, &tuba_inpcb);
+			inp->inp_head = &tuba_inpcb;
+			tp = intotcpcb(inp);
+			if (tp == 0)
+				panic("tuba_usrreq 3");
+			tp->t_tuba_pcb = (caddr_t) isop;
+		}
+		goto notrace;
+
+	/*
+	 * PRU_DETACH detaches the TCP protocol from the socket.
+	 * If the protocol state is non-embryonic, then can't
+	 * do this directly: have to initiate a PRU_DISCONNECT,
+	 * which may finish later; embryonic TCB's can just
+	 * be discarded here.
+	 */
+	case PRU_DETACH:
+		if (tp->t_state > TCPS_LISTEN)
+			tp = tcp_disconnect(tp);
+		else
+			tp = tcp_close(tp);
+		if (tp == 0)
+			tuba_pcbdetach(isop);
+		break;
+
+	/*
+	 * Give the socket an address.
+	 */
+	case PRU_BIND:
+		siso = mtod(nam, struct sockaddr_iso *);
+		if (siso->siso_tlen && siso->siso_tlen != 2) {
+			error = EINVAL;
+			break;
+		}
+		if ((error = iso_pcbbind(isop, nam)) || 
+		    (siso = isop->isop_laddr) == 0)
+			break;
+		bcopy(TSEL(siso), &inp->inp_lport, 2);
+		if (siso->siso_nlen &&
+		    !(inp->inp_laddr.s_addr = tuba_lookup(siso, M_WAITOK)))
+			error = ENOBUFS;
+		break;
+
+	/*
+	 * Prepare to accept connections.
+	 */
+	case PRU_CONNECT:
+	case PRU_LISTEN:
+		if (inp->inp_lport == 0 &&
+		    (error = iso_pcbbind(isop, (struct mbuf *)0)))
+			break;
+		bcopy(TSEL(isop->isop_laddr), &inp->inp_lport, 2);
+		if (req == PRU_LISTEN) {
+			tp->t_state = TCPS_LISTEN;
+			break;
+		}
+	/*FALLTHROUGH*/
+	/*
+	 * Initiate connection to peer.
+	 * Create a template for use in transmissions on this connection.
+	 * Enter SYN_SENT state, and mark socket as connecting.
+	 * Start keep-alive timer, and seed output sequence space.
+	 * Send initial segment on connection.
+	 */
+	/* case PRU_CONNECT: */
+		if (error = iso_pcbconnect(isop, nam))
+			break;
+		if ((siso = isop->isop_laddr) && siso->siso_nlen > 1)
+			siso->siso_data[siso->siso_nlen - 1] = ISOPROTO_TCP;
+		else
+			panic("tuba_usrreq: connect");
+		siso = mtod(nam, struct sockaddr_iso *);
+		if (!(inp->inp_faddr.s_addr = tuba_lookup(siso, M_WAITOK))) {
+		unconnect:
+			iso_pcbdisconnect(isop);
+			error = ENOBUFS;
+			break;
+		}
+		bcopy(TSEL(isop->isop_faddr), &inp->inp_fport, 2);
+		if (inp->inp_laddr.s_addr == 0 &&
+		     (inp->inp_laddr.s_addr = 
+			    tuba_lookup(isop->isop_laddr, M_WAITOK)) == 0)
+			goto unconnect;
+		if ((tp->t_template = tcp_template(tp)) == 0)
+			goto unconnect;
+		soisconnecting(so);
+		tcpstat.tcps_connattempt++;
+		tp->t_state = TCPS_SYN_SENT;
+		tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
+		tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2;
+		tcp_sendseqinit(tp);
+		error = tcp_output(tp);
+		tuba_refcnt(isop, 1);
+		break;
+
+	/*
+	 * Initiate disconnect from peer.
+	 * If connection never passed embryonic stage, just drop;
+	 * else if don't need to let data drain, then can just drop anyways,
+	 * else have to begin TCP shutdown process: mark socket disconnecting,
+	 * drain unread data, state switch to reflect user close, and
+	 * send segment (e.g. FIN) to peer.  Socket will be really disconnected
+	 * when peer sends FIN and acks ours.
+	 *
+	 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
+	 */
+	case PRU_DISCONNECT:
+		if ((tp = tcp_disconnect(tp)) == 0)
+			tuba_pcbdetach(isop);
+		break;
+
+	/*
+	 * Accept a connection.  Essentially all the work is
+	 * done at higher levels; just return the address
+	 * of the peer, storing through addr.
+	 */
+	case PRU_ACCEPT:
+		bcopy((caddr_t)isop->isop_faddr, mtod(nam, caddr_t),
+			nam->m_len = isop->isop_faddr->siso_len);
+		break;
+
+	/*
+	 * Mark the connection as being incapable of further output.
+	 */
+	case PRU_SHUTDOWN:
+		socantsendmore(so);
+		tp = tcp_usrclosed(tp);
+		if (tp)
+			error = tcp_output(tp);
+		else
+			tuba_pcbdetach(isop);
+		break;
+	/*
+	 * Abort the TCP.
+	 */
+	case PRU_ABORT:
+		if ((tp = tcp_drop(tp, ECONNABORTED)) == 0)
+			tuba_pcbdetach(isop);
+		break;
+
+
+	case PRU_SOCKADDR:
+		if (isop->isop_laddr)
+			bcopy((caddr_t)isop->isop_laddr, mtod(nam, caddr_t),
+				nam->m_len = isop->isop_laddr->siso_len);
+		break;
+
+	case PRU_PEERADDR:
+		if (isop->isop_faddr)
+			bcopy((caddr_t)isop->isop_faddr, mtod(nam, caddr_t),
+				nam->m_len = isop->isop_faddr->siso_len);
+		break;
+
+	default:
+		error = tcp_usrreq(so, req, m, nam, control);
+		goto notrace;
+	}
+	if (tp && (so->so_options & SO_DEBUG))
+		tcp_trace(TA_USER, ostate, tp, (struct tcpiphdr *)0, req);
+notrace:
+	splx(s);
+	return(error);
+}
+
+tuba_ctloutput(op, so, level, optname, mp)
+	int op;
+	struct socket *so;
+	int level, optname;
+	struct mbuf **mp;
+{
+	int clnp_ctloutput(), tcp_ctloutput();
+
+	return ((level != IPPROTO_TCP ? clnp_ctloutput : tcp_ctloutput)
+			(op, so, level, optname, mp));
+}
diff --git a/sys/netiso/xebec/Makefile b/sys/netiso/xebec/Makefile
new file mode 100644
index 00000000000..fa05f9cc47e
--- /dev/null
+++ b/sys/netiso/xebec/Makefile
@@ -0,0 +1,8 @@
+#	@(#)Makefile	5.16 (Berkeley) 4/26/91
+
+PROG=	xebec
+SRCS=	llparse.c llscan.c main.c malloc.c procs.c putdriver.c sets.c xebec.c
+CFLAGS+= -DDEBUG -traditional
+NOMAN = noman
+
+.include <bsd.prog.mk>
diff --git a/sys/netiso/xebec/debug.h b/sys/netiso/xebec/debug.h
new file mode 100644
index 00000000000..2e3f16794d6
--- /dev/null
+++ b/sys/netiso/xebec/debug.h
@@ -0,0 +1,22 @@
+/* $Header: debug.h,v 2.1 88/09/19 12:56:16 nhall Exp $ */
+/* $Source: /var/home/tadl/src/argo/xebec/RCS/debug.h,v $ */
+
+#define OUT stdout
+
+extern int	debug[128];
+
+#ifdef DEBUG
+extern int column;
+
+#define IFDEBUG(letter) \
+	if(debug['letter']) { 
+#define ENDDEBUG  ; (void) fflush(stdout);}
+
+#else 
+
+#define STAR *
+#define IFDEBUG(letter)	 //*beginning of comment*/STAR
+#define ENDDEBUG	 STAR/*end of comment*//
+
+#endif DEBUG
+
diff --git a/sys/netiso/xebec/llparse.c b/sys/netiso/xebec/llparse.c
new file mode 100644
index 00000000000..fee7a9f7e47
--- /dev/null
+++ b/sys/netiso/xebec/llparse.c
@@ -0,0 +1,366 @@
+/* $Header: llparse.c,v 2.2 88/09/19 12:54:59 nhall Exp $ */
+/* $Source: /var/home/tadl/src/argo/xebec/RCS/llparse.c,v $ */
+/*
+ * ************************* NOTICE *******************************
+ * This code is in the public domain.  It cannot be copyrighted.
+ * This ll parser was originally written by Keith Thompson for the 
+ * University of Wisconsin Crystal project.
+ * It was based on an FMQ lr parser written by Jon Mauney at the
+ * University of Wisconsin.
+ * It was subsequently modified very slightly by Nancy Hall at the 
+ * University of Wisconsin for the Crystal project.
+ * ****************************************************************
+ */
+#include "xebec.h"
+#include "llparse.h"
+#include "main.h"
+#include <stdio.h>
+
+#include "debug.h"
+
+#define LLMINACTION -LLINF
+
+short		llparsestack[STACKSIZE];
+short		llstackptr = 0;
+LLtoken		lltoken;
+
+llparse()
+{
+	register		havetoken = FALSE;
+	register		sym;
+	register LLtoken	*t = &lltoken;
+	register		parseaction;
+	register		accepted = FALSE;
+
+	llpushprod(llnprods-1); /* $$$ ::= <start symbol>  */
+
+	do {
+		sym = llparsestack[llstackptr];
+	IFDEBUG(L)
+		printf("llparse() top of loop, llstackptr=%d, sym=%d\n",
+			llstackptr, sym);
+	ENDDEBUG
+
+		if(sym < 0) {
+			/* action symbol */
+			if(sym <= LLMINACTION) {
+				for(;sym<=LLMINACTION;sym++) {
+					llaction(1, t); /* calls llfinprod */
+				}
+				llstackptr--;
+				continue;
+			} else { llaction(-sym, t);
+				llstackptr--;
+				continue;
+			}
+		}
+
+		if(sym < llnterms) {
+
+			/* it's a terminal symbol */
+
+			if(!havetoken) {
+				llgettoken(t);
+				havetoken = TRUE;
+			}
+
+			if(sym == t->llterm) {
+				llpushattr(t->llattrib);
+				llaccept(t);
+				llstackptr--; /* pop terminal */
+				if(t->llterm == llnterms-1) { /* end symbol $$$ */
+					accepted = TRUE;
+				} else {
+					havetoken = FALSE;
+				}
+			} else {
+				llparsererror(t); /* wrong terminal on input */
+				havetoken = FALSE;
+			}
+			continue;
+		}
+
+		/* non terminal */
+
+		if(!havetoken) {
+			llgettoken(t);
+			havetoken = TRUE;
+		}
+
+		/* consult parse table  for new production */
+		parseaction = llfindaction(sym, t->llterm);
+
+		if(parseaction == 0) {
+			/* error entry */
+			llparsererror(t);
+			havetoken = FALSE;
+			continue;
+		}
+
+		if(llepsilon[parseaction]) {
+			/* epsilon production */
+			if(llepsilonok(t->llterm)) {
+				llstackptr--; /* pop nonterminal */
+				llpushprod(parseaction); /* push rhs of production */
+			} else {
+				llparsererror(t);
+				havetoken = FALSE;
+			}
+		} else {
+			llstackptr--; /* pop nonterminal */
+			llpushprod(parseaction); /* push rhs of production */
+		}
+	} while(!accepted);
+
+	return(0);
+}
+
+llpushprod(prod) 	/* recognize production prod - push rhs on stack */
+short prod;
+{
+	register	start;
+	register	length;
+	register	count;
+
+	start = llprodindex[prod].llprodstart;
+	length = llprodindex[prod].llprodlength;
+
+	IFDEBUG(L)
+		printf("llpushprod(%d) llstackptr=0x%x(%d), length = 0x%x(%d)\n",
+		prod, llstackptr, llstackptr, length , length);
+		/*
+		dump_parse_stack();
+		*/
+	ENDDEBUG
+	if(llstackptr+length >= STACKSIZE) {
+		fprintf(stderr,"Parse stack overflow. llstackptr=0x%x, length=0x%x\n",
+		llstackptr, length);
+		Exit(-1);
+	}
+
+
+	llsetattr(llprodindex[prod].llprodtlen);
+
+	/* put a marker on the stack to mark beginning of production */
+	if(llparsestack[llstackptr] <= LLMINACTION) {
+		(llparsestack[llstackptr]) --; /* if there's already one there, don't
+								put another on; just let it represent all of
+								the adjacent markers */
+	}
+	else {
+		llstackptr++;
+		llparsestack[llstackptr] = LLMINACTION;
+	}
+
+	for(count=0; count<length; count++) {
+		llstackptr++;
+		llparsestack[llstackptr] = llproductions[start++];
+	}
+	if(llstackptr > STACKSIZE) {
+		fprintf(stderr, "PARSE STACK OVERFLOW! \n"); Exit(-1);
+		Exit(-1);
+	}
+}
+
+
+llepsilonok(term)
+{
+	register	ptr;
+	register	sym;
+	register	pact;
+	register	nomore;
+	register	rval;
+
+	IFDEBUG(L)
+		printf("llepsilonok() enter\n");
+	ENDDEBUG
+	rval = TRUE;
+
+	ptr = llstackptr;
+
+	do {
+		sym = llparsestack[ptr];
+
+		if(sym < 0) {
+			ptr--;
+			nomore = ptr == 0;
+			continue;
+		}
+
+		if(sym < llnterms) {
+			nomore = TRUE;
+			rval = sym == term;
+			continue;
+		}
+
+		pact = llfindaction(sym, term);
+
+		if(pact == 0) {
+			nomore = TRUE;
+			rval = FALSE;
+			continue;
+		}
+
+		if(llepsilon[pact] == TRUE) {
+			ptr--;
+			nomore = ptr == 0;
+		}
+		else {
+			nomore = TRUE;
+		}
+
+	} while(!nomore);
+
+	return(rval);
+}
+
+
+short llfindaction(sym, term)
+{
+	register	index;
+
+	IFDEBUG(L)
+		printf("llfindaction(sym=%d, term=%d) enter \n", sym, term);
+	ENDDEBUG
+	index = llparseindex[sym];
+
+	while(llparsetable[index].llterm != 0) {
+		if(llparsetable[index].llterm == term) {
+			return(llparsetable[index].llprod);
+		}
+		index++;
+	}
+	return(0);
+}
+
+
+llparsererror(token)
+LLtoken *token;
+{
+	IFDEBUG(L)
+		fprintf(stderr,"llparsererror() enter\n");
+		prt_token(token);
+	ENDDEBUG
+
+	fprintf(stderr, "Syntax error: ");
+	prt_token(token);
+	dump_buffer();
+	Exit(-1);
+}
+
+
+llgettoken(token)
+LLtoken *token;
+{
+	llscan(token);
+	token->llstate = NORMAL;
+	IFDEBUG(L)
+		printf("llgettoken(): ");
+		prt_token(token);
+	ENDDEBUG
+}
+
+
+/******************************************************************************
+
+	Attribute support routines
+
+******************************************************************************/
+/*
+**	attribute stack
+**
+**	AttrStack =	stack of record
+**				values : array of values;
+**				ptr	: index;
+**	end;
+**
+*/
+
+LLattrib	llattributes[LLMAXATTR];
+int		llattrtop = 0;
+
+struct llattr	llattrdesc[LLMAXDESC];
+
+int	lldescindex = 1;
+
+
+llsetattr(n)
+{
+	register struct llattr *ptr;
+
+	IFDEBUG(L)
+		printf("llsetattr(%d) enter\n",n);
+	ENDDEBUG
+	if(lldescindex >= LLMAXDESC) {
+		fprintf(stdout, "llattribute stack overflow: desc\n");
+		fprintf(stdout, 
+			"lldescindex=0x%x, llattrtop=0x%x\n",lldescindex, llattrtop);
+		Exit(-1);
+	}
+	ptr = &llattrdesc[lldescindex];
+	ptr->llabase = &llattributes[llattrtop];
+	ptr->lloldtop = ++llattrtop; 
+	ptr->llaindex = 1;
+	ptr->llacnt = n+1; /* the lhs ALWAYS uses an attr; it remains on the
+						stack when the production is recognized */
+	lldescindex++;
+}
+
+llpushattr(attr)
+LLattrib attr;
+{
+	struct llattr *a;
+
+	IFDEBUG(L)
+		printf("llpushattr() enter\n");
+	ENDDEBUG
+	if(llattrtop + 1 > LLMAXATTR) {
+		fprintf(stderr, "ATTRIBUTE STACK OVERFLOW!\n");
+		Exit(-1);
+	}
+	a = &llattrdesc[lldescindex-1];
+	llattributes[llattrtop++] = attr;
+	a->llaindex++; /* inc count of attrs on the stack for this prod */
+}
+
+llfinprod()
+{
+	IFDEBUG(L)
+		printf("llfinprod() enter\n");
+	ENDDEBUG
+	lldescindex--;
+	llattrtop = llattrdesc[lldescindex].lloldtop;
+	llattrdesc[lldescindex-1].llaindex++; /* lhs-of-prod.attr stays on
+		the stack; it is now one of the rhs attrs of the now-top production
+		on the stack */
+}
+
+#ifndef LINT
+#ifdef DEBUG
+dump_parse_stack()
+{
+	int ind;
+
+	printf("PARSE STACK:\n");
+	for(ind=llstackptr; ind>=0; ind--) {
+		printf("%d\t%d\t%s\n",
+		ind, llparsestack[ind],
+		llparsestack[ind]<0? "Action symbol" : llstrings[llparsestack[ind]]);
+	}
+}
+
+#endif DEBUG
+#endif LINT
+
+prt_token(t)
+LLtoken *t;
+{
+	fprintf(stdout, "t at 0x%x\n", t);
+	fprintf(stdout, "t->llterm=0x%x\n", t->llterm); (void) fflush(stdout);
+	fprintf(stdout, "TOK: %s\n", llstrings[t->llterm]);
+	(void) fflush(stdout);
+#ifdef LINT
+	/* to make lint shut up */
+	fprintf(stdout, "", llnterms, llnsyms, llnprods, llinfinite);
+#endif LINT
+}
diff --git a/sys/netiso/xebec/llparse.h b/sys/netiso/xebec/llparse.h
new file mode 100644
index 00000000000..1b6133b1b7b
--- /dev/null
+++ b/sys/netiso/xebec/llparse.h
@@ -0,0 +1,145 @@
+/* $Header: llparse.h,v 2.1 88/09/19 12:56:20 nhall Exp $ */
+/* $Source: /var/home/tadl/src/argo/xebec/RCS/llparse.h,v $ */
+
+	/************************************************************
+		attributes stack garbage
+	************************************************************/
+
+#define LLMAXATTR	512
+#define LLMAXDESC	256
+#define	LLATTR		/* build an attribute stack */
+
+	/*
+	**	attribute stack
+	**
+	**	AttrStack =	stack of record
+	**				values : array of values;
+	**				ptr	: index;
+	**	end;
+	**
+	*/
+
+	typedef union llattrib LLattrib;
+
+	extern LLattrib	llattributes[LLMAXATTR];
+	extern int	llattrtop;
+
+	extern struct	llattr {
+		LLattrib	*llabase; /* ptr into the attr stack (llattributes) */
+		int		llaindex;/* # attrs on the stack so far for this prod */
+		int		llacnt;/* total # ever to go on for this prod */
+
+		int		lloldtop;/* when popping this prod, restore stack to here ;
+						 one attr will remain on the stack (for the lhs) */
+	}	llattrdesc[LLMAXDESC];
+
+	extern int	lldescindex;
+
+	/************************************************************
+		attributes stack garbage
+	************************************************************/
+
+	extern	struct	lltoken {
+		short		llterm;		/* token number */
+		short		llstate;	/* inserted deleted normal */
+		LLattrib	llattrib; 
+	} 	lltoken;
+	typedef	struct lltoken	LLtoken;
+
+/************************************************************
+	constants used in llparse.c
+************************************************************/
+
+#define STACKSIZE	500
+#define MAXCORR		16
+
+#define	NORMAL		0
+#define	DELETE		1
+#define	INSERT		2
+
+/************************************************************
+	datatypes used to communicate with the parser
+************************************************************/
+
+struct	llinsert {
+	short	llinscost;
+	short	llinslength;
+	short	llinsert[MAXCORR];
+};
+typedef	struct llinsert	LLinsert;
+
+extern	short	llparsestack[];
+extern	short	llstackptr;
+extern	short	llinfinite;
+
+/************************************************************
+	variables used to pass information
+	specific to each grammer
+************************************************************/
+
+extern	short	llnterms;
+extern	short	llnsyms;
+extern	short	llnprods;
+
+extern	char	*llefile;
+
+extern	struct	llparsetable {
+	short	llterm;
+	short	llprod;
+}	llparsetable[];
+
+extern	short	llparseindex[];
+
+extern	short	llepsilon[];
+
+extern	short	llproductions[];
+
+extern	struct	llprodindex {
+	short	llprodstart;
+	short	llprodlength;
+	short	llprodtlen;
+}	llprodindex[];
+
+extern	struct	llcosts {
+	short	llinsert;
+	short	lldelete;
+}	llcosts[];
+
+extern	struct	llstable {
+	short	llsstart;
+	short	llslength;
+}	llstable[];
+
+extern	short	llsspace[];
+
+extern	struct	lletable {
+	short	llecost;
+	short	llelength;
+	short	llestart;
+}	lletable[];
+
+extern	long	lleindex[];
+
+extern	short	llespace[];
+
+extern	char	*llstrings[];
+
+/************************************************************
+	routines defined in llparse.c
+************************************************************/
+
+extern llparse();
+extern llcopye();
+extern llcopys();
+extern llcorrector();
+extern llepsilonok();
+extern llexpand();
+extern short llfindaction();
+extern llgetprefix();
+extern llgettoken();
+extern llinsert();
+extern llinsertsym();
+extern llinserttokens();
+extern llparsererror();
+extern llpushprod();
+extern llreadetab();
diff --git a/sys/netiso/xebec/llscan.c b/sys/netiso/xebec/llscan.c
new file mode 100644
index 00000000000..ffdb9a92a60
--- /dev/null
+++ b/sys/netiso/xebec/llscan.c
@@ -0,0 +1,430 @@
+/* $Header: llscan.c,v 2.2 88/09/19 12:55:06 nhall Exp $ */
+/* $Source: /var/home/tadl/src/argo/xebec/RCS/llscan.c,v $ */
+/*
+ * ************************* NOTICE *******************************
+ * This code is in the public domain.  It cannot be copyrighted.
+ * This scanner was originally written by Keith Thompson for the 
+ * University of Wisconsin Crystal project.
+ * It was subsequently modified significantly by Nancy Hall at the 
+ * University of Wisconsin for the ARGO project.
+ * ****************************************************************
+ */
+#include "xebec.h"
+#include "llparse.h"
+
+#include "main.h"
+#include <stdio.h>
+#include "procs.h"
+#include "debug.h"
+
+#define EOFILE	0x01
+#define UNUSED	0x02
+#define IGNORE	0x04
+#define OPCHAR	0x8
+#define DIGITS	0x10
+#define	LETTER	0x20
+
+int chtype[128] = {
+/*	null,	soh ^a,	stx ^b	etx ^c	eot ^d	enq ^e	ack ^f	bel ^g	*/
+	EOFILE,	UNUSED,	UNUSED,	UNUSED,	UNUSED,	UNUSED,	UNUSED,	UNUSED,
+/*	bs ^h	ht ^i	lf ^j	vt ^k	ff ^l	cr ^m	so ^n	si ^o	*/
+	UNUSED,	IGNORE,	IGNORE,	UNUSED,	IGNORE,	IGNORE,	UNUSED,	UNUSED,
+/*	dle ^p	dc1 ^q	dc2 ^r	dc3 ^s	dc4 ^t	nak ^u	syn ^v	etb ^w	*/
+	UNUSED,	UNUSED,	UNUSED,	UNUSED,	EOFILE,	UNUSED,	UNUSED,	UNUSED,
+/*	can ^x	em ^y	sub ^z	esc ^]	fs ^\ 	gs ^}	rs ^`	us ^/	*/
+	UNUSED,	UNUSED,	UNUSED,	UNUSED,	UNUSED,	UNUSED,	UNUSED,	UNUSED,
+
+/*			!		"		#		$		%		&		'		*/
+	IGNORE,	UNUSED,	OPCHAR,	UNUSED,	OPCHAR,	UNUSED,	OPCHAR,	OPCHAR,
+/*	(		)		*		+		,		-		.		/		*/
+	OPCHAR,	OPCHAR,	OPCHAR,	OPCHAR,	OPCHAR,	OPCHAR,	OPCHAR,	OPCHAR,
+/*	0		1		2		3		4		5		6		7		*/
+	DIGITS,	DIGITS,	DIGITS,	DIGITS,	DIGITS,	DIGITS,	DIGITS,	DIGITS,
+/*	8		9		:		;		<		=		>		?		*/
+	DIGITS,	DIGITS,	OPCHAR,	OPCHAR,	OPCHAR,	OPCHAR,	OPCHAR,	OPCHAR,
+
+/*	@		A		B		C		D		E		F		G		*/
+	UNUSED,	LETTER,	LETTER,	LETTER,	LETTER,	LETTER,	LETTER,	LETTER,
+/*	H		I		J		K		L		M		N		O		*/
+	LETTER,	LETTER,	LETTER,	LETTER,	LETTER,	LETTER,	LETTER,	LETTER,
+/*	P		Q		R		S		T		U		V		W		*/
+	LETTER,	LETTER,	LETTER,	LETTER,	LETTER,	LETTER,	LETTER,	LETTER,
+/* 	X		Y		Z		[		\		]		^		_		*/
+	LETTER,	LETTER,	LETTER,	OPCHAR,	UNUSED,	OPCHAR,	OPCHAR,	LETTER,
+
+/*	`		a		b		c		d		e		f		g		*/
+	UNUSED,	LETTER,	LETTER,	LETTER,	LETTER,	LETTER,	LETTER,	LETTER,
+/*	h		i		j		k		l		m		n		o		*/
+	LETTER,	LETTER,	LETTER,	LETTER,	LETTER,	LETTER,	LETTER,	LETTER,
+/*	p		q		r		s		t		u		v		w		*/
+	LETTER,	LETTER,	LETTER,	LETTER,	LETTER,	LETTER,	LETTER,	LETTER,
+/*	x		y		z		{		|		}		~		del		*/
+	LETTER,	LETTER,	LETTER,	OPCHAR,	UNUSED,	OPCHAR,	UNUSED,	UNUSED
+};
+
+
+extern FILE *astringfile; 
+static char *buffptr;
+static char buffer[2][LINELEN];
+static int currentbuf = 1;
+
+#define addbuf(x) *buffptr++ = x
+
+static int ch = ' ';
+
+skip()
+{
+	while((chtype[ch] == IGNORE) ) {
+		ch = getch();
+	}
+}
+
+llaccept(t)
+LLtoken *t;
+{
+	switch(t->llstate) {
+	case NORMAL:
+		break;
+	case INSERT:
+		fprintf(stderr,"Insert %s\n", llstrings[t->llterm]);
+		break;
+	case DELETE:
+		fprintf(stderr,"Delete %s\n", llstrings[t->llterm]);
+		break;
+	}
+}
+
+#define	TVAL	(t->llattrib)
+
+
+dump_buffer()
+{
+	register int i;
+	for(i=0; i<20; i++)
+	(void) fputc(buffer[currentbuf][i], stderr);
+	(void) fputc('\n', stderr);
+	(void) fflush(stderr);
+}
+
+int iskey(c, buf)
+char *c;
+char **buf;
+{
+	register int i;
+	static struct { char *key_word; int term_type; } keys[] = {
+			{ "SAME", T_SAME },
+			{ "DEFAULT", T_DEFAULT },
+			{ "NULLACTION", T_NULLACTION },
+			{ "STRUCT", T_STRUCT },
+			{ "SYNONYM", T_SYNONYM },
+			{ "TRANSITIONS", T_TRANSITIONS },
+			{ "STATES", T_STATES },
+			{ "EVENTS", T_EVENTS },
+			{ "PCB", T_PCB },
+			{ "INCLUDE", T_INCLUDE },
+			{ "PROTOCOL", T_PROTOCOL },
+			{ 0, 0},
+	};
+
+	for (i = 0; keys[i].key_word ; i++) {
+		if( !strcmp(c, (*buf = keys[i].key_word) ) ) {
+			return ( keys[i].term_type );
+		}
+	}
+	*buf = (char *)0;
+	return(0);
+}
+
+getstr(o,c) 
+	/* c is the string delimiter 
+	 * allow the delimiter to be escaped 
+	 * the messy part: translate $ID to
+	 *   e->ev_union.ID
+	 * where ID is an event with a non-zero obj_struc
+	 * need we check for the field???
+	 */
+char o,c;
+{
+	register int nested = 1;
+	register int allow_nesting = (o==c)?-1:1; 
+
+	IFDEBUG(S)
+		fprintf(stdout,"getstr: ch=%c, delimiters %c %c\n",
+			ch,o, c);
+		fprintf(stdout,"getstr: buffptr 0x%x, currentbuf 0x%x\n",
+			buffptr, currentbuf);
+	ENDDEBUG
+
+	if( ch == c ) nested--;
+	while(nested) {
+		if(ch == '\0') {
+			fprintf(stderr,
+			"Eof inside of a string, delims= %c,%c, nesting %d",c,o, nested);
+			Exit(-1);
+			/* notreached */
+		} else if(ch == '$') {
+			/* might be an attribute */
+			IFDEBUG(S)
+				fprintf(stdout,"getstr: atttribute?\n");
+			ENDDEBUG
+
+			/* assume it's an event */
+			/* addbuf is a macro so this isn't as bad as
+			 * it looks 
+			 * add "e->ev_union."
+			 */
+			if( (ch = getch()) == '$' ) {
+				addbuf('e'); addbuf('-'); addbuf('>');
+				addbuf('e'); addbuf('v'); addbuf('_');
+				addbuf('u'); addbuf('n'); addbuf('i');
+				addbuf('o'); addbuf('n'); 
+				addbuf('.');
+				AddCurrentEventName(& buffptr);
+			} else {
+				char *obufp = buffptr;
+
+				do {
+					addbuf(ch);
+					ch = getch();
+				} while(chtype[ch] & LETTER);
+				addbuf('\0');
+				if( !strncmp(obufp, synonyms[PCB_SYN],
+										strlen(synonyms[PCB_SYN]) )) {
+					buffptr = obufp;
+					addbuf('p');
+				} else if( !strncmp(obufp, synonyms[EVENT_SYN],
+										strlen(synonyms[EVENT_SYN]))) {
+					buffptr = obufp;
+					addbuf('e'); 
+				} else {
+					fprintf(stderr, "Unknown synonym %s\n", obufp);
+					Exit(-1);
+				}
+				if(ch == '.') {
+					addbuf('-'); addbuf('>');
+				} else  {
+					/* needs to be checked for nesting */
+					goto check;
+				}
+			}
+			/* end of attribute handling */
+			goto skip;
+		} else if(ch == '\\') {
+			/* possible escape - this is kludgy beyond belief:
+			 * \ is used to escape open and closing delimiters
+			 * and '$'
+			 * otherwise it's passed through to be compiled by C
+			 */
+			ch = getch();
+			if( (ch != o ) && (ch != c) && (ch != '$') ) {
+			/* may need to handle case where \ is last char in file... */
+				/* don't treat is as escape; not open or close so
+				 * don't have to worry about nesting either 
+				 */
+				addbuf('\\');
+			}
+		}
+		addbuf(ch);
+	skip:
+		ch = getch();
+	check:
+		if( ch == o ) nested += allow_nesting;
+		else if( ch == c ) nested--;
+		if ( (buffptr - buffer[currentbuf]) > LINELEN) {
+			fprintf(stderr, 
+			"%s too long.\n", (o=='{')?"Action":"Predicate"); /*}*/
+			fprintf(stderr, 
+			"buffptr, currentbuf 0x%x, 0x%x\n",buffptr,currentbuf );
+			Exit(-1);
+		}
+		IFDEBUG(S)
+			fprintf(stdout,"loop in getstr: ch 0x%x,%c o=%c,c=%c nested=%d\n", 
+				ch,ch,o,c,nested);
+		ENDDEBUG
+	}
+	addbuf(ch);
+	addbuf('\0');
+
+	IFDEBUG(S)
+		fprintf(stdout,"exit getstr: got %s\n", buffer[currentbuf]);
+		fprintf(stdout,"exit getstr: buffptr 0x%x, currentbuf 0x%x\n",
+			buffptr, currentbuf);
+	ENDDEBUG
+}
+
+getch()
+{
+	char c;
+	extern FILE *infile;
+	extern int lineno;
+
+	c = fgetc(infile) ;
+	if (c == '\n') lineno++;
+	if ((int)c ==  EOF) c = (char)0;
+	if (feof(infile)) c = (char) 0;
+	IFDEBUG(e)
+		fprintf(stdout, "getch: 0x%x\n", c);
+		(void) fputc( c, stdout);
+		fflush(stdout);
+	ENDDEBUG
+
+	return c;
+}
+
+llscan(t)
+LLtoken *t;
+{
+	char c;
+
+	t->llstate = NORMAL;
+
+	++currentbuf;
+	currentbuf&=1;
+again:
+	buffptr =  &buffer[currentbuf][0];
+
+	skip();
+
+	switch(chtype[ch]) {
+
+	case EOFILE:
+		t->llterm = T_ENDMARKER;
+		break;
+
+	case UNUSED:
+		fprintf(stderr, "Illegal character in input - 0x%x ignored.",  ch);
+		ch = getch();
+		goto again;
+
+	case OPCHAR:
+
+		switch(ch) {
+
+		case '/':
+			/* possible comment : elide ; kludge */
+			IFDEBUG(S)
+				fprintf(stdout, "Comment ch=%c\n", ch);
+			ENDDEBUG
+			c = getch();
+			if (c != '*') {
+				fprintf(stderr,"Syntax error : character(0x%x) ignored", ch);
+				ch = c;
+				goto again;
+			} else {
+				register int state = 2,  whatchar=0;
+				static int dfa[3][3] = {
+					/* 		 	done	seen-star  middle */
+					/* star */	{ 	0,	1,		1	},
+					/* /    */	{	0,	0,		2 	},
+					/* other */ {	0,	2,		2	}
+				};
+
+				while( state ) {
+					if( (c = getch()) == (char)0)
+						break;
+					whatchar = (c=='*')?0:(c=='/'?1:2);
+					IFDEBUG(S)
+						fprintf(stdout, 
+							"comment: whatchar = %d, c = 0x%x,%c, oldstate=%d",
+							whatchar, c,c, state);
+					ENDDEBUG
+					state = dfa[whatchar][state];
+					IFDEBUG(S)
+						fprintf(stdout, ", newstate=%d\n", state);
+					ENDDEBUG
+				}
+				if(state) {
+					fprintf(stderr,
+						"Syntax error: end of file inside a comment");
+					Exit(-1);
+				} else ch = getch();
+			}
+			IFDEBUG(S)
+				fprintf(stdout, "end of comment at 0x%x,%c\n",ch,ch);
+			ENDDEBUG
+			goto again;
+
+
+		case '*':
+			t->llterm = T_STAR;
+			break;
+
+		case ',':
+			t->llterm = T_COMMA;
+			break;
+
+		case ';':
+			t->llterm = T_SEMI;
+			break;
+
+		case '<':
+			t->llterm = T_LANGLE;
+			break;
+
+		case '=':
+			t->llterm = T_EQUAL;
+			break;
+
+		case '[':
+			t->llterm = T_LBRACK;
+			break;
+
+		case ']':
+			t->llterm = T_RBRACK;
+			break;
+
+#ifdef T_FSTRING
+		case '"':
+			t->llterm = T_FSTRING;
+			addbuf(ch);
+			ch = getch();
+			getstr('"', '"');
+			TVAL.FSTRING.address = stash(buffer[currentbuf]);
+			break;
+#endif T_FSTRING
+
+		case '(':
+			t->llterm = T_PREDICATE;
+			getstr(ch, ')' );
+			TVAL.PREDICATE.address = buffer[currentbuf];
+			break;
+
+		case '{':
+			t->llterm = T_ACTION;
+			getstr(ch, '}');
+			TVAL.ACTION.address = buffer[currentbuf];
+			break;
+
+		default:
+			fprintf(stderr,"Syntax error : character(0x%x) ignored", ch);
+			ch = getch();
+			goto again;
+
+		}
+		ch = getch();
+		break;
+
+	case LETTER:
+		do {
+			addbuf(ch);
+			ch = getch();
+		} while(chtype[ch] & (LETTER | DIGITS));
+
+		addbuf('\0');
+
+		t->llterm = iskey(buffer[currentbuf], &TVAL.ID.address);
+		if(!t->llterm) {
+			t->llterm = T_ID;
+			TVAL.ID.address = buffer[currentbuf];
+		}
+		IFDEBUG(S)
+			fprintf(stdout, "llscan: id or keyword 0x%x, %s\n",
+			TVAL.ID.address, TVAL.ID.address);
+		ENDDEBUG
+		break;
+
+	default:
+		fprintf(stderr, "Snark in llscan: chtype=0x%x, ch=0x%x\n",
+			chtype[ch], ch);
+	}
+}
diff --git a/sys/netiso/xebec/main.c b/sys/netiso/xebec/main.c
new file mode 100644
index 00000000000..a0b4842f30c
--- /dev/null
+++ b/sys/netiso/xebec/main.c
@@ -0,0 +1,410 @@
+/* $Header: main.c,v 2.4 88/09/19 12:55:13 nhall Exp $ */
+/* $Source: /var/home/tadl/src/argo/xebec/RCS/main.c,v $ */
+/*
+ * TODO:
+ * rewrite the command line stuff altogether - it's kludged beyond
+ * belief (as is the rest of the code...)
+ *
+ * DISCLAIMER DISCLAIMER DISCLAIMER
+ * This code is such a kludge that I don't want to put my name on it.
+ * It was a ridiculously fast hack and needs rewriting.
+ * However it does work...
+ */
+
+#include <stdio.h>
+#include <strings.h>
+#include "malloc.h"
+#include "debug.h"
+#include "main.h"
+
+int	debug[128];
+
+int lineno = 1;
+
+FILE *statefile, *actfile, *eventfile_h, *statevalfile;
+FILE *infile, *astringfile;
+char *Transfilename;
+char *astringfile_name = DEBUGFILE;
+char *actfile_name = ACTFILE;
+char *statefile_name = STATEFILE;
+char *statevalfile_name = STATEVALFILE;
+char *eventfile_h_name = EVENTFILE_H;
+int print_trans = 0;
+int print_protoerrs = 0;
+int pgoption = 0;
+char kerneldirname[50] = "\0";
+
+char protocol[50];
+
+char *synonyms[] = {
+	"EVENT",
+	"PCB",
+	0
+};
+
+usage(a)
+char *a;
+{
+	fprintf(stderr, 
+	"usage: %s <transition file> {-D<debug options>} <other options>\n",
+		a);
+	fprintf(stderr, "\t<other options> is any combination of:\n");
+	fprintf(stderr, "\t\t-A<action file name>\n");
+	fprintf(stderr, "\t\t-E<event file name>\n");
+	fprintf(stderr, "\t\t-S<state file name>\n");
+	fprintf(stderr, "\t\t-I<initial values file name>\n");
+	fprintf(stderr, "\t\t-X<debugging file name>\n");
+	fprintf(stderr, "\t\t-K<directory name>\n");
+	fprintf(stderr, 
+	"\tThese names do NOT include the suffices (.c, .h)\n");
+	fprintf(stderr, 
+	"\t\t-D<options> to turn on debug options for xebec itself\n");
+	fprintf(stderr, "\t-<nn> for levels of debugging output\n");
+	fprintf(stderr, "\t\t<nn> ranges from 1 to 3, 1 is default(everything)\n");
+	fprintf(stderr, "\t\t-T to print transitions\n");
+	fprintf(stderr, "\t\t-e to print list of combinations of\n");
+	fprintf(stderr, "\t\t\t [event,old_state] that produce protocol errors\n");
+	fprintf(stderr, "\t\t-g include profiling code in driver\n");
+	Exit(-1);
+}
+
+openfiles(proto)
+register char *proto;
+{
+	register char *junk;
+	register int lenp = strlen(proto);
+
+	IFDEBUG(b)
+		fprintf(OUT, "openfiles %s\n",proto);
+	ENDDEBUG
+
+#define HEADER Header
+#define SOURCE Source
+#define DOIT(X)\
+	/* GAG */\
+	junk = Malloc( 2 + lenp + strlen(X/**/_name) );\
+	(void) sprintf(junk, "%s_", proto);\
+	X/**/_name = strcat(junk, X/**/_name);\
+	X = fopen(X/**/_name, "w");\
+	if((X)==(FILE *)0)\
+	{ fprintf(stderr,"Open failed: %s\n", "X"); Exit(-1); }\
+	fprintf(X, "/* %cHeader%c */\n",'$', '$' );\
+	fprintf(X, "/* %cSource%c */\n",'$', '$' );
+
+	DOIT(eventfile_h);
+
+	IFDEBUG(X)
+#ifdef DEBUG
+		DOIT(astringfile);
+#endif DEBUG
+		fprintf(astringfile, 
+				"#ifndef _NFILE\n#include <stdio.h>\n#endif _NFILE\n" );
+	ENDDEBUG
+
+	DOIT(statevalfile);
+	DOIT(statefile);
+	DOIT(actfile);
+	fprintf(actfile,
+		"#ifndef lint\nstatic char *rcsid = \"$Header/**/$\";\n#endif lint\n");
+
+	if(pgoption)
+		putdriver(actfile, 15);
+	else 
+		putdriver(actfile, 14);
+
+	FakeFilename(actfile, Transfilename, lineno);
+	putdriver(actfile, 1);
+	FakeFilename(actfile, Transfilename, lineno);
+	putdriver(actfile, 12);
+	fprintf(actfile, "#include \"%s%s\"\n", kerneldirname, statevalfile_name);
+	FakeFilename(actfile, Transfilename, lineno);
+	putdriver(actfile, 2);
+
+	initsets(eventfile_h, statefile);
+}
+
+includecode(file, f)
+FILE *file;
+register char *f;
+{
+	register int count=1;
+	static char o='{';
+	static char c='}';
+	register char *g;
+
+	IFDEBUG(a)
+		fprintf(stdout, "including: %s, f=0x%x", f,f);
+	ENDDEBUG
+	g = ++f;
+	while(count>0) {
+		if(*g == o) count++;
+		if(*g == c) count--;
+		g++;
+	}
+	*(--g) = '\0';
+	IFDEBUG(a)
+		fprintf(stdout, "derived: %s", f);
+	ENDDEBUG
+	fprintf(file, "%s", f);
+	FakeFilename(file, Transfilename, lineno);
+}
+
+putincludes()
+{
+	FakeFilename(actfile, Transfilename, lineno);
+	fprintf(actfile, "\n#include \"%s%s\"\n", kerneldirname, eventfile_h_name);
+	IFDEBUG(X)
+		if( !debug['K'] )
+			fprintf(actfile, "\n#include \"%s\"\n", astringfile_name);
+			/* not in kernel mode */
+	ENDDEBUG
+	FakeFilename(actfile, Transfilename, lineno);
+}
+
+main(argc, argv)
+int argc;
+char *argv[];
+{
+	register int i = 2;
+	extern char *strcpy();
+	int start, finish;
+	extern int FirstEventAttribute;
+	extern int Nevents, Nstates;
+
+	start = time(0);
+	if(argc < 2) {
+		usage(argv[0]);
+	}
+	IFDEBUG(a)
+		fprintf(stdout, "infile = %s\n",argv[1]);
+	ENDDEBUG
+	Transfilename = argv[1];
+	infile = fopen(argv[1], "r");
+
+	if(argc > 2) while(i < argc) {
+		register int j=0;
+		char c;
+		char *name;
+
+		if(argv[i][j] == '-') j++;
+		switch(c = argv[i][j]) {
+
+		/* GROT */
+		case 'A':
+			name = &argv[i][++j];
+			actfile_name = Malloc( strlen(name)+4);
+			actfile_name =  (char *)strcpy(actfile_name,name);
+#ifdef LINT
+			name =
+#endif LINT
+			strcat(actfile_name, ".c");
+			fprintf(stdout, "debugging file is %s\n",actfile_name);
+			break;
+		case 'K':
+			debug[c]=1;
+			fprintf(OUT, "option %c file %s\n",c, &argv[i][j+1]);
+			(void) strcpy(kerneldirname,&argv[i][++j]);
+			break;
+		case 'X':
+			debug[c]=1;
+			name = &argv[i][++j];
+			astringfile_name = Malloc( strlen(name)+4);
+			astringfile_name =  (char *)strcpy(astringfile_name,name);
+#ifdef LINT
+			name =
+#endif LINT
+			strcat(astringfile_name, ".c");
+			fprintf(OUT, "option %c, astringfile name %s\n",c, name);
+			break;
+		case 'E':
+			name = &argv[i][++j];
+			eventfile_h_name = Malloc( strlen(name)+4);
+			eventfile_h_name =  (char *)strcpy(eventfile_h_name,name);
+#ifdef LINT
+			name =
+#endif LINT
+			strcat(eventfile_h_name, ".h");
+			fprintf(stdout, "event files is %s\n",eventfile_h_name);
+			break;
+		case 'I':
+			name = &argv[i][++j];
+			statevalfile_name = Malloc( strlen(name)+4 );
+			statevalfile_name =  (char *)strcpy(statevalfile_name,name);
+#ifdef LINT
+			name =
+#endif LINT
+			strcat(statevalfile_name, ".init");
+			fprintf(stdout, "state table initial values file is %s\n",statevalfile_name);
+			break;
+		case 'S':
+			name = &argv[i][++j];
+			statefile_name = Malloc( strlen(name)+4);
+			statefile_name =  (char *)strcpy(statefile_name,name);
+#ifdef LINT
+			name =
+#endif LINT
+			strcat(statefile_name, ".h");
+			fprintf(stdout, "state file is %s\n",statefile_name);
+			break;
+		/* END GROT */
+		case '1':
+		case '2':
+		case '3':
+			debug['X']= (int)argv[i][j] - (int) '0';
+			fprintf(OUT, "value of debug['X'] is 0x%x,%d\n", debug['X'],
+				debug['X']);
+			break;
+		case 'D':
+			while( c = argv[i][++j] ) {
+				if(c ==  'X') {
+					fprintf(OUT, "debugging on");
+					if(debug['X']) fprintf(OUT,
+						" - overrides any -%d flags used\n", debug['X']);
+				}
+				debug[c]=1;
+				fprintf(OUT, "debug %c\n",c);
+			}
+			break;
+		case 'g':
+			pgoption = 1;
+			fprintf(stdout, "Profiling\n");
+			break;
+		case 'e':
+			print_protoerrs = 1;
+			fprintf(stdout, "Protocol error table:\n");
+			break;
+
+		case 'T':
+			print_trans = 1;
+			fprintf(stdout, "Transitions:\n");
+			break;
+		default:
+			usage(argv[0]);
+			break;
+		}
+		i++;
+	}
+	if(kerneldirname[0]) {
+		char *c;
+#ifdef notdef
+		if(debug['X']) {
+			fprintf(OUT, "Option K overrides option X\n");
+			debug['X'] = 0;
+		}
+#endif notdef
+		if(strlen(kerneldirname)<1) {
+			fprintf(OUT, "K option: dir name too short!\n");
+			exit(-1);
+		}
+		/* add ../name/ */
+		c = (char *) Malloc(strlen(kerneldirname)+6) ;
+		if(c <= (char *)0) {
+			fprintf(OUT, "Cannot allocate %d bytes for kerneldirname\n",
+				strlen(kerneldirname + 6) );
+			fprintf(OUT, "kerneldirname is %s\n", kerneldirname  );
+			exit(-1);
+		}
+		*c = '.';
+		*(c+1) = '.';
+		*(c+2) = '/';
+		(void) strcat(c, kerneldirname);
+		(void) strcat(c, "/\0");
+		strcpy(kerneldirname, c);
+	}
+
+	init_alloc();
+
+	(void) llparse();
+
+	/* {{ */
+	if( !FirstEventAttribute )
+		fprintf(eventfile_h, "\t}ev_union;\n");
+	fprintf(eventfile_h, "};/* end struct event */\n");
+	fprintf(eventfile_h, "\n#define %s_NEVENTS 0x%x\n", protocol, Nevents);
+	fprintf(eventfile_h,
+		"\n#define ATTR(X)ev_union.%s/**/X/**/\n",EV_PREFIX);
+	(void) fclose(eventfile_h);
+
+	/* {{ */ fprintf(actfile, "\t}\nreturn 0;\n}\n"); /* end switch; end action() */
+	dump_predtable(actfile);
+
+	putdriver(actfile, 3);
+	IFDEBUG(X)
+		if(!debug['K'])
+			putdriver(actfile, 4);
+	ENDDEBUG
+	putdriver(actfile, 6);
+	IFDEBUG(X)
+		/*
+		putdriver(actfile, 10);
+		*/
+		if(debug['K']) { 
+			putdriver(actfile, 11);
+		} else {
+			switch(debug['X']) {
+			case 1:
+			default:
+				putdriver(actfile, 7);
+				break;
+			case 2:
+				putdriver(actfile, 13);
+				break;
+			case 3:
+				break;
+			}
+		}
+	ENDDEBUG
+	putdriver(actfile, 8);
+	(void) fclose(actfile);
+	IFDEBUG(X) 
+		/* { */ 
+		fprintf(astringfile, "};\n");
+		(void) fclose(astringfile);
+	ENDDEBUG
+
+	(void) fclose(statevalfile);
+
+	fprintf(statefile, "\n#define %s_NSTATES 0x%x\n", protocol, Nstates);
+	(void) fclose(statefile);
+
+	finish = time(0);
+	fprintf(stdout, "%d seconds\n", finish - start);
+	if( print_protoerrs ) 
+		printprotoerrs();
+}
+
+int transno = 0;
+
+Exit(n)
+{
+	fprintf(stderr, "Error at line %d\n",lineno);
+	if(transno) fprintf(stderr, "Transition number %d\n",transno);
+	(void) fflush(stdout);
+	(void) fflush(statefile);
+	(void) fflush(eventfile_h);
+	(void) fflush(actfile);
+	exit(n);
+}
+
+syntax() 
+{
+	static char *synt[] = {
+		"*PROTOCOL <string>\n",
+		"*PCB <string> <optional: SYNONYM synonymstring>\n",
+		"<optional: *INCLUDE {\n<C source>\n} >\n",
+		"*STATES <string>\n",
+		"*EVENTS <string>\n",
+		"*TRANSITIONS <string>\n",
+	};
+}
+	
+FakeFilename(outfile, name, l)
+FILE *outfile;
+char *name;
+int l;
+{
+	/*
+	doesn't work
+	fprintf(outfile, "\n\n\n\n# line %d \"%s\"\n", l, name);
+	*/
+}
diff --git a/sys/netiso/xebec/main.h b/sys/netiso/xebec/main.h
new file mode 100644
index 00000000000..cb5bd74f31e
--- /dev/null
+++ b/sys/netiso/xebec/main.h
@@ -0,0 +1,32 @@
+/* $Header: main.h,v 2.1 88/09/19 12:56:24 nhall Exp $ */
+/* $Source: /var/home/tadl/src/argo/xebec/RCS/main.h,v $ */
+
+#define TRUE 1
+#define FALSE 0
+#define LINELEN 2350
+	/* approx limit on token size for C compiler 
+	 * which matters for the purpose of debugging (astring.c...)
+	 */
+
+#define MSIZE 4000
+#define	 DEBUGFILE "astring.c"
+#define  ACTFILE "driver.c"
+#define  EVENTFILE_H "events.h"
+#define  STATEFILE "states.h"
+#define  STATEVALFILE "states.init"
+
+#define EV_PREFIX "EV_"
+#define ST_PREFIX "ST_"
+
+#define PCBNAME "_PCB_"
+
+extern char kerneldirname[];
+extern char protocol[];
+extern char *synonyms[];
+#define EVENT_SYN 0
+#define PCB_SYN 1
+
+extern int transno;
+extern int print_trans;
+extern char *stash();
+
diff --git a/sys/netiso/xebec/malloc.c b/sys/netiso/xebec/malloc.c
new file mode 100644
index 00000000000..5cdfc147a70
--- /dev/null
+++ b/sys/netiso/xebec/malloc.c
@@ -0,0 +1,136 @@
+/* $Header: malloc.c,v 2.2 88/09/19 12:55:18 nhall Exp $ */
+/* $Source: /var/home/tadl/src/argo/xebec/RCS/malloc.c,v $ */
+/*
+ * This code is such a kludge that I don't want to put my name on it.
+ * It was a ridiculously fast hack and needs rewriting.
+ * However it does work...
+ */
+
+/* 
+ * a simple malloc
+ * it might be brain-damaged but for the purposes of xebec
+ * it's a whole lot faster than the c library malloc 
+ */
+
+#include <stdio.h>
+#include "malloc.h"
+#include "debug.h"
+#define CHUNKSIZE 4096*2
+
+static char *hiwat, *highend;
+int bytesmalloced=0;
+int byteswasted = 0;
+
+
+init_alloc()
+{
+#ifdef LINT
+	hiwat = 0;
+	highend = 0;
+#else LINT
+	extern char *sbrk();
+
+	hiwat = (char *) sbrk(0);
+	hiwat = (char *)((unsigned)(hiwat + 3) & ~0x3);
+	highend = hiwat;
+#endif LINT
+}
+
+HIWAT(s)
+char *s;
+{
+	IFDEBUG(M)
+		fprintf(stdout, "HIWAT 0x%x  %s\n", hiwat,s);
+		fflush(stdout);
+	ENDDEBUG
+}
+
+#define MIN(x,y) ((x<y)?x:y)
+
+char *Malloc(x)
+int x;
+{
+	char *c;
+	extern char *sbrk();
+	static int firsttime=1;
+	int total = x;
+	int first_iter = 1;
+	char *returnvalue;
+
+	IFDEBUG(N)
+		fprintf(stdout, "Malloc 0x%x, %d, bytesmalloced %d\n",
+			total,total, bytesmalloced);
+		fflush(stdout);
+	ENDDEBUG
+	IFDEBUG(M)
+		fprintf(stdout, "Malloc 0x%x, %d, hiwat 0x%x\n",
+			total,total, hiwat);
+		fflush(stdout);
+	ENDDEBUG
+	if(firsttime) {
+		hiwat = sbrk(0);
+		if(((unsigned)(hiwat) & 0x3)) {
+			bytesmalloced = 4 - (int) ((unsigned)(hiwat) & 0x3);
+			hiwat = sbrk( bytesmalloced );
+		} else 
+			bytesmalloced = 0;
+		firsttime = 0;
+		highend = hiwat;
+	}
+	while( total ) {
+		x = MIN(CHUNKSIZE, total);
+		if(total != x)  {
+			IFDEBUG(N)
+				fprintf(stdout, "BIG Malloc tot %d, x %d, left %d net %d\n",
+					total,x, total-x, bytesmalloced);
+				fflush(stdout);
+			ENDDEBUG
+		}
+		if ( (hiwat + x) > highend) {
+			c = sbrk(CHUNKSIZE);
+			IFDEBUG(M)
+				fprintf(stdout, "hiwat 0x%x, x 0x%x, highend 0x%x, c 0x%x\n",
+						hiwat, x, highend, c);
+				fflush(stdout);
+			ENDDEBUG
+			if( c == (char *) -1 ) {
+				fprintf(stderr, "Ran out of memory!\n");
+				Exit(-1);
+			}
+			if(first_iter) {
+				returnvalue = c;
+				first_iter = 0;
+			}
+			bytesmalloced +=  CHUNKSIZE;
+			IFDEBUG(m)
+				if (highend != c) {
+					fprintf(OUT, "warning: %d wasted bytes!\n", highend - hiwat);
+				fprintf(OUT, " chunksize 0x%x,  x 0x%x \n", CHUNKSIZE, x);
+				}
+			ENDDEBUG
+			highend = c + CHUNKSIZE;
+			hiwat = c;
+		}
+		c = hiwat;
+		if(first_iter) {
+			returnvalue = c;
+			first_iter = 0;
+		}
+		hiwat += x;
+		total -= x;
+	}
+	if((unsigned)hiwat & 0x3) {
+		byteswasted += (int)((unsigned)(hiwat) & 0x3);
+		hiwat = (char *)((unsigned)(hiwat + 3) & ~0x3);
+	}
+	IFDEBUG(M)
+		fprintf(stdout, "Malloc = 0x%x, bytesm 0x%x, wasted 0x%x, hiwat 0x%x\n",
+			returnvalue, bytesmalloced, byteswasted, hiwat);
+	ENDDEBUG
+	IFDEBUG(N)
+		fprintf(stdout, "Malloc returns 0x%x, sbrk(0) 0x%x\n", returnvalue, sbrk(0));
+		fflush(stdout);
+	ENDDEBUG
+	return(returnvalue);
+}
+
diff --git a/sys/netiso/xebec/malloc.h b/sys/netiso/xebec/malloc.h
new file mode 100644
index 00000000000..53d865bf47b
--- /dev/null
+++ b/sys/netiso/xebec/malloc.h
@@ -0,0 +1,4 @@
+/* $Header: malloc.h,v 2.1 88/09/19 12:56:27 nhall Exp $ */
+/* $Source: /var/home/tadl/src/argo/xebec/RCS/malloc.h,v $ */
+
+char *Malloc();
diff --git a/sys/netiso/xebec/procs.c b/sys/netiso/xebec/procs.c
new file mode 100644
index 00000000000..49d862ac5b9
--- /dev/null
+++ b/sys/netiso/xebec/procs.c
@@ -0,0 +1,437 @@
+/* $Header: procs.c,v 2.3 88/09/19 12:55:22 nhall Exp $ */
+/* $Source: /var/home/tadl/src/argo/xebec/RCS/procs.c,v $ */
+/*
+ * This code is such a kludge that I don't want to put my name on it.
+ * It was a ridiculously fast hack and needs rewriting.
+ * However it does work...
+ */
+
+#include <stdio.h>
+#include <strings.h>
+#include "malloc.h"
+#include "main.h"
+#include "debug.h"
+#include "sets.h"
+#include "procs.h"
+
+struct Predicate {
+	int p_index;
+	int p_transno;
+	char *p_str;
+	struct Predicate *p_next;
+};
+
+struct Stateent {
+	int s_index;
+	int s_newstate;
+	int s_action;
+	struct Stateent *s_next;
+};
+
+struct Object *SameState = (struct Object *)-1;
+int Index = 0;
+int Nstates = 0;
+int Nevents = 0;
+struct Predicate **Predlist;
+struct Stateent **Statelist;
+extern FILE *astringfile;
+
+end_events() {
+	int size, part;
+	char *addr;
+
+	IFDEBUG(X)
+		/* finish estring[], start astring[] */
+	if(debug['X'] < 2 )
+		fprintf(astringfile, "};\n\nchar *%s_astring[] = {\n\"NULLACTION\",\n",
+			protocol);
+	ENDDEBUG
+	/* NOSTRICT */
+	Statelist = 
+	  (struct Stateent **) Malloc((Nstates+1) * sizeof(struct Statent *));
+	/* NOSTRICT */
+	Predlist =  
+	  (struct Predicate **) 
+	  Malloc ( (((Nevents)<<Eventshift)+Nstates)*sizeof(struct Predicate *) );
+
+	size = (((Nevents)<<Eventshift)+Nstates)*sizeof(struct Predicate *) ;
+	addr = (char *)Predlist;
+	IFDEBUG(N)
+		fprintf(OUT, "Predlist at 0x%x, sbrk 0x%x bzero size %d at addr 0x%x\n",
+		Predlist, sbrk(0), size, addr);
+	ENDDEBUG
+#define BZSIZE 8192
+	while(size) {
+		part = size>BZSIZE?BZSIZE:size;
+	IFDEBUG(N)
+		fprintf(OUT, "bzero addr 0x%x part %d size %d\n",addr, part, size);
+	ENDDEBUG
+		bzero(addr, part);
+	IFDEBUG(N)
+		fprintf(OUT, "after bzero addr 0x%x part %d size %d\n",addr, part, size);
+	ENDDEBUG
+		addr += part;
+		size -= part;
+
+	}
+	IFDEBUG(N)
+		fprintf(OUT, "endevents..done \n");
+	ENDDEBUG
+}
+
+int acttable(f,actstring)
+char *actstring;
+FILE *f;
+{
+	static Actindex = 0;
+	extern FILE *astringfile;
+	extern int pgoption;
+
+	IFDEBUG(a)
+		fprintf(OUT,"acttable()\n");
+	ENDDEBUG
+	fprintf(f, "case 0x%x: \n", ++Actindex);
+
+	if(pgoption) {
+		fprintf(f, "asm(\" # dummy statement\");\n");
+		fprintf(f, "asm(\"_Xebec_action_%x: \");\n", Actindex );
+		fprintf(f, "asm(\".data\");\n");
+		fprintf(f, "asm(\".globl _Xebec_action_%x# X profiling\");\n",
+			Actindex );
+		fprintf(f, "asm(\".long 0 # X profiling\");\n");
+		fprintf(f, "asm(\".text # X profiling\");\n");
+		fprintf(f, "asm(\"cas r0,r15,r0 # X profiling\");\n");
+		fprintf(f, "asm(\"bali r15,mcount   # X profiling\");\n");
+	}
+
+	fprintf(f, "\t\t%s\n\t\t break;\n", actstring);
+	IFDEBUG(X)
+		if(debug['X']<2) {
+			register int len = 0;
+			fputc('"',astringfile);
+			while(*actstring) {
+				if( *actstring == '\n' ) {
+					fputc('\\', astringfile);
+					len++;
+					fputc('n', astringfile);
+				} else if (*actstring == '\\') {
+					fputc('\\', astringfile);
+					len ++;
+					fputc('\\', astringfile);
+				} else if (*actstring == '\"') {
+					fputc('\\', astringfile);
+					len ++;
+					fputc('\"', astringfile);
+				} else fputc(*actstring, astringfile);
+				actstring++;
+				len++;
+			}
+			fprintf(astringfile,"\",\n");
+			if (len > LINELEN) {
+				fprintf(stderr, "Action too long: %d\n",len); Exit(-1);
+			}
+		}
+	ENDDEBUG
+
+	return(Actindex);
+}
+
+static int Npred=0, Ndefpred=0, Ntrans=0, Ndefevent=0, Nnulla=0;
+
+statetable(string, oldstate, newstate, action, event)
+char *string;
+int action;
+struct Object *oldstate, *newstate, *event; 
+{
+	register int different;
+
+	IFDEBUG(a)
+		fprintf(OUT,"statetable(0x%x, 0x%x,0x%x, 0x%x)\n",
+			string, oldstate, newstate, action);
+		fprintf(OUT,"statetable(%s, %s,%s, 0x%x)\n",
+			string, oldstate->obj_name, newstate->obj_name, action);
+	ENDDEBUG
+
+	if( !action) Nnulla++;
+	if( newstate->obj_kind == OBJ_SET) {
+		fprintf(stderr, "Newstate cannot be a set\n");
+		Exit(-1);
+	}
+	different = (newstate != SameState);
+
+	(void) predtable( oldstate, event, string,
+				action, (newstate->obj_number) * different );
+	IFDEBUG(a)
+		fprintf(OUT,"EXIT statetable\n");
+	ENDDEBUG
+}
+
+stateentry(index, oldstate, newstate, action)
+int index, action;
+int oldstate, newstate; 
+{
+	extern FILE *statevalfile;
+
+	IFDEBUG(a)
+		fprintf(OUT,"stateentry(0x%x,0x%x,0x%x,0x%x) Statelist@0x%x, val 0x%x\n",
+			index, oldstate, newstate,action, &Statelist, Statelist);
+	ENDDEBUG
+
+
+	fprintf(statevalfile, "{0x%x,0x%x},\n", newstate, action);
+}
+
+int predtable(os, oe, str, action, newstate)
+struct Object *os, *oe;
+char *str;
+int action, newstate;
+{
+	register struct Predicate *p, **q;
+	register int event, state;
+	register struct Object *e, *s;
+	struct Object *firste;
+
+	if (oe == (struct Object *)0 ) {
+		Ndefevent ++;
+		fprintf(stderr, "DEFAULT EVENTS aren't implemented; trans ignored\n");
+		return;
+	}
+	Ntrans++;
+	IFDEBUG(g)
+		fprintf(stdout,
+		"PREDTAB: s %5s;  e %5s\n", os->obj_kind==OBJ_SET?"SET":"item",
+			oe->obj_kind==OBJ_SET?"SET":"item");
+	ENDDEBUG
+	if (os->obj_kind == OBJ_SET) s = os->obj_members;
+	else s = os;
+	if (oe->obj_kind == OBJ_SET) firste = oe->obj_members;
+	else firste = oe;
+	if(newstate) {
+		fprintf(statevalfile, "{0x%x,0x%x},\n",newstate, action);
+		Index++;
+	}
+	while (s) {
+		if( !newstate ) { /* !newstate --> SAME */
+			/* i.e., use old obj_number */
+			fprintf(statevalfile, "{0x%x,0x%x},\n",s->obj_number, action);
+			Index++;
+		}
+		e = firste;
+		while (e) {
+			event = e->obj_number; state = s->obj_number;
+			IFDEBUG(g)
+				fprintf(stdout,"pred table event=0x%x, state 0x%x\n",
+				event, state);
+				fflush(stdout);
+			ENDDEBUG
+			if( !str /* DEFAULT PREDICATE */) {
+				Ndefpred++;
+				IFDEBUG(g)
+					fprintf(stdout,
+					"DEFAULT pred state 0x%x, event 0x%x, Index 0x%x\n",
+					state, event, Index);
+					fflush(stdout);
+				ENDDEBUG
+			} else 
+				Npred++;
+			/* put at END of list */
+#ifndef LINT
+			IFDEBUG(g)
+				fprintf(stdout, 
+				"predicate for event 0x%x, state 0x%x is 0x%x, %s\n", 
+				event, state, Index, str);
+				fflush(stdout);
+			ENDDEBUG
+#endif LINT
+			for( ((q = &Predlist[(event<<Eventshift)+state]), 
+					 (p = Predlist[(event<<Eventshift)+state]));
+							p ; p = p->p_next ) {
+				q = &p->p_next;
+			}
+
+			p = (struct Predicate *)Malloc(sizeof(struct Predicate));
+			p->p_next = (struct Predicate *)0;
+			p->p_str = str;
+			p->p_index = Index;
+			p->p_transno = transno;
+			*q = p;
+
+			IFDEBUG(g)
+				fprintf(stdout, 
+			  	  "predtable index 0x%x, transno %d, E 0x%x, S 0x%x\n",
+					 Index, transno, e, s);
+			ENDDEBUG
+
+			e = e->obj_members;
+		}
+		s = s->obj_members;
+	}
+	return Index ;
+}
+
+printprotoerrs()
+{
+	register int e,s;
+
+	fprintf(stderr, "[ Event, State ] without any transitions :\n");
+	for(e = 0; e < Nevents; e++) { 
+		fprintf(stderr, "Event 0x%x: states ", e);
+		for(s = 0; s < Nstates; s++) {
+			if( Predlist[(e<<Eventshift)+s] == 0 )
+				fprintf(stderr, "0x%x ", s);
+		}
+		fprintf(stderr, "\n");
+	}
+}
+
+#ifndef LINT
+dump_predtable(f)
+FILE *f;
+{
+	struct Predicate *p;
+	register int e,s, hadapred;
+	int defaultindex;
+	int defaultItrans;
+	extern int bytesmalloced;
+	extern int byteswasted;
+
+#ifdef notdef
+	fprintf(stdout,
+		" Xebec used %8d bytes of storage, wasted %8d bytes\n", 
+		bytesmalloced, byteswasted);
+#endif notdef
+	fprintf(stdout, 
+		" %8d states\n %8d events\n %8d transitions\n",
+		Nstates, Nevents, Ntrans);
+	fprintf(stdout,
+		" %8d predicates\n %8d default predicates used\n",
+		Npred, Ndefpred);
+	fprintf(stdout,
+		" %8d null actions\n",
+		Nnulla);
+
+	putdriver(f, 5);
+	for(e = 0; e < Nevents; e++) { for(s = 0; s < Nstates; s++) {
+		p = Predlist[(e<<Eventshift)+s];
+		hadapred=0;
+		defaultindex=0;
+		defaultItrans=0;
+		if(p) {
+			IFDEBUG(d)
+				fflush(f);
+			ENDDEBUG
+			while(p) {
+				if(p->p_str) {
+					if(!hadapred)
+						fprintf(f, "case 0x%x:\n\t", (e<<Eventshift) + s);
+					hadapred = 1;
+					fprintf(f, "if %s return 0x%x;\n\t else ", 
+					p->p_str, p->p_index);
+				} else {
+					if(defaultindex) {
+						fprintf(stderr, 
+"\nConflict between transitions %d and %d: duplicate default \n",
+						p->p_transno, defaultItrans);
+						Exit(-1);
+					}
+					defaultindex = p->p_index;
+					defaultItrans = p->p_transno;
+				}
+				p = p->p_next;
+			}
+			if( hadapred)  {
+				fprintf(f, "return 0x%x;\n", defaultindex);
+			}
+			IFDEBUG(d)
+				fflush(f);
+			ENDDEBUG
+		} 
+		IFDEBUG(g)
+		fprintf(stdout, 
+		"loop: e 0x%x s 0x%x hadapred 0x%x dindex 0x%x for trans 0x%x\n",
+			e, s, hadapred, defaultindex, defaultItrans);
+		ENDDEBUG
+		if ( hadapred ) {
+			/* put a -1 in the array  - Predlist is temporary storage */
+			Predlist[(e<<Eventshift)+s] = (struct Predicate *)(-1);
+		} else {
+			/* put defaultindex in the array */
+			/* if defaultindex is zero, then the driver will
+			 * cause an erroraction (same as if no default
+			 * were given and none of the predicates were true;
+			 * also same as if no preds or defaults were given
+			 * for this combo)
+			 */
+			Predlist[(e<<Eventshift)+s] = (struct Predicate *)(defaultindex);
+		}
+	} }
+	fprintf(f, "default: return 0;\n} /* end switch */\n");
+#ifdef notdef
+	fprintf(f, "/*NOTREACHED*/return 0;\n} /* _Xebec_index() */\n");
+#else notdef
+	fprintf(f, "} /* _Xebec_index() */\n");
+#endif notdef
+	fprintf(f, "static int inx[%d][%d] = { {", Nevents+1,Nstates);
+	for(s = 0; s< Nstates; s++) fprintf(f, "0,"); /* event 0 */
+	fprintf(f, "},\n");
+
+	for(e = 0; e < Nevents; e++) { 
+		fprintf(f, " {"); 
+		for(s = 0; s < Nstates; s++) {
+			register struct Predicate *xyz = Predlist[(e<<Eventshift)+s];
+			/* this kludge is to avoid a lint msg. concerning
+			 * loss of bits 
+			 */
+			if (xyz == (struct Predicate *)(-1))
+				fprintf(f, "-1,");
+			else
+				fprintf(f, "0x%x,", Predlist[(e<<Eventshift)+s]);
+		}
+		fprintf(f, " },\n"); 
+	}
+	fprintf(f, "};");
+}
+#endif LINT
+
+char *
+stash(buf)
+char *buf;
+{
+	register int len;
+	register char *c;
+
+	/* grot */
+	len = strlen(buf);
+	c = Malloc(len+1);
+#ifdef LINT
+	c =
+#endif LINT
+	strcpy(c, buf);
+
+	IFDEBUG(z)
+		fprintf(stdout,"stash %s at 0x%x\n", c,c);
+	ENDDEBUG
+	return(c);
+}
+
+#ifdef notdef
+dump_pentry(event,state)
+int event,state;
+{
+	register struct Predicate *p, **q;
+
+	for( 
+	((q = &Predlist[(event<<Eventshift) +state]), 
+	 (p = Predlist[(event<<Eventshift) + state]));
+		p!= (struct Predicate *)0 ; p = p->p_next ) {
+#ifndef LINT
+		IFDEBUG(a)
+			fprintf(OUT, 
+			"dump_pentry for event 0x%x, state 0x%x is 0x%x\n", 
+			 event, state, p);
+		ENDDEBUG
+#endif LINT
+		q = &p->p_next;
+	}
+}
+#endif notdef
diff --git a/sys/netiso/xebec/procs.h b/sys/netiso/xebec/procs.h
new file mode 100644
index 00000000000..e41ae75995c
--- /dev/null
+++ b/sys/netiso/xebec/procs.h
@@ -0,0 +1,5 @@
+/* $Header: procs.h,v 2.1 88/09/19 12:56:30 nhall Exp $ */
+/* $Source: /var/home/tadl/src/argo/xebec/RCS/procs.h,v $ */
+
+extern char *stash();
+extern struct Object *SameState;
diff --git a/sys/netiso/xebec/putdriver.c b/sys/netiso/xebec/putdriver.c
new file mode 100644
index 00000000000..996ac643d10
--- /dev/null
+++ b/sys/netiso/xebec/putdriver.c
@@ -0,0 +1,244 @@
+/* $Header: putdriver.c,v 2.2 88/09/19 12:55:27 nhall Exp $ */
+/* $Source: /var/home/tadl/src/argo/xebec/RCS/putdriver.c,v $ */
+
+/*
+ * This code is such a kludge that I don't want to put my name on it.
+ * It was a ridiculously fast hack and needs rewriting.
+ * However it does work...
+ */
+
+/* The original idea was to put all the driver code
+ * in one place so it would be easy to modify
+ * but as hacks got thrown in it got worse and worse...
+ * It's to the point where a user would be better off
+ * writing his own driver and xebec should JUST produce
+ * the tables.
+ */
+
+#include <stdio.h>
+#include "main.h"
+#include "debug.h"
+
+extern char protocol[];
+char Eventshiftstring[10];
+static char statename[] = {'_', 's', 't', 'a', 't', 'e', 0 };
+
+static char *strings[] = {
+
+#define PART1 { 0,3 }
+
+	"\n#include \"",
+	kerneldirname,
+	protocol,
+	"_states.h\"",
+	(char *)0,
+	(char *)0,
+	(char *)0,
+	(char *)0,
+	(char *)0,
+	(char *)0,
+
+#define PART12 { 10,12 }
+	"\n\nstatic struct act_ent {\n",
+	"\tint a_newstate;\n\tint a_action;\n",
+	"} statetable[] = { {0,0},\n",
+	(char *)0,
+	(char *)0,
+	(char *)0,
+	(char *)0,
+	(char *)0,
+	(char *)0,
+	(char *)0,
+
+#define PART2 { 20,20 }
+	"};\n",
+	(char *)0,
+	(char *)0,
+	(char *)0,
+	(char *)0,
+	(char *)0,
+	(char *)0,
+	(char *)0,
+	(char *)0,
+	(char *)0,
+
+#define PART3 { 30,41 }
+	"\n",
+	protocol,
+	"_driver(p, e)\nregister ",
+	protocol,
+	PCBNAME,
+	" *p;\nregister struct ",
+	protocol,
+	"_event *e;\n",
+	"{\n",
+		"\tregister int index, error=0;\n",
+		"\tstruct act_ent *a;\n",
+		"\tstatic struct act_ent erroraction = {0,-1};\n",
+	(char *)0,
+	(char *)0,
+	(char *)0,
+	(char *)0,
+	(char *)0,
+	(char *)0,
+	(char *)0,
+	(char *)0,
+
+#define PART4 { 50,54 }
+
+	"\textern int ",
+	protocol,
+	"_debug;\n\textern FILE *",
+	protocol,
+	"_astringfile;\n", 
+	(char *)0,
+	(char *)0,
+	(char *)0,
+	(char *)0,
+	(char *)0,
+
+#define PART6 { 60, 65 }
+	"\n\tindex = inx[1 + e->ev_number][p->",
+		protocol,
+		statename,
+		"];\n\tif(index<0) index=_Xebec_index(e, p);\n",
+		"\tif (index==0) {\n\t\ta = &erroraction;\n",
+		"\t} else\n\t\ta = &statetable[index];\n\n",
+	(char *)0,
+	(char *)0,
+	(char *)0,
+	(char *)0,
+
+#define PART7 {70, 77 }
+	"\tif(",
+	protocol,
+	"_debug) fprintf(",
+	protocol,
+	"_astringfile, \"%15s <-- %15s [%15s] \\n\\t%s\\n\",\n",
+	"\t\tsstring[a->a_newstate], sstring[p->",
+	protocol,
+	"_state], estring[e->ev_number], astring[a->a_action]);\n\n",
+	(char *)0,
+	(char *)0,
+
+#define PART8 { 80, 84 }
+		"\tif(a->a_action)\n",
+		"\t\terror = _Xebec_action( a->a_action, e, p );\n",
+		"\tif(error==0)\n\tp->",
+		protocol,
+		"_state = a->a_newstate;\n\treturn error;\n}\n",
+	(char *)0,
+	(char *)0,
+	(char *)0,
+	(char *)0,
+	(char *)0,
+
+#define PART9 { 90, 99 }
+	"\n_XEBEC_PG int _Xebec_action(a,e,p)\nint a;\nstruct ",
+	protocol,
+	"_event *e;\n",
+	protocol, 
+	PCBNAME,
+	" *p;\n{\n",
+	"switch(a) {\n",
+	"case -1:  return ",
+	protocol,
+	"_protocol_error(e,p);\n",
+	(char *)0,
+
+#define PART10 { 101, 105 }
+	"\tif(",
+	protocol,
+	"_debug) fprintf(",
+	protocol,
+	"_astringfile, \"index 0x%5x\\n\", index);\n",
+	(char *)0,
+	(char *)0,
+	(char *)0,
+	(char *)0,
+
+#define PART5 { 110, 121 }
+	"\n_XEBEC_PG int\n_Xebec_index( e,p )\n",
+	"\tstruct ",
+	protocol,
+	"_event *e;\n\t", 
+	protocol, 
+	PCBNAME,
+	" *p;\n{\nswitch( (e->ev_number<<",
+	Eventshiftstring,
+	")+(p->",
+	protocol, 
+	statename,
+	") ) {\n", 
+	(char *)0,
+	(char *)0,
+	(char *)0,
+	(char *)0,
+	(char *)0,
+	(char *)0,
+	(char *)0,
+	(char *)0,
+
+#define PART11 {130, 137 }
+	"\tIFTRACE(D_DRIVER)\n",
+	"\t",
+	protocol,
+	"trace(DRIVERTRACE,",
+	"\t\ta->a_newstate, p->",
+	protocol,
+	"_state, e->ev_number, a->a_action, 0);\n\n",
+	"\tENDTRACE\n",
+	(char *)0,
+	(char *)0,
+
+#define PART13 {140, 147 }
+	"\tif(",
+	protocol,
+	"_debug) fprintf(",
+	protocol,
+	"_astringfile, \"%15s <-- %15s [%15s] \\n\",\n",
+	"\t\tsstring[a->a_newstate], sstring[p->",
+	protocol,
+	"_state], estring[e->ev_number]);\n\n",
+	(char *)0,
+	(char *)0,
+
+#define PART14 { 150,150 }
+	"#define _XEBEC_PG static\n",
+
+#define PART15 { 151,151 }
+	"#define _XEBEC_PG  \n",
+
+};
+
+static struct { int start; int finish; } parts[] = {
+	{ 0,0 },
+	PART1,
+	PART2,
+	PART3,
+	PART4,
+	PART5,
+	PART6,
+	PART7,
+	PART8,
+	PART9,
+	PART10,
+	PART11,
+	PART12,
+	PART13,
+	PART14,
+	PART15,
+};
+
+putdriver(f, x) 
+FILE *f;
+int x;
+{
+	register int i; 
+
+	for( i = parts[x].start; i<= parts[x].finish; i++)
+		fprintf(f, "%s", strings[i]);
+	IFDEBUG(d)
+		fflush(f);
+	ENDDEBUG
+}
diff --git a/sys/netiso/xebec/sets.c b/sys/netiso/xebec/sets.c
new file mode 100644
index 00000000000..3bb74ed8d29
--- /dev/null
+++ b/sys/netiso/xebec/sets.c
@@ -0,0 +1,472 @@
+/* $Header: sets.c,v 2.3 88/09/19 12:55:30 nhall Exp $ */
+/* $Source: /var/home/tadl/src/argo/xebec/RCS/sets.c,v $ */
+/*
+ * This code is such a kludge that I don't want to put my name on it.
+ * It was a ridiculously fast hack and needs rewriting.
+ * However it does work...
+ */
+#include "main.h"
+#include "malloc.h"
+#include "sets.h"
+#include "debug.h"
+#include <stdio.h>
+
+struct Object *CurrentEvent = (struct Object *)0;
+struct Object *Objtree;
+struct Object dummy;
+/* 
+ * define a set w/ type and name
+ * return a set number 
+ */
+#undef NULL
+#define NULL (struct Object *)0
+
+static FILE *Sfile, *Efile;
+extern FILE *astringfile;
+char *Noname = "Unnamed set\0";
+
+initsets(f,s)
+FILE *f, *s;
+{
+	static char errorstring[20];
+	extern struct Object *SameState;
+	Efile = f;
+	Sfile = s;
+
+	IFDEBUG(X)
+		fprintf(astringfile, "char *%s_sstring[] = {\n", protocol);
+	ENDDEBUG
+	sprintf(errorstring, "%sERROR\0", ST_PREFIX);
+	defineitem(STATESET, errorstring, (char *)0);	/* state 0 */
+	SameState = (struct Object *) Malloc( sizeof (struct Object) );
+	SameState->obj_kind = OBJ_ITEM;
+	SameState->obj_type = STATESET;
+	SameState->obj_name = "SAME";
+	SameState->obj_struc = (char *)0;
+	SameState->obj_number = 0;
+	SameState->obj_members = (struct Object *)0;
+	SameState->obj_left = (struct Object *)0;
+	SameState->obj_right = (struct Object *)0;
+	SameState->obj_parent = (struct Object *)0;
+}
+
+/*
+ * get a set based on its type and name
+ * returns address of an Object, may be set or item
+ */
+
+struct Object *lookup(type, name)
+unsigned char type;
+char *name;
+{
+	register struct Object *p = Objtree;
+	int val = 1 ;
+
+	IFDEBUG(o)
+		fprintf(stdout,"lookup 0x%x,%s \n",
+			type, name);
+	ENDDEBUG
+
+	while( p && val ) {
+		IFDEBUG(o)
+		fprintf(OUT, "lookup strcmp 0x%x,%s, 0x%x,%s\n",
+			name, name, OBJ_NAME(p), OBJ_NAME(p));
+		ENDDEBUG
+		if( p->obj_name == (char *)0 ) {
+			fprintf(stderr, "Unnamed set in table!\n");
+			Exit(-1);
+		}
+		val =  (int) strcmp(name, OBJ_NAME(p));
+		if(val < 0) {
+			/* left */
+			p = p->obj_left;
+		} else if (val > 0) {
+			/* right */
+			p = p->obj_right;
+		}
+	}
+	if( p && ( p->obj_type != type)) {
+		fprintf(stdout, "lookup(0x%x,%s) found wrong obj type 0x%x\n",
+			type,name, p->obj_type);
+		p = NULL;
+	}
+	IFDEBUG(o)
+		fprintf(stdout,"lookup 0x%x,%s returning 0x%x\n",type, name, p);
+	ENDDEBUG
+	return(p);
+}
+
+static int states_done  = 0;
+
+end_states(f)
+FILE *f;
+{
+	register unsigned n = Nstates;
+	register int i;
+	extern char Eventshiftstring[];
+
+	states_done = 1;
+
+	for( i = 0; ;i++) {
+		if( (n >>= 1) <= 0 ) break;
+	}
+	Eventshift = i+1;
+	IFDEBUG(d)
+		fprintf(OUT, "Eventshift=%d\n", Eventshift);
+	ENDDEBUG
+	sprintf(Eventshiftstring, "%d\0",Eventshift);
+	fprintf(f, "struct %s_event {\n\tint ev_number;\n", &protocol[0]);
+	IFDEBUG(X)
+		/* finish sstring[] & start estring[] */
+		fprintf(astringfile, 
+		"};\n\nchar *%s_estring[] = {\n", protocol);
+	ENDDEBUG
+}
+
+int FirstEventAttribute = 1;
+
+static 
+insert(o) 
+struct Object *o;
+{
+	struct Object *p = Objtree;
+	struct Object **q = &Objtree; 
+	int val=1;
+
+
+	if (o->obj_name == (char *)0) {
+		fprintf(stderr, "Internal Error: inserting unnamed object\n");
+		Exit(-1);
+	}
+	if( o->obj_type == STATESET) {
+		if( states_done )  {
+			fprintf(stderr, "No states may be defined after *TRANSITIONS\n");
+			Exit(-1);
+		}
+		o->obj_number =  Nstates++ ; 
+		if(Nstates > MAXSTATES) {
+			fprintf(stderr, "Too many states\n");
+			Exit(-1);
+		}
+		fprintf(Sfile, "#define %s 0x%x\n", o->obj_name, o->obj_number);
+		IFDEBUG(X)
+			fprintf(astringfile, "\"%s(0x%x)\",\n", o->obj_name, o->obj_number);
+		ENDDEBUG
+	} else {
+		/* EVENTSET */ 
+		if( ! states_done )  {
+			fprintf(stderr, "states must precede events\n");
+			Exit(-1);
+		}
+		o->obj_number =  Nevents++ ;
+		if(Nevents > MAXEVENTS) {
+			fprintf(stderr, "Too many events\n");
+			Exit(-1);
+		}
+		if(o->obj_struc)  {
+			if( FirstEventAttribute ) {
+				fprintf(Efile,  "\n\tunion{\n"); /*} */
+				FirstEventAttribute = 0;
+			}
+			fprintf(Efile, 
+			"struct %s %s%s;\n\n", o->obj_struc, EV_PREFIX,  o->obj_name);
+		}
+		fprintf(Efile, "#define %s 0x%x\n", o->obj_name, o->obj_number);
+		IFDEBUG(X)
+			fprintf(astringfile, "\"%s(0x%x)\",\n", o->obj_name, o->obj_number);
+		ENDDEBUG
+	}
+	IFDEBUG(o)
+		fprintf(OUT, "insert(%s)\n", OBJ_NAME(o) );
+		if(o->obj_right != NULL) {
+			fprintf(OUT, "insert: unclean Object right\n");
+			exit(-1);
+		}
+		if(o->obj_left != NULL) {
+			fprintf(OUT, "insert: unclean Object left\n");
+			exit(-1);
+		}
+		fflush(OUT);
+	ENDDEBUG
+
+	while( val ) {
+		if(p == NULL) {
+			*q = o;
+			o->obj_parent = (struct Object *)q;
+			break;
+		}
+		if(!(val = strcmp(o->obj_name, p->obj_name)) ) {
+			/* equal */
+			fprintf(stderr, "re-inserting %s\n",o->obj_name);
+			exit(-1);
+		}
+		if(val < 0) {
+			/* left */
+			q = &p->obj_left;
+			p = p->obj_left;
+		} else {
+			/* right */
+			q = &p->obj_right;
+			p = p->obj_right;
+		}
+	}
+	IFDEBUG(a)
+		dumptree(Objtree,0);
+	ENDDEBUG
+}
+
+delete(o) 
+struct Object *o;
+{
+	register struct Object *p = o->obj_right; 
+	register struct Object *q;
+	register struct Object *newparent;
+	register struct Object **np_childlink;
+
+	IFDEBUG(T)
+		fprintf(stdout, "delete(0x%x)\n", o);
+		dumptree(Objtree,0);
+	ENDDEBUG
+
+	/* q <== lowest valued node of the right subtree */
+	while( p ) {
+		q = p;
+		p = p->obj_left;
+	}
+
+	if (o->obj_parent == (struct Object *)&Objtree)  {
+		newparent =  (struct Object *)&Objtree;
+		np_childlink = (struct Object **)&Objtree;
+	} else if(o->obj_parent->obj_left == o)  {
+		newparent = o->obj_parent;
+		np_childlink = &(o->obj_parent->obj_left);
+	} else {
+		newparent = o->obj_parent;
+		np_childlink = &(o->obj_parent->obj_right);
+	}
+	IFDEBUG(T)
+		fprintf(OUT, "newparent=0x%x\n");
+	ENDDEBUG
+
+	if (q) { /* q gets the left, parent gets the right */
+		IFDEBUG(T)
+			fprintf(OUT, "delete: q null\n");
+		ENDDEBUG
+		q->obj_left = p;
+		if(p) p->obj_parent = q;
+		p = o->obj_right;
+	} else { /* parent(instead of q) gets the left ; there is no right  */
+		IFDEBUG(T)
+			fprintf(OUT, "delete: q not null\n");
+		ENDDEBUG
+		p = o->obj_left;
+	}
+	*np_childlink = p;
+	if(p) 
+		p->obj_parent = newparent;
+
+	IFDEBUG(T)
+		fprintf(OUT, "After deleting 0x%x\n",o);
+		dumptree(Objtree,0);
+	ENDDEBUG
+}
+
+struct Object *
+defineset(type, adr, keep)
+unsigned char type;
+char *adr;
+int keep;
+{
+	struct Object *onew;
+	IFDEBUG(o)
+		printf("defineset(0x%x,%s, %s)\n", type , adr, keep?"KEEP":"NO_KEEP");
+	ENDDEBUG
+	
+	onew = (struct Object *)Malloc(sizeof (struct Object));
+	bzero(onew, sizeof(struct Object));
+	onew->obj_name = adr;
+	onew->obj_kind = OBJ_SET;
+	onew->obj_type = type;
+	if(keep) 
+		insert( onew );
+		/* address already stashed before calling defineset */
+	IFDEBUG(o)
+		printf("defineset(0x%x,%s) returning 0x%x\n", type , adr, onew);
+		dumptree(Objtree,0);
+	ENDDEBUG
+	return(onew);
+}
+
+dumpit(o, s)
+char *o;
+char *s;
+{
+	register int i;
+
+IFDEBUG(o)
+	fprintf(OUT, "object 0x%x, %s\n",o, s);
+	for(i=0; i< sizeof(struct Object); i+=4) {
+		fprintf(OUT, "0x%x: 0x%x 0x%x 0x%x 0x%x\n",
+		*((int *)o), *o, *(o+1), *(o+2), *(o+3) );
+	}
+ENDDEBUG
+}
+
+defineitem(type, adr, struc)
+unsigned char type;
+char *adr;
+char *struc;
+{
+	struct Object *onew;
+	IFDEBUG(o)
+		printf("defineitem(0x%x, %s at 0x%x, %s)\n", type, adr, adr, struc);
+	ENDDEBUG
+	
+	if( onew = lookup( type, adr ) ) {
+		fprintf(stderr, 
+	"Internal error at defineitem: trying to redefine obj type 0x%x, adr %s\n",
+			type, adr);
+		exit(-1);
+	} else {
+		onew = (struct Object *)Malloc(sizeof (struct Object));
+		bzero(onew, sizeof(struct Object));
+		onew->obj_name = stash(adr);
+		onew->obj_kind = OBJ_ITEM;
+		onew->obj_type =  type;
+		onew->obj_struc = struc?stash(struc):struc;
+		insert( onew );
+	}
+	IFDEBUG(o)
+		fprintf(OUT, "defineitem(0x%x, %s) returning 0x%x\n", type, adr, onew);
+	ENDDEBUG
+}
+
+member(o, adr)
+struct Object *o;
+char *adr;
+{
+	struct Object *onew, *oold;
+	IFDEBUG(o)
+		printf("member(0x%x, %s)\n", o, adr);
+	ENDDEBUG
+	
+	oold = lookup(  o->obj_type, adr );
+
+	onew = (struct Object *)Malloc(sizeof (struct Object));
+	if( oold == NULL ) {
+		extern int lineno;
+
+		fprintf(stderr,
+		"Warning at line %d: set definition of %s causes definition of\n",
+			lineno, OBJ_NAME(o));
+		fprintf(stderr, "\t (previously undefined) member %s\n", adr);
+		bzero(onew, sizeof(struct Object));
+		onew->obj_name = stash(adr);
+		onew->obj_kind = OBJ_ITEM;
+		onew->obj_type = o->obj_type;
+		onew->obj_members = NULL;
+		insert( onew );
+	} else {
+		if(oold->obj_kind != OBJ_ITEM) {
+			fprintf(stderr, "Sets cannot be members of sets; %s\n", adr);
+			exit(-1);
+		}
+		bcopy(oold, onew, sizeof(struct Object));
+		onew->obj_members = onew->obj_left = onew->obj_right = NULL;
+	}
+	onew->obj_members = o->obj_members;
+	o->obj_members = onew;
+}
+
+struct Object *Lookup(type, name)
+unsigned char type;
+char *name;
+{
+	register struct Object *o = lookup(type,name);
+
+	if(o == NULL) {
+		fprintf(stderr, "Trying to use undefined %s: %s\n",
+			type==STATESET?"state":"event", name);
+		Exit(-1);
+	}
+	return(o);
+}
+
+AddCurrentEventName(x)
+register char **x;
+{
+	register char *n = EV_PREFIX; ;
+	
+	if( CurrentEvent == (struct Object *)0 ) {
+		fprintf(stderr, "No event named!  BARF!\n"); Exit(-1);
+	}
+
+	if( ! CurrentEvent->obj_struc ) {
+		fprintf(stderr, "No attributes for current event!\n"); Exit(-1);
+	}
+
+	/* add prefix first */
+	while(*n) {
+		*(*x)++ = *n++;
+	}
+
+	n = CurrentEvent->obj_name;
+
+	while(*n) {
+		*(*x)++ = *n++;
+	}
+}
+
+dumptree(o,i)
+	register struct Object *o;
+	int i;
+{
+	register int j;
+
+	if(o == NULL) {
+		for(j=0; j<i; j++)
+			fputc(' ', stdout);
+		fprintf(stdout, "%3d NULL\n", i);
+	} else {
+		dumptree(o->obj_left, i+1);
+		for(j=0; j<i; j++) 
+			fputc(' ', stdout);
+		fprintf(stdout, "%3d 0x%x: %s\n", i,o, OBJ_NAME(o));
+		dumptree(o->obj_right, i+1);
+	}
+}
+
+dump(c,a)
+{
+	register int x = 8;
+	int zero = 0;
+#include <sys/signal.h>
+
+	fprintf(stderr, "dump: c 0x%x, a 0x%x\n",c,a);
+
+	x = x/zero;
+	kill(0, SIGQUIT);
+}
+
+dump_trans( pred, oldstate, newstate, action, event )
+struct Object *oldstate, *newstate, *event;
+char *pred, *action;
+{
+	extern int transno;
+	struct Object *o;
+
+	fprintf(stdout, "\n%d:  ", transno);
+#define dumpit(x)\
+	if((x)->obj_kind == OBJ_SET) {\
+		o = (x)->obj_members; fprintf( stdout, "[ " );\
+		while(o) { fprintf(stdout, "%s ", o->obj_name); o = o->obj_members; }\
+		fprintf( stdout, " ] ");\
+	} else { fprintf(stdout, "%s ", (x)->obj_name); }
+
+	dumpit(newstate);
+	fprintf(stdout, " <== ");
+	dumpit(oldstate);
+	dumpit(event);
+	fprintf(stdout, "\n\t\t%s\n\t\t%s\n", pred?pred:"DEFAULT", 
+		action);
+}
diff --git a/sys/netiso/xebec/sets.h b/sys/netiso/xebec/sets.h
new file mode 100644
index 00000000000..96eb791edc2
--- /dev/null
+++ b/sys/netiso/xebec/sets.h
@@ -0,0 +1,36 @@
+/* $Header: sets.h,v 2.1 88/09/19 12:56:33 nhall Exp $ */
+/* $Source: /var/home/tadl/src/argo/xebec/RCS/sets.h,v $ */
+
+#define MAXEVENTS 200
+#define MAXSTATES 200
+
+#define STATESET 10
+#define EVENTSET 5
+
+#define OBJ_ITEM 2
+#define OBJ_SET 3
+
+struct Object {
+	unsigned char obj_kind;
+	unsigned char obj_type; /* state or event */
+	char *obj_name;
+	char *obj_struc;
+	int obj_number; 
+	struct Object *obj_members; /* must be null for kind==item */
+	/* for the tree */
+	struct Object *obj_left;
+	struct Object *obj_right;
+	struct Object *obj_parent;
+} ;
+
+extern char *Noname;
+
+#define OBJ_NAME(o) (((o)->obj_name)?(o)->obj_name:Noname)
+
+extern int Nevents, Nstates;
+int Eventshift;
+extern struct Object *CurrentEvent;
+
+extern struct Object *Lookup();
+extern struct Object *defineset();
+
diff --git a/sys/netiso/xebec/test.trans b/sys/netiso/xebec/test.trans
new file mode 100644
index 00000000000..49db3610994
--- /dev/null
+++ b/sys/netiso/xebec/test.trans
@@ -0,0 +1,64 @@
+/* $Header: test.trans,v 0.2 88/09/19 12:58:29 nhall Exp $
+ */
+*PROTOCOL test
+
+*INCLUDE
+
+{
+#include "test_def.h"
+}
+
+*PCB    test_pcbstruct 	SYNONYM  P
+
+*STATES
+
+STATE_A
+STATE_B
+STATE_C
+ALL_STATES = [STATE_A, STATE_B, STATE_C]
+
+*EVENTS		{ int ev_all; } 		SYNONYM  E
+
+EV_1	{ char *ev1_char; }
+EV_2	{ int	ev2_int; char ev2_char; }
+EV_3
+EV_4		{ struct blah	*ev4_blahptr; 
+			  unsigned int 	ev4_uint;
+			  int 			ev4_int; 
+			}
+
+*TRANSITIONS
+
+SAME			<==			[ STATE_A, STATE_B ] [ EV_1, EV_2, EV_3 ]
+	( $E.ev_all > 0 )
+	{
+		if( $P.test_state == STATE_A )
+			printf("state is STATE_A\n"); 
+		else
+			printf("state is STATE_B\n"); 
+		printf("action first transition\n"); 
+	}
+
+;
+STATE_C			<==			[ STATE_A, STATE_B ] [ EV_1, EV_2, EV_3 ]
+	DEFAULT
+	{
+		printf("default - transition 2\n");
+		MACRO1( $P.test_pcbfield );
+	}
+;
+
+STATE_C			<==			[ STATE_A, STATE_B ] 	EV_4
+	( $$.ev4_blahptr->blahfield & 0x1 )
+	NULLACTION
+;
+
+STATE_C			<==			 ALL_STATES	EV_4
+	DEFAULT
+	{
+		printf("default - transition 4\n");
+		printf("pcb is 0x%x, event is 0x%x \n", $P, $E);
+		printf("ev4 values are : blahptr 0x%x uint 0x%x int 0x%x\n",
+			$$.ev4_blahptr, $$.ev4_uint, $$.ev4_int);
+	}
+;
diff --git a/sys/netiso/xebec/test_def.h b/sys/netiso/xebec/test_def.h
new file mode 100644
index 00000000000..6faa2dfce81
--- /dev/null
+++ b/sys/netiso/xebec/test_def.h
@@ -0,0 +1,13 @@
+
+struct blah {
+	unsigned int blahfield;
+	int		dummyi;
+	char 	dummyc;
+};
+
+struct test_pcbstruct {
+	int test_pcbfield;
+	int test_state;
+};
+
+#define MACRO1(arg) if(arg != 0) { printf("macro1\n"); }
diff --git a/sys/netiso/xebec/xebec.bnf b/sys/netiso/xebec/xebec.bnf
new file mode 100644
index 00000000000..d7406d9d5c2
--- /dev/null
+++ b/sys/netiso/xebec/xebec.bnf
@@ -0,0 +1,315 @@
+{
+#include "main.h"
+#include "sets.h"
+#include <stdio.h> 
+
+extern FILE *eventfile_h, *actfile; 
+}
+
+*fmq
+
+	novocab
+	nobnf
+	nofirst
+	nofollow
+	noparsetable
+	noerrortables
+	nos
+	noe
+
+*terminals
+
+ID		 	0	0	{ char *address; }
+STRUCT		0	0
+SYNONYM		0	0
+PREDICATE		0	0	{ char *address; }
+ACTION		0	0	{ char *address; }
+/*
+FSTRING		0	0	{ char *address; }
+*/
+PROTOCOL	0	0	
+LBRACK		0	0
+RBRACK		0	0
+LANGLE		0	0
+EQUAL		0	0
+COMMA		0	0
+STAR		0	0
+EVENTS		0	0
+TRANSITIONS	0	0
+INCLUDE		0	0
+STATES		0	0
+SEMI		0	0
+PCB			0	0		{ char *address; }
+DEFAULT		0	0
+NULLACTION	0 	0
+SAME		0 	0
+
+*nonterminals
+
+pcb				{ char *address; int isevent; }
+syn				{ int type; }
+setlist			{ struct Object *setnum; }
+setlisttail		{ struct Object *setnum; }
+part			{ unsigned char type; }
+parttail		{ unsigned char type; }
+partrest		{ unsigned char type; char *address; }
+setstruct		{ struct Object *object; }
+setdef			{ unsigned char type,keep; char *address; struct Object *object; }
+translist		
+transition	
+event			{ struct Object *object;  }
+oldstate		{	struct Object *object;	}
+newstate		{	struct Object *object;	}
+predicatepart 	{	char *string; }
+actionpart		{ 	char *string; struct Object *oldstate; struct Object *newstate; }
+
+*productions
+
+program 	::= 
+				STAR PROTOCOL ID 
+				{	
+					if(strlen($ID.address) > 50 ) {
+						fprintf(stderr, 
+						"Protocol name may not exceed 50 chars in length.\n"); 
+						Exit(-1);
+					}
+					strcpy(protocol, $ID.address); 
+					openfiles(protocol); 
+				}
+				STAR includelist
+				PCB  
+				{ 
+					$$pcb.isevent = 0; 
+				}
+				pcb 
+				{
+				  fprintf(actfile, "\ntypedef %s %s%s;\n",
+							  $pcb[7].address,protocol, PCBNAME); 
+				  $$syn.type = PCB_SYN;
+				} 
+				syn 
+				STAR STATES { $$part.type = (unsigned char) STATESET; } part
+				STAR { end_states(eventfile_h); } EVENTS 
+				{ $$pcb.isevent = 1; }
+				pcb 
+				{
+					fprintf(eventfile_h, "\t"); /* fmq gags on single chars */
+					includecode(eventfile_h, $pcb[14].address);
+					fprintf(eventfile_h, "\n"); /* fmq gags on single chars */
+					$$syn.type = EVENT_SYN;
+				}
+				syn 
+				{ 
+				  	$$part.type = (unsigned char)EVENTSET; 
+				} 
+				part 
+				STAR { end_events(); } 
+				TRANSITIONS 
+				{ 
+					putincludes();
+					putdriver(actfile, 9);
+				} 
+				translist
+;
+pcb 	::=  STRUCT  
+			 {	if($pcb.isevent)  {
+					fprintf(stderr, 
+					"Event is a list of objects enclosed by \"{}\"\n");
+					Exit(-1);
+				}
+			  fprintf(eventfile_h, "struct "); 
+			}
+			 ACTION { $pcb.address = $ACTION.address; }
+			 optsemi
+		::=	 ACTION 
+			{	if( ! $pcb.isevent)  {
+					fprintf(stderr, 
+					"Pcb requires a type or structure definition.\"{}\"\n");
+					Exit(-1);
+				}
+			   $pcb.address = $ACTION.address; 
+			}
+			 optsemi
+		::=  ID {  $pcb.address = $ID.address; } optsemi
+;
+
+syn ::= SYNONYM ID { synonyms[$syn.type] = stash( $ID.address ); }
+		::= 
+;
+
+optsemi ::= SEMI 
+		::= 
+;
+includelist ::= INCLUDE ACTION { includecode(actfile, $ACTION.address);} STAR
+			::=
+;
+part ::= ID 
+		{ 
+			$$partrest.address = $ID.address;
+			$$partrest.type = $part.type; 
+		}
+		partrest  
+		{ $$parttail.type = $part.type; } 
+		parttail
+;
+parttail ::= { $$part.type = $parttail.type; } part 
+		::=  
+;
+partrest ::=  EQUAL 
+			{ 
+			  if(  lookup( $partrest.type, $partrest.address ) ) {
+				fprintf(stderr, "bnf:trying to redefine obj type 0x%x, adr %s\n",
+					$partrest.type, $partrest.address);
+				Exit(-1);
+			  } 
+			  $$setdef.type = $partrest.type;
+			  $$setdef.address = stash( $partrest.address );
+			  $$setdef.keep = 1;
+			} setdef { $$setstruct.object = $setdef.object; } setstruct
+
+	::=  ACTION 
+		{ 
+		 defineitem($partrest.type, 
+					$partrest.address, $ACTION.address); 
+		}
+
+	::= { 
+			defineitem($partrest.type, $partrest.address, (char *)0);
+		}
+;
+
+setstruct ::= ACTION 
+			{
+				if($setstruct.object)  {
+					/* WHEN COULD THIS BE FALSE?? 
+					 * isn't it supposed to be setstruct.object???
+					 * (it used to be $ACTION.address)
+					 */
+
+					$setstruct.object->obj_struc = $ACTION.address;
+					fprintf(eventfile_h, 
+						"struct %s %s%s;\n\n", $ACTION.address, 
+						EV_PREFIX,  $setstruct.object->obj_name);
+				}
+			}
+		::=
+;
+
+setdef ::= LBRACK 
+		{ 
+			$$setlist.setnum = 
+			defineset($setdef.type, $setdef.address, $setdef.keep); 
+		} setlist RBRACK { $setdef.object = $setlist.setnum; }
+;
+
+setlist ::= ID 
+	{ 
+		member($setlist.setnum, $ID.address); 
+				$$setlisttail.setnum = $setlist.setnum; 
+	} setlisttail 
+;
+
+setlisttail ::= COMMA { $$setlist.setnum = $setlisttail.setnum; } setlist
+		::=
+;
+translist		::=	 transition  translisttail 
+;
+translisttail	::= translist
+	::=	
+;
+transition ::=  newstate { transno ++; } LANGLE EQUAL EQUAL oldstate  
+	event 
+	{ 
+	 	CurrentEvent /* GAG! */ = $event.object; 
+	 }
+	predicatepart
+	{ 
+		$$actionpart.string = $predicatepart.string; 
+		$$actionpart.newstate = $newstate.object; 
+		$$actionpart.oldstate = $oldstate.object;
+	}
+	actionpart
+	SEMI
+;
+
+predicatepart ::= PREDICATE
+	{ 
+		 $predicatepart.string = stash ( $PREDICATE.address );
+	}
+	::= DEFAULT
+	{ 
+		$predicatepart.string = (char *)0;
+	}
+;
+
+actionpart ::=  
+	ACTION
+	{
+	  statetable( $actionpart.string, $actionpart.oldstate, 
+					$actionpart.newstate,
+					acttable(actfile, $ACTION.address ), 
+					CurrentEvent ); 
+	  if( print_trans ) {
+	  	dump_trans( $actionpart.string, $actionpart.oldstate, 
+					$actionpart.newstate,
+					$ACTION.address, CurrentEvent ); 
+	  }
+	}
+	::= NULLACTION
+	{
+	  statetable($actionpart.string, $actionpart.oldstate, $actionpart.newstate,
+				  0, CurrentEvent ); /* KLUDGE - remove this */
+	  if( print_trans ) {
+	  	dump_trans( $actionpart.string, $actionpart.oldstate, 
+					$actionpart.newstate,
+					"NULLACTION", CurrentEvent ); 
+	  }
+	}
+;
+
+oldstate ::= ID 
+	{	
+		$oldstate.object = Lookup(STATESET, $ID.address);
+	}
+	::= {
+			$$setdef.address = (char *)0;
+			$$setdef.type = (unsigned char)STATESET; 
+			$$setdef.keep = 0;
+		}
+		setdef 
+		{ 
+			$oldstate.object = $setdef.object; 
+		}
+;
+
+newstate ::= ID 
+	{ 
+		$newstate.object = Lookup(STATESET, $ID.address); 
+	}
+;
+
+newstate ::= SAME 
+	{ 
+		extern struct Object *SameState;
+
+		$newstate.object = SameState;
+	}
+;
+
+event ::= ID 
+		{
+			$event.object = Lookup(EVENTSET, $ID.address); 
+		}
+	::= 
+		{
+			$$setdef.address = (char *)0;
+			$$setdef.type = (unsigned char)EVENTSET; 
+			$$setdef.keep = 0;
+		}
+		setdef 
+		{ 
+			$event.object = $setdef.object; 
+		}
+;
+
+*end
diff --git a/sys/netiso/xebec/xebec.c b/sys/netiso/xebec/xebec.c
new file mode 100644
index 00000000000..132bcb8487a
--- /dev/null
+++ b/sys/netiso/xebec/xebec.c
@@ -0,0 +1,451 @@
+/* $Header: xebec.c,v 2.2 88/09/19 12:55:37 nhall Exp $ */
+/* $Source: /var/home/tadl/src/argo/xebec/RCS/xebec.c,v $ */
+
+#include "xebec.h"
+#include "llparse.h"
+#ifndef	E_TABLE
+#define	E_TABLE "xebec.e"
+#endif	E_TABLE
+
+#include "main.h"
+#include "sets.h"
+#include <stdio.h> 
+
+extern FILE *eventfile_h, *actfile; 
+
+llaction(lln,token)
+LLtoken *token;
+{
+	struct llattr *llattr;
+	llattr = &llattrdesc[lldescindex-1];
+switch(lln) {
+case 1:
+	llfinprod();
+	break;
+
+case 10: {
+	
+					if(strlen(llattr->llabase[3].ID.address) > 50 ) {
+						fprintf(stderr, 
+						"Protocol name may not exceed 50 chars in length.\n"); 
+						Exit(-1);
+					}
+					strcpy(protocol, llattr->llabase[3].ID.address); 
+					openfiles(protocol); 
+				
+} break;
+
+case 11: {
+ 
+					llattr->llabase[7].pcb.isevent = 0; 
+				
+} break;
+
+case 12: {
+
+				  fprintf(actfile, "\ntypedef %s %s%s;\n",
+							  llattr->llabase[7].pcb.address,protocol, PCBNAME); 
+				  llattr->llabase[8].syn.type = PCB_SYN;
+				
+} break;
+
+case 13: {
+ llattr->llabase[11].part.type = (unsigned char) STATESET; 
+} break;
+
+case 14: {
+ end_states(eventfile_h); 
+} break;
+
+case 15: {
+ llattr->llabase[14].pcb.isevent = 1; 
+} break;
+
+case 16: {
+
+					fprintf(eventfile_h, "\t"); /* fmq gags on single chars */
+					includecode(eventfile_h, llattr->llabase[14].pcb.address);
+					fprintf(eventfile_h, "\n"); /* fmq gags on single chars */
+					llattr->llabase[15].syn.type = EVENT_SYN;
+				
+} break;
+
+case 17: {
+ 
+				  	llattr->llabase[16].part.type = (unsigned char)EVENTSET; 
+				
+} break;
+
+case 18: {
+ end_events(); 
+} break;
+
+case 19: {
+ 
+					putincludes();
+					putdriver(actfile, 9);
+				
+} break;
+
+case 20: {
+	if(llattr->llabase[0].pcb.isevent)  {
+					fprintf(stderr, 
+					"Event is a list of objects enclosed by \"{}\"\n");
+					Exit(-1);
+				}
+			  fprintf(eventfile_h, "struct "); 
+			
+} break;
+
+case 21: {
+ llattr->llabase[0].pcb.address = llattr->llabase[2].ACTION.address; 
+} break;
+
+case 22: {
+	if( ! llattr->llabase[0].pcb.isevent)  {
+					fprintf(stderr, 
+					"Pcb requires a type or structure definition.\"{}\"\n");
+					Exit(-1);
+				}
+			   llattr->llabase[0].pcb.address = llattr->llabase[1].ACTION.address; 
+			
+} break;
+
+case 23: {
+  llattr->llabase[0].pcb.address = llattr->llabase[1].ID.address; 
+} break;
+
+case 24: {
+ synonyms[llattr->llabase[0].syn.type] = stash( llattr->llabase[2].ID.address ); 
+} break;
+
+case 25: {
+ includecode(actfile, llattr->llabase[2].ACTION.address);
+} break;
+
+case 26: {
+ 
+			llattr->llabase[2].partrest.address = llattr->llabase[1].ID.address;
+			llattr->llabase[2].partrest.type = llattr->llabase[0].part.type; 
+		
+} break;
+
+case 27: {
+ llattr->llabase[3].parttail.type = llattr->llabase[0].part.type; 
+} break;
+
+case 28: {
+ llattr->llabase[1].part.type = llattr->llabase[0].parttail.type; 
+} break;
+
+case 29: {
+ 
+			  if(  lookup( llattr->llabase[0].partrest.type, llattr->llabase[0].partrest.address ) ) {
+				fprintf(stderr, "bnf:trying to redefine obj type 0x%x, adr %s\n",
+					llattr->llabase[0].partrest.type, llattr->llabase[0].partrest.address);
+				Exit(-1);
+			  } 
+			  llattr->llabase[2].setdef.type = llattr->llabase[0].partrest.type;
+			  llattr->llabase[2].setdef.address = stash( llattr->llabase[0].partrest.address );
+			  llattr->llabase[2].setdef.keep = 1;
+			
+} break;
+
+case 30: {
+ llattr->llabase[3].setstruct.object = llattr->llabase[2].setdef.object; 
+} break;
+
+case 31: {
+ 
+		 defineitem(llattr->llabase[0].partrest.type, 
+					llattr->llabase[0].partrest.address, llattr->llabase[1].ACTION.address); 
+		
+} break;
+
+case 32: {
+ 
+			defineitem(llattr->llabase[0].partrest.type, llattr->llabase[0].partrest.address, (char *)0);
+		
+} break;
+
+case 33: {
+
+				if(llattr->llabase[0].setstruct.object)  {
+					/* WHEN COULD THIS BE FALSE?? 
+					 * isn't it supposed to be setstruct.object???
+					 * (it used to be $ACTION.address)
+					 */
+
+					llattr->llabase[0].setstruct.object->obj_struc = llattr->llabase[1].ACTION.address;
+					fprintf(eventfile_h, 
+						"struct %s %s%s;\n\n", llattr->llabase[1].ACTION.address, 
+						EV_PREFIX,  llattr->llabase[0].setstruct.object->obj_name);
+				}
+			
+} break;
+
+case 34: {
+ 
+			llattr->llabase[2].setlist.setnum = 
+			defineset(llattr->llabase[0].setdef.type, llattr->llabase[0].setdef.address, llattr->llabase[0].setdef.keep); 
+		
+} break;
+
+case 35: {
+ llattr->llabase[0].setdef.object = llattr->llabase[2].setlist.setnum; 
+} break;
+
+case 36: {
+ 
+		member(llattr->llabase[0].setlist.setnum, llattr->llabase[1].ID.address); 
+				llattr->llabase[2].setlisttail.setnum = llattr->llabase[0].setlist.setnum; 
+	
+} break;
+
+case 37: {
+ llattr->llabase[2].setlist.setnum = llattr->llabase[0].setlisttail.setnum; 
+} break;
+
+case 38: {
+ transno ++; 
+} break;
+
+case 39: {
+ 
+	 	CurrentEvent /* GAG! */ = llattr->llabase[6].event.object; 
+	 
+} break;
+
+case 40: {
+ 
+		llattr->llabase[8].actionpart.string = llattr->llabase[7].predicatepart.string; 
+		llattr->llabase[8].actionpart.newstate = llattr->llabase[1].newstate.object; 
+		llattr->llabase[8].actionpart.oldstate = llattr->llabase[5].oldstate.object;
+	
+} break;
+
+case 41: {
+ 
+		 llattr->llabase[0].predicatepart.string = stash ( llattr->llabase[1].PREDICATE.address );
+	
+} break;
+
+case 42: {
+ 
+		llattr->llabase[0].predicatepart.string = (char *)0;
+	
+} break;
+
+case 43: {
+
+	  statetable( llattr->llabase[0].actionpart.string, llattr->llabase[0].actionpart.oldstate, 
+					llattr->llabase[0].actionpart.newstate,
+					acttable(actfile, llattr->llabase[1].ACTION.address ), 
+					CurrentEvent ); 
+	  if( print_trans ) {
+	  	dump_trans( llattr->llabase[0].actionpart.string, llattr->llabase[0].actionpart.oldstate, 
+					llattr->llabase[0].actionpart.newstate,
+					llattr->llabase[1].ACTION.address, CurrentEvent ); 
+	  }
+	
+} break;
+
+case 44: {
+
+	  statetable(llattr->llabase[0].actionpart.string, llattr->llabase[0].actionpart.oldstate, llattr->llabase[0].actionpart.newstate,
+				  0, CurrentEvent ); /* KLUDGE - remove this */
+	  if( print_trans ) {
+	  	dump_trans( llattr->llabase[0].actionpart.string, llattr->llabase[0].actionpart.oldstate, 
+					llattr->llabase[0].actionpart.newstate,
+					"NULLACTION", CurrentEvent ); 
+	  }
+	
+} break;
+
+case 45: {
+	
+		llattr->llabase[0].oldstate.object = Lookup(STATESET, llattr->llabase[1].ID.address);
+	
+} break;
+
+case 46: {
+
+			llattr->llabase[1].setdef.address = (char *)0;
+			llattr->llabase[1].setdef.type = (unsigned char)STATESET; 
+			llattr->llabase[1].setdef.keep = 0;
+		
+} break;
+
+case 47: {
+ 
+			llattr->llabase[0].oldstate.object = llattr->llabase[1].setdef.object; 
+		
+} break;
+
+case 48: {
+ 
+		llattr->llabase[0].newstate.object = Lookup(STATESET, llattr->llabase[1].ID.address); 
+	
+} break;
+
+case 49: {
+ 
+		extern struct Object *SameState;
+
+		llattr->llabase[0].newstate.object = SameState;
+	
+} break;
+
+case 50: {
+
+			llattr->llabase[0].event.object = Lookup(EVENTSET, llattr->llabase[1].ID.address); 
+		
+} break;
+
+case 51: {
+
+			llattr->llabase[1].setdef.address = (char *)0;
+			llattr->llabase[1].setdef.type = (unsigned char)EVENTSET; 
+			llattr->llabase[1].setdef.keep = 0;
+		
+} break;
+
+case 52: {
+ 
+			llattr->llabase[0].event.object = llattr->llabase[1].setdef.object; 
+		
+} break;
+}
+}
+char *llstrings[] = {
+	"<null>",
+	"ID",
+	"STRUCT",
+	"SYNONYM",
+	"PREDICATE",
+	"ACTION",
+	"PROTOCOL",
+	"LBRACK",
+	"RBRACK",
+	"LANGLE",
+	"EQUAL",
+	"COMMA",
+	"STAR",
+	"EVENTS",
+	"TRANSITIONS",
+	"INCLUDE",
+	"STATES",
+	"SEMI",
+	"PCB",
+	"DEFAULT",
+	"NULLACTION",
+	"SAME",
+	"ENDMARKER",
+	"pcb",
+	"syn",
+	"setlist",
+	"setlisttail",
+	"part",
+	"parttail",
+	"partrest",
+	"setstruct",
+	"setdef",
+	"translist",
+	"transition",
+	"event",
+	"oldstate",
+	"newstate",
+	"predicatepart",
+	"actionpart",
+	"program",
+	"includelist",
+	"optsemi",
+	"translisttail",
+	"$goal$",
+	(char *) 0
+};
+short llnterms = 23;
+short llnsyms = 44;
+short llnprods = 38;
+short llinfinite = 10000;
+short llproductions[] = {
+41, -21, 5, -20, 2, 
+41, -22, 5, 
+41, -23, 1, 
+-24, 1, 3, 
+
+26, -36, 1, 
+25, -37, 11, 
+
+28, -27, 29, -26, 1, 
+27, -28, 
+
+30, -30, 31, -29, 10, 
+-31, 5, 
+-32, 
+-33, 5, 
+
+-35, 8, 25, -34, 7, 
+42, 33, 
+17, 38, -40, 37, -39, 34, 35, 10, 10, 9, -38, 36, 
+-50, 1, 
+-52, 31, -51, 
+-45, 1, 
+-47, 31, -46, 
+-48, 1, 
+-49, 21, 
+-41, 4, 
+-42, 19, 
+-43, 5, 
+-44, 20, 
+32, -19, 14, -18, 12, 27, -17, 24, -16, 23, -15, 13, -14, 12, 27, -13, 16, 12, 24, -12, 23, -11, 18, 40, 12, -10, 1, 6, 12, 
+12, -25, 5, 15, 
+
+17, 
+
+32, 
+
+22, 39, 
+0
+};
+struct llprodindex llprodindex[] = {
+{   0,   0,   0 }, {   0,   5,  19 }, {   5,   3,   3 }, {   8,   3,   2 }, 
+{  11,   3,   2 }, {  14,   0,   2 }, {  14,   3,   0 }, {  17,   3,   1 }, 
+{  20,   0,   0 }, {  20,   5,   3 }, {  25,   2,   0 }, {  27,   0,   3 }, 
+{  27,   5,   1 }, {  32,   2,   0 }, {  34,   1,   3 }, {  35,   2,   1 }, 
+{  37,   0,   0 }, {  37,   5,   1 }, {  42,   2,   0 }, {  44,  12,   3 }, 
+{  56,   2,   2 }, {  58,   3,   2 }, {  61,   2,   0 }, {  63,   3,   2 }, 
+{  66,   2,   1 }, {  68,   2,   0 }, {  70,   2,   9 }, {  72,   2,   1 }, 
+{  74,   2,   1 }, {  76,   2,   1 }, {  78,  29,   1 }, { 107,   4,   1 }, 
+{ 111,   0,   1 }, { 111,   1,   1 }, { 112,   0,   1 }, { 112,   1,   1 }, 
+{ 113,   0,   1 }, { 113,   2,   2 }, {   0,   0,   0 }
+};
+short llepsilon[] = {
+ 0, 0, 0, 0, 0, 1, 0, 0, 1, 0,
+ 0, 1, 0, 0, 1, 0, 1, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 1, 0, 1, 0, 1, 0, 0
+};
+struct llparsetable llparsetable[] = {
+{   1,   3 }, {   2,   1 }, {   5,   2 }, {   0,  23 }, {   1,   5 }, 
+{   3,   4 }, {  12,   5 }, {   0,  24 }, {   1,   6 }, {   0,  25 }, 
+{   8,   8 }, {  11,   7 }, {   0,  26 }, {   1,   9 }, {   0,  27 }, 
+{   1,  10 }, {  12,  11 }, {   0,  28 }, {   1,  14 }, {   5,  13 }, 
+{  10,  12 }, {  12,  14 }, {   0,  29 }, {   1,  16 }, {   5,  15 }, 
+{  12,  16 }, {   0,  30 }, {   7,  17 }, {   0,  31 }, {   1,  18 }, 
+{  21,  18 }, {   0,  32 }, {   1,  19 }, {  21,  19 }, {   0,  33 }, 
+{   1,  20 }, {   7,  21 }, {   0,  34 }, {   1,  22 }, {   7,  23 }, 
+{   0,  35 }, {   1,  24 }, {  21,  25 }, {   0,  36 }, {   4,  26 }, 
+{  19,  27 }, {   0,  37 }, {   5,  28 }, {  20,  29 }, {   0,  38 }, 
+{  12,  30 }, {   0,  39 }, {  15,  31 }, {  18,  32 }, {   0,  40 }, 
+{   1,  34 }, {   3,  34 }, {  12,  34 }, {  17,  33 }, {   0,  41 }, 
+{   1,  35 }, {  21,  35 }, {  22,  36 }, {   0,  42 }, {  12,  37 }, 
+{   0,  43 }, {   0,   0 }
+};
+short llparseindex[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 4, 8, 10, 13, 15, 18,
+ 23, 27, 29, 32, 35, 38, 41, 44, 47, 50,
+ 52, 55, 60, 64, 0
+};
diff --git a/sys/netiso/xebec/xebec.h b/sys/netiso/xebec/xebec.h
new file mode 100644
index 00000000000..168bb77b249
--- /dev/null
+++ b/sys/netiso/xebec/xebec.h
@@ -0,0 +1,88 @@
+/* $Header: xebec.h,v 2.1 88/09/19 12:56:35 nhall Exp $ */
+/* $Source: /var/home/tadl/src/argo/xebec/RCS/xebec.h,v $ */
+
+union llattrib {
+	struct {
+ char *address; 	} ID;
+	int	STRUCT;
+	int	SYNONYM;
+	struct {
+ char *address; 	} PREDICATE;
+	struct {
+ char *address; 	} ACTION;
+	int	PROTOCOL;
+	int	LBRACK;
+	int	RBRACK;
+	int	LANGLE;
+	int	EQUAL;
+	int	COMMA;
+	int	STAR;
+	int	EVENTS;
+	int	TRANSITIONS;
+	int	INCLUDE;
+	int	STATES;
+	int	SEMI;
+	struct {
+ char *address; 	} PCB;
+	int	DEFAULT;
+	int	NULLACTION;
+	int	SAME;
+	struct {
+ char *address; int isevent; 	} pcb;
+	struct {
+ int type; 	} syn;
+	struct {
+ struct Object *setnum; 	} setlist;
+	struct {
+ struct Object *setnum; 	} setlisttail;
+	struct {
+ unsigned char type; 	} part;
+	struct {
+ unsigned char type; 	} parttail;
+	struct {
+ unsigned char type; char *address; 	} partrest;
+	struct {
+ struct Object *object; 	} setstruct;
+	struct {
+ unsigned char type,keep; char *address; struct Object *object; 	} setdef;
+	int	translist;
+	int	transition;
+	struct {
+ struct Object *object;  	} event;
+	struct {
+	struct Object *object;		} oldstate;
+	struct {
+	struct Object *object;		} newstate;
+	struct {
+	char *string; 	} predicatepart;
+	struct {
+ 	char *string; struct Object *oldstate; struct Object *newstate; 	} actionpart;
+};
+#define LLTERM	23
+#define LLSYM	44
+#define LLPROD	38
+
+#define LLINF	10000
+
+#define T_ID                              1
+#define T_STRUCT                          2
+#define T_SYNONYM                         3
+#define T_PREDICATE                       4
+#define T_ACTION                          5
+#define T_PROTOCOL                        6
+#define T_LBRACK                          7
+#define T_RBRACK                          8
+#define T_LANGLE                          9
+#define T_EQUAL                           10
+#define T_COMMA                           11
+#define T_STAR                            12
+#define T_EVENTS                          13
+#define T_TRANSITIONS                     14
+#define T_INCLUDE                         15
+#define T_STATES                          16
+#define T_SEMI                            17
+#define T_PCB                             18
+#define T_DEFAULT                         19
+#define T_NULLACTION                      20
+#define T_SAME                            21
+#define T_ENDMARKER                       22
diff --git a/sys/netns/idp.h b/sys/netns/idp.h
new file mode 100644
index 00000000000..254208dfad8
--- /dev/null
+++ b/sys/netns/idp.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)idp.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Definitions for NS(tm) Internet Datagram Protocol
+ */
+struct idp {
+	u_short	idp_sum;	/* Checksum */
+	u_short	idp_len;	/* Length, in bytes, including header */
+	u_char	idp_tc;		/* Transport Crontrol (i.e. hop count) */
+	u_char	idp_pt;		/* Packet Type (i.e. level 2 protocol) */
+	struct ns_addr	idp_dna;	/* Destination Network Address */
+	struct ns_addr	idp_sna;	/* Source Network Address */
+};
diff --git a/sys/netns/idp_usrreq.c b/sys/netns/idp_usrreq.c
new file mode 100644
index 00000000000..b548a12574b
--- /dev/null
+++ b/sys/netns/idp_usrreq.c
@@ -0,0 +1,566 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)idp_usrreq.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+#include <sys/stat.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netns/ns.h>
+#include <netns/ns_pcb.h>
+#include <netns/ns_if.h>
+#include <netns/idp.h>
+#include <netns/idp_var.h>
+#include <netns/ns_error.h>
+
+/*
+ * IDP protocol implementation.
+ */
+
+struct	sockaddr_ns idp_ns = { sizeof(idp_ns), AF_NS };
+
+/*
+ *  This may also be called for raw listeners.
+ */
+idp_input(m, nsp)
+	struct mbuf *m;
+	register struct nspcb *nsp;
+{
+	register struct idp *idp = mtod(m, struct idp *);
+	struct ifnet *ifp = m->m_pkthdr.rcvif;
+
+	if (nsp==0)
+		panic("No nspcb");
+	/*
+	 * Construct sockaddr format source address.
+	 * Stuff source address and datagram in user buffer.
+	 */
+	idp_ns.sns_addr = idp->idp_sna;
+	if (ns_neteqnn(idp->idp_sna.x_net, ns_zeronet) && ifp) {
+		register struct ifaddr *ifa;
+
+		for (ifa = ifp->if_addrlist; ifa; ifa = ifa->ifa_next) {
+			if (ifa->ifa_addr->sa_family == AF_NS) {
+				idp_ns.sns_addr.x_net =
+					IA_SNS(ifa)->sns_addr.x_net;
+				break;
+			}
+		}
+	}
+	nsp->nsp_rpt = idp->idp_pt;
+	if ( ! (nsp->nsp_flags & NSP_RAWIN) ) {
+		m->m_len -= sizeof (struct idp);
+		m->m_pkthdr.len -= sizeof (struct idp);
+		m->m_data += sizeof (struct idp);
+	}
+	if (sbappendaddr(&nsp->nsp_socket->so_rcv, (struct sockaddr *)&idp_ns,
+	    m, (struct mbuf *)0) == 0)
+		goto bad;
+	sorwakeup(nsp->nsp_socket);
+	return;
+bad:
+	m_freem(m);
+}
+
+idp_abort(nsp)
+	struct nspcb *nsp;
+{
+	struct socket *so = nsp->nsp_socket;
+
+	ns_pcbdisconnect(nsp);
+	soisdisconnected(so);
+}
+/*
+ * Drop connection, reporting
+ * the specified error.
+ */
+struct nspcb *
+idp_drop(nsp, errno)
+	register struct nspcb *nsp;
+	int errno;
+{
+	struct socket *so = nsp->nsp_socket;
+
+	/*
+	 * someday, in the xerox world
+	 * we will generate error protocol packets
+	 * announcing that the socket has gone away.
+	 */
+	/*if (TCPS_HAVERCVDSYN(tp->t_state)) {
+		tp->t_state = TCPS_CLOSED;
+		(void) tcp_output(tp);
+	}*/
+	so->so_error = errno;
+	ns_pcbdisconnect(nsp);
+	soisdisconnected(so);
+}
+
+int noIdpRoute;
+idp_output(nsp, m0)
+	struct nspcb *nsp;
+	struct mbuf *m0;
+{
+	register struct mbuf *m;
+	register struct idp *idp;
+	register struct socket *so;
+	register int len = 0;
+	register struct route *ro;
+	struct mbuf *mprev;
+	extern int idpcksum;
+
+	/*
+	 * Calculate data length.
+	 */
+	for (m = m0; m; m = m->m_next) {
+		mprev = m;
+		len += m->m_len;
+	}
+	/*
+	 * Make sure packet is actually of even length.
+	 */
+	
+	if (len & 1) {
+		m = mprev;
+		if ((m->m_flags & M_EXT) == 0 &&
+			(m->m_len + m->m_data < &m->m_dat[MLEN])) {
+			m->m_len++;
+		} else {
+			struct mbuf *m1 = m_get(M_DONTWAIT, MT_DATA);
+
+			if (m1 == 0) {
+				m_freem(m0);
+				return (ENOBUFS);
+			}
+			m1->m_len = 1;
+			* mtod(m1, char *) = 0;
+			m->m_next = m1;
+		}
+		m0->m_pkthdr.len++;
+	}
+
+	/*
+	 * Fill in mbuf with extended IDP header
+	 * and addresses and length put into network format.
+	 */
+	m = m0;
+	if (nsp->nsp_flags & NSP_RAWOUT) {
+		idp = mtod(m, struct idp *);
+	} else {
+		M_PREPEND(m, sizeof (struct idp), M_DONTWAIT);
+		if (m == 0)
+			return (ENOBUFS);
+		idp = mtod(m, struct idp *);
+		idp->idp_tc = 0;
+		idp->idp_pt = nsp->nsp_dpt;
+		idp->idp_sna = nsp->nsp_laddr;
+		idp->idp_dna = nsp->nsp_faddr;
+		len += sizeof (struct idp);
+	}
+
+	idp->idp_len = htons((u_short)len);
+
+	if (idpcksum) {
+		idp->idp_sum = 0;
+		len = ((len - 1) | 1) + 1;
+		idp->idp_sum = ns_cksum(m, len);
+	} else
+		idp->idp_sum = 0xffff;
+
+	/*
+	 * Output datagram.
+	 */
+	so = nsp->nsp_socket;
+	if (so->so_options & SO_DONTROUTE)
+		return (ns_output(m, (struct route *)0,
+		    (so->so_options & SO_BROADCAST) | NS_ROUTETOIF));
+	/*
+	 * Use cached route for previous datagram if
+	 * possible.  If the previous net was the same
+	 * and the interface was a broadcast medium, or
+	 * if the previous destination was identical,
+	 * then we are ok.
+	 *
+	 * NB: We don't handle broadcasts because that
+	 *     would require 3 subroutine calls.
+	 */
+	ro = &nsp->nsp_route;
+#ifdef ancient_history
+	/*
+	 * I think that this will all be handled in ns_pcbconnect!
+	 */
+	if (ro->ro_rt) {
+		if(ns_neteq(nsp->nsp_lastdst, idp->idp_dna)) {
+			/*
+			 * This assumes we have no GH type routes
+			 */
+			if (ro->ro_rt->rt_flags & RTF_HOST) {
+				if (!ns_hosteq(nsp->nsp_lastdst, idp->idp_dna))
+					goto re_route;
+
+			}
+			if ((ro->ro_rt->rt_flags & RTF_GATEWAY) == 0) {
+				register struct ns_addr *dst =
+						&satons_addr(ro->ro_dst);
+				dst->x_host = idp->idp_dna.x_host;
+			}
+			/* 
+			 * Otherwise, we go through the same gateway
+			 * and dst is already set up.
+			 */
+		} else {
+		re_route:
+			RTFREE(ro->ro_rt);
+			ro->ro_rt = (struct rtentry *)0;
+		}
+	}
+	nsp->nsp_lastdst = idp->idp_dna;
+#endif /* ancient_history */
+	if (noIdpRoute) ro = 0;
+	return (ns_output(m, ro, so->so_options & SO_BROADCAST));
+}
+/* ARGSUSED */
+idp_ctloutput(req, so, level, name, value)
+	int req, level;
+	struct socket *so;
+	int name;
+	struct mbuf **value;
+{
+	register struct mbuf *m;
+	struct nspcb *nsp = sotonspcb(so);
+	int mask, error = 0;
+	extern long ns_pexseq;
+
+	if (nsp == NULL)
+		return (EINVAL);
+
+	switch (req) {
+
+	case PRCO_GETOPT:
+		if (value==NULL)
+			return (EINVAL);
+		m = m_get(M_DONTWAIT, MT_DATA);
+		if (m==NULL)
+			return (ENOBUFS);
+		switch (name) {
+
+		case SO_ALL_PACKETS:
+			mask = NSP_ALL_PACKETS;
+			goto get_flags;
+
+		case SO_HEADERS_ON_INPUT:
+			mask = NSP_RAWIN;
+			goto get_flags;
+			
+		case SO_HEADERS_ON_OUTPUT:
+			mask = NSP_RAWOUT;
+		get_flags:
+			m->m_len = sizeof(short);
+			*mtod(m, short *) = nsp->nsp_flags & mask;
+			break;
+
+		case SO_DEFAULT_HEADERS:
+			m->m_len = sizeof(struct idp);
+			{
+				register struct idp *idp = mtod(m, struct idp *);
+				idp->idp_len = 0;
+				idp->idp_sum = 0;
+				idp->idp_tc = 0;
+				idp->idp_pt = nsp->nsp_dpt;
+				idp->idp_dna = nsp->nsp_faddr;
+				idp->idp_sna = nsp->nsp_laddr;
+			}
+			break;
+
+		case SO_SEQNO:
+			m->m_len = sizeof(long);
+			*mtod(m, long *) = ns_pexseq++;
+			break;
+
+		default:
+			error = EINVAL;
+		}
+		*value = m;
+		break;
+
+	case PRCO_SETOPT:
+		switch (name) {
+			int *ok;
+
+		case SO_ALL_PACKETS:
+			mask = NSP_ALL_PACKETS;
+			goto set_head;
+
+		case SO_HEADERS_ON_INPUT:
+			mask = NSP_RAWIN;
+			goto set_head;
+
+		case SO_HEADERS_ON_OUTPUT:
+			mask = NSP_RAWOUT;
+		set_head:
+			if (value && *value) {
+				ok = mtod(*value, int *);
+				if (*ok)
+					nsp->nsp_flags |= mask;
+				else
+					nsp->nsp_flags &= ~mask;
+			} else error = EINVAL;
+			break;
+
+		case SO_DEFAULT_HEADERS:
+			{
+				register struct idp *idp
+				    = mtod(*value, struct idp *);
+				nsp->nsp_dpt = idp->idp_pt;
+			}
+			break;
+#ifdef NSIP
+
+		case SO_NSIP_ROUTE:
+			error = nsip_route(*value);
+			break;
+#endif /* NSIP */
+		default:
+			error = EINVAL;
+		}
+		if (value && *value)
+			m_freem(*value);
+		break;
+	}
+	return (error);
+}
+
+/*ARGSUSED*/
+idp_usrreq(so, req, m, nam, control)
+	struct socket *so;
+	int req;
+	struct mbuf *m, *nam, *control;
+{
+	struct nspcb *nsp = sotonspcb(so);
+	int error = 0;
+
+	if (req == PRU_CONTROL)
+                return (ns_control(so, (int)m, (caddr_t)nam,
+			(struct ifnet *)control));
+	if (control && control->m_len) {
+		error = EINVAL;
+		goto release;
+	}
+	if (nsp == NULL && req != PRU_ATTACH) {
+		error = EINVAL;
+		goto release;
+	}
+	switch (req) {
+
+	case PRU_ATTACH:
+		if (nsp != NULL) {
+			error = EINVAL;
+			break;
+		}
+		error = ns_pcballoc(so, &nspcb);
+		if (error)
+			break;
+		error = soreserve(so, (u_long) 2048, (u_long) 2048);
+		if (error)
+			break;
+		break;
+
+	case PRU_DETACH:
+		if (nsp == NULL) {
+			error = ENOTCONN;
+			break;
+		}
+		ns_pcbdetach(nsp);
+		break;
+
+	case PRU_BIND:
+		error = ns_pcbbind(nsp, nam);
+		break;
+
+	case PRU_LISTEN:
+		error = EOPNOTSUPP;
+		break;
+
+	case PRU_CONNECT:
+		if (!ns_nullhost(nsp->nsp_faddr)) {
+			error = EISCONN;
+			break;
+		}
+		error = ns_pcbconnect(nsp, nam);
+		if (error == 0)
+			soisconnected(so);
+		break;
+
+	case PRU_CONNECT2:
+		error = EOPNOTSUPP;
+		break;
+
+	case PRU_ACCEPT:
+		error = EOPNOTSUPP;
+		break;
+
+	case PRU_DISCONNECT:
+		if (ns_nullhost(nsp->nsp_faddr)) {
+			error = ENOTCONN;
+			break;
+		}
+		ns_pcbdisconnect(nsp);
+		soisdisconnected(so);
+		break;
+
+	case PRU_SHUTDOWN:
+		socantsendmore(so);
+		break;
+
+	case PRU_SEND:
+	{
+		struct ns_addr laddr;
+		int s;
+
+		if (nam) {
+			laddr = nsp->nsp_laddr;
+			if (!ns_nullhost(nsp->nsp_faddr)) {
+				error = EISCONN;
+				break;
+			}
+			/*
+			 * Must block input while temporarily connected.
+			 */
+			s = splnet();
+			error = ns_pcbconnect(nsp, nam);
+			if (error) {
+				splx(s);
+				break;
+			}
+		} else {
+			if (ns_nullhost(nsp->nsp_faddr)) {
+				error = ENOTCONN;
+				break;
+			}
+		}
+		error = idp_output(nsp, m);
+		m = NULL;
+		if (nam) {
+			ns_pcbdisconnect(nsp);
+			splx(s);
+			nsp->nsp_laddr.x_host = laddr.x_host;
+			nsp->nsp_laddr.x_port = laddr.x_port;
+		}
+	}
+		break;
+
+	case PRU_ABORT:
+		ns_pcbdetach(nsp);
+		sofree(so);
+		soisdisconnected(so);
+		break;
+
+	case PRU_SOCKADDR:
+		ns_setsockaddr(nsp, nam);
+		break;
+
+	case PRU_PEERADDR:
+		ns_setpeeraddr(nsp, nam);
+		break;
+
+	case PRU_SENSE:
+		/*
+		 * stat: don't bother with a blocksize.
+		 */
+		return (0);
+
+	case PRU_SENDOOB:
+	case PRU_FASTTIMO:
+	case PRU_SLOWTIMO:
+	case PRU_PROTORCV:
+	case PRU_PROTOSEND:
+		error =  EOPNOTSUPP;
+		break;
+
+	case PRU_CONTROL:
+	case PRU_RCVD:
+	case PRU_RCVOOB:
+		return (EOPNOTSUPP);	/* do not free mbuf's */
+
+	default:
+		panic("idp_usrreq");
+	}
+release:
+	if (control != NULL)
+		m_freem(control);
+	if (m != NULL)
+		m_freem(m);
+	return (error);
+}
+/*ARGSUSED*/
+idp_raw_usrreq(so, req, m, nam, control)
+	struct socket *so;
+	int req;
+	struct mbuf *m, *nam, *control;
+{
+	int error = 0;
+	struct nspcb *nsp = sotonspcb(so);
+	extern struct nspcb nsrawpcb;
+
+	switch (req) {
+
+	case PRU_ATTACH:
+
+		if (!(so->so_state & SS_PRIV) || (nsp != NULL)) {
+			error = EINVAL;
+			break;
+		}
+		error = ns_pcballoc(so, &nsrawpcb);
+		if (error)
+			break;
+		error = soreserve(so, (u_long) 2048, (u_long) 2048);
+		if (error)
+			break;
+		nsp = sotonspcb(so);
+		nsp->nsp_faddr.x_host = ns_broadhost;
+		nsp->nsp_flags = NSP_RAWIN | NSP_RAWOUT;
+		break;
+	default:
+		error = idp_usrreq(so, req, m, nam, control);
+	}
+	return (error);
+}
+
diff --git a/sys/netns/idp_var.h b/sys/netns/idp_var.h
new file mode 100644
index 00000000000..fc9a4f45d81
--- /dev/null
+++ b/sys/netns/idp_var.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)idp_var.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * IDP Kernel Structures and Variables
+ */
+struct	idpstat {
+	int	idps_badsum;		/* checksum bad */
+	int	idps_tooshort;		/* packet too short */
+	int	idps_toosmall;		/* not enough data */
+	int	idps_badhlen;		/* ip header length < data size */
+	int	idps_badlen;		/* ip length < ip header length */
+};
+
+#ifdef KERNEL
+struct	idpstat	idpstat;
+#endif
diff --git a/sys/netns/ns.c b/sys/netns/ns.c
new file mode 100644
index 00000000000..8b76543fce3
--- /dev/null
+++ b/sys/netns/ns.c
@@ -0,0 +1,368 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ns.c	8.2 (Berkeley) 11/15/93
+ */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/ioctl.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netns/ns.h>
+#include <netns/ns_if.h>
+
+#ifdef NS
+
+struct ns_ifaddr *ns_ifaddr;
+int ns_interfaces;
+extern struct sockaddr_ns ns_netmask, ns_hostmask;
+
+/*
+ * Generic internet control operations (ioctl's).
+ */
+/* ARGSUSED */
+ns_control(so, cmd, data, ifp)
+	struct socket *so;
+	int cmd;
+	caddr_t data;
+	register struct ifnet *ifp;
+{
+	register struct ifreq *ifr = (struct ifreq *)data;
+	register struct ns_aliasreq *ifra = (struct ns_aliasreq *)data;
+	register struct ns_ifaddr *ia;
+	struct ifaddr *ifa;
+	struct ns_ifaddr *oia;
+	int error, dstIsNew, hostIsNew;
+
+	/*
+	 * Find address for this interface, if it exists.
+	 */
+	if (ifp == 0)
+		return (EADDRNOTAVAIL);
+	for (ia = ns_ifaddr; ia; ia = ia->ia_next)
+		if (ia->ia_ifp == ifp)
+			break;
+
+	switch (cmd) {
+
+	case SIOCGIFADDR:
+		if (ia == (struct ns_ifaddr *)0)
+			return (EADDRNOTAVAIL);
+		*(struct sockaddr_ns *)&ifr->ifr_addr = ia->ia_addr;
+		return (0);
+
+
+	case SIOCGIFBRDADDR:
+		if (ia == (struct ns_ifaddr *)0)
+			return (EADDRNOTAVAIL);
+		if ((ifp->if_flags & IFF_BROADCAST) == 0)
+			return (EINVAL);
+		*(struct sockaddr_ns *)&ifr->ifr_dstaddr = ia->ia_broadaddr;
+		return (0);
+
+	case SIOCGIFDSTADDR:
+		if (ia == (struct ns_ifaddr *)0)
+			return (EADDRNOTAVAIL);
+		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
+			return (EINVAL);
+		*(struct sockaddr_ns *)&ifr->ifr_dstaddr = ia->ia_dstaddr;
+		return (0);
+	}
+
+	if ((so->so_state & SS_PRIV) == 0)
+		return (EPERM);
+
+	switch (cmd) {
+	case SIOCAIFADDR:
+	case SIOCDIFADDR:
+		if (ifra->ifra_addr.sns_family == AF_NS)
+		    for (oia = ia; ia; ia = ia->ia_next) {
+			if (ia->ia_ifp == ifp  &&
+			    ns_neteq(ia->ia_addr.sns_addr,
+				  ifra->ifra_addr.sns_addr))
+			    break;
+		    }
+		if (cmd == SIOCDIFADDR && ia == 0)
+			return (EADDRNOTAVAIL);
+		/* FALLTHROUGH */
+
+	case SIOCSIFADDR:
+	case SIOCSIFDSTADDR:
+		if (ia == (struct ns_ifaddr *)0) {
+			oia = (struct ns_ifaddr *)
+				malloc(sizeof *ia, M_IFADDR, M_WAITOK);
+			if (oia == (struct ns_ifaddr *)NULL)
+				return (ENOBUFS);
+			bzero((caddr_t)oia, sizeof(*oia));
+			if (ia = ns_ifaddr) {
+				for ( ; ia->ia_next; ia = ia->ia_next)
+					;
+				ia->ia_next = oia;
+			} else
+				ns_ifaddr = oia;
+			ia = oia;
+			if (ifa = ifp->if_addrlist) {
+				for ( ; ifa->ifa_next; ifa = ifa->ifa_next)
+					;
+				ifa->ifa_next = (struct ifaddr *) ia;
+			} else
+				ifp->if_addrlist = (struct ifaddr *) ia;
+			ia->ia_ifp = ifp;
+			ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
+
+			ia->ia_ifa.ifa_netmask =
+				(struct sockaddr *)&ns_netmask;
+
+			ia->ia_ifa.ifa_dstaddr =
+				(struct sockaddr *)&ia->ia_dstaddr;
+			if (ifp->if_flags & IFF_BROADCAST) {
+				ia->ia_broadaddr.sns_family = AF_NS;
+				ia->ia_broadaddr.sns_len = sizeof(ia->ia_addr);
+				ia->ia_broadaddr.sns_addr.x_host = ns_broadhost;
+			}
+			ns_interfaces++;
+		}
+	}
+
+	switch (cmd) {
+		int error;
+
+	case SIOCSIFDSTADDR:
+		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
+			return (EINVAL);
+		if (ia->ia_flags & IFA_ROUTE) {
+			rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST);
+			ia->ia_flags &= ~IFA_ROUTE;
+		}
+		if (ifp->if_ioctl) {
+			error = (*ifp->if_ioctl)(ifp, SIOCSIFDSTADDR, ia);
+			if (error)
+				return (error);
+		}
+		*(struct sockaddr *)&ia->ia_dstaddr = ifr->ifr_dstaddr;
+		return (0);
+
+	case SIOCSIFADDR:
+		return (ns_ifinit(ifp, ia,
+				(struct sockaddr_ns *)&ifr->ifr_addr, 1));
+
+	case SIOCDIFADDR:
+		ns_ifscrub(ifp, ia);
+		if ((ifa = ifp->if_addrlist) == (struct ifaddr *)ia)
+			ifp->if_addrlist = ifa->ifa_next;
+		else {
+			while (ifa->ifa_next &&
+			       (ifa->ifa_next != (struct ifaddr *)ia))
+				    ifa = ifa->ifa_next;
+			if (ifa->ifa_next)
+			    ifa->ifa_next = ((struct ifaddr *)ia)->ifa_next;
+			else
+				printf("Couldn't unlink nsifaddr from ifp\n");
+		}
+		oia = ia;
+		if (oia == (ia = ns_ifaddr)) {
+			ns_ifaddr = ia->ia_next;
+		} else {
+			while (ia->ia_next && (ia->ia_next != oia)) {
+				ia = ia->ia_next;
+			}
+			if (ia->ia_next)
+			    ia->ia_next = oia->ia_next;
+			else
+				printf("Didn't unlink nsifadr from list\n");
+		}
+		IFAFREE((&oia->ia_ifa));
+		if (0 == --ns_interfaces) {
+			/*
+			 * We reset to virginity and start all over again
+			 */
+			ns_thishost = ns_zerohost;
+		}
+		return (0);
+	
+	case SIOCAIFADDR:
+		dstIsNew = 0; hostIsNew = 1;
+		if (ia->ia_addr.sns_family == AF_NS) {
+			if (ifra->ifra_addr.sns_len == 0) {
+				ifra->ifra_addr = ia->ia_addr;
+				hostIsNew = 0;
+			} else if (ns_neteq(ifra->ifra_addr.sns_addr,
+					 ia->ia_addr.sns_addr))
+				hostIsNew = 0;
+		}
+		if ((ifp->if_flags & IFF_POINTOPOINT) &&
+		    (ifra->ifra_dstaddr.sns_family == AF_NS)) {
+			if (hostIsNew == 0)
+				ns_ifscrub(ifp, ia);
+			ia->ia_dstaddr = ifra->ifra_dstaddr;
+			dstIsNew  = 1;
+		}
+		if (ifra->ifra_addr.sns_family == AF_NS &&
+					    (hostIsNew || dstIsNew))
+			error = ns_ifinit(ifp, ia, &ifra->ifra_addr, 0);
+		return (error);
+
+	default:
+		if (ifp->if_ioctl == 0)
+			return (EOPNOTSUPP);
+		return ((*ifp->if_ioctl)(ifp, cmd, data));
+	}
+}
+
+/*
+* Delete any previous route for an old address.
+*/
+ns_ifscrub(ifp, ia)
+	register struct ifnet *ifp;
+	register struct ns_ifaddr *ia; 
+{
+	if (ia->ia_flags & IFA_ROUTE) {
+		if (ifp->if_flags & IFF_POINTOPOINT) {
+			rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST);
+		} else
+			rtinit(&(ia->ia_ifa), (int)RTM_DELETE, 0);
+		ia->ia_flags &= ~IFA_ROUTE;
+	}
+}
+/*
+ * Initialize an interface's internet address
+ * and routing table entry.
+ */
+ns_ifinit(ifp, ia, sns, scrub)
+	register struct ifnet *ifp;
+	register struct ns_ifaddr *ia;
+	register struct sockaddr_ns *sns;
+{
+	struct sockaddr_ns oldaddr;
+	register union ns_host *h = &ia->ia_addr.sns_addr.x_host;
+	int s = splimp(), error;
+
+	/*
+	 * Set up new addresses.
+	 */
+	oldaddr = ia->ia_addr;
+	ia->ia_addr = *sns;
+	/*
+	 * The convention we shall adopt for naming is that
+	 * a supplied address of zero means that "we don't care".
+	 * if there is a single interface, use the address of that
+	 * interface as our 6 byte host address.
+	 * if there are multiple interfaces, use any address already
+	 * used.
+	 *
+	 * Give the interface a chance to initialize
+	 * if this is its first address,
+	 * and to validate the address if necessary.
+	 */
+	if (ns_hosteqnh(ns_thishost, ns_zerohost)) {
+		if (ifp->if_ioctl &&
+		     (error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, ia))) {
+			ia->ia_addr = oldaddr;
+			splx(s);
+			return (error);
+		}
+		ns_thishost = *h;
+	} else if (ns_hosteqnh(sns->sns_addr.x_host, ns_zerohost)
+	    || ns_hosteqnh(sns->sns_addr.x_host, ns_thishost)) {
+		*h = ns_thishost;
+		if (ifp->if_ioctl &&
+		     (error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, ia))) {
+			ia->ia_addr = oldaddr;
+			splx(s);
+			return (error);
+		}
+		if (!ns_hosteqnh(ns_thishost,*h)) {
+			ia->ia_addr = oldaddr;
+			splx(s);
+			return (EINVAL);
+		}
+	} else {
+		ia->ia_addr = oldaddr;
+		splx(s);
+		return (EINVAL);
+	}
+	ia->ia_ifa.ifa_metric = ifp->if_metric;
+	/*
+	 * Add route for the network.
+	 */
+	if (scrub) {
+		ia->ia_ifa.ifa_addr = (struct sockaddr *)&oldaddr;
+		ns_ifscrub(ifp, ia);
+		ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
+	}
+	if (ifp->if_flags & IFF_POINTOPOINT)
+		rtinit(&(ia->ia_ifa), (int)RTM_ADD, RTF_HOST|RTF_UP);
+	else {
+		ia->ia_broadaddr.sns_addr.x_net = ia->ia_addr.sns_addr.x_net;
+		rtinit(&(ia->ia_ifa), (int)RTM_ADD, RTF_UP);
+	}
+	ia->ia_flags |= IFA_ROUTE;
+	return (0);
+}
+
+/*
+ * Return address info for specified internet network.
+ */
+struct ns_ifaddr *
+ns_iaonnetof(dst)
+	register struct ns_addr *dst;
+{
+	register struct ns_ifaddr *ia;
+	register struct ns_addr *compare;
+	register struct ifnet *ifp;
+	struct ns_ifaddr *ia_maybe = 0;
+	union ns_net net = dst->x_net;
+
+	for (ia = ns_ifaddr; ia; ia = ia->ia_next) {
+		if (ifp = ia->ia_ifp) {
+			if (ifp->if_flags & IFF_POINTOPOINT) {
+				compare = &satons_addr(ia->ia_dstaddr);
+				if (ns_hosteq(*dst, *compare))
+					return (ia);
+				if (ns_neteqnn(net, ia->ia_addr.sns_addr.x_net))
+					ia_maybe = ia;
+			} else {
+				if (ns_neteqnn(net, ia->ia_addr.sns_addr.x_net))
+					return (ia);
+			}
+		}
+	}
+	return (ia_maybe);
+}
+#endif
diff --git a/sys/netns/ns.h b/sys/netns/ns.h
new file mode 100644
index 00000000000..cf51f0047e9
--- /dev/null
+++ b/sys/netns/ns.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ns.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Constants and Structures defined by the Xerox Network Software
+ * per "Internet Transport Protocols", XSIS 028112, December 1981
+ */
+
+/*
+ * Protocols
+ */
+#define NSPROTO_RI	1		/* Routing Information */
+#define NSPROTO_ECHO	2		/* Echo Protocol */
+#define NSPROTO_ERROR	3		/* Error Protocol */
+#define NSPROTO_PE	4		/* Packet Exchange */
+#define NSPROTO_SPP	5		/* Sequenced Packet */
+#define NSPROTO_RAW	255		/* Placemarker*/
+#define NSPROTO_MAX	256		/* Placemarker*/
+
+
+/*
+ * Port/Socket numbers: network standard functions
+ */
+
+#define NSPORT_RI	1		/* Routing Information */
+#define NSPORT_ECHO	2		/* Echo */
+#define NSPORT_RE	3		/* Router Error */
+
+/*
+ * Ports < NSPORT_RESERVED are reserved for priveleged
+ * processes (e.g. root).
+ */
+#define NSPORT_RESERVED		3000
+
+/* flags passed to ns_output as last parameter */
+
+#define	NS_FORWARDING		0x1	/* most of idp header exists */
+#define	NS_ROUTETOIF		0x10	/* same as SO_DONTROUTE */
+#define	NS_ALLOWBROADCAST	SO_BROADCAST	/* can send broadcast packets */
+
+#define NS_MAXHOPS		15
+
+/* flags passed to get/set socket option */
+#define	SO_HEADERS_ON_INPUT	1
+#define	SO_HEADERS_ON_OUTPUT	2
+#define	SO_DEFAULT_HEADERS	3
+#define	SO_LAST_HEADER		4
+#define	SO_NSIP_ROUTE		5
+#define SO_SEQNO		6
+#define	SO_ALL_PACKETS		7
+#define SO_MTU			8
+
+
+/*
+ * NS addressing
+ */
+union ns_host {
+	u_char	c_host[6];
+	u_short	s_host[3];
+};
+
+union ns_net {
+	u_char	c_net[4];
+	u_short	s_net[2];
+};
+
+union ns_net_u {
+	union ns_net	net_e;
+	u_long		long_e;
+};
+
+struct ns_addr {
+	union ns_net	x_net;
+	union ns_host	x_host;
+	u_short	x_port;
+};
+
+/*
+ * Socket address, Xerox style
+ */
+struct sockaddr_ns {
+	u_char		sns_len;
+	u_char		sns_family;
+	struct ns_addr	sns_addr;
+	char		sns_zero[2];
+};
+#define sns_port sns_addr.x_port
+
+#ifdef vax
+#define ns_netof(a) (*(long *) & ((a).x_net)) /* XXX - not needed */
+#endif
+#define ns_neteqnn(a,b) (((a).s_net[0]==(b).s_net[0]) && \
+					((a).s_net[1]==(b).s_net[1]))
+#define ns_neteq(a,b) ns_neteqnn((a).x_net, (b).x_net)
+#define satons_addr(sa)	(((struct sockaddr_ns *)&(sa))->sns_addr)
+#define ns_hosteqnh(s,t) ((s).s_host[0] == (t).s_host[0] && \
+	(s).s_host[1] == (t).s_host[1] && (s).s_host[2] == (t).s_host[2])
+#define ns_hosteq(s,t) (ns_hosteqnh((s).x_host,(t).x_host))
+#define ns_nullhost(x) (((x).x_host.s_host[0]==0) && \
+	((x).x_host.s_host[1]==0) && ((x).x_host.s_host[2]==0))
+
+#ifdef KERNEL
+extern struct domain nsdomain;
+union ns_host ns_thishost;
+union ns_host ns_zerohost;
+union ns_host ns_broadhost;
+union ns_net ns_zeronet;
+union ns_net ns_broadnet;
+u_short ns_cksum();
+#else
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+extern struct ns_addr ns_addr __P((const char *));
+extern char *ns_ntoa __P((struct ns_addr));
+__END_DECLS
+
+#endif
diff --git a/sys/netns/ns_cksum.c b/sys/netns/ns_cksum.c
new file mode 100644
index 00000000000..52eba8bce81
--- /dev/null
+++ b/sys/netns/ns_cksum.c
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 1982, 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ns_cksum.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+
+/*
+ * Checksum routine for Network Systems Protocol Packets (Big-Endian).
+ *
+ * This routine is very heavily used in the network
+ * code and should be modified for each CPU to be as fast as possible.
+ */
+
+#define ADDCARRY(x)  { if ((x) > 65535) (x) -= 65535; }
+#define FOLD(x) {l_util.l = (x); (x) = l_util.s[0] + l_util.s[1]; ADDCARRY(x);}
+
+u_short
+ns_cksum(m, len)
+	register struct mbuf *m;
+	register int len;
+{
+	register u_short *w;
+	register int sum = 0;
+	register int mlen = 0;
+	register int sum2;
+
+	union {
+		u_short s[2];
+		long	l;
+	} l_util;
+
+	for (;m && len; m = m->m_next) {
+		if (m->m_len == 0)
+			continue;
+		/*
+		 * Each trip around loop adds in
+		 * word from one mbuf segment.
+		 */
+		w = mtod(m, u_short *);
+		if (mlen == -1) {
+			/*
+			 * There is a byte left from the last segment;
+			 * ones-complement add it into the checksum.
+			 */
+#if BYTE_ORDER == BIG_ENDIAN
+			sum  += *(u_char *)w;
+#else
+			sum  += *(u_char *)w << 8;
+#endif
+			sum += sum;
+			w = (u_short *)(1 + (char *)w);
+			mlen = m->m_len - 1;
+			len--;
+			FOLD(sum);
+		} else
+			mlen = m->m_len;
+		if (len < mlen)
+			mlen = len;
+		len -= mlen;
+		/*
+		 * We can do a 16 bit ones complement sum using
+		 * 32 bit arithmetic registers for adding,
+		 * with carries from the low added
+		 * into the high (by normal carry-chaining)
+		 * so long as we fold back before 16 carries have occured.
+		 */
+		if (1 & (int) w)
+			goto uuuuglyy;
+#ifndef TINY
+/* -DTINY reduces the size from 1250 to 550, but slows it down by 22% */
+		while ((mlen -= 32) >= 0) {
+			sum += w[0]; sum += sum; sum += w[1]; sum += sum;
+			sum += w[2]; sum += sum; sum += w[3]; sum += sum;
+			sum += w[4]; sum += sum; sum += w[5]; sum += sum;
+			sum += w[6]; sum += sum; sum += w[7]; sum += sum;
+			FOLD(sum);
+			sum += w[8]; sum += sum; sum += w[9]; sum += sum;
+			sum += w[10]; sum += sum; sum += w[11]; sum += sum;
+			sum += w[12]; sum += sum; sum += w[13]; sum += sum;
+			sum += w[14]; sum += sum; sum += w[15]; sum += sum;
+			FOLD(sum);
+			w += 16;
+		}
+		mlen += 32;
+#endif
+		while ((mlen -= 8) >= 0) {
+			sum += w[0]; sum += sum; sum += w[1]; sum += sum;
+			sum += w[2]; sum += sum; sum += w[3]; sum += sum;
+			FOLD(sum);
+			w += 4;
+		}
+		mlen += 8;
+		while ((mlen -= 2) >= 0) {
+			sum += *w++; sum += sum;
+		}
+		goto commoncase;
+uuuuglyy:
+#if BYTE_ORDER == BIG_ENDIAN
+#define ww(n) (((u_char *)w)[n + n + 1])
+#define vv(n) (((u_char *)w)[n + n])
+#else
+#if BYTE_ORDER == LITTLE_ENDIAN
+#define vv(n) (((u_char *)w)[n + n + 1])
+#define ww(n) (((u_char *)w)[n + n])
+#endif
+#endif
+		sum2 = 0;
+#ifndef TINY
+		while ((mlen -= 32) >= 0) {
+		    sum += ww(0); sum += sum; sum += ww(1); sum += sum;
+		    sum += ww(2); sum += sum; sum += ww(3); sum += sum;
+		    sum += ww(4); sum += sum; sum += ww(5); sum += sum;
+		    sum += ww(6); sum += sum; sum += ww(7); sum += sum;
+		    FOLD(sum);
+		    sum += ww(8); sum += sum; sum += ww(9); sum += sum;
+		    sum += ww(10); sum += sum; sum += ww(11); sum += sum;
+		    sum += ww(12); sum += sum; sum += ww(13); sum += sum;
+		    sum += ww(14); sum += sum; sum += ww(15); sum += sum;
+		    FOLD(sum);
+		    sum2 += vv(0); sum2 += sum2; sum2 += vv(1); sum2 += sum2;
+		    sum2 += vv(2); sum2 += sum2; sum2 += vv(3); sum2 += sum2;
+		    sum2 += vv(4); sum2 += sum2; sum2 += vv(5); sum2 += sum2;
+		    sum2 += vv(6); sum2 += sum2; sum2 += vv(7); sum2 += sum2;
+		    FOLD(sum2);
+		    sum2 += vv(8); sum2 += sum2; sum2 += vv(9); sum2 += sum2;
+		    sum2 += vv(10); sum2 += sum2; sum2 += vv(11); sum2 += sum2;
+		    sum2 += vv(12); sum2 += sum2; sum2 += vv(13); sum2 += sum2;
+		    sum2 += vv(14); sum2 += sum2; sum2 += vv(15); sum2 += sum2;
+		    FOLD(sum2);
+		    w += 16;
+		}
+		mlen += 32;
+#endif
+		while ((mlen -= 8) >= 0) {
+		    sum += ww(0); sum += sum; sum += ww(1); sum += sum;
+		    sum += ww(2); sum += sum; sum += ww(3); sum += sum;
+		    FOLD(sum);
+		    sum2 += vv(0); sum2 += sum2; sum2 += vv(1); sum2 += sum2;
+		    sum2 += vv(2); sum2 += sum2; sum2 += vv(3); sum2 += sum2;
+		    FOLD(sum2);
+		    w += 4;
+		}
+		mlen += 8;
+		while ((mlen -= 2) >= 0) {
+			sum += ww(0); sum += sum;
+			sum2 += vv(0); sum2 += sum2;
+			w++;
+		}
+		sum += (sum2 << 8);
+commoncase:
+		if (mlen == -1) {
+#if BYTE_ORDER == BIG_ENDIAN
+			sum += *(u_char *)w << 8;
+#else
+			sum += *(u_char *)w;
+#endif
+		}
+		FOLD(sum);
+	}
+	if (mlen == -1) {
+		/* We had an odd number of bytes to sum; assume a garbage
+		   byte of zero and clean up */
+		sum += sum;
+		FOLD(sum);
+	}
+	/*
+	 * sum has already been kept to low sixteen bits.
+	 * just examine result and exit.
+	 */
+	if(sum==0xffff) sum = 0;
+	return (sum);
+}
diff --git a/sys/netns/ns_error.c b/sys/netns/ns_error.c
new file mode 100644
index 00000000000..03473a23680
--- /dev/null
+++ b/sys/netns/ns_error.c
@@ -0,0 +1,323 @@
+/*
+ * Copyright (c) 1984, 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ns_error.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/route.h>
+
+#include <netns/ns.h>
+#include <netns/ns_pcb.h>
+#include <netns/idp.h>
+#include <netns/ns_error.h>
+
+#ifdef lint
+#define NS_ERRPRINTFS 1
+#endif
+
+#ifdef NS_ERRPRINTFS
+/*
+ * NS_ERR routines: error generation, receive packet processing, and
+ * routines to turnaround packets back to the originator.
+ */
+int	ns_errprintfs = 0;
+#endif
+
+ns_err_x(c)
+{
+	register u_short *w, *lim, *base = ns_errstat.ns_es_codes;
+	u_short x = c;
+
+	/*
+	 * zero is a legit error code, handle specially
+	 */
+	if (x == 0)
+		return (0);
+	lim = base + NS_ERR_MAX - 1;
+	for (w = base + 1; w < lim; w++) {
+		if (*w == 0)
+			*w = x;
+		if (*w == x)
+			break;
+	}
+	return (w - base);
+}
+
+/*
+ * Generate an error packet of type error
+ * in response to bad packet.
+ */
+
+ns_error(om, type, param)
+	struct mbuf *om;
+	int type;
+{
+	register struct ns_epidp *ep;
+	struct mbuf *m;
+	struct idp *nip;
+	register struct idp *oip = mtod(om, struct idp *);
+	extern int idpcksum;
+
+	/*
+	 * If this packet was sent to the echo port,
+	 * and nobody was there, just echo it.
+	 * (Yes, this is a wart!)
+	 */
+	if (type == NS_ERR_NOSOCK &&
+	    oip->idp_dna.x_port == htons(2) &&
+	    (type = ns_echo(om))==0)
+		return;
+
+#ifdef NS_ERRPRINTFS
+	if (ns_errprintfs)
+		printf("ns_err_error(%x, %d, %d)\n", oip, type, param);
+#endif
+	/*
+	 * Don't Generate error packets in response to multicasts.
+	 */
+	if (oip->idp_dna.x_host.c_host[0] & 1)
+		goto freeit;
+
+	ns_errstat.ns_es_error++;
+	/*
+	 * Make sure that the old IDP packet had 30 bytes of data to return;
+	 * if not, don't bother.  Also don't EVER error if the old
+	 * packet protocol was NS_ERR.
+	 */
+	if (oip->idp_len < sizeof(struct idp)) {
+		ns_errstat.ns_es_oldshort++;
+		goto freeit;
+	}
+	if (oip->idp_pt == NSPROTO_ERROR) {
+		ns_errstat.ns_es_oldns_err++;
+		goto freeit;
+	}
+
+	/*
+	 * First, formulate ns_err message
+	 */
+	m = m_gethdr(M_DONTWAIT, MT_HEADER);
+	if (m == NULL)
+		goto freeit;
+	m->m_len = sizeof(*ep);
+	MH_ALIGN(m, m->m_len);
+	ep = mtod(m, struct ns_epidp *);
+	if ((u_int)type > NS_ERR_TOO_BIG)
+		panic("ns_err_error");
+	ns_errstat.ns_es_outhist[ns_err_x(type)]++;
+	ep->ns_ep_errp.ns_err_num = htons((u_short)type);
+	ep->ns_ep_errp.ns_err_param = htons((u_short)param);
+	bcopy((caddr_t)oip, (caddr_t)&ep->ns_ep_errp.ns_err_idp, 42);
+	nip = &ep->ns_ep_idp;
+	nip->idp_len = sizeof(*ep);
+	nip->idp_len = htons((u_short)nip->idp_len);
+	nip->idp_pt = NSPROTO_ERROR;
+	nip->idp_tc = 0;
+	nip->idp_dna = oip->idp_sna;
+	nip->idp_sna = oip->idp_dna;
+	if (idpcksum) {
+		nip->idp_sum = 0;
+		nip->idp_sum = ns_cksum(m, sizeof(*ep));
+	} else 
+		nip->idp_sum = 0xffff;
+	(void) ns_output(m, (struct route *)0, 0);
+
+freeit:
+	m_freem(om);
+}
+
+ns_printhost(p)
+register struct ns_addr *p;
+{
+
+	printf("<net:%x%x,host:%x%x%x,port:%x>",
+			p->x_net.s_net[0],
+			p->x_net.s_net[1],
+			p->x_host.s_host[0],
+			p->x_host.s_host[1],
+			p->x_host.s_host[2],
+			p->x_port);
+
+}
+
+/*
+ * Process a received NS_ERR message.
+ */
+ns_err_input(m)
+	struct mbuf *m;
+{
+	register struct ns_errp *ep;
+	register struct ns_epidp *epidp = mtod(m, struct ns_epidp *);
+	register int i;
+	int type, code, param;
+
+	/*
+	 * Locate ns_err structure in mbuf, and check
+	 * that not corrupted and of at least minimum length.
+	 */
+#ifdef NS_ERRPRINTFS
+	if (ns_errprintfs) {
+		printf("ns_err_input from ");
+		ns_printhost(&epidp->ns_ep_idp.idp_sna);
+		printf("len %d\n", ntohs(epidp->ns_ep_idp.idp_len));
+	}
+#endif
+	i = sizeof (struct ns_epidp);
+ 	if (((m->m_flags & M_EXT) || m->m_len < i) &&
+ 		(m = m_pullup(m, i)) == 0)  {
+		ns_errstat.ns_es_tooshort++;
+		return;
+	}
+	ep = &(mtod(m, struct ns_epidp *)->ns_ep_errp);
+	type = ntohs(ep->ns_err_num);
+	param = ntohs(ep->ns_err_param);
+	ns_errstat.ns_es_inhist[ns_err_x(type)]++;
+
+#ifdef NS_ERRPRINTFS
+	/*
+	 * Message type specific processing.
+	 */
+	if (ns_errprintfs)
+		printf("ns_err_input, type %d param %d\n", type, param);
+#endif
+	if (type >= NS_ERR_TOO_BIG) {
+		goto badcode;
+	}
+	ns_errstat.ns_es_outhist[ns_err_x(type)]++;
+	switch (type) {
+
+	case NS_ERR_UNREACH_HOST:
+		code = PRC_UNREACH_NET;
+		goto deliver;
+
+	case NS_ERR_TOO_OLD:
+		code = PRC_TIMXCEED_INTRANS;
+		goto deliver;
+
+	case NS_ERR_TOO_BIG:
+		code = PRC_MSGSIZE;
+		goto deliver;
+
+	case NS_ERR_FULLUP:
+		code = PRC_QUENCH;
+		goto deliver;
+
+	case NS_ERR_NOSOCK:
+		code = PRC_UNREACH_PORT;
+		goto deliver;
+
+	case NS_ERR_UNSPEC_T:
+	case NS_ERR_BADSUM_T:
+	case NS_ERR_BADSUM:
+	case NS_ERR_UNSPEC:
+		code = PRC_PARAMPROB;
+		goto deliver;
+
+	deliver:
+		/*
+		 * Problem with datagram; advise higher level routines.
+		 */
+#ifdef NS_ERRPRINTFS
+		if (ns_errprintfs)
+			printf("deliver to protocol %d\n",
+				       ep->ns_err_idp.idp_pt);
+#endif
+		switch(ep->ns_err_idp.idp_pt) {
+		case NSPROTO_SPP:
+			spp_ctlinput(code, (caddr_t)ep);
+			break;
+
+		default:
+			idp_ctlinput(code, (caddr_t)ep);
+		}
+		
+		goto freeit;
+
+	default:
+	badcode:
+		ns_errstat.ns_es_badcode++;
+		goto freeit;
+
+	}
+freeit:
+	m_freem(m);
+}
+
+#ifdef notdef
+u_long
+nstime()
+{
+	int s = splclock();
+	u_long t;
+
+	t = (time.tv_sec % (24*60*60)) * 1000 + time.tv_usec / 1000;
+	splx(s);
+	return (htonl(t));
+}
+#endif
+
+ns_echo(m)
+struct mbuf *m;
+{
+	register struct idp *idp = mtod(m, struct idp *);
+	register struct echo {
+	    struct idp	ec_idp;
+	    u_short		ec_op; /* Operation, 1 = request, 2 = reply */
+	} *ec = (struct echo *)idp;
+	struct ns_addr temp;
+
+	if (idp->idp_pt!=NSPROTO_ECHO) return(NS_ERR_NOSOCK);
+	if (ec->ec_op!=htons(1)) return(NS_ERR_UNSPEC);
+
+	ec->ec_op = htons(2);
+
+	temp = idp->idp_dna;
+	idp->idp_dna = idp->idp_sna;
+	idp->idp_sna = temp;
+
+	if (idp->idp_sum != 0xffff) {
+		idp->idp_sum = 0;
+		idp->idp_sum = ns_cksum(m,
+		    (int)(((ntohs(idp->idp_len) - 1)|1)+1));
+	}
+	(void) ns_output(m, (struct route *)0, NS_FORWARDING);
+	return(0);
+}
diff --git a/sys/netns/ns_error.h b/sys/netns/ns_error.h
new file mode 100644
index 00000000000..992911f1552
--- /dev/null
+++ b/sys/netns/ns_error.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 1984, 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ns_error.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Xerox NS error messages
+ */
+
+struct ns_errp {
+	u_short		ns_err_num;		/* Error Number */
+	u_short		ns_err_param;		/* Error Parameter */
+	struct idp	ns_err_idp;		/* Initial segment of offending
+						   packet */
+	u_char		ns_err_lev2[12];	/* at least this much higher
+						   level protocol */
+};
+struct  ns_epidp {
+	struct idp ns_ep_idp;
+	struct ns_errp ns_ep_errp;
+};
+
+#define	NS_ERR_UNSPEC	0	/* Unspecified Error detected at dest. */
+#define	NS_ERR_BADSUM	1	/* Bad Checksum detected at dest */
+#define	NS_ERR_NOSOCK	2	/* Specified socket does not exist at dest*/
+#define	NS_ERR_FULLUP	3	/* Dest. refuses packet due to resource lim.*/
+#define	NS_ERR_UNSPEC_T	0x200	/* Unspec. Error occured before reaching dest*/
+#define	NS_ERR_BADSUM_T	0x201	/* Bad Checksum detected in transit */
+#define	NS_ERR_UNREACH_HOST	0x202	/* Dest cannot be reached from here*/
+#define	NS_ERR_TOO_OLD	0x203	/* Packet x'd 15 routers without delivery*/
+#define	NS_ERR_TOO_BIG	0x204	/* Packet too large to be forwarded through
+				   some intermediate gateway.  The error
+				   parameter field contains the max packet
+				   size that can be accommodated */
+#define NS_ERR_MAX 20
+
+/*
+ * Variables related to this implementation
+ * of the network systems error message protocol.
+ */
+struct	ns_errstat {
+/* statistics related to ns_err packets generated */
+	int	ns_es_error;		/* # of calls to ns_error */
+	int	ns_es_oldshort;		/* no error 'cuz old ip too short */
+	int	ns_es_oldns_err;	/* no error 'cuz old was ns_err */
+	int	ns_es_outhist[NS_ERR_MAX];
+/* statistics related to input messages processed */
+	int	ns_es_badcode;		/* ns_err_code out of range */
+	int	ns_es_tooshort;		/* packet < IDP_MINLEN */
+	int	ns_es_checksum;		/* bad checksum */
+	int	ns_es_badlen;		/* calculated bound mismatch */
+	int	ns_es_reflect;		/* number of responses */
+	int	ns_es_inhist[NS_ERR_MAX];
+	u_short	ns_es_codes[NS_ERR_MAX];/* which error code for outhist
+					   since we might not know all */
+};
+
+#ifdef KERNEL
+struct	ns_errstat ns_errstat;
+#endif
diff --git a/sys/netns/ns_if.h b/sys/netns/ns_if.h
new file mode 100644
index 00000000000..3abb284a1dd
--- /dev/null
+++ b/sys/netns/ns_if.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ns_if.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Interface address, xerox version.  One of these structures
+ * is allocated for each interface with an internet address.
+ * The ifaddr structure contains the protocol-independent part
+ * of the structure and is assumed to be first.
+ */
+
+struct ns_ifaddr {
+	struct	ifaddr ia_ifa;		/* protocol-independent info */
+#define	ia_ifp		ia_ifa.ifa_ifp
+#define	ia_flags	ia_ifa.ifa_flags
+	struct	ns_ifaddr *ia_next;	/* next in list of xerox addresses */
+	struct	sockaddr_ns ia_addr;	/* reserve space for my address */
+	struct	sockaddr_ns ia_dstaddr;	/* space for my broadcast address */
+#define ia_broadaddr	ia_dstaddr
+	struct	sockaddr_ns ia_netmask;	/* space for my network mask */
+};
+
+struct	ns_aliasreq {
+	char	ifra_name[IFNAMSIZ];		/* if name, e.g. "en0" */
+	struct	sockaddr_ns ifra_addr;
+	struct	sockaddr_ns ifra_broadaddr;
+#define ifra_dstaddr ifra_broadaddr
+};
+/*
+ * Given a pointer to an ns_ifaddr (ifaddr),
+ * return a pointer to the addr as a sockadd_ns.
+ */
+
+#define	IA_SNS(ia) (&(((struct ns_ifaddr *)(ia))->ia_addr))
+
+/* This is not the right place for this but where is? */
+#define	ETHERTYPE_NS	0x0600
+
+#ifdef	NSIP
+struct nsip_req {
+	struct sockaddr rq_ns;	/* must be ns format destination */
+	struct sockaddr rq_ip;	/* must be ip format gateway */
+	short rq_flags;
+};
+#endif
+
+#ifdef	KERNEL
+struct	ns_ifaddr *ns_ifaddr;
+struct	ns_ifaddr *ns_iaonnetof();
+struct	ifqueue	nsintrq;	/* XNS input packet queue */
+#endif
diff --git a/sys/netns/ns_input.c b/sys/netns/ns_input.c
new file mode 100644
index 00000000000..7a6e1babc7c
--- /dev/null
+++ b/sys/netns/ns_input.c
@@ -0,0 +1,485 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ns_input.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/route.h>
+#include <net/raw_cb.h>
+
+#include <netns/ns.h>
+#include <netns/ns_if.h>
+#include <netns/ns_pcb.h>
+#include <netns/idp.h>
+#include <netns/idp_var.h>
+#include <netns/ns_error.h>
+
+/*
+ * NS initialization.
+ */
+union ns_host	ns_thishost;
+union ns_host	ns_zerohost;
+union ns_host	ns_broadhost;
+union ns_net	ns_zeronet;
+union ns_net	ns_broadnet;
+struct sockaddr_ns ns_netmask, ns_hostmask;
+
+static u_short allones[] = {-1, -1, -1};
+
+struct nspcb nspcb;
+struct nspcb nsrawpcb;
+
+struct ifqueue	nsintrq;
+int	nsqmaxlen = IFQ_MAXLEN;
+
+int	idpcksum = 1;
+long	ns_pexseq;
+
+ns_init()
+{
+	extern struct timeval time;
+
+	ns_broadhost = * (union ns_host *) allones;
+	ns_broadnet = * (union ns_net *) allones;
+	nspcb.nsp_next = nspcb.nsp_prev = &nspcb;
+	nsrawpcb.nsp_next = nsrawpcb.nsp_prev = &nsrawpcb;
+	nsintrq.ifq_maxlen = nsqmaxlen;
+	ns_pexseq = time.tv_usec;
+	ns_netmask.sns_len = 6;
+	ns_netmask.sns_addr.x_net = ns_broadnet;
+	ns_hostmask.sns_len = 12;
+	ns_hostmask.sns_addr.x_net = ns_broadnet;
+	ns_hostmask.sns_addr.x_host = ns_broadhost;
+}
+
+/*
+ * Idp input routine.  Pass to next level.
+ */
+int nsintr_getpck = 0;
+int nsintr_swtch = 0;
+nsintr()
+{
+	register struct idp *idp;
+	register struct mbuf *m;
+	register struct nspcb *nsp;
+	register int i;
+	int len, s, error;
+	char oddpacketp;
+
+next:
+	/*
+	 * Get next datagram off input queue and get IDP header
+	 * in first mbuf.
+	 */
+	s = splimp();
+	IF_DEQUEUE(&nsintrq, m);
+	splx(s);
+	nsintr_getpck++;
+	if (m == 0)
+		return;
+	if ((m->m_flags & M_EXT || m->m_len < sizeof (struct idp)) &&
+	    (m = m_pullup(m, sizeof (struct idp))) == 0) {
+		idpstat.idps_toosmall++;
+		goto next;
+	}
+
+	/*
+	 * Give any raw listeners a crack at the packet
+	 */
+	for (nsp = nsrawpcb.nsp_next; nsp != &nsrawpcb; nsp = nsp->nsp_next) {
+		struct mbuf *m1 = m_copy(m, 0, (int)M_COPYALL);
+		if (m1) idp_input(m1, nsp);
+	}
+
+	idp = mtod(m, struct idp *);
+	len = ntohs(idp->idp_len);
+	if (oddpacketp = len & 1) {
+		len++;		/* If this packet is of odd length,
+				   preserve garbage byte for checksum */
+	}
+
+	/*
+	 * Check that the amount of data in the buffers
+	 * is as at least much as the IDP header would have us expect.
+	 * Trim mbufs if longer than we expect.
+	 * Drop packet if shorter than we expect.
+	 */
+	if (m->m_pkthdr.len < len) {
+		idpstat.idps_tooshort++;
+		goto bad;
+	}
+	if (m->m_pkthdr.len > len) {
+		if (m->m_len == m->m_pkthdr.len) {
+			m->m_len = len;
+			m->m_pkthdr.len = len;
+		} else
+			m_adj(m, len - m->m_pkthdr.len);
+	}
+	if (idpcksum && ((i = idp->idp_sum)!=0xffff)) {
+		idp->idp_sum = 0;
+		if (i != (idp->idp_sum = ns_cksum(m, len))) {
+			idpstat.idps_badsum++;
+			idp->idp_sum = i;
+			if (ns_hosteqnh(ns_thishost, idp->idp_dna.x_host))
+				error = NS_ERR_BADSUM;
+			else
+				error = NS_ERR_BADSUM_T;
+			ns_error(m, error, 0);
+			goto next;
+		}
+	}
+	/*
+	 * Is this a directed broadcast?
+	 */
+	if (ns_hosteqnh(ns_broadhost,idp->idp_dna.x_host)) {
+		if ((!ns_neteq(idp->idp_dna, idp->idp_sna)) &&
+		    (!ns_neteqnn(idp->idp_dna.x_net, ns_broadnet)) &&
+		    (!ns_neteqnn(idp->idp_sna.x_net, ns_zeronet)) &&
+		    (!ns_neteqnn(idp->idp_dna.x_net, ns_zeronet)) ) {
+			/*
+			 * Look to see if I need to eat this packet.
+			 * Algorithm is to forward all young packets
+			 * and prematurely age any packets which will
+			 * by physically broadcasted.
+			 * Any very old packets eaten without forwarding
+			 * would die anyway.
+			 *
+			 * Suggestion of Bill Nesheim, Cornell U.
+			 */
+			if (idp->idp_tc < NS_MAXHOPS) {
+				idp_forward(m);
+				goto next;
+			}
+		}
+	/*
+	 * Is this our packet? If not, forward.
+	 */
+	} else if (!ns_hosteqnh(ns_thishost,idp->idp_dna.x_host)) {
+		idp_forward(m);
+		goto next;
+	}
+	/*
+	 * Locate pcb for datagram.
+	 */
+	nsp = ns_pcblookup(&idp->idp_sna, idp->idp_dna.x_port, NS_WILDCARD);
+	/*
+	 * Switch out to protocol's input routine.
+	 */
+	nsintr_swtch++;
+	if (nsp) {
+		if (oddpacketp) {
+			m_adj(m, -1);
+		}
+		if ((nsp->nsp_flags & NSP_ALL_PACKETS)==0)
+			switch (idp->idp_pt) {
+
+			    case NSPROTO_SPP:
+				    spp_input(m, nsp);
+				    goto next;
+
+			    case NSPROTO_ERROR:
+				    ns_err_input(m);
+				    goto next;
+			}
+		idp_input(m, nsp);
+	} else {
+		ns_error(m, NS_ERR_NOSOCK, 0);
+	}
+	goto next;
+
+bad:
+	m_freem(m);
+	goto next;
+}
+
+u_char nsctlerrmap[PRC_NCMDS] = {
+	ECONNABORTED,	ECONNABORTED,	0,		0,
+	0,		0,		EHOSTDOWN,	EHOSTUNREACH,
+	ENETUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
+	EMSGSIZE,	0,		0,		0,
+	0,		0,		0,		0
+};
+
+int idp_donosocks = 1;
+
+idp_ctlinput(cmd, arg)
+	int cmd;
+	caddr_t arg;
+{
+	struct ns_addr *ns;
+	struct nspcb *nsp;
+	struct ns_errp *errp;
+	int idp_abort();
+	extern struct nspcb *idp_drop();
+	int type;
+
+	if (cmd < 0 || cmd > PRC_NCMDS)
+		return;
+	if (nsctlerrmap[cmd] == 0)
+		return;		/* XXX */
+	type = NS_ERR_UNREACH_HOST;
+	switch (cmd) {
+		struct sockaddr_ns *sns;
+
+	case PRC_IFDOWN:
+	case PRC_HOSTDEAD:
+	case PRC_HOSTUNREACH:
+		sns = (struct sockaddr_ns *)arg;
+		if (sns->sns_family != AF_NS)
+			return;
+		ns = &sns->sns_addr;
+		break;
+
+	default:
+		errp = (struct ns_errp *)arg;
+		ns = &errp->ns_err_idp.idp_dna;
+		type = errp->ns_err_num;
+		type = ntohs((u_short)type);
+	}
+	switch (type) {
+
+	case NS_ERR_UNREACH_HOST:
+		ns_pcbnotify(ns, (int)nsctlerrmap[cmd], idp_abort, (long)0);
+		break;
+
+	case NS_ERR_NOSOCK:
+		nsp = ns_pcblookup(ns, errp->ns_err_idp.idp_sna.x_port,
+			NS_WILDCARD);
+		if(nsp && idp_donosocks && ! ns_nullhost(nsp->nsp_faddr))
+			(void) idp_drop(nsp, (int)nsctlerrmap[cmd]);
+	}
+}
+
+int	idpprintfs = 0;
+int	idpforwarding = 1;
+/*
+ * Forward a packet.  If some error occurs return the sender
+ * an error packet.  Note we can't always generate a meaningful
+ * error message because the NS errors don't have a large enough repetoire
+ * of codes and types.
+ */
+struct route idp_droute;
+struct route idp_sroute;
+
+idp_forward(m)
+struct mbuf *m;
+{
+	register struct idp *idp = mtod(m, struct idp *);
+	register int error, type, code;
+	struct mbuf *mcopy = NULL;
+	int agedelta = 1;
+	int flags = NS_FORWARDING;
+	int ok_there = 0;
+	int ok_back = 0;
+
+	if (idpprintfs) {
+		printf("forward: src ");
+		ns_printhost(&idp->idp_sna);
+		printf(", dst ");
+		ns_printhost(&idp->idp_dna);
+		printf("hop count %d\n", idp->idp_tc);
+	}
+	if (idpforwarding == 0) {
+		/* can't tell difference between net and host */
+		type = NS_ERR_UNREACH_HOST, code = 0;
+		goto senderror;
+	}
+	idp->idp_tc++;
+	if (idp->idp_tc > NS_MAXHOPS) {
+		type = NS_ERR_TOO_OLD, code = 0;
+		goto senderror;
+	}
+	/*
+	 * Save at most 42 bytes of the packet in case
+	 * we need to generate an NS error message to the src.
+	 */
+	mcopy = m_copy(m, 0, imin((int)ntohs(idp->idp_len), 42));
+
+	if ((ok_there = idp_do_route(&idp->idp_dna,&idp_droute))==0) {
+		type = NS_ERR_UNREACH_HOST, code = 0;
+		goto senderror;
+	}
+	/*
+	 * Here we think about  forwarding  broadcast packets,
+	 * so we try to insure that it doesn't go back out
+	 * on the interface it came in on.  Also, if we
+	 * are going to physically broadcast this, let us
+	 * age the packet so we can eat it safely the second time around.
+	 */
+	if (idp->idp_dna.x_host.c_host[0] & 0x1) {
+		struct ns_ifaddr *ia = ns_iaonnetof(&idp->idp_dna);
+		struct ifnet *ifp;
+		if (ia) {
+			/* I'm gonna hafta eat this packet */
+			agedelta += NS_MAXHOPS - idp->idp_tc;
+			idp->idp_tc = NS_MAXHOPS;
+		}
+		if ((ok_back = idp_do_route(&idp->idp_sna,&idp_sroute))==0) {
+			/* error = ENETUNREACH; He'll never get it! */
+			m_freem(m);
+			goto cleanup;
+		}
+		if (idp_droute.ro_rt &&
+		    (ifp=idp_droute.ro_rt->rt_ifp) &&
+		    idp_sroute.ro_rt &&
+		    (ifp!=idp_sroute.ro_rt->rt_ifp)) {
+			flags |= NS_ALLOWBROADCAST;
+		} else {
+			type = NS_ERR_UNREACH_HOST, code = 0;
+			goto senderror;
+		}
+	}
+	/* need to adjust checksum */
+	if (idp->idp_sum!=0xffff) {
+		union bytes {
+			u_char c[4];
+			u_short s[2];
+			long l;
+		} x;
+		register int shift;
+		x.l = 0; x.c[0] = agedelta;
+		shift = (((((int)ntohs(idp->idp_len))+1)>>1)-2) & 0xf;
+		x.l = idp->idp_sum + (x.s[0] << shift);
+		x.l = x.s[0] + x.s[1];
+		x.l = x.s[0] + x.s[1];
+		if (x.l==0xffff) idp->idp_sum = 0; else idp->idp_sum = x.l;
+	}
+	if ((error = ns_output(m, &idp_droute, flags)) && 
+	    (mcopy!=NULL)) {
+		idp = mtod(mcopy, struct idp *);
+		type = NS_ERR_UNSPEC_T, code = 0;
+		switch (error) {
+
+		case ENETUNREACH:
+		case EHOSTDOWN:
+		case EHOSTUNREACH:
+		case ENETDOWN:
+		case EPERM:
+			type = NS_ERR_UNREACH_HOST;
+			break;
+
+		case EMSGSIZE:
+			type = NS_ERR_TOO_BIG;
+			code = 576; /* too hard to figure out mtu here */
+			break;
+
+		case ENOBUFS:
+			type = NS_ERR_UNSPEC_T;
+			break;
+		}
+		mcopy = NULL;
+	senderror:
+		ns_error(m, type, code);
+	}
+cleanup:
+	if (ok_there)
+		idp_undo_route(&idp_droute);
+	if (ok_back)
+		idp_undo_route(&idp_sroute);
+	if (mcopy != NULL)
+		m_freem(mcopy);
+}
+
+idp_do_route(src, ro)
+struct ns_addr *src;
+struct route *ro;
+{
+	
+	struct sockaddr_ns *dst;
+
+	bzero((caddr_t)ro, sizeof (*ro));
+	dst = (struct sockaddr_ns *)&ro->ro_dst;
+
+	dst->sns_len = sizeof(*dst);
+	dst->sns_family = AF_NS;
+	dst->sns_addr = *src;
+	dst->sns_addr.x_port = 0;
+	rtalloc(ro);
+	if (ro->ro_rt == 0 || ro->ro_rt->rt_ifp == 0) {
+		return (0);
+	}
+	ro->ro_rt->rt_use++;
+	return (1);
+}
+
+idp_undo_route(ro)
+register struct route *ro;
+{
+	if (ro->ro_rt) {RTFREE(ro->ro_rt);}
+}
+
+ns_watch_output(m, ifp)
+struct mbuf *m;
+struct ifnet *ifp;
+{
+	register struct nspcb *nsp;
+	register struct ifaddr *ifa;
+	/*
+	 * Give any raw listeners a crack at the packet
+	 */
+	for (nsp = nsrawpcb.nsp_next; nsp != &nsrawpcb; nsp = nsp->nsp_next) {
+		struct mbuf *m0 = m_copy(m, 0, (int)M_COPYALL);
+		if (m0) {
+			register struct idp *idp;
+
+			M_PREPEND(m0, sizeof (*idp), M_DONTWAIT);
+			if (m0 == NULL)
+				continue;
+			idp = mtod(m0, struct idp *);
+			idp->idp_sna.x_net = ns_zeronet;
+			idp->idp_sna.x_host = ns_thishost;
+			if (ifp && (ifp->if_flags & IFF_POINTOPOINT))
+			    for(ifa = ifp->if_addrlist; ifa;
+						ifa = ifa->ifa_next) {
+				if (ifa->ifa_addr->sa_family==AF_NS) {
+				    idp->idp_sna = IA_SNS(ifa)->sns_addr;
+				    break;
+				}
+			    }
+			idp->idp_len = ntohl(m0->m_pkthdr.len);
+			idp_input(m0, nsp);
+		}
+	}
+}
diff --git a/sys/netns/ns_ip.c b/sys/netns/ns_ip.c
new file mode 100644
index 00000000000..09deb8fe7c4
--- /dev/null
+++ b/sys/netns/ns_ip.c
@@ -0,0 +1,440 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ns_ip.c	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Software interface driver for encapsulating ns in ip.
+ */
+
+#ifdef NSIP
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+#include <sys/ioctl.h>
+#include <sys/protosw.h>
+
+#include <net/if.h>
+#include <net/netisr.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+
+#include <machine/mtpr.h>
+
+#include <netns/ns.h>
+#include <netns/ns_if.h>
+#include <netns/idp.h>
+
+struct ifnet_en {
+	struct ifnet ifen_ifnet;
+	struct route ifen_route;
+	struct in_addr ifen_src;
+	struct in_addr ifen_dst;
+	struct ifnet_en *ifen_next;
+};
+
+int	nsipoutput(), nsipioctl(), nsipstart();
+#define LOMTU	(1024+512);
+
+struct ifnet nsipif;
+struct ifnet_en *nsip_list;		/* list of all hosts and gateways or
+					broadcast addrs */
+
+struct ifnet_en *
+nsipattach()
+{
+	register struct ifnet_en *m;
+	register struct ifnet *ifp;
+
+	if (nsipif.if_mtu == 0) {
+		ifp = &nsipif;
+		ifp->if_name = "nsip";
+		ifp->if_mtu = LOMTU;
+		ifp->if_ioctl = nsipioctl;
+		ifp->if_output = nsipoutput;
+		ifp->if_start = nsipstart;
+		ifp->if_flags = IFF_POINTOPOINT;
+	}
+
+	MALLOC((m), struct ifnet_en *, sizeof(*m), M_PCB, M_NOWAIT);
+	if (m == NULL) return (NULL);
+	m->ifen_next = nsip_list;
+	nsip_list = m;
+	ifp = &m->ifen_ifnet;
+
+	ifp->if_name = "nsip";
+	ifp->if_mtu = LOMTU;
+	ifp->if_ioctl = nsipioctl;
+	ifp->if_output = nsipoutput;
+	ifp->if_start = nsipstart;
+	ifp->if_flags = IFF_POINTOPOINT;
+	ifp->if_unit = nsipif.if_unit++;
+	if_attach(ifp);
+
+	return (m);
+}
+
+
+/*
+ * Process an ioctl request.
+ */
+/* ARGSUSED */
+nsipioctl(ifp, cmd, data)
+	register struct ifnet *ifp;
+	int cmd;
+	caddr_t data;
+{
+	int error = 0;
+	struct ifreq *ifr;
+
+	switch (cmd) {
+
+	case SIOCSIFADDR:
+		ifp->if_flags |= IFF_UP;
+		/* fall into: */
+
+	case SIOCSIFDSTADDR:
+		/*
+		 * Everything else is done at a higher level.
+		 */
+		break;
+
+	case SIOCSIFFLAGS:
+		ifr = (struct ifreq *)data;
+		if ((ifr->ifr_flags & IFF_UP) == 0)
+			error = nsip_free(ifp);
+
+
+	default:
+		error = EINVAL;
+	}
+	return (error);
+}
+
+struct mbuf *nsip_badlen;
+struct mbuf *nsip_lastin;
+int nsip_hold_input;
+
+idpip_input(m, ifp)
+	register struct mbuf *m;
+	struct ifnet *ifp;
+{
+	register struct ip *ip;
+	register struct idp *idp;
+	register struct ifqueue *ifq = &nsintrq;
+	int len, s;
+
+	if (nsip_hold_input) {
+		if (nsip_lastin) {
+			m_freem(nsip_lastin);
+		}
+		nsip_lastin = m_copym(m, 0, (int)M_COPYALL, M_DONTWAIT);
+	}
+	/*
+	 * Get IP and IDP header together in first mbuf.
+	 */
+	nsipif.if_ipackets++;
+	s = sizeof (struct ip) + sizeof (struct idp);
+	if (((m->m_flags & M_EXT) || m->m_len < s) &&
+	    (m = m_pullup(m, s)) == 0) {
+		nsipif.if_ierrors++;
+		return;
+	}
+	ip = mtod(m, struct ip *);
+	if (ip->ip_hl > (sizeof (struct ip) >> 2)) {
+		ip_stripoptions(m, (struct mbuf *)0);
+		if (m->m_len < s) {
+			if ((m = m_pullup(m, s)) == 0) {
+				nsipif.if_ierrors++;
+				return;
+			}
+			ip = mtod(m, struct ip *);
+		}
+	}
+
+	/*
+	 * Make mbuf data length reflect IDP length.
+	 * If not enough data to reflect IDP length, drop.
+	 */
+	m->m_data += sizeof (struct ip);
+	m->m_len -= sizeof (struct ip);
+	m->m_pkthdr.len -= sizeof (struct ip);
+	idp = mtod(m, struct idp *);
+	len = ntohs(idp->idp_len);
+	if (len & 1) len++;		/* Preserve Garbage Byte */
+	if (ip->ip_len != len) {
+		if (len > ip->ip_len) {
+			nsipif.if_ierrors++;
+			if (nsip_badlen) m_freem(nsip_badlen);
+			nsip_badlen = m;
+			return;
+		}
+		/* Any extra will be trimmed off by the NS routines */
+	}
+
+	/*
+	 * Place interface pointer before the data
+	 * for the receiving protocol.
+	 */
+	m->m_pkthdr.rcvif = ifp;
+	/*
+	 * Deliver to NS
+	 */
+	s = splimp();
+	if (IF_QFULL(ifq)) {
+		IF_DROP(ifq);
+bad:
+		m_freem(m);
+		splx(s);
+		return;
+	}
+	IF_ENQUEUE(ifq, m);
+	schednetisr(NETISR_NS);
+	splx(s);
+	return;
+}
+
+/* ARGSUSED */
+nsipoutput(ifn, m, dst)
+	struct ifnet_en *ifn;
+	register struct mbuf *m;
+	struct sockaddr *dst;
+{
+
+	register struct ip *ip;
+	register struct route *ro = &(ifn->ifen_route);
+	register int len = 0;
+	register struct idp *idp = mtod(m, struct idp *);
+	int error;
+
+	ifn->ifen_ifnet.if_opackets++;
+	nsipif.if_opackets++;
+
+
+	/*
+	 * Calculate data length and make space
+	 * for IP header.
+	 */
+	len =  ntohs(idp->idp_len);
+	if (len & 1) len++;		/* Preserve Garbage Byte */
+	/* following clause not necessary on vax */
+	if (3 & (int)m->m_data) {
+		/* force longword alignment of ip hdr */
+		struct mbuf *m0 = m_gethdr(MT_HEADER, M_DONTWAIT);
+		if (m0 == 0) {
+			m_freem(m);
+			return (ENOBUFS);
+		}
+		MH_ALIGN(m0, sizeof (struct ip));
+		m0->m_flags = m->m_flags & M_COPYFLAGS;
+		m0->m_next = m;
+		m0->m_len = sizeof (struct ip);
+		m0->m_pkthdr.len = m0->m_len + m->m_len;
+		m->m_flags &= ~M_PKTHDR;
+	} else {
+		M_PREPEND(m, sizeof (struct ip), M_DONTWAIT);
+		if (m == 0)
+			return (ENOBUFS);
+	}
+	/*
+	 * Fill in IP header.
+	 */
+	ip = mtod(m, struct ip *);
+	*(long *)ip = 0;
+	ip->ip_p = IPPROTO_IDP;
+	ip->ip_src = ifn->ifen_src;
+	ip->ip_dst = ifn->ifen_dst;
+	ip->ip_len = (u_short)len + sizeof (struct ip);
+	ip->ip_ttl = MAXTTL;
+
+	/*
+	 * Output final datagram.
+	 */
+	error =  (ip_output(m, (struct mbuf *)0, ro, SO_BROADCAST, NULL));
+	if (error) {
+		ifn->ifen_ifnet.if_oerrors++;
+		ifn->ifen_ifnet.if_ierrors = error;
+	}
+	return (error);
+bad:
+	m_freem(m);
+	return (ENETUNREACH);
+}
+
+nsipstart(ifp)
+struct ifnet *ifp;
+{
+	panic("nsip_start called\n");
+}
+
+struct ifreq ifr = {"nsip0"};
+
+nsip_route(m)
+	register struct mbuf *m;
+{
+	register struct nsip_req *rq = mtod(m, struct nsip_req *);
+	struct sockaddr_ns *ns_dst = (struct sockaddr_ns *)&rq->rq_ns;
+	struct sockaddr_in *ip_dst = (struct sockaddr_in *)&rq->rq_ip;
+	struct route ro;
+	struct ifnet_en *ifn;
+	struct sockaddr_in *src;
+
+	/*
+	 * First, make sure we already have an ns address:
+	 */
+	if (ns_hosteqnh(ns_thishost, ns_zerohost))
+		return (EADDRNOTAVAIL);
+	/*
+	 * Now, determine if we can get to the destination
+	 */
+	bzero((caddr_t)&ro, sizeof (ro));
+	ro.ro_dst = *(struct sockaddr *)ip_dst;
+	rtalloc(&ro);
+	if (ro.ro_rt == 0 || ro.ro_rt->rt_ifp == 0) {
+		return (ENETUNREACH);
+	}
+
+	/*
+	 * And see how he's going to get back to us:
+	 * i.e., what return ip address do we use?
+	 */
+	{
+		register struct in_ifaddr *ia;
+		struct ifnet *ifp = ro.ro_rt->rt_ifp;
+
+		for (ia = in_ifaddr; ia; ia = ia->ia_next)
+			if (ia->ia_ifp == ifp)
+				break;
+		if (ia == 0)
+			ia = in_ifaddr;
+		if (ia == 0) {
+			RTFREE(ro.ro_rt);
+			return (EADDRNOTAVAIL);
+		}
+		src = (struct sockaddr_in *)&ia->ia_addr;
+	}
+
+	/*
+	 * Is there a free (pseudo-)interface or space?
+	 */
+	for (ifn = nsip_list; ifn; ifn = ifn->ifen_next) {
+		if ((ifn->ifen_ifnet.if_flags & IFF_UP) == 0)
+			break;
+	}
+	if (ifn == NULL)
+		ifn = nsipattach();
+	if (ifn == NULL) {
+		RTFREE(ro.ro_rt);
+		return (ENOBUFS);
+	}
+	ifn->ifen_route = ro;
+	ifn->ifen_dst =  ip_dst->sin_addr;
+	ifn->ifen_src = src->sin_addr;
+
+	/*
+	 * now configure this as a point to point link
+	 */
+	ifr.ifr_name[4] = '0' + nsipif.if_unit - 1;
+	ifr.ifr_dstaddr = * (struct sockaddr *) ns_dst;
+	(void)ns_control((struct socket *)0, (int)SIOCSIFDSTADDR, (caddr_t)&ifr,
+			(struct ifnet *)ifn);
+	satons_addr(ifr.ifr_addr).x_host = ns_thishost;
+	return (ns_control((struct socket *)0, (int)SIOCSIFADDR, (caddr_t)&ifr,
+			(struct ifnet *)ifn));
+}
+
+nsip_free(ifp)
+struct ifnet *ifp;
+{
+	register struct ifnet_en *ifn = (struct ifnet_en *)ifp;
+	struct route *ro = & ifn->ifen_route;
+
+	if (ro->ro_rt) {
+		RTFREE(ro->ro_rt);
+		ro->ro_rt = 0;
+	}
+	ifp->if_flags &= ~IFF_UP;
+	return (0);
+}
+
+nsip_ctlinput(cmd, sa)
+	int cmd;
+	struct sockaddr *sa;
+{
+	extern u_char inetctlerrmap[];
+	struct sockaddr_in *sin;
+	int in_rtchange();
+
+	if ((unsigned)cmd >= PRC_NCMDS)
+		return;
+	if (sa->sa_family != AF_INET && sa->sa_family != AF_IMPLINK)
+		return;
+	sin = (struct sockaddr_in *)sa;
+	if (sin->sin_addr.s_addr == INADDR_ANY)
+		return;
+
+	switch (cmd) {
+
+	case PRC_ROUTEDEAD:
+	case PRC_REDIRECT_NET:
+	case PRC_REDIRECT_HOST:
+	case PRC_REDIRECT_TOSNET:
+	case PRC_REDIRECT_TOSHOST:
+		nsip_rtchange(&sin->sin_addr);
+		break;
+	}
+}
+
+nsip_rtchange(dst)
+	register struct in_addr *dst;
+{
+	register struct ifnet_en *ifn;
+
+	for (ifn = nsip_list; ifn; ifn = ifn->ifen_next) {
+		if (ifn->ifen_dst.s_addr == dst->s_addr &&
+			ifn->ifen_route.ro_rt) {
+				RTFREE(ifn->ifen_route.ro_rt);
+				ifn->ifen_route.ro_rt = 0;
+		}
+	}
+}
+#endif
diff --git a/sys/netns/ns_output.c b/sys/netns/ns_output.c
new file mode 100644
index 00000000000..4c9f364f1ea
--- /dev/null
+++ b/sys/netns/ns_output.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ns_output.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/errno.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netns/ns.h>
+#include <netns/ns_if.h>
+#include <netns/idp.h>
+#include <netns/idp_var.h>
+
+#ifdef vax
+#include <machine/mtpr.h>
+#endif
+int ns_hold_output = 0;
+int ns_copy_output = 0;
+int ns_output_cnt = 0;
+struct mbuf *ns_lastout;
+
+ns_output(m0, ro, flags)
+	struct mbuf *m0;
+	struct route *ro;
+	int flags;
+{
+	register struct idp *idp = mtod(m0, struct idp *);
+	register struct ifnet *ifp = 0;
+	int error = 0;
+	struct route idproute;
+	struct sockaddr_ns *dst;
+	extern int idpcksum;
+
+	if (ns_hold_output) {
+		if (ns_lastout) {
+			(void)m_free(ns_lastout);
+		}
+		ns_lastout = m_copy(m0, 0, (int)M_COPYALL);
+	}
+	/*
+	 * Route packet.
+	 */
+	if (ro == 0) {
+		ro = &idproute;
+		bzero((caddr_t)ro, sizeof (*ro));
+	}
+	dst = (struct sockaddr_ns *)&ro->ro_dst;
+	if (ro->ro_rt == 0) {
+		dst->sns_family = AF_NS;
+		dst->sns_len = sizeof (*dst);
+		dst->sns_addr = idp->idp_dna;
+		dst->sns_addr.x_port = 0;
+		/*
+		 * If routing to interface only,
+		 * short circuit routing lookup.
+		 */
+		if (flags & NS_ROUTETOIF) {
+			struct ns_ifaddr *ia = ns_iaonnetof(&idp->idp_dna);
+
+			if (ia == 0) {
+				error = ENETUNREACH;
+				goto bad;
+			}
+			ifp = ia->ia_ifp;
+			goto gotif;
+		}
+		rtalloc(ro);
+	} else if ((ro->ro_rt->rt_flags & RTF_UP) == 0) {
+		/*
+		 * The old route has gone away; try for a new one.
+		 */
+		rtfree(ro->ro_rt);
+		ro->ro_rt = NULL;
+		rtalloc(ro);
+	}
+	if (ro->ro_rt == 0 || (ifp = ro->ro_rt->rt_ifp) == 0) {
+		error = ENETUNREACH;
+		goto bad;
+	}
+	ro->ro_rt->rt_use++;
+	if (ro->ro_rt->rt_flags & (RTF_GATEWAY|RTF_HOST))
+		dst = (struct sockaddr_ns *)ro->ro_rt->rt_gateway;
+gotif:
+
+	/*
+	 * Look for multicast addresses and
+	 * and verify user is allowed to send
+	 * such a packet.
+	 */
+	if (dst->sns_addr.x_host.c_host[0]&1) {
+		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
+			error = EADDRNOTAVAIL;
+			goto bad;
+		}
+		if ((flags & NS_ALLOWBROADCAST) == 0) {
+			error = EACCES;
+			goto bad;
+		}
+	}
+
+	if (htons(idp->idp_len) <= ifp->if_mtu) {
+		ns_output_cnt++;
+		if (ns_copy_output) {
+			ns_watch_output(m0, ifp);
+		}
+		error = (*ifp->if_output)(ifp, m0,
+					(struct sockaddr *)dst, ro->ro_rt);
+		goto done;
+	} else error = EMSGSIZE;
+
+
+bad:
+	if (ns_copy_output) {
+		ns_watch_output(m0, ifp);
+	}
+	m_freem(m0);
+done:
+	if (ro == &idproute && (flags & NS_ROUTETOIF) == 0 && ro->ro_rt) {
+		RTFREE(ro->ro_rt);
+		ro->ro_rt = 0;
+	}
+	return (error);
+}
diff --git a/sys/netns/ns_pcb.c b/sys/netns/ns_pcb.c
new file mode 100644
index 00000000000..ca88472d594
--- /dev/null
+++ b/sys/netns/ns_pcb.c
@@ -0,0 +1,363 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ns_pcb.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/errno.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netns/ns.h>
+#include <netns/ns_if.h>
+#include <netns/ns_pcb.h>
+
+struct	ns_addr zerons_addr;
+
+ns_pcballoc(so, head)
+	struct socket *so;
+	struct nspcb *head;
+{
+	struct mbuf *m;
+	register struct nspcb *nsp;
+
+	m = m_getclr(M_DONTWAIT, MT_PCB);
+	if (m == NULL)
+		return (ENOBUFS);
+	nsp = mtod(m, struct nspcb *);
+	nsp->nsp_socket = so;
+	insque(nsp, head);
+	so->so_pcb = (caddr_t)nsp;
+	return (0);
+}
+	
+ns_pcbbind(nsp, nam)
+	register struct nspcb *nsp;
+	struct mbuf *nam;
+{
+	register struct sockaddr_ns *sns;
+	u_short lport = 0;
+
+	if (nsp->nsp_lport || !ns_nullhost(nsp->nsp_laddr))
+		return (EINVAL);
+	if (nam == 0)
+		goto noname;
+	sns = mtod(nam, struct sockaddr_ns *);
+	if (nam->m_len != sizeof (*sns))
+		return (EINVAL);
+	if (!ns_nullhost(sns->sns_addr)) {
+		int tport = sns->sns_port;
+
+		sns->sns_port = 0;		/* yech... */
+		if (ifa_ifwithaddr((struct sockaddr *)sns) == 0)
+			return (EADDRNOTAVAIL);
+		sns->sns_port = tport;
+	}
+	lport = sns->sns_port;
+	if (lport) {
+		u_short aport = ntohs(lport);
+
+		if (aport < NSPORT_RESERVED &&
+		    (nsp->nsp_socket->so_state & SS_PRIV) == 0)
+			return (EACCES);
+		if (ns_pcblookup(&zerons_addr, lport, 0))
+			return (EADDRINUSE);
+	}
+	nsp->nsp_laddr = sns->sns_addr;
+noname:
+	if (lport == 0)
+		do {
+			if (nspcb.nsp_lport++ < NSPORT_RESERVED)
+				nspcb.nsp_lport = NSPORT_RESERVED;
+			lport = htons(nspcb.nsp_lport);
+		} while (ns_pcblookup(&zerons_addr, lport, 0));
+	nsp->nsp_lport = lport;
+	return (0);
+}
+
+/*
+ * Connect from a socket to a specified address.
+ * Both address and port must be specified in argument sns.
+ * If don't have a local address for this socket yet,
+ * then pick one.
+ */
+ns_pcbconnect(nsp, nam)
+	struct nspcb *nsp;
+	struct mbuf *nam;
+{
+	struct ns_ifaddr *ia;
+	register struct sockaddr_ns *sns = mtod(nam, struct sockaddr_ns *);
+	register struct ns_addr *dst;
+	register struct route *ro;
+	struct ifnet *ifp;
+
+	if (nam->m_len != sizeof (*sns))
+		return (EINVAL);
+	if (sns->sns_family != AF_NS)
+		return (EAFNOSUPPORT);
+	if (sns->sns_port==0 || ns_nullhost(sns->sns_addr))
+		return (EADDRNOTAVAIL);
+	/*
+	 * If we haven't bound which network number to use as ours,
+	 * we will use the number of the outgoing interface.
+	 * This depends on having done a routing lookup, which
+	 * we will probably have to do anyway, so we might
+	 * as well do it now.  On the other hand if we are
+	 * sending to multiple destinations we may have already
+	 * done the lookup, so see if we can use the route
+	 * from before.  In any case, we only
+	 * chose a port number once, even if sending to multiple
+	 * destinations.
+	 */
+	ro = &nsp->nsp_route;
+	dst = &satons_addr(ro->ro_dst);
+	if (nsp->nsp_socket->so_options & SO_DONTROUTE)
+		goto flush;
+	if (!ns_neteq(nsp->nsp_lastdst, sns->sns_addr))
+		goto flush;
+	if (!ns_hosteq(nsp->nsp_lastdst, sns->sns_addr)) {
+		if (ro->ro_rt && ! (ro->ro_rt->rt_flags & RTF_HOST)) {
+			/* can patch route to avoid rtalloc */
+			*dst = sns->sns_addr;
+		} else {
+	flush:
+			if (ro->ro_rt)
+				RTFREE(ro->ro_rt);
+			ro->ro_rt = (struct rtentry *)0;
+			nsp->nsp_laddr.x_net = ns_zeronet;
+		}
+	}/* else cached route is ok; do nothing */
+	nsp->nsp_lastdst = sns->sns_addr;
+	if ((nsp->nsp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/
+	    (ro->ro_rt == (struct rtentry *)0 ||
+	     ro->ro_rt->rt_ifp == (struct ifnet *)0)) {
+		    /* No route yet, so try to acquire one */
+		    ro->ro_dst.sa_family = AF_NS;
+		    ro->ro_dst.sa_len = sizeof(ro->ro_dst);
+		    *dst = sns->sns_addr;
+		    dst->x_port = 0;
+		    rtalloc(ro);
+	}
+	if (ns_neteqnn(nsp->nsp_laddr.x_net, ns_zeronet)) {
+		/* 
+		 * If route is known or can be allocated now,
+		 * our src addr is taken from the i/f, else punt.
+		 */
+
+		ia = (struct ns_ifaddr *)0;
+		/*
+		 * If we found a route, use the address
+		 * corresponding to the outgoing interface
+		 */
+		if (ro->ro_rt && (ifp = ro->ro_rt->rt_ifp))
+			for (ia = ns_ifaddr; ia; ia = ia->ia_next)
+				if (ia->ia_ifp == ifp)
+					break;
+		if (ia == 0) {
+			u_short fport = sns->sns_addr.x_port;
+			sns->sns_addr.x_port = 0;
+			ia = (struct ns_ifaddr *)
+				ifa_ifwithdstaddr((struct sockaddr *)sns);
+			sns->sns_addr.x_port = fport;
+			if (ia == 0)
+				ia = ns_iaonnetof(&sns->sns_addr);
+			if (ia == 0)
+				ia = ns_ifaddr;
+			if (ia == 0)
+				return (EADDRNOTAVAIL);
+		}
+		nsp->nsp_laddr.x_net = satons_addr(ia->ia_addr).x_net;
+	}
+	if (ns_pcblookup(&sns->sns_addr, nsp->nsp_lport, 0))
+		return (EADDRINUSE);
+	if (ns_nullhost(nsp->nsp_laddr)) {
+		if (nsp->nsp_lport == 0)
+			(void) ns_pcbbind(nsp, (struct mbuf *)0);
+		nsp->nsp_laddr.x_host = ns_thishost;
+	}
+	nsp->nsp_faddr = sns->sns_addr;
+	/* Includes nsp->nsp_fport = sns->sns_port; */
+	return (0);
+}
+
+ns_pcbdisconnect(nsp)
+	struct nspcb *nsp;
+{
+
+	nsp->nsp_faddr = zerons_addr;
+	if (nsp->nsp_socket->so_state & SS_NOFDREF)
+		ns_pcbdetach(nsp);
+}
+
+ns_pcbdetach(nsp)
+	struct nspcb *nsp;
+{
+	struct socket *so = nsp->nsp_socket;
+
+	so->so_pcb = 0;
+	sofree(so);
+	if (nsp->nsp_route.ro_rt)
+		rtfree(nsp->nsp_route.ro_rt);
+	remque(nsp);
+	(void) m_free(dtom(nsp));
+}
+
+ns_setsockaddr(nsp, nam)
+	register struct nspcb *nsp;
+	struct mbuf *nam;
+{
+	register struct sockaddr_ns *sns = mtod(nam, struct sockaddr_ns *);
+	
+	nam->m_len = sizeof (*sns);
+	sns = mtod(nam, struct sockaddr_ns *);
+	bzero((caddr_t)sns, sizeof (*sns));
+	sns->sns_len = sizeof(*sns);
+	sns->sns_family = AF_NS;
+	sns->sns_addr = nsp->nsp_laddr;
+}
+
+ns_setpeeraddr(nsp, nam)
+	register struct nspcb *nsp;
+	struct mbuf *nam;
+{
+	register struct sockaddr_ns *sns = mtod(nam, struct sockaddr_ns *);
+	
+	nam->m_len = sizeof (*sns);
+	sns = mtod(nam, struct sockaddr_ns *);
+	bzero((caddr_t)sns, sizeof (*sns));
+	sns->sns_len = sizeof(*sns);
+	sns->sns_family = AF_NS;
+	sns->sns_addr  = nsp->nsp_faddr;
+}
+
+/*
+ * Pass some notification to all connections of a protocol
+ * associated with address dst.  Call the
+ * protocol specific routine to handle each connection.
+ * Also pass an extra paramter via the nspcb. (which may in fact
+ * be a parameter list!)
+ */
+ns_pcbnotify(dst, errno, notify, param)
+	register struct ns_addr *dst;
+	long param;
+	int errno, (*notify)();
+{
+	register struct nspcb *nsp, *oinp;
+	int s = splimp();
+
+	for (nsp = (&nspcb)->nsp_next; nsp != (&nspcb);) {
+		if (!ns_hosteq(*dst,nsp->nsp_faddr)) {
+	next:
+			nsp = nsp->nsp_next;
+			continue;
+		}
+		if (nsp->nsp_socket == 0)
+			goto next;
+		if (errno) 
+			nsp->nsp_socket->so_error = errno;
+		oinp = nsp;
+		nsp = nsp->nsp_next;
+		oinp->nsp_notify_param = param;
+		(*notify)(oinp);
+	}
+	splx(s);
+}
+
+#ifdef notdef
+/*
+ * After a routing change, flush old routing
+ * and allocate a (hopefully) better one.
+ */
+ns_rtchange(nsp)
+	struct nspcb *nsp;
+{
+	if (nsp->nsp_route.ro_rt) {
+		rtfree(nsp->nsp_route.ro_rt);
+		nsp->nsp_route.ro_rt = 0;
+		/*
+		 * A new route can be allocated the next time
+		 * output is attempted.
+		 */
+	}
+	/* SHOULD NOTIFY HIGHER-LEVEL PROTOCOLS */
+}
+#endif
+
+struct nspcb *
+ns_pcblookup(faddr, lport, wildp)
+	struct ns_addr *faddr;
+	u_short lport;
+{
+	register struct nspcb *nsp, *match = 0;
+	int matchwild = 3, wildcard;
+	u_short fport;
+
+	fport = faddr->x_port;
+	for (nsp = (&nspcb)->nsp_next; nsp != (&nspcb); nsp = nsp->nsp_next) {
+		if (nsp->nsp_lport != lport)
+			continue;
+		wildcard = 0;
+		if (ns_nullhost(nsp->nsp_faddr)) {
+			if (!ns_nullhost(*faddr))
+				wildcard++;
+		} else {
+			if (ns_nullhost(*faddr))
+				wildcard++;
+			else {
+				if (!ns_hosteq(nsp->nsp_faddr, *faddr))
+					continue;
+				if (nsp->nsp_fport != fport) {
+					if (nsp->nsp_fport != 0)
+						continue;
+					else
+						wildcard++;
+				}
+			}
+		}
+		if (wildcard && wildp==0)
+			continue;
+		if (wildcard < matchwild) {
+			match = nsp;
+			matchwild = wildcard;
+			if (wildcard == 0)
+				break;
+		}
+	}
+	return (match);
+}
diff --git a/sys/netns/ns_pcb.h b/sys/netns/ns_pcb.h
new file mode 100644
index 00000000000..68cf744f738
--- /dev/null
+++ b/sys/netns/ns_pcb.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ns_pcb.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Ns protocol interface control block.
+ */
+struct nspcb {
+	struct	nspcb *nsp_next;	/* doubly linked list */
+	struct	nspcb *nsp_prev;
+	struct	nspcb *nsp_head;
+	struct	socket *nsp_socket;	/* back pointer to socket */
+	struct	ns_addr nsp_faddr;	/* destination address */
+	struct	ns_addr nsp_laddr;	/* socket's address */
+	caddr_t	nsp_pcb;		/* protocol specific stuff */
+	struct	route nsp_route;	/* routing information */
+	struct	ns_addr nsp_lastdst;	/* validate cached route for dg socks*/
+	long	nsp_notify_param;	/* extra info passed via ns_pcbnotify*/
+	short	nsp_flags;
+	u_char	nsp_dpt;		/* default packet type for idp_output*/
+	u_char	nsp_rpt;		/* last received packet type by
+								idp_input() */
+};
+
+/* possible flags */
+
+#define NSP_IN_ABORT	0x1		/* calling abort through socket */
+#define NSP_RAWIN	0x2		/* show headers on input */
+#define NSP_RAWOUT	0x4		/* show header on output */
+#define NSP_ALL_PACKETS	0x8		/* Turn off higher proto processing */
+
+#define	NS_WILDCARD	1
+
+#define nsp_lport nsp_laddr.x_port
+#define nsp_fport nsp_faddr.x_port
+
+#define	sotonspcb(so)		((struct nspcb *)((so)->so_pcb))
+
+/*
+ * Nominal space allocated to a ns socket.
+ */
+#define	NSSNDQ		2048
+#define	NSRCVQ		2048
+
+
+#ifdef KERNEL
+struct	nspcb nspcb;			/* head of list */
+struct	nspcb *ns_pcblookup();
+#endif
diff --git a/sys/netns/ns_proto.c b/sys/netns/ns_proto.c
new file mode 100644
index 00000000000..fc9f8238c55
--- /dev/null
+++ b/sys/netns/ns_proto.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ns_proto.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/domain.h>
+#include <sys/mbuf.h>
+
+#include <net/radix.h>
+
+#include <netns/ns.h>
+
+/*
+ * NS protocol family: IDP, ERR, PE, SPP, ROUTE.
+ */
+int	ns_init();
+int	idp_input(), idp_output(), idp_ctlinput(), idp_usrreq();
+int	idp_raw_usrreq(), idp_ctloutput();
+int	spp_input(), spp_ctlinput();
+int	spp_usrreq(), spp_usrreq_sp(), spp_ctloutput();
+int	spp_init(), spp_fasttimo(), spp_slowtimo();
+extern	int raw_usrreq();
+
+extern	struct domain nsdomain;
+
+struct protosw nssw[] = {
+{ 0,		&nsdomain,	0,		0,
+  0,		idp_output,	0,		0,
+  0,
+  ns_init,	0,		0,		0,
+},
+{ SOCK_DGRAM,	&nsdomain,	0,		PR_ATOMIC|PR_ADDR,
+  0,		0,		idp_ctlinput,	idp_ctloutput,
+  idp_usrreq,
+  0,		0,		0,		0,
+},
+{ SOCK_STREAM,	&nsdomain,	NSPROTO_SPP,	PR_CONNREQUIRED|PR_WANTRCVD,
+  spp_input,	0,		spp_ctlinput,	spp_ctloutput,
+  spp_usrreq,
+  spp_init,	spp_fasttimo,	spp_slowtimo,	0,
+},
+{ SOCK_SEQPACKET,&nsdomain,	NSPROTO_SPP,	PR_CONNREQUIRED|PR_WANTRCVD|PR_ATOMIC,
+  spp_input,	0,		spp_ctlinput,	spp_ctloutput,
+  spp_usrreq_sp,
+  0,		0,		0,		0,
+},
+{ SOCK_RAW,	&nsdomain,	NSPROTO_RAW,	PR_ATOMIC|PR_ADDR,
+  idp_input,	idp_output,	0,		idp_ctloutput,
+  idp_raw_usrreq,
+  0,		0,		0,		0,
+},
+{ SOCK_RAW,	&nsdomain,	NSPROTO_ERROR,	PR_ATOMIC|PR_ADDR,
+  idp_ctlinput,	idp_output,	0,		idp_ctloutput,
+  idp_raw_usrreq,
+  0,		0,		0,		0,
+},
+};
+
+struct domain nsdomain =
+    { AF_NS, "network systems", 0, 0, 0, 
+      nssw, &nssw[sizeof(nssw)/sizeof(nssw[0])], 0,
+      rn_inithead, 16, sizeof(struct sockaddr_ns)};
+
diff --git a/sys/netns/sp.h b/sys/netns/sp.h
new file mode 100644
index 00000000000..b55dac26039
--- /dev/null
+++ b/sys/netns/sp.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)sp.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Definitions for Xerox NS style sequenced packet protocol
+ */
+
+struct sphdr {
+	u_char	sp_cc;		/* connection control */
+	u_char	sp_dt;		/* datastream type */
+#define	SP_SP	0x80		/* system packet */
+#define	SP_SA	0x40		/* send acknowledgement */
+#define	SP_OB	0x20		/* attention (out of band data) */
+#define	SP_EM	0x10		/* end of message */
+	u_short	sp_sid;		/* source connection identifier */
+	u_short	sp_did;		/* destination connection identifier */
+	u_short	sp_seq;		/* sequence number */
+	u_short	sp_ack;		/* acknowledge number */
+	u_short	sp_alo;		/* allocation number */
+};
diff --git a/sys/netns/spidp.h b/sys/netns/spidp.h
new file mode 100644
index 00000000000..332df5be235
--- /dev/null
+++ b/sys/netns/spidp.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)spidp.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Definitions for NS(tm) Internet Datagram Protocol
+ * containing a Sequenced Packet Protocol packet.
+ */
+struct spidp {
+	struct idp	si_i;
+	struct sphdr 	si_s;
+};
+struct spidp_q {
+	struct spidp_q	*si_next;
+	struct spidp_q	*si_prev;
+};
+#define SI(x)	((struct spidp *)x)
+#define si_sum	si_i.idp_sum
+#define si_len	si_i.idp_len
+#define si_tc	si_i.idp_tc
+#define si_pt	si_i.idp_pt
+#define si_dna	si_i.idp_dna
+#define si_sna	si_i.idp_sna
+#define si_sport	si_i.idp_sna.x_port
+#define si_cc	si_s.sp_cc
+#define si_dt	si_s.sp_dt
+#define si_sid	si_s.sp_sid
+#define si_did	si_s.sp_did
+#define si_seq	si_s.sp_seq
+#define si_ack	si_s.sp_ack
+#define si_alo	si_s.sp_alo
diff --git a/sys/netns/spp_debug.c b/sys/netns/spp_debug.c
new file mode 100644
index 00000000000..eaa1d023f87
--- /dev/null
+++ b/sys/netns/spp_debug.c
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)spp_debug.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+
+#include <net/route.h>
+#include <net/if.h>
+#include <netinet/tcp_fsm.h>
+
+#include <netns/ns.h>
+#include <netns/ns_pcb.h>
+#include <netns/idp.h>
+#include <netns/idp_var.h>
+#include <netns/sp.h>
+#include <netns/spidp.h>
+#define SPPTIMERS
+#include <netns/spp_timer.h>
+#include <netns/spp_var.h>
+#define	SANAMES
+#include <netns/spp_debug.h>
+
+int	sppconsdebug = 0;
+/*
+ * spp debug routines
+ */
+spp_trace(act, ostate, sp, si, req)
+	short act;
+	u_char ostate;
+	struct sppcb *sp;
+	struct spidp *si;
+	int req;
+{
+#ifdef INET
+#ifdef TCPDEBUG
+	u_short seq, ack, len, alo;
+	unsigned long iptime();
+	int flags;
+	struct spp_debug *sd = &spp_debug[spp_debx++];
+	extern char *prurequests[];
+	extern char *sanames[];
+	extern char *tcpstates[];
+	extern char *spptimers[];
+
+	if (spp_debx == SPP_NDEBUG)
+		spp_debx = 0;
+	sd->sd_time = iptime();
+	sd->sd_act = act;
+	sd->sd_ostate = ostate;
+	sd->sd_cb = (caddr_t)sp;
+	if (sp)
+		sd->sd_sp = *sp;
+	else
+		bzero((caddr_t)&sd->sd_sp, sizeof (*sp));
+	if (si)
+		sd->sd_si = *si;
+	else
+		bzero((caddr_t)&sd->sd_si, sizeof (*si));
+	sd->sd_req = req;
+	if (sppconsdebug == 0)
+		return;
+	if (ostate >= TCP_NSTATES) ostate = 0;
+	if (act >= SA_DROP) act = SA_DROP;
+	if (sp)
+		printf("%x %s:", sp, tcpstates[ostate]);
+	else
+		printf("???????? ");
+	printf("%s ", sanames[act]);
+	switch (act) {
+
+	case SA_RESPOND:
+	case SA_INPUT:
+	case SA_OUTPUT:
+	case SA_DROP:
+		if (si == 0)
+			break;
+		seq = si->si_seq;
+		ack = si->si_ack;
+		alo = si->si_alo;
+		len = si->si_len;
+		if (act == SA_OUTPUT) {
+			seq = ntohs(seq);
+			ack = ntohs(ack);
+			alo = ntohs(alo);
+			len = ntohs(len);
+		}
+#ifndef lint
+#define p1(f)  { printf("%s = %x, ", "f", f); }
+		p1(seq); p1(ack); p1(alo); p1(len);
+#endif
+		flags = si->si_cc;
+		if (flags) {
+			char *cp = "<";
+#ifndef lint
+#define pf(f) { if (flags&SP_/**/f) { printf("%s%s", cp, "f"); cp = ","; } }
+			pf(SP); pf(SA); pf(OB); pf(EM);
+#else
+			cp = cp;
+#endif
+			printf(">");
+		}
+#ifndef lint
+#define p2(f)  { printf("%s = %x, ", "f", si->si_/**/f); }
+		p2(sid);p2(did);p2(dt);p2(pt);
+#endif
+		ns_printhost(&si->si_sna);
+		ns_printhost(&si->si_dna);
+
+		if (act==SA_RESPOND) {
+			printf("idp_len = %x, ",
+				((struct idp *)si)->idp_len);
+		}
+		break;
+
+	case SA_USER:
+		printf("%s", prurequests[req&0xff]);
+		if ((req & 0xff) == PRU_SLOWTIMO)
+			printf("<%s>", spptimers[req>>8]);
+		break;
+	}
+	if (sp)
+		printf(" -> %s", tcpstates[sp->s_state]);
+	/* print out internal state of sp !?! */
+	printf("\n");
+	if (sp == 0)
+		return;
+#ifndef lint
+#define p3(f)  { printf("%s = %x, ", "f", sp->s_/**/f); }
+	printf("\t"); p3(rack);p3(ralo);p3(smax);p3(flags); printf("\n");
+#endif
+#endif
+#endif
+}
diff --git a/sys/netns/spp_debug.h b/sys/netns/spp_debug.h
new file mode 100644
index 00000000000..8dfe2422069
--- /dev/null
+++ b/sys/netns/spp_debug.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)spp_debug.h	8.1 (Berkeley) 6/10/93
+ */
+
+struct	spp_debug {
+	u_long	sd_time;
+	short	sd_act;
+	short	sd_ostate;
+	caddr_t	sd_cb;
+	short	sd_req;
+	struct	spidp sd_si;
+	struct	sppcb sd_sp;
+};
+
+#define	SA_INPUT 	0
+#define	SA_OUTPUT	1
+#define	SA_USER		2
+#define	SA_RESPOND	3
+#define	SA_DROP		4
+
+#ifdef SANAMES
+char	*sanames[] =
+    { "input", "output", "user", "respond", "drop" };
+#endif
+
+#define	SPP_NDEBUG 100
+struct	spp_debug spp_debug[SPP_NDEBUG];
+int	spp_debx;
diff --git a/sys/netns/spp_timer.h b/sys/netns/spp_timer.h
new file mode 100644
index 00000000000..f84e3282a23
--- /dev/null
+++ b/sys/netns/spp_timer.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)spp_timer.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Definitions of the SPP timers.  These timers are counted
+ * down PR_SLOWHZ times a second.
+ */
+#define	SPPT_NTIMERS	4
+
+#define	SPPT_REXMT	0		/* retransmit */
+#define	SPPT_PERSIST	1		/* retransmit persistance */
+#define	SPPT_KEEP	2		/* keep alive */
+#define	SPPT_2MSL	3		/* 2*msl quiet time timer */
+
+/*
+ * The SPPT_REXMT timer is used to force retransmissions.
+ * The SPP has the SPPT_REXMT timer set whenever segments
+ * have been sent for which ACKs are expected but not yet
+ * received.  If an ACK is received which advances tp->snd_una,
+ * then the retransmit timer is cleared (if there are no more
+ * outstanding segments) or reset to the base value (if there
+ * are more ACKs expected).  Whenever the retransmit timer goes off,
+ * we retransmit one unacknowledged segment, and do a backoff
+ * on the retransmit timer.
+ *
+ * The SPPT_PERSIST timer is used to keep window size information
+ * flowing even if the window goes shut.  If all previous transmissions
+ * have been acknowledged (so that there are no retransmissions in progress),
+ * and the window is too small to bother sending anything, then we start
+ * the SPPT_PERSIST timer.  When it expires, if the window is nonzero,
+ * we go to transmit state.  Otherwise, at intervals send a single byte
+ * into the peer's window to force him to update our window information.
+ * We do this at most as often as SPPT_PERSMIN time intervals,
+ * but no more frequently than the current estimate of round-trip
+ * packet time.  The SPPT_PERSIST timer is cleared whenever we receive
+ * a window update from the peer.
+ *
+ * The SPPT_KEEP timer is used to keep connections alive.  If an
+ * connection is idle (no segments received) for SPPTV_KEEP amount of time,
+ * but not yet established, then we drop the connection.  If the connection
+ * is established, then we force the peer to send us a segment by sending:
+ *	<SEQ=SND.UNA-1><ACK=RCV.NXT><CTL=ACK>
+ * This segment is (deliberately) outside the window, and should elicit
+ * an ack segment in response from the peer.  If, despite the SPPT_KEEP
+ * initiated segments we cannot elicit a response from a peer in SPPT_MAXIDLE
+ * amount of time, then we drop the connection.
+ */
+
+#define	SPP_TTL		30		/* default time to live for SPP segs */
+/*
+ * Time constants.
+ */
+#define	SPPTV_MSL	( 15*PR_SLOWHZ)		/* max seg lifetime */
+#define	SPPTV_SRTTBASE	0			/* base roundtrip time;
+						   if 0, no idea yet */
+#define	SPPTV_SRTTDFLT	(  3*PR_SLOWHZ)		/* assumed RTT if no info */
+
+#define	SPPTV_PERSMIN	(  5*PR_SLOWHZ)		/* retransmit persistance */
+#define	SPPTV_PERSMAX	( 60*PR_SLOWHZ)		/* maximum persist interval */
+
+#define	SPPTV_KEEP	( 75*PR_SLOWHZ)		/* keep alive - 75 secs */
+#define	SPPTV_MAXIDLE	(  8*SPPTV_KEEP)	/* maximum allowable idle
+						   time before drop conn */
+
+#define	SPPTV_MIN	(  1*PR_SLOWHZ)		/* minimum allowable value */
+#define	SPPTV_REXMTMAX	( 64*PR_SLOWHZ)		/* max allowable REXMT value */
+
+#define	SPP_LINGERTIME	120			/* linger at most 2 minutes */
+
+#define	SPP_MAXRXTSHIFT	12			/* maximum retransmits */
+
+#ifdef	SPPTIMERS
+char *spptimers[] =
+    { "REXMT", "PERSIST", "KEEP", "2MSL" };
+#endif
+
+/*
+ * Force a time value to be in a certain range.
+ */
+#define	SPPT_RANGESET(tv, value, tvmin, tvmax) { \
+	(tv) = (value); \
+	if ((tv) < (tvmin)) \
+		(tv) = (tvmin); \
+	else if ((tv) > (tvmax)) \
+		(tv) = (tvmax); \
+}
+
+#ifdef KERNEL
+extern int spp_backoff[];
+#endif
diff --git a/sys/netns/spp_usrreq.c b/sys/netns/spp_usrreq.c
new file mode 100644
index 00000000000..062bbec5fab
--- /dev/null
+++ b/sys/netns/spp_usrreq.c
@@ -0,0 +1,1804 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)spp_usrreq.c	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+
+#include <net/if.h>
+#include <net/route.h>
+#include <netinet/tcp_fsm.h>
+
+#include <netns/ns.h>
+#include <netns/ns_pcb.h>
+#include <netns/idp.h>
+#include <netns/idp_var.h>
+#include <netns/ns_error.h>
+#include <netns/sp.h>
+#include <netns/spidp.h>
+#include <netns/spp_timer.h>
+#include <netns/spp_var.h>
+#include <netns/spp_debug.h>
+
+/*
+ * SP protocol implementation.
+ */
+spp_init()
+{
+
+	spp_iss = 1; /* WRONG !! should fish it out of TODR */
+}
+struct spidp spp_savesi;
+int traceallspps = 0;
+extern int sppconsdebug;
+int spp_hardnosed;
+int spp_use_delack = 0;
+u_short spp_newchecks[50];
+
+/*ARGSUSED*/
+spp_input(m, nsp)
+	register struct mbuf *m;
+	register struct nspcb *nsp;
+{
+	register struct sppcb *cb;
+	register struct spidp *si = mtod(m, struct spidp *);
+	register struct socket *so;
+	short ostate;
+	int dropsocket = 0;
+
+
+	sppstat.spps_rcvtotal++;
+	if (nsp == 0) {
+		panic("No nspcb in spp_input\n");
+		return;
+	}
+
+	cb = nstosppcb(nsp);
+	if (cb == 0) goto bad;
+
+	if (m->m_len < sizeof(*si)) {
+		if ((m = m_pullup(m, sizeof(*si))) == 0) {
+			sppstat.spps_rcvshort++;
+			return;
+		}
+		si = mtod(m, struct spidp *);
+	}
+	si->si_seq = ntohs(si->si_seq);
+	si->si_ack = ntohs(si->si_ack);
+	si->si_alo = ntohs(si->si_alo);
+
+	so = nsp->nsp_socket;
+	if (so->so_options & SO_DEBUG || traceallspps) {
+		ostate = cb->s_state;
+		spp_savesi = *si;
+	}
+	if (so->so_options & SO_ACCEPTCONN) {
+		struct sppcb *ocb = cb;
+
+		so = sonewconn(so, 0);
+		if (so == 0) {
+			goto drop;
+		}
+		/*
+		 * This is ugly, but ....
+		 *
+		 * Mark socket as temporary until we're
+		 * committed to keeping it.  The code at
+		 * ``drop'' and ``dropwithreset'' check the
+		 * flag dropsocket to see if the temporary
+		 * socket created here should be discarded.
+		 * We mark the socket as discardable until
+		 * we're committed to it below in TCPS_LISTEN.
+		 */
+		dropsocket++;
+		nsp = (struct nspcb *)so->so_pcb;
+		nsp->nsp_laddr = si->si_dna;
+		cb = nstosppcb(nsp);
+		cb->s_mtu = ocb->s_mtu;		/* preserve sockopts */
+		cb->s_flags = ocb->s_flags;	/* preserve sockopts */
+		cb->s_flags2 = ocb->s_flags2;	/* preserve sockopts */
+		cb->s_state = TCPS_LISTEN;
+	}
+
+	/*
+	 * Packet received on connection.
+	 * reset idle time and keep-alive timer;
+	 */
+	cb->s_idle = 0;
+	cb->s_timer[SPPT_KEEP] = SPPTV_KEEP;
+
+	switch (cb->s_state) {
+
+	case TCPS_LISTEN:{
+		struct mbuf *am;
+		register struct sockaddr_ns *sns;
+		struct ns_addr laddr;
+
+		/*
+		 * If somebody here was carying on a conversation
+		 * and went away, and his pen pal thinks he can
+		 * still talk, we get the misdirected packet.
+		 */
+		if (spp_hardnosed && (si->si_did != 0 || si->si_seq != 0)) {
+			spp_istat.gonawy++;
+			goto dropwithreset;
+		}
+		am = m_get(M_DONTWAIT, MT_SONAME);
+		if (am == NULL)
+			goto drop;
+		am->m_len = sizeof (struct sockaddr_ns);
+		sns = mtod(am, struct sockaddr_ns *);
+		sns->sns_len = sizeof(*sns);
+		sns->sns_family = AF_NS;
+		sns->sns_addr = si->si_sna;
+		laddr = nsp->nsp_laddr;
+		if (ns_nullhost(laddr))
+			nsp->nsp_laddr = si->si_dna;
+		if (ns_pcbconnect(nsp, am)) {
+			nsp->nsp_laddr = laddr;
+			(void) m_free(am);
+			spp_istat.noconn++;
+			goto drop;
+		}
+		(void) m_free(am);
+		spp_template(cb);
+		dropsocket = 0;		/* committed to socket */
+		cb->s_did = si->si_sid;
+		cb->s_rack = si->si_ack;
+		cb->s_ralo = si->si_alo;
+#define THREEWAYSHAKE
+#ifdef THREEWAYSHAKE
+		cb->s_state = TCPS_SYN_RECEIVED;
+		cb->s_force = 1 + SPPT_KEEP;
+		sppstat.spps_accepts++;
+		cb->s_timer[SPPT_KEEP] = SPPTV_KEEP;
+		}
+		break;
+	/*
+	 * This state means that we have heard a response
+	 * to our acceptance of their connection
+	 * It is probably logically unnecessary in this
+	 * implementation.
+	 */
+	 case TCPS_SYN_RECEIVED: {
+		if (si->si_did!=cb->s_sid) {
+			spp_istat.wrncon++;
+			goto drop;
+		}
+#endif
+		nsp->nsp_fport =  si->si_sport;
+		cb->s_timer[SPPT_REXMT] = 0;
+		cb->s_timer[SPPT_KEEP] = SPPTV_KEEP;
+		soisconnected(so);
+		cb->s_state = TCPS_ESTABLISHED;
+		sppstat.spps_accepts++;
+		}
+		break;
+
+	/*
+	 * This state means that we have gotten a response
+	 * to our attempt to establish a connection.
+	 * We fill in the data from the other side,
+	 * telling us which port to respond to, instead of the well-
+	 * known one we might have sent to in the first place.
+	 * We also require that this is a response to our
+	 * connection id.
+	 */
+	case TCPS_SYN_SENT:
+		if (si->si_did!=cb->s_sid) {
+			spp_istat.notme++;
+			goto drop;
+		}
+		sppstat.spps_connects++;
+		cb->s_did = si->si_sid;
+		cb->s_rack = si->si_ack;
+		cb->s_ralo = si->si_alo;
+		cb->s_dport = nsp->nsp_fport =  si->si_sport;
+		cb->s_timer[SPPT_REXMT] = 0;
+		cb->s_flags |= SF_ACKNOW;
+		soisconnected(so);
+		cb->s_state = TCPS_ESTABLISHED;
+		/* Use roundtrip time of connection request for initial rtt */
+		if (cb->s_rtt) {
+			cb->s_srtt = cb->s_rtt << 3;
+			cb->s_rttvar = cb->s_rtt << 1;
+			SPPT_RANGESET(cb->s_rxtcur,
+			    ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
+			    SPPTV_MIN, SPPTV_REXMTMAX);
+			    cb->s_rtt = 0;
+		}
+	}
+	if (so->so_options & SO_DEBUG || traceallspps)
+		spp_trace(SA_INPUT, (u_char)ostate, cb, &spp_savesi, 0);
+
+	m->m_len -= sizeof (struct idp);
+	m->m_pkthdr.len -= sizeof (struct idp);
+	m->m_data += sizeof (struct idp);
+
+	if (spp_reass(cb, si)) {
+		(void) m_freem(m);
+	}
+	if (cb->s_force || (cb->s_flags & (SF_ACKNOW|SF_WIN|SF_RXT)))
+		(void) spp_output(cb, (struct mbuf *)0);
+	cb->s_flags &= ~(SF_WIN|SF_RXT);
+	return;
+
+dropwithreset:
+	if (dropsocket)
+		(void) soabort(so);
+	si->si_seq = ntohs(si->si_seq);
+	si->si_ack = ntohs(si->si_ack);
+	si->si_alo = ntohs(si->si_alo);
+	ns_error(dtom(si), NS_ERR_NOSOCK, 0);
+	if (cb->s_nspcb->nsp_socket->so_options & SO_DEBUG || traceallspps)
+		spp_trace(SA_DROP, (u_char)ostate, cb, &spp_savesi, 0);
+	return;
+
+drop:
+bad:
+	if (cb == 0 || cb->s_nspcb->nsp_socket->so_options & SO_DEBUG ||
+            traceallspps)
+		spp_trace(SA_DROP, (u_char)ostate, cb, &spp_savesi, 0);
+	m_freem(m);
+}
+
+int spprexmtthresh = 3;
+
+/*
+ * This is structurally similar to the tcp reassembly routine
+ * but its function is somewhat different:  It merely queues
+ * packets up, and suppresses duplicates.
+ */
+spp_reass(cb, si)
+register struct sppcb *cb;
+register struct spidp *si;
+{
+	register struct spidp_q *q;
+	register struct mbuf *m;
+	register struct socket *so = cb->s_nspcb->nsp_socket;
+	char packetp = cb->s_flags & SF_HI;
+	int incr;
+	char wakeup = 0;
+
+	if (si == SI(0))
+		goto present;
+	/*
+	 * Update our news from them.
+	 */
+	if (si->si_cc & SP_SA)
+		cb->s_flags |= (spp_use_delack ? SF_DELACK : SF_ACKNOW);
+	if (SSEQ_GT(si->si_alo, cb->s_ralo))
+		cb->s_flags |= SF_WIN;
+	if (SSEQ_LEQ(si->si_ack, cb->s_rack)) {
+		if ((si->si_cc & SP_SP) && cb->s_rack != (cb->s_smax + 1)) {
+			sppstat.spps_rcvdupack++;
+			/*
+			 * If this is a completely duplicate ack
+			 * and other conditions hold, we assume
+			 * a packet has been dropped and retransmit
+			 * it exactly as in tcp_input().
+			 */
+			if (si->si_ack != cb->s_rack ||
+			    si->si_alo != cb->s_ralo)
+				cb->s_dupacks = 0;
+			else if (++cb->s_dupacks == spprexmtthresh) {
+				u_short onxt = cb->s_snxt;
+				int cwnd = cb->s_cwnd;
+
+				cb->s_snxt = si->si_ack;
+				cb->s_cwnd = CUNIT;
+				cb->s_force = 1 + SPPT_REXMT;
+				(void) spp_output(cb, (struct mbuf *)0);
+				cb->s_timer[SPPT_REXMT] = cb->s_rxtcur;
+				cb->s_rtt = 0;
+				if (cwnd >= 4 * CUNIT)
+					cb->s_cwnd = cwnd / 2;
+				if (SSEQ_GT(onxt, cb->s_snxt))
+					cb->s_snxt = onxt;
+				return (1);
+			}
+		} else
+			cb->s_dupacks = 0;
+		goto update_window;
+	}
+	cb->s_dupacks = 0;
+	/*
+	 * If our correspondent acknowledges data we haven't sent
+	 * TCP would drop the packet after acking.  We'll be a little
+	 * more permissive
+	 */
+	if (SSEQ_GT(si->si_ack, (cb->s_smax + 1))) {
+		sppstat.spps_rcvacktoomuch++;
+		si->si_ack = cb->s_smax + 1;
+	}
+	sppstat.spps_rcvackpack++;
+	/*
+	 * If transmit timer is running and timed sequence
+	 * number was acked, update smoothed round trip time.
+	 * See discussion of algorithm in tcp_input.c
+	 */
+	if (cb->s_rtt && SSEQ_GT(si->si_ack, cb->s_rtseq)) {
+		sppstat.spps_rttupdated++;
+		if (cb->s_srtt != 0) {
+			register short delta;
+			delta = cb->s_rtt - (cb->s_srtt >> 3);
+			if ((cb->s_srtt += delta) <= 0)
+				cb->s_srtt = 1;
+			if (delta < 0)
+				delta = -delta;
+			delta -= (cb->s_rttvar >> 2);
+			if ((cb->s_rttvar += delta) <= 0)
+				cb->s_rttvar = 1;
+		} else {
+			/*
+			 * No rtt measurement yet
+			 */
+			cb->s_srtt = cb->s_rtt << 3;
+			cb->s_rttvar = cb->s_rtt << 1;
+		}
+		cb->s_rtt = 0;
+		cb->s_rxtshift = 0;
+		SPPT_RANGESET(cb->s_rxtcur,
+			((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
+			SPPTV_MIN, SPPTV_REXMTMAX);
+	}
+	/*
+	 * If all outstanding data is acked, stop retransmit
+	 * timer and remember to restart (more output or persist).
+	 * If there is more data to be acked, restart retransmit
+	 * timer, using current (possibly backed-off) value;
+	 */
+	if (si->si_ack == cb->s_smax + 1) {
+		cb->s_timer[SPPT_REXMT] = 0;
+		cb->s_flags |= SF_RXT;
+	} else if (cb->s_timer[SPPT_PERSIST] == 0)
+		cb->s_timer[SPPT_REXMT] = cb->s_rxtcur;
+	/*
+	 * When new data is acked, open the congestion window.
+	 * If the window gives us less than ssthresh packets
+	 * in flight, open exponentially (maxseg at a time).
+	 * Otherwise open linearly (maxseg^2 / cwnd at a time).
+	 */
+	incr = CUNIT;
+	if (cb->s_cwnd > cb->s_ssthresh)
+		incr = max(incr * incr / cb->s_cwnd, 1);
+	cb->s_cwnd = min(cb->s_cwnd + incr, cb->s_cwmx);
+	/*
+	 * Trim Acked data from output queue.
+	 */
+	while ((m = so->so_snd.sb_mb) != NULL) {
+		if (SSEQ_LT((mtod(m, struct spidp *))->si_seq, si->si_ack))
+			sbdroprecord(&so->so_snd);
+		else
+			break;
+	}
+	sowwakeup(so);
+	cb->s_rack = si->si_ack;
+update_window:
+	if (SSEQ_LT(cb->s_snxt, cb->s_rack))
+		cb->s_snxt = cb->s_rack;
+	if (SSEQ_LT(cb->s_swl1, si->si_seq) || cb->s_swl1 == si->si_seq &&
+	    (SSEQ_LT(cb->s_swl2, si->si_ack) ||
+	     cb->s_swl2 == si->si_ack && SSEQ_LT(cb->s_ralo, si->si_alo))) {
+		/* keep track of pure window updates */
+		if ((si->si_cc & SP_SP) && cb->s_swl2 == si->si_ack
+		    && SSEQ_LT(cb->s_ralo, si->si_alo)) {
+			sppstat.spps_rcvwinupd++;
+			sppstat.spps_rcvdupack--;
+		}
+		cb->s_ralo = si->si_alo;
+		cb->s_swl1 = si->si_seq;
+		cb->s_swl2 = si->si_ack;
+		cb->s_swnd = (1 + si->si_alo - si->si_ack);
+		if (cb->s_swnd > cb->s_smxw)
+			cb->s_smxw = cb->s_swnd;
+		cb->s_flags |= SF_WIN;
+	}
+	/*
+	 * If this packet number is higher than that which
+	 * we have allocated refuse it, unless urgent
+	 */
+	if (SSEQ_GT(si->si_seq, cb->s_alo)) {
+		if (si->si_cc & SP_SP) {
+			sppstat.spps_rcvwinprobe++;
+			return (1);
+		} else
+			sppstat.spps_rcvpackafterwin++;
+		if (si->si_cc & SP_OB) {
+			if (SSEQ_GT(si->si_seq, cb->s_alo + 60)) {
+				ns_error(dtom(si), NS_ERR_FULLUP, 0);
+				return (0);
+			} /* else queue this packet; */
+		} else {
+			/*register struct socket *so = cb->s_nspcb->nsp_socket;
+			if (so->so_state && SS_NOFDREF) {
+				ns_error(dtom(si), NS_ERR_NOSOCK, 0);
+				(void)spp_close(cb);
+			} else
+				       would crash system*/
+			spp_istat.notyet++;
+			ns_error(dtom(si), NS_ERR_FULLUP, 0);
+			return (0);
+		}
+	}
+	/*
+	 * If this is a system packet, we don't need to
+	 * queue it up, and won't update acknowledge #
+	 */
+	if (si->si_cc & SP_SP) {
+		return (1);
+	}
+	/*
+	 * We have already seen this packet, so drop.
+	 */
+	if (SSEQ_LT(si->si_seq, cb->s_ack)) {
+		spp_istat.bdreas++;
+		sppstat.spps_rcvduppack++;
+		if (si->si_seq == cb->s_ack - 1)
+			spp_istat.lstdup++;
+		return (1);
+	}
+	/*
+	 * Loop through all packets queued up to insert in
+	 * appropriate sequence.
+	 */
+	for (q = cb->s_q.si_next; q!=&cb->s_q; q = q->si_next) {
+		if (si->si_seq == SI(q)->si_seq) {
+			sppstat.spps_rcvduppack++;
+			return (1);
+		}
+		if (SSEQ_LT(si->si_seq, SI(q)->si_seq)) {
+			sppstat.spps_rcvoopack++;
+			break;
+		}
+	}
+	insque(si, q->si_prev);
+	/*
+	 * If this packet is urgent, inform process
+	 */
+	if (si->si_cc & SP_OB) {
+		cb->s_iobc = ((char *)si)[1 + sizeof(*si)];
+		sohasoutofband(so);
+		cb->s_oobflags |= SF_IOOB;
+	}
+present:
+#define SPINC sizeof(struct sphdr)
+	/*
+	 * Loop through all packets queued up to update acknowledge
+	 * number, and present all acknowledged data to user;
+	 * If in packet interface mode, show packet headers.
+	 */
+	for (q = cb->s_q.si_next; q!=&cb->s_q; q = q->si_next) {
+		  if (SI(q)->si_seq == cb->s_ack) {
+			cb->s_ack++;
+			m = dtom(q);
+			if (SI(q)->si_cc & SP_OB) {
+				cb->s_oobflags &= ~SF_IOOB;
+				if (so->so_rcv.sb_cc)
+					so->so_oobmark = so->so_rcv.sb_cc;
+				else
+					so->so_state |= SS_RCVATMARK;
+			}
+			q = q->si_prev;
+			remque(q->si_next);
+			wakeup = 1;
+			sppstat.spps_rcvpack++;
+#ifdef SF_NEWCALL
+			if (cb->s_flags2 & SF_NEWCALL) {
+				struct sphdr *sp = mtod(m, struct sphdr *);
+				u_char dt = sp->sp_dt;
+				spp_newchecks[4]++;
+				if (dt != cb->s_rhdr.sp_dt) {
+					struct mbuf *mm =
+					   m_getclr(M_DONTWAIT, MT_CONTROL);
+					spp_newchecks[0]++;
+					if (mm != NULL) {
+						u_short *s =
+							mtod(mm, u_short *);
+						cb->s_rhdr.sp_dt = dt;
+						mm->m_len = 5; /*XXX*/
+						s[0] = 5;
+						s[1] = 1;
+						*(u_char *)(&s[2]) = dt;
+						sbappend(&so->so_rcv, mm);
+					}
+				}
+				if (sp->sp_cc & SP_OB) {
+					MCHTYPE(m, MT_OOBDATA);
+					spp_newchecks[1]++;
+					so->so_oobmark = 0;
+					so->so_state &= ~SS_RCVATMARK;
+				}
+				if (packetp == 0) {
+					m->m_data += SPINC;
+					m->m_len -= SPINC;
+					m->m_pkthdr.len -= SPINC;
+				}
+				if ((sp->sp_cc & SP_EM) || packetp) {
+					sbappendrecord(&so->so_rcv, m);
+					spp_newchecks[9]++;
+				} else
+					sbappend(&so->so_rcv, m);
+			} else
+#endif
+			if (packetp) {
+				sbappendrecord(&so->so_rcv, m);
+			} else {
+				cb->s_rhdr = *mtod(m, struct sphdr *);
+				m->m_data += SPINC;
+				m->m_len -= SPINC;
+				m->m_pkthdr.len -= SPINC;
+				sbappend(&so->so_rcv, m);
+			}
+		  } else
+			break;
+	}
+	if (wakeup) sorwakeup(so);
+	return (0);
+}
+
+spp_ctlinput(cmd, arg)
+	int cmd;
+	caddr_t arg;
+{
+	struct ns_addr *na;
+	extern u_char nsctlerrmap[];
+	extern spp_abort(), spp_quench();
+	extern struct nspcb *idp_drop();
+	struct ns_errp *errp;
+	struct nspcb *nsp;
+	struct sockaddr_ns *sns;
+	int type;
+
+	if (cmd < 0 || cmd > PRC_NCMDS)
+		return;
+	type = NS_ERR_UNREACH_HOST;
+
+	switch (cmd) {
+
+	case PRC_ROUTEDEAD:
+		return;
+
+	case PRC_IFDOWN:
+	case PRC_HOSTDEAD:
+	case PRC_HOSTUNREACH:
+		sns = (struct sockaddr_ns *)arg;
+		if (sns->sns_family != AF_NS)
+			return;
+		na = &sns->sns_addr;
+		break;
+
+	default:
+		errp = (struct ns_errp *)arg;
+		na = &errp->ns_err_idp.idp_dna;
+		type = errp->ns_err_num;
+		type = ntohs((u_short)type);
+	}
+	switch (type) {
+
+	case NS_ERR_UNREACH_HOST:
+		ns_pcbnotify(na, (int)nsctlerrmap[cmd], spp_abort, (long) 0);
+		break;
+
+	case NS_ERR_TOO_BIG:
+	case NS_ERR_NOSOCK:
+		nsp = ns_pcblookup(na, errp->ns_err_idp.idp_sna.x_port,
+			NS_WILDCARD);
+		if (nsp) {
+			if(nsp->nsp_pcb)
+				(void) spp_drop((struct sppcb *)nsp->nsp_pcb,
+						(int)nsctlerrmap[cmd]);
+			else
+				(void) idp_drop(nsp, (int)nsctlerrmap[cmd]);
+		}
+		break;
+
+	case NS_ERR_FULLUP:
+		ns_pcbnotify(na, 0, spp_quench, (long) 0);
+	}
+}
+/*
+ * When a source quench is received, close congestion window
+ * to one packet.  We will gradually open it again as we proceed.
+ */
+spp_quench(nsp)
+	struct nspcb *nsp;
+{
+	struct sppcb *cb = nstosppcb(nsp);
+
+	if (cb)
+		cb->s_cwnd = CUNIT;
+}
+
+#ifdef notdef
+int
+spp_fixmtu(nsp)
+register struct nspcb *nsp;
+{
+	register struct sppcb *cb = (struct sppcb *)(nsp->nsp_pcb);
+	register struct mbuf *m;
+	register struct spidp *si;
+	struct ns_errp *ep;
+	struct sockbuf *sb;
+	int badseq, len;
+	struct mbuf *firstbad, *m0;
+
+	if (cb) {
+		/* 
+		 * The notification that we have sent
+		 * too much is bad news -- we will
+		 * have to go through queued up so far
+		 * splitting ones which are too big and
+		 * reassigning sequence numbers and checksums.
+		 * we should then retransmit all packets from
+		 * one above the offending packet to the last one
+		 * we had sent (or our allocation)
+		 * then the offending one so that the any queued
+		 * data at our destination will be discarded.
+		 */
+		 ep = (struct ns_errp *)nsp->nsp_notify_param;
+		 sb = &nsp->nsp_socket->so_snd;
+		 cb->s_mtu = ep->ns_err_param;
+		 badseq = SI(&ep->ns_err_idp)->si_seq;
+		 for (m = sb->sb_mb; m; m = m->m_act) {
+			si = mtod(m, struct spidp *);
+			if (si->si_seq == badseq)
+				break;
+		 }
+		 if (m == 0) return;
+		 firstbad = m;
+		 /*for (;;) {*/
+			/* calculate length */
+			for (m0 = m, len = 0; m ; m = m->m_next)
+				len += m->m_len;
+			if (len > cb->s_mtu) {
+			}
+		/* FINISH THIS
+		} */
+	}
+}
+#endif
+
+spp_output(cb, m0)
+	register struct sppcb *cb;
+	struct mbuf *m0;
+{
+	struct socket *so = cb->s_nspcb->nsp_socket;
+	register struct mbuf *m;
+	register struct spidp *si = (struct spidp *) 0;
+	register struct sockbuf *sb = &so->so_snd;
+	int len = 0, win, rcv_win;
+	short span, off, recordp = 0;
+	u_short alo;
+	int error = 0, sendalot;
+#ifdef notdef
+	int idle;
+#endif
+	struct mbuf *mprev;
+	extern int idpcksum;
+
+	if (m0) {
+		int mtu = cb->s_mtu;
+		int datalen;
+		/*
+		 * Make sure that packet isn't too big.
+		 */
+		for (m = m0; m ; m = m->m_next) {
+			mprev = m;
+			len += m->m_len;
+			if (m->m_flags & M_EOR)
+				recordp = 1;
+		}
+		datalen = (cb->s_flags & SF_HO) ?
+				len - sizeof (struct sphdr) : len;
+		if (datalen > mtu) {
+			if (cb->s_flags & SF_PI) {
+				m_freem(m0);
+				return (EMSGSIZE);
+			} else {
+				int oldEM = cb->s_cc & SP_EM;
+
+				cb->s_cc &= ~SP_EM;
+				while (len > mtu) {
+					/*
+					 * Here we are only being called
+					 * from usrreq(), so it is OK to
+					 * block.
+					 */
+					m = m_copym(m0, 0, mtu, M_WAIT);
+					if (cb->s_flags & SF_NEWCALL) {
+					    struct mbuf *mm = m;
+					    spp_newchecks[7]++;
+					    while (mm) {
+						mm->m_flags &= ~M_EOR;
+						mm = mm->m_next;
+					    }
+					}
+					error = spp_output(cb, m);
+					if (error) {
+						cb->s_cc |= oldEM;
+						m_freem(m0);
+						return(error);
+					}
+					m_adj(m0, mtu);
+					len -= mtu;
+				}
+				cb->s_cc |= oldEM;
+			}
+		}
+		/*
+		 * Force length even, by adding a "garbage byte" if
+		 * necessary.
+		 */
+		if (len & 1) {
+			m = mprev;
+			if (M_TRAILINGSPACE(m) >= 1)
+				m->m_len++;
+			else {
+				struct mbuf *m1 = m_get(M_DONTWAIT, MT_DATA);
+
+				if (m1 == 0) {
+					m_freem(m0);
+					return (ENOBUFS);
+				}
+				m1->m_len = 1;
+				*(mtod(m1, u_char *)) = 0;
+				m->m_next = m1;
+			}
+		}
+		m = m_gethdr(M_DONTWAIT, MT_HEADER);
+		if (m == 0) {
+			m_freem(m0);
+			return (ENOBUFS);
+		}
+		/*
+		 * Fill in mbuf with extended SP header
+		 * and addresses and length put into network format.
+		 */
+		MH_ALIGN(m, sizeof (struct spidp));
+		m->m_len = sizeof (struct spidp);
+		m->m_next = m0;
+		si = mtod(m, struct spidp *);
+		si->si_i = *cb->s_idp;
+		si->si_s = cb->s_shdr;
+		if ((cb->s_flags & SF_PI) && (cb->s_flags & SF_HO)) {
+			register struct sphdr *sh;
+			if (m0->m_len < sizeof (*sh)) {
+				if((m0 = m_pullup(m0, sizeof(*sh))) == NULL) {
+					(void) m_free(m);
+					m_freem(m0);
+					return (EINVAL);
+				}
+				m->m_next = m0;
+			}
+			sh = mtod(m0, struct sphdr *);
+			si->si_dt = sh->sp_dt;
+			si->si_cc |= sh->sp_cc & SP_EM;
+			m0->m_len -= sizeof (*sh);
+			m0->m_data += sizeof (*sh);
+			len -= sizeof (*sh);
+		}
+		len += sizeof(*si);
+		if ((cb->s_flags2 & SF_NEWCALL) && recordp) {
+			si->si_cc  |= SP_EM;
+			spp_newchecks[8]++;
+		}
+		if (cb->s_oobflags & SF_SOOB) {
+			/*
+			 * Per jqj@cornell:
+			 * make sure OB packets convey exactly 1 byte.
+			 * If the packet is 1 byte or larger, we
+			 * have already guaranted there to be at least
+			 * one garbage byte for the checksum, and
+			 * extra bytes shouldn't hurt!
+			 */
+			if (len > sizeof(*si)) {
+				si->si_cc |= SP_OB;
+				len = (1 + sizeof(*si));
+			}
+		}
+		si->si_len = htons((u_short)len);
+		m->m_pkthdr.len = ((len - 1) | 1) + 1;
+		/*
+		 * queue stuff up for output
+		 */
+		sbappendrecord(sb, m);
+		cb->s_seq++;
+	}
+#ifdef notdef
+	idle = (cb->s_smax == (cb->s_rack - 1));
+#endif
+again:
+	sendalot = 0;
+	off = cb->s_snxt - cb->s_rack;
+	win = min(cb->s_swnd, (cb->s_cwnd/CUNIT));
+
+	/*
+	 * If in persist timeout with window of 0, send a probe.
+	 * Otherwise, if window is small but nonzero
+	 * and timer expired, send what we can and go into
+	 * transmit state.
+	 */
+	if (cb->s_force == 1 + SPPT_PERSIST) {
+		if (win != 0) {
+			cb->s_timer[SPPT_PERSIST] = 0;
+			cb->s_rxtshift = 0;
+		}
+	}
+	span = cb->s_seq - cb->s_rack;
+	len = min(span, win) - off;
+
+	if (len < 0) {
+		/*
+		 * Window shrank after we went into it.
+		 * If window shrank to 0, cancel pending
+		 * restransmission and pull s_snxt back
+		 * to (closed) window.  We will enter persist
+		 * state below.  If the widndow didn't close completely,
+		 * just wait for an ACK.
+		 */
+		len = 0;
+		if (win == 0) {
+			cb->s_timer[SPPT_REXMT] = 0;
+			cb->s_snxt = cb->s_rack;
+		}
+	}
+	if (len > 1)
+		sendalot = 1;
+	rcv_win = sbspace(&so->so_rcv);
+
+	/*
+	 * Send if we owe peer an ACK.
+	 */
+	if (cb->s_oobflags & SF_SOOB) {
+		/*
+		 * must transmit this out of band packet
+		 */
+		cb->s_oobflags &= ~ SF_SOOB;
+		sendalot = 1;
+		sppstat.spps_sndurg++;
+		goto found;
+	}
+	if (cb->s_flags & SF_ACKNOW)
+		goto send;
+	if (cb->s_state < TCPS_ESTABLISHED)
+		goto send;
+	/*
+	 * Silly window can't happen in spp.
+	 * Code from tcp deleted.
+	 */
+	if (len)
+		goto send;
+	/*
+	 * Compare available window to amount of window
+	 * known to peer (as advertised window less
+	 * next expected input.)  If the difference is at least two
+	 * packets or at least 35% of the mximum possible window,
+	 * then want to send a window update to peer.
+	 */
+	if (rcv_win > 0) {
+		u_short delta =  1 + cb->s_alo - cb->s_ack;
+		int adv = rcv_win - (delta * cb->s_mtu);
+		
+		if ((so->so_rcv.sb_cc == 0 && adv >= (2 * cb->s_mtu)) ||
+		    (100 * adv / so->so_rcv.sb_hiwat >= 35)) {
+			sppstat.spps_sndwinup++;
+			cb->s_flags |= SF_ACKNOW;
+			goto send;
+		}
+
+	}
+	/*
+	 * Many comments from tcp_output.c are appropriate here
+	 * including . . .
+	 * If send window is too small, there is data to transmit, and no
+	 * retransmit or persist is pending, then go to persist state.
+	 * If nothing happens soon, send when timer expires:
+	 * if window is nonzero, transmit what we can,
+	 * otherwise send a probe.
+	 */
+	if (so->so_snd.sb_cc && cb->s_timer[SPPT_REXMT] == 0 &&
+		cb->s_timer[SPPT_PERSIST] == 0) {
+			cb->s_rxtshift = 0;
+			spp_setpersist(cb);
+	}
+	/*
+	 * No reason to send a packet, just return.
+	 */
+	cb->s_outx = 1;
+	return (0);
+
+send:
+	/*
+	 * Find requested packet.
+	 */
+	si = 0;
+	if (len > 0) {
+		cb->s_want = cb->s_snxt;
+		for (m = sb->sb_mb; m; m = m->m_act) {
+			si = mtod(m, struct spidp *);
+			if (SSEQ_LEQ(cb->s_snxt, si->si_seq))
+				break;
+		}
+	found:
+		if (si) {
+			if (si->si_seq == cb->s_snxt)
+					cb->s_snxt++;
+				else
+					sppstat.spps_sndvoid++, si = 0;
+		}
+	}
+	/*
+	 * update window
+	 */
+	if (rcv_win < 0)
+		rcv_win = 0;
+	alo = cb->s_ack - 1 + (rcv_win / ((short)cb->s_mtu));
+	if (SSEQ_LT(alo, cb->s_alo)) 
+		alo = cb->s_alo;
+
+	if (si) {
+		/*
+		 * must make a copy of this packet for
+		 * idp_output to monkey with
+		 */
+		m = m_copy(dtom(si), 0, (int)M_COPYALL);
+		if (m == NULL) {
+			return (ENOBUFS);
+		}
+		si = mtod(m, struct spidp *);
+		if (SSEQ_LT(si->si_seq, cb->s_smax))
+			sppstat.spps_sndrexmitpack++;
+		else
+			sppstat.spps_sndpack++;
+	} else if (cb->s_force || cb->s_flags & SF_ACKNOW) {
+		/*
+		 * Must send an acknowledgement or a probe
+		 */
+		if (cb->s_force)
+			sppstat.spps_sndprobe++;
+		if (cb->s_flags & SF_ACKNOW)
+			sppstat.spps_sndacks++;
+		m = m_gethdr(M_DONTWAIT, MT_HEADER);
+		if (m == 0)
+			return (ENOBUFS);
+		/*
+		 * Fill in mbuf with extended SP header
+		 * and addresses and length put into network format.
+		 */
+		MH_ALIGN(m, sizeof (struct spidp));
+		m->m_len = sizeof (*si);
+		m->m_pkthdr.len = sizeof (*si);
+		si = mtod(m, struct spidp *);
+		si->si_i = *cb->s_idp;
+		si->si_s = cb->s_shdr;
+		si->si_seq = cb->s_smax + 1;
+		si->si_len = htons(sizeof (*si));
+		si->si_cc |= SP_SP;
+	} else {
+		cb->s_outx = 3;
+		if (so->so_options & SO_DEBUG || traceallspps)
+			spp_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
+		return (0);
+	}
+	/*
+	 * Stuff checksum and output datagram.
+	 */
+	if ((si->si_cc & SP_SP) == 0) {
+		if (cb->s_force != (1 + SPPT_PERSIST) ||
+		    cb->s_timer[SPPT_PERSIST] == 0) {
+			/*
+			 * If this is a new packet and we are not currently 
+			 * timing anything, time this one.
+			 */
+			if (SSEQ_LT(cb->s_smax, si->si_seq)) {
+				cb->s_smax = si->si_seq;
+				if (cb->s_rtt == 0) {
+					sppstat.spps_segstimed++;
+					cb->s_rtseq = si->si_seq;
+					cb->s_rtt = 1;
+				}
+			}
+			/*
+			 * Set rexmt timer if not currently set,
+			 * Initial value for retransmit timer is smoothed
+			 * round-trip time + 2 * round-trip time variance.
+			 * Initialize shift counter which is used for backoff
+			 * of retransmit time.
+			 */
+			if (cb->s_timer[SPPT_REXMT] == 0 &&
+			    cb->s_snxt != cb->s_rack) {
+				cb->s_timer[SPPT_REXMT] = cb->s_rxtcur;
+				if (cb->s_timer[SPPT_PERSIST]) {
+					cb->s_timer[SPPT_PERSIST] = 0;
+					cb->s_rxtshift = 0;
+				}
+			}
+		} else if (SSEQ_LT(cb->s_smax, si->si_seq)) {
+			cb->s_smax = si->si_seq;
+		}
+	} else if (cb->s_state < TCPS_ESTABLISHED) {
+		if (cb->s_rtt == 0)
+			cb->s_rtt = 1; /* Time initial handshake */
+		if (cb->s_timer[SPPT_REXMT] == 0)
+			cb->s_timer[SPPT_REXMT] = cb->s_rxtcur;
+	}
+	{
+		/*
+		 * Do not request acks when we ack their data packets or
+		 * when we do a gratuitous window update.
+		 */
+		if (((si->si_cc & SP_SP) == 0) || cb->s_force)
+				si->si_cc |= SP_SA;
+		si->si_seq = htons(si->si_seq);
+		si->si_alo = htons(alo);
+		si->si_ack = htons(cb->s_ack);
+
+		if (idpcksum) {
+			si->si_sum = 0;
+			len = ntohs(si->si_len);
+			if (len & 1)
+				len++;
+			si->si_sum = ns_cksum(m, len);
+		} else
+			si->si_sum = 0xffff;
+
+		cb->s_outx = 4;
+		if (so->so_options & SO_DEBUG || traceallspps)
+			spp_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
+
+		if (so->so_options & SO_DONTROUTE)
+			error = ns_output(m, (struct route *)0, NS_ROUTETOIF);
+		else
+			error = ns_output(m, &cb->s_nspcb->nsp_route, 0);
+	}
+	if (error) {
+		return (error);
+	}
+	sppstat.spps_sndtotal++;
+	/*
+	 * Data sent (as far as we can tell).
+	 * If this advertises a larger window than any other segment,
+	 * then remember the size of the advertized window.
+	 * Any pending ACK has now been sent.
+	 */
+	cb->s_force = 0;
+	cb->s_flags &= ~(SF_ACKNOW|SF_DELACK);
+	if (SSEQ_GT(alo, cb->s_alo))
+		cb->s_alo = alo;
+	if (sendalot)
+		goto again;
+	cb->s_outx = 5;
+	return (0);
+}
+
+int spp_do_persist_panics = 0;
+
+spp_setpersist(cb)
+	register struct sppcb *cb;
+{
+	register t = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
+	extern int spp_backoff[];
+
+	if (cb->s_timer[SPPT_REXMT] && spp_do_persist_panics)
+		panic("spp_output REXMT");
+	/*
+	 * Start/restart persistance timer.
+	 */
+	SPPT_RANGESET(cb->s_timer[SPPT_PERSIST],
+	    t*spp_backoff[cb->s_rxtshift],
+	    SPPTV_PERSMIN, SPPTV_PERSMAX);
+	if (cb->s_rxtshift < SPP_MAXRXTSHIFT)
+		cb->s_rxtshift++;
+}
+/*ARGSUSED*/
+spp_ctloutput(req, so, level, name, value)
+	int req;
+	struct socket *so;
+	int name;
+	struct mbuf **value;
+{
+	register struct mbuf *m;
+	struct nspcb *nsp = sotonspcb(so);
+	register struct sppcb *cb;
+	int mask, error = 0;
+
+	if (level != NSPROTO_SPP) {
+		/* This will have to be changed when we do more general
+		   stacking of protocols */
+		return (idp_ctloutput(req, so, level, name, value));
+	}
+	if (nsp == NULL) {
+		error = EINVAL;
+		goto release;
+	} else
+		cb = nstosppcb(nsp);
+
+	switch (req) {
+
+	case PRCO_GETOPT:
+		if (value == NULL)
+			return (EINVAL);
+		m = m_get(M_DONTWAIT, MT_DATA);
+		if (m == NULL)
+			return (ENOBUFS);
+		switch (name) {
+
+		case SO_HEADERS_ON_INPUT:
+			mask = SF_HI;
+			goto get_flags;
+
+		case SO_HEADERS_ON_OUTPUT:
+			mask = SF_HO;
+		get_flags:
+			m->m_len = sizeof(short);
+			*mtod(m, short *) = cb->s_flags & mask;
+			break;
+
+		case SO_MTU:
+			m->m_len = sizeof(u_short);
+			*mtod(m, short *) = cb->s_mtu;
+			break;
+
+		case SO_LAST_HEADER:
+			m->m_len = sizeof(struct sphdr);
+			*mtod(m, struct sphdr *) = cb->s_rhdr;
+			break;
+
+		case SO_DEFAULT_HEADERS:
+			m->m_len = sizeof(struct spidp);
+			*mtod(m, struct sphdr *) = cb->s_shdr;
+			break;
+
+		default:
+			error = EINVAL;
+		}
+		*value = m;
+		break;
+
+	case PRCO_SETOPT:
+		if (value == 0 || *value == 0) {
+			error = EINVAL;
+			break;
+		}
+		switch (name) {
+			int *ok;
+
+		case SO_HEADERS_ON_INPUT:
+			mask = SF_HI;
+			goto set_head;
+
+		case SO_HEADERS_ON_OUTPUT:
+			mask = SF_HO;
+		set_head:
+			if (cb->s_flags & SF_PI) {
+				ok = mtod(*value, int *);
+				if (*ok)
+					cb->s_flags |= mask;
+				else
+					cb->s_flags &= ~mask;
+			} else error = EINVAL;
+			break;
+
+		case SO_MTU:
+			cb->s_mtu = *(mtod(*value, u_short *));
+			break;
+
+#ifdef SF_NEWCALL
+		case SO_NEWCALL:
+			ok = mtod(*value, int *);
+			if (*ok) {
+				cb->s_flags2 |= SF_NEWCALL;
+				spp_newchecks[5]++;
+			} else {
+				cb->s_flags2 &= ~SF_NEWCALL;
+				spp_newchecks[6]++;
+			}
+			break;
+#endif
+
+		case SO_DEFAULT_HEADERS:
+			{
+				register struct sphdr *sp
+						= mtod(*value, struct sphdr *);
+				cb->s_dt = sp->sp_dt;
+				cb->s_cc = sp->sp_cc & SP_EM;
+			}
+			break;
+
+		default:
+			error = EINVAL;
+		}
+		m_freem(*value);
+		break;
+	}
+	release:
+		return (error);
+}
+
+/*ARGSUSED*/
+spp_usrreq(so, req, m, nam, controlp)
+	struct socket *so;
+	int req;
+	struct mbuf *m, *nam, *controlp;
+{
+	struct nspcb *nsp = sotonspcb(so);
+	register struct sppcb *cb;
+	int s = splnet();
+	int error = 0, ostate;
+	struct mbuf *mm;
+	register struct sockbuf *sb;
+
+	if (req == PRU_CONTROL)
+                return (ns_control(so, (int)m, (caddr_t)nam,
+			(struct ifnet *)controlp));
+	if (nsp == NULL) {
+		if (req != PRU_ATTACH) {
+			error = EINVAL;
+			goto release;
+		}
+	} else
+		cb = nstosppcb(nsp);
+
+	ostate = cb ? cb->s_state : 0;
+
+	switch (req) {
+
+	case PRU_ATTACH:
+		if (nsp != NULL) {
+			error = EISCONN;
+			break;
+		}
+		error = ns_pcballoc(so, &nspcb);
+		if (error)
+			break;
+		if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
+			error = soreserve(so, (u_long) 3072, (u_long) 3072);
+			if (error)
+				break;
+		}
+		nsp = sotonspcb(so);
+
+		mm = m_getclr(M_DONTWAIT, MT_PCB);
+		sb = &so->so_snd;
+
+		if (mm == NULL) {
+			error = ENOBUFS;
+			break;
+		}
+		cb = mtod(mm, struct sppcb *);
+		mm = m_getclr(M_DONTWAIT, MT_HEADER);
+		if (mm == NULL) {
+			(void) m_free(dtom(m));
+			error = ENOBUFS;
+			break;
+		}
+		cb->s_idp = mtod(mm, struct idp *);
+		cb->s_state = TCPS_LISTEN;
+		cb->s_smax = -1;
+		cb->s_swl1 = -1;
+		cb->s_q.si_next = cb->s_q.si_prev = &cb->s_q;
+		cb->s_nspcb = nsp;
+		cb->s_mtu = 576 - sizeof (struct spidp);
+		cb->s_cwnd = sbspace(sb) * CUNIT / cb->s_mtu;
+		cb->s_ssthresh = cb->s_cwnd;
+		cb->s_cwmx = sbspace(sb) * CUNIT /
+				(2 * sizeof (struct spidp));
+		/* Above is recomputed when connecting to account
+		   for changed buffering or mtu's */
+		cb->s_rtt = SPPTV_SRTTBASE;
+		cb->s_rttvar = SPPTV_SRTTDFLT << 2;
+		SPPT_RANGESET(cb->s_rxtcur,
+		    ((SPPTV_SRTTBASE >> 2) + (SPPTV_SRTTDFLT << 2)) >> 1,
+		    SPPTV_MIN, SPPTV_REXMTMAX);
+		nsp->nsp_pcb = (caddr_t) cb; 
+		break;
+
+	case PRU_DETACH:
+		if (nsp == NULL) {
+			error = ENOTCONN;
+			break;
+		}
+		if (cb->s_state > TCPS_LISTEN)
+			cb = spp_disconnect(cb);
+		else
+			cb = spp_close(cb);
+		break;
+
+	case PRU_BIND:
+		error = ns_pcbbind(nsp, nam);
+		break;
+
+	case PRU_LISTEN:
+		if (nsp->nsp_lport == 0)
+			error = ns_pcbbind(nsp, (struct mbuf *)0);
+		if (error == 0)
+			cb->s_state = TCPS_LISTEN;
+		break;
+
+	/*
+	 * Initiate connection to peer.
+	 * Enter SYN_SENT state, and mark socket as connecting.
+	 * Start keep-alive timer, setup prototype header,
+	 * Send initial system packet requesting connection.
+	 */
+	case PRU_CONNECT:
+		if (nsp->nsp_lport == 0) {
+			error = ns_pcbbind(nsp, (struct mbuf *)0);
+			if (error)
+				break;
+		}
+		error = ns_pcbconnect(nsp, nam);
+		if (error)
+			break;
+		soisconnecting(so);
+		sppstat.spps_connattempt++;
+		cb->s_state = TCPS_SYN_SENT;
+		cb->s_did = 0;
+		spp_template(cb);
+		cb->s_timer[SPPT_KEEP] = SPPTV_KEEP;
+		cb->s_force = 1 + SPPTV_KEEP;
+		/*
+		 * Other party is required to respond to
+		 * the port I send from, but he is not
+		 * required to answer from where I am sending to,
+		 * so allow wildcarding.
+		 * original port I am sending to is still saved in
+		 * cb->s_dport.
+		 */
+		nsp->nsp_fport = 0;
+		error = spp_output(cb, (struct mbuf *) 0);
+		break;
+
+	case PRU_CONNECT2:
+		error = EOPNOTSUPP;
+		break;
+
+	/*
+	 * We may decide later to implement connection closing
+	 * handshaking at the spp level optionally.
+	 * here is the hook to do it:
+	 */
+	case PRU_DISCONNECT:
+		cb = spp_disconnect(cb);
+		break;
+
+	/*
+	 * Accept a connection.  Essentially all the work is
+	 * done at higher levels; just return the address
+	 * of the peer, storing through addr.
+	 */
+	case PRU_ACCEPT: {
+		struct sockaddr_ns *sns = mtod(nam, struct sockaddr_ns *);
+
+		nam->m_len = sizeof (struct sockaddr_ns);
+		sns->sns_family = AF_NS;
+		sns->sns_addr = nsp->nsp_faddr;
+		break;
+		}
+
+	case PRU_SHUTDOWN:
+		socantsendmore(so);
+		cb = spp_usrclosed(cb);
+		if (cb)
+			error = spp_output(cb, (struct mbuf *) 0);
+		break;
+
+	/*
+	 * After a receive, possibly send acknowledgment
+	 * updating allocation.
+	 */
+	case PRU_RCVD:
+		cb->s_flags |= SF_RVD;
+		(void) spp_output(cb, (struct mbuf *) 0);
+		cb->s_flags &= ~SF_RVD;
+		break;
+
+	case PRU_ABORT:
+		(void) spp_drop(cb, ECONNABORTED);
+		break;
+
+	case PRU_SENSE:
+	case PRU_CONTROL:
+		m = NULL;
+		error = EOPNOTSUPP;
+		break;
+
+	case PRU_RCVOOB:
+		if ((cb->s_oobflags & SF_IOOB) || so->so_oobmark ||
+		    (so->so_state & SS_RCVATMARK)) {
+			m->m_len = 1;
+			*mtod(m, caddr_t) = cb->s_iobc;
+			break;
+		}
+		error = EINVAL;
+		break;
+
+	case PRU_SENDOOB:
+		if (sbspace(&so->so_snd) < -512) {
+			error = ENOBUFS;
+			break;
+		}
+		cb->s_oobflags |= SF_SOOB;
+		/* fall into */
+	case PRU_SEND:
+		if (controlp) {
+			u_short *p = mtod(controlp, u_short *);
+			spp_newchecks[2]++;
+			if ((p[0] == 5) && p[1] == 1) { /* XXXX, for testing */
+				cb->s_shdr.sp_dt = *(u_char *)(&p[2]);
+				spp_newchecks[3]++;
+			}
+			m_freem(controlp);
+		}
+		controlp = NULL;
+		error = spp_output(cb, m);
+		m = NULL;
+		break;
+
+	case PRU_SOCKADDR:
+		ns_setsockaddr(nsp, nam);
+		break;
+
+	case PRU_PEERADDR:
+		ns_setpeeraddr(nsp, nam);
+		break;
+
+	case PRU_SLOWTIMO:
+		cb = spp_timers(cb, (int)nam);
+		req |= ((int)nam) << 8;
+		break;
+
+	case PRU_FASTTIMO:
+	case PRU_PROTORCV:
+	case PRU_PROTOSEND:
+		error =  EOPNOTSUPP;
+		break;
+
+	default:
+		panic("sp_usrreq");
+	}
+	if (cb && (so->so_options & SO_DEBUG || traceallspps))
+		spp_trace(SA_USER, (u_char)ostate, cb, (struct spidp *)0, req);
+release:
+	if (controlp != NULL)
+		m_freem(controlp);
+	if (m != NULL)
+		m_freem(m);
+	splx(s);
+	return (error);
+}
+
+spp_usrreq_sp(so, req, m, nam, controlp)
+	struct socket *so;
+	int req;
+	struct mbuf *m, *nam, *controlp;
+{
+	int error = spp_usrreq(so, req, m, nam, controlp);
+
+	if (req == PRU_ATTACH && error == 0) {
+		struct nspcb *nsp = sotonspcb(so);
+		((struct sppcb *)nsp->nsp_pcb)->s_flags |=
+					(SF_HI | SF_HO | SF_PI);
+	}
+	return (error);
+}
+
+/*
+ * Create template to be used to send spp packets on a connection.
+ * Called after host entry created, fills
+ * in a skeletal spp header (choosing connection id),
+ * minimizing the amount of work necessary when the connection is used.
+ */
+spp_template(cb)
+	register struct sppcb *cb;
+{
+	register struct nspcb *nsp = cb->s_nspcb;
+	register struct idp *idp = cb->s_idp;
+	register struct sockbuf *sb = &(nsp->nsp_socket->so_snd);
+
+	idp->idp_pt = NSPROTO_SPP;
+	idp->idp_sna = nsp->nsp_laddr;
+	idp->idp_dna = nsp->nsp_faddr;
+	cb->s_sid = htons(spp_iss);
+	spp_iss += SPP_ISSINCR/2;
+	cb->s_alo = 1;
+	cb->s_cwnd = (sbspace(sb) * CUNIT) / cb->s_mtu;
+	cb->s_ssthresh = cb->s_cwnd; /* Try to expand fast to full complement
+					of large packets */
+	cb->s_cwmx = (sbspace(sb) * CUNIT) / (2 * sizeof(struct spidp));
+	cb->s_cwmx = max(cb->s_cwmx, cb->s_cwnd);
+		/* But allow for lots of little packets as well */
+}
+
+/*
+ * Close a SPIP control block:
+ *	discard spp control block itself
+ *	discard ns protocol control block
+ *	wake up any sleepers
+ */
+struct sppcb *
+spp_close(cb)
+	register struct sppcb *cb;
+{
+	register struct spidp_q *s;
+	struct nspcb *nsp = cb->s_nspcb;
+	struct socket *so = nsp->nsp_socket;
+	register struct mbuf *m;
+
+	s = cb->s_q.si_next;
+	while (s != &(cb->s_q)) {
+		s = s->si_next;
+		m = dtom(s->si_prev);
+		remque(s->si_prev);
+		m_freem(m);
+	}
+	(void) m_free(dtom(cb->s_idp));
+	(void) m_free(dtom(cb));
+	nsp->nsp_pcb = 0;
+	soisdisconnected(so);
+	ns_pcbdetach(nsp);
+	sppstat.spps_closed++;
+	return ((struct sppcb *)0);
+}
+/*
+ *	Someday we may do level 3 handshaking
+ *	to close a connection or send a xerox style error.
+ *	For now, just close.
+ */
+struct sppcb *
+spp_usrclosed(cb)
+	register struct sppcb *cb;
+{
+	return (spp_close(cb));
+}
+struct sppcb *
+spp_disconnect(cb)
+	register struct sppcb *cb;
+{
+	return (spp_close(cb));
+}
+/*
+ * Drop connection, reporting
+ * the specified error.
+ */
+struct sppcb *
+spp_drop(cb, errno)
+	register struct sppcb *cb;
+	int errno;
+{
+	struct socket *so = cb->s_nspcb->nsp_socket;
+
+	/*
+	 * someday, in the xerox world
+	 * we will generate error protocol packets
+	 * announcing that the socket has gone away.
+	 */
+	if (TCPS_HAVERCVDSYN(cb->s_state)) {
+		sppstat.spps_drops++;
+		cb->s_state = TCPS_CLOSED;
+		/*(void) tcp_output(cb);*/
+	} else
+		sppstat.spps_conndrops++;
+	so->so_error = errno;
+	return (spp_close(cb));
+}
+
+spp_abort(nsp)
+	struct nspcb *nsp;
+{
+
+	(void) spp_close((struct sppcb *)nsp->nsp_pcb);
+}
+
+int	spp_backoff[SPP_MAXRXTSHIFT+1] =
+    { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
+/*
+ * Fast timeout routine for processing delayed acks
+ */
+spp_fasttimo()
+{
+	register struct nspcb *nsp;
+	register struct sppcb *cb;
+	int s = splnet();
+
+	nsp = nspcb.nsp_next;
+	if (nsp)
+	for (; nsp != &nspcb; nsp = nsp->nsp_next)
+		if ((cb = (struct sppcb *)nsp->nsp_pcb) &&
+		    (cb->s_flags & SF_DELACK)) {
+			cb->s_flags &= ~SF_DELACK;
+			cb->s_flags |= SF_ACKNOW;
+			sppstat.spps_delack++;
+			(void) spp_output(cb, (struct mbuf *) 0);
+		}
+	splx(s);
+}
+
+/*
+ * spp protocol timeout routine called every 500 ms.
+ * Updates the timers in all active pcb's and
+ * causes finite state machine actions if timers expire.
+ */
+spp_slowtimo()
+{
+	register struct nspcb *ip, *ipnxt;
+	register struct sppcb *cb;
+	int s = splnet();
+	register int i;
+
+	/*
+	 * Search through tcb's and update active timers.
+	 */
+	ip = nspcb.nsp_next;
+	if (ip == 0) {
+		splx(s);
+		return;
+	}
+	while (ip != &nspcb) {
+		cb = nstosppcb(ip);
+		ipnxt = ip->nsp_next;
+		if (cb == 0)
+			goto tpgone;
+		for (i = 0; i < SPPT_NTIMERS; i++) {
+			if (cb->s_timer[i] && --cb->s_timer[i] == 0) {
+				(void) spp_usrreq(cb->s_nspcb->nsp_socket,
+				    PRU_SLOWTIMO, (struct mbuf *)0,
+				    (struct mbuf *)i, (struct mbuf *)0,
+				    (struct mbuf *)0);
+				if (ipnxt->nsp_prev != ip)
+					goto tpgone;
+			}
+		}
+		cb->s_idle++;
+		if (cb->s_rtt)
+			cb->s_rtt++;
+tpgone:
+		ip = ipnxt;
+	}
+	spp_iss += SPP_ISSINCR/PR_SLOWHZ;		/* increment iss */
+	splx(s);
+}
+/*
+ * SPP timer processing.
+ */
+struct sppcb *
+spp_timers(cb, timer)
+	register struct sppcb *cb;
+	int timer;
+{
+	long rexmt;
+	int win;
+
+	cb->s_force = 1 + timer;
+	switch (timer) {
+
+	/*
+	 * 2 MSL timeout in shutdown went off.  TCP deletes connection
+	 * control block.
+	 */
+	case SPPT_2MSL:
+		printf("spp: SPPT_2MSL went off for no reason\n");
+		cb->s_timer[timer] = 0;
+		break;
+
+	/*
+	 * Retransmission timer went off.  Message has not
+	 * been acked within retransmit interval.  Back off
+	 * to a longer retransmit interval and retransmit one packet.
+	 */
+	case SPPT_REXMT:
+		if (++cb->s_rxtshift > SPP_MAXRXTSHIFT) {
+			cb->s_rxtshift = SPP_MAXRXTSHIFT;
+			sppstat.spps_timeoutdrop++;
+			cb = spp_drop(cb, ETIMEDOUT);
+			break;
+		}
+		sppstat.spps_rexmttimeo++;
+		rexmt = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
+		rexmt *= spp_backoff[cb->s_rxtshift];
+		SPPT_RANGESET(cb->s_rxtcur, rexmt, SPPTV_MIN, SPPTV_REXMTMAX);
+		cb->s_timer[SPPT_REXMT] = cb->s_rxtcur;
+		/*
+		 * If we have backed off fairly far, our srtt
+		 * estimate is probably bogus.  Clobber it
+		 * so we'll take the next rtt measurement as our srtt;
+		 * move the current srtt into rttvar to keep the current
+		 * retransmit times until then.
+		 */
+		if (cb->s_rxtshift > SPP_MAXRXTSHIFT / 4 ) {
+			cb->s_rttvar += (cb->s_srtt >> 2);
+			cb->s_srtt = 0;
+		}
+		cb->s_snxt = cb->s_rack;
+		/*
+		 * If timing a packet, stop the timer.
+		 */
+		cb->s_rtt = 0;
+		/*
+		 * See very long discussion in tcp_timer.c about congestion
+		 * window and sstrhesh
+		 */
+		win = min(cb->s_swnd, (cb->s_cwnd/CUNIT)) / 2;
+		if (win < 2)
+			win = 2;
+		cb->s_cwnd = CUNIT;
+		cb->s_ssthresh = win * CUNIT;
+		(void) spp_output(cb, (struct mbuf *) 0);
+		break;
+
+	/*
+	 * Persistance timer into zero window.
+	 * Force a probe to be sent.
+	 */
+	case SPPT_PERSIST:
+		sppstat.spps_persisttimeo++;
+		spp_setpersist(cb);
+		(void) spp_output(cb, (struct mbuf *) 0);
+		break;
+
+	/*
+	 * Keep-alive timer went off; send something
+	 * or drop connection if idle for too long.
+	 */
+	case SPPT_KEEP:
+		sppstat.spps_keeptimeo++;
+		if (cb->s_state < TCPS_ESTABLISHED)
+			goto dropit;
+		if (cb->s_nspcb->nsp_socket->so_options & SO_KEEPALIVE) {
+		    	if (cb->s_idle >= SPPTV_MAXIDLE)
+				goto dropit;
+			sppstat.spps_keepprobe++;
+			(void) spp_output(cb, (struct mbuf *) 0);
+		} else
+			cb->s_idle = 0;
+		cb->s_timer[SPPT_KEEP] = SPPTV_KEEP;
+		break;
+	dropit:
+		sppstat.spps_keepdrops++;
+		cb = spp_drop(cb, ETIMEDOUT);
+		break;
+	}
+	return (cb);
+}
+#ifndef lint
+int SppcbSize = sizeof (struct sppcb);
+int NspcbSize = sizeof (struct nspcb);
+#endif /* lint */
diff --git a/sys/netns/spp_var.h b/sys/netns/spp_var.h
new file mode 100644
index 00000000000..0d44f63904f
--- /dev/null
+++ b/sys/netns/spp_var.h
@@ -0,0 +1,215 @@
+/*
+ * Copyright (c) 1984, 1985, 1986, 1987, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)spp_var.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Sp control block, one per connection
+ */
+struct sppcb {
+	struct	spidp_q	s_q;		/* queue for out-of-order receipt */
+	struct	nspcb	*s_nspcb;	/* backpointer to internet pcb */
+	u_char	s_state;
+	u_char	s_flags;
+#define	SF_ACKNOW	0x01		/* Ack peer immediately */
+#define	SF_DELACK	0x02		/* Ack, but try to delay it */
+#define	SF_HI	0x04			/* Show headers on input */
+#define	SF_HO	0x08			/* Show headers on output */
+#define	SF_PI	0x10			/* Packet (datagram) interface */
+#define SF_WIN	0x20			/* Window info changed */
+#define SF_RXT	0x40			/* Rxt info changed */
+#define SF_RVD	0x80			/* Calling from read usrreq routine */
+	u_short s_mtu;			/* Max packet size for this stream */
+/* use sequence fields in headers to store sequence numbers for this
+   connection */
+	struct	idp	*s_idp;
+	struct	sphdr	s_shdr;		/* prototype header to transmit */
+#define s_cc s_shdr.sp_cc		/* connection control (for EM bit) */
+#define s_dt s_shdr.sp_dt		/* datastream type */
+#define s_sid s_shdr.sp_sid		/* source connection identifier */
+#define s_did s_shdr.sp_did		/* destination connection identifier */
+#define s_seq s_shdr.sp_seq		/* sequence number */
+#define s_ack s_shdr.sp_ack		/* acknowledge number */
+#define s_alo s_shdr.sp_alo		/* allocation number */
+#define s_dport s_idp->idp_dna.x_port	/* where we are sending */
+	struct sphdr s_rhdr;		/* last received header (in effect!)*/
+	u_short s_rack;			/* their acknowledge number */
+	u_short s_ralo;			/* their allocation number */
+	u_short s_smax;			/* highest packet # we have sent */
+	u_short	s_snxt;			/* which packet to send next */
+
+/* congestion control */
+#define	CUNIT	1024			/* scaling for ... */
+	int	s_cwnd;			/* Congestion-controlled window */
+					/* in packets * CUNIT */
+	short	s_swnd;			/* == tcp snd_wnd, in packets */
+	short	s_smxw;			/* == tcp max_sndwnd */
+					/* difference of two spp_seq's can be
+					   no bigger than a short */
+	u_short	s_swl1;			/* == tcp snd_wl1 */
+	u_short	s_swl2;			/* == tcp snd_wl2 */
+	int	s_cwmx;			/* max allowable cwnd */
+	int	s_ssthresh;		/* s_cwnd size threshhold for
+					 * slow start exponential-to-
+					 * linear switch */
+/* transmit timing stuff
+ * srtt and rttvar are stored as fixed point, for convenience in smoothing.
+ * srtt has 3 bits to the right of the binary point, rttvar has 2.
+ */
+	short	s_idle;			/* time idle */
+	short	s_timer[SPPT_NTIMERS];	/* timers */
+	short	s_rxtshift;		/* log(2) of rexmt exp. backoff */
+	short	s_rxtcur;		/* current retransmit value */
+	u_short	s_rtseq;		/* packet being timed */
+	short	s_rtt;			/* timer for round trips */
+	short	s_srtt;			/* averaged timer */
+	short	s_rttvar;		/* variance in round trip time */
+	char	s_force;		/* which timer expired */
+	char	s_dupacks;		/* counter to intuit xmt loss */
+
+/* out of band data */
+	char	s_oobflags;
+#define SF_SOOB	0x08			/* sending out of band data */
+#define SF_IOOB 0x10			/* receiving out of band data */
+	char	s_iobc;			/* input characters */
+/* debug stuff */
+	u_short	s_want;			/* Last candidate for sending */
+	char	s_outx;			/* exit taken from spp_output */
+	char	s_inx;			/* exit taken from spp_input */
+	u_short	s_flags2;		/* more flags for testing */
+#define SF_NEWCALL	0x100		/* for new_recvmsg */
+#define SO_NEWCALL	10		/* for new_recvmsg */
+};
+
+#define	nstosppcb(np)	((struct sppcb *)(np)->nsp_pcb)
+#define	sotosppcb(so)	(nstosppcb(sotonspcb(so)))
+
+struct	sppstat {
+	long	spps_connattempt;	/* connections initiated */
+	long	spps_accepts;		/* connections accepted */
+	long	spps_connects;		/* connections established */
+	long	spps_drops;		/* connections dropped */
+	long	spps_conndrops;		/* embryonic connections dropped */
+	long	spps_closed;		/* conn. closed (includes drops) */
+	long	spps_segstimed;		/* segs where we tried to get rtt */
+	long	spps_rttupdated;	/* times we succeeded */
+	long	spps_delack;		/* delayed acks sent */
+	long	spps_timeoutdrop;	/* conn. dropped in rxmt timeout */
+	long	spps_rexmttimeo;	/* retransmit timeouts */
+	long	spps_persisttimeo;	/* persist timeouts */
+	long	spps_keeptimeo;		/* keepalive timeouts */
+	long	spps_keepprobe;		/* keepalive probes sent */
+	long	spps_keepdrops;		/* connections dropped in keepalive */
+
+	long	spps_sndtotal;		/* total packets sent */
+	long	spps_sndpack;		/* data packets sent */
+	long	spps_sndbyte;		/* data bytes sent */
+	long	spps_sndrexmitpack;	/* data packets retransmitted */
+	long	spps_sndrexmitbyte;	/* data bytes retransmitted */
+	long	spps_sndacks;		/* ack-only packets sent */
+	long	spps_sndprobe;		/* window probes sent */
+	long	spps_sndurg;		/* packets sent with URG only */
+	long	spps_sndwinup;		/* window update-only packets sent */
+	long	spps_sndctrl;		/* control (SYN|FIN|RST) packets sent */
+	long	spps_sndvoid;		/* couldn't find requested packet*/
+
+	long	spps_rcvtotal;		/* total packets received */
+	long	spps_rcvpack;		/* packets received in sequence */
+	long	spps_rcvbyte;		/* bytes received in sequence */
+	long	spps_rcvbadsum;		/* packets received with ccksum errs */
+	long	spps_rcvbadoff;		/* packets received with bad offset */
+	long	spps_rcvshort;		/* packets received too short */
+	long	spps_rcvduppack;	/* duplicate-only packets received */
+	long	spps_rcvdupbyte;	/* duplicate-only bytes received */
+	long	spps_rcvpartduppack;	/* packets with some duplicate data */
+	long	spps_rcvpartdupbyte;	/* dup. bytes in part-dup. packets */
+	long	spps_rcvoopack;		/* out-of-order packets received */
+	long	spps_rcvoobyte;		/* out-of-order bytes received */
+	long	spps_rcvpackafterwin;	/* packets with data after window */
+	long	spps_rcvbyteafterwin;	/* bytes rcvd after window */
+	long	spps_rcvafterclose;	/* packets rcvd after "close" */
+	long	spps_rcvwinprobe;	/* rcvd window probe packets */
+	long	spps_rcvdupack;		/* rcvd duplicate acks */
+	long	spps_rcvacktoomuch;	/* rcvd acks for unsent data */
+	long	spps_rcvackpack;	/* rcvd ack packets */
+	long	spps_rcvackbyte;	/* bytes acked by rcvd acks */
+	long	spps_rcvwinupd;		/* rcvd window update packets */
+};
+struct	spp_istat {
+	short	hdrops;
+	short	badsum;
+	short	badlen;
+	short	slotim;
+	short	fastim;
+	short	nonucn;
+	short	noconn;
+	short	notme;
+	short	wrncon;
+	short	bdreas;
+	short	gonawy;
+	short	notyet;
+	short	lstdup;
+	struct sppstat newstats;
+};
+
+#ifdef KERNEL
+struct spp_istat spp_istat;
+
+/* Following was struct sppstat sppstat; */
+#ifndef sppstat
+#define sppstat spp_istat.newstats
+#endif
+
+u_short spp_iss;
+extern struct sppcb *spp_close(), *spp_disconnect(),
+	*spp_usrclosed(), *spp_timers(), *spp_drop();
+#endif
+
+#define	SPP_ISSINCR	128
+/*
+ * SPP sequence numbers are 16 bit integers operated
+ * on with modular arithmetic.  These macros can be
+ * used to compare such integers.
+ */
+#ifdef sun
+short xnsCbug;
+#define	SSEQ_LT(a,b)	((xnsCbug = (short)((a)-(b))) < 0)
+#define	SSEQ_LEQ(a,b)	((xnsCbug = (short)((a)-(b))) <= 0)
+#define	SSEQ_GT(a,b)	((xnsCbug = (short)((a)-(b))) > 0)
+#define	SSEQ_GEQ(a,b)	((xnsCbug = (short)((a)-(b))) >= 0)
+#else
+#define	SSEQ_LT(a,b)	(((short)((a)-(b))) < 0)
+#define	SSEQ_LEQ(a,b)	(((short)((a)-(b))) <= 0)
+#define	SSEQ_GT(a,b)	(((short)((a)-(b))) > 0)
+#define	SSEQ_GEQ(a,b)	(((short)((a)-(b))) >= 0)
+#endif
diff --git a/sys/nfs/nfs.h b/sys/nfs/nfs.h
new file mode 100644
index 00000000000..261fd42657a
--- /dev/null
+++ b/sys/nfs/nfs.h
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfs.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Tunable constants for nfs
+ */
+
+#define	NFS_MAXIOVEC	34
+#define NFS_HZ		25		/* Ticks per second for NFS timeouts */
+#define	NFS_TIMEO	(1*NFS_HZ)	/* Default timeout = 1 second */
+#define	NFS_MINTIMEO	(1*NFS_HZ)	/* Min timeout to use */
+#define	NFS_MAXTIMEO	(60*NFS_HZ)	/* Max timeout to backoff to */
+#define	NFS_MINIDEMTIMEO (5*NFS_HZ)	/* Min timeout for non-idempotent ops*/
+#define	NFS_MAXREXMIT	100		/* Stop counting after this many */
+#define	NFS_MAXWINDOW	1024		/* Max number of outstanding requests */
+#define	NFS_RETRANS	10		/* Num of retrans for soft mounts */
+#define	NFS_MAXGRPS	16		/* Max. size of groups list */
+#define	NFS_MINATTRTIMO 5		/* Attribute cache timeout in sec */
+#define	NFS_MAXATTRTIMO 60
+#define	NFS_WSIZE	8192		/* Def. write data size <= 8192 */
+#define	NFS_RSIZE	8192		/* Def. read data size <= 8192 */
+#define	NFS_DEFRAHEAD	1		/* Def. read ahead # blocks */
+#define	NFS_MAXRAHEAD	4		/* Max. read ahead # blocks */
+#define	NFS_MAXREADDIR	NFS_MAXDATA	/* Max. size of directory read */
+#define	NFS_MAXUIDHASH	64		/* Max. # of hashed uid entries/mp */
+#define	NFS_MAXASYNCDAEMON 20	/* Max. number async_daemons runable */
+#define	NFS_DIRBLKSIZ	1024		/* Size of an NFS directory block */
+#define	NMOD(a)		((a) % nfs_asyncdaemons)
+
+/*
+ * Set the attribute timeout based on how recently the file has been modified.
+ */
+#define	NFS_ATTRTIMEO(np) \
+	((((np)->n_flag & NMODIFIED) || \
+	 (time.tv_sec - (np)->n_mtime) / 10 < NFS_MINATTRTIMO) ? NFS_MINATTRTIMO : \
+	 ((time.tv_sec - (np)->n_mtime) / 10 > NFS_MAXATTRTIMO ? NFS_MAXATTRTIMO : \
+	  (time.tv_sec - (np)->n_mtime) / 10))
+
+/*
+ * Structures for the nfssvc(2) syscall. Not that anyone but nfsd and mount_nfs
+ * should ever try and use it.
+ */
+struct nfsd_args {
+	int	sock;		/* Socket to serve */
+	caddr_t	name;		/* Client address for connection based sockets */
+	int	namelen;	/* Length of name */
+};
+
+struct nfsd_srvargs {
+	struct nfsd	*nsd_nfsd;	/* Pointer to in kernel nfsd struct */
+	uid_t		nsd_uid;	/* Effective uid mapped to cred */
+	u_long		nsd_haddr;	/* Ip address of client */
+	struct ucred	nsd_cr;		/* Cred. uid maps to */
+	int		nsd_authlen;	/* Length of auth string (ret) */
+	char		*nsd_authstr;	/* Auth string (ret) */
+};
+
+struct nfsd_cargs {
+	char		*ncd_dirp;	/* Mount dir path */
+	uid_t		ncd_authuid;	/* Effective uid */
+	int		ncd_authtype;	/* Type of authenticator */
+	int		ncd_authlen;	/* Length of authenticator string */
+	char		*ncd_authstr;	/* Authenticator string */
+};
+
+/*
+ * Stats structure
+ */
+struct nfsstats {
+	int	attrcache_hits;
+	int	attrcache_misses;
+	int	lookupcache_hits;
+	int	lookupcache_misses;
+	int	direofcache_hits;
+	int	direofcache_misses;
+	int	biocache_reads;
+	int	read_bios;
+	int	read_physios;
+	int	biocache_writes;
+	int	write_bios;
+	int	write_physios;
+	int	biocache_readlinks;
+	int	readlink_bios;
+	int	biocache_readdirs;
+	int	readdir_bios;
+	int	rpccnt[NFS_NPROCS];
+	int	rpcretries;
+	int	srvrpccnt[NFS_NPROCS];
+	int	srvrpc_errs;
+	int	srv_errs;
+	int	rpcrequests;
+	int	rpctimeouts;
+	int	rpcunexpected;
+	int	rpcinvalid;
+	int	srvcache_inproghits;
+	int	srvcache_idemdonehits;
+	int	srvcache_nonidemdonehits;
+	int	srvcache_misses;
+	int	srvnqnfs_leases;
+	int	srvnqnfs_maxleases;
+	int	srvnqnfs_getleases;
+};
+
+/*
+ * Flags for nfssvc() system call.
+ */
+#define	NFSSVC_BIOD	0x002
+#define	NFSSVC_NFSD	0x004
+#define	NFSSVC_ADDSOCK	0x008
+#define	NFSSVC_AUTHIN	0x010
+#define	NFSSVC_GOTAUTH	0x040
+#define	NFSSVC_AUTHINFAIL 0x080
+#define	NFSSVC_MNTD	0x100
+
+/*
+ * The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts.
+ * What should be in this set is open to debate, but I believe that since
+ * I/O system calls on ufs are never interrupted by signals the set should
+ * be minimal. My reasoning is that many current programs that use signals
+ * such as SIGALRM will not expect file I/O system calls to be interrupted
+ * by them and break.
+ */
+#ifdef KERNEL
+#define	NFSINT_SIGMASK	(sigmask(SIGINT)|sigmask(SIGTERM)|sigmask(SIGKILL)| \
+			 sigmask(SIGHUP)|sigmask(SIGQUIT))
+
+/*
+ * Socket errors ignored for connectionless sockets??
+ * For now, ignore them all
+ */
+#define	NFSIGNORE_SOERROR(s, e) \
+		((e) != EINTR && (e) != ERESTART && (e) != EWOULDBLOCK && \
+		((s) & PR_CONNREQUIRED) == 0)
+
+/*
+ * Nfs outstanding request list element
+ */
+struct nfsreq {
+	struct nfsreq	*r_next;
+	struct nfsreq	*r_prev;
+	struct mbuf	*r_mreq;
+	struct mbuf	*r_mrep;
+	struct mbuf	*r_md;
+	caddr_t		r_dpos;
+	struct nfsmount *r_nmp;
+	struct vnode	*r_vp;
+	u_long		r_xid;
+	int		r_flags;	/* flags on request, see below */
+	int		r_retry;	/* max retransmission count */
+	int		r_rexmit;	/* current retrans count */
+	int		r_timer;	/* tick counter on reply */
+	int		r_procnum;	/* NFS procedure number */
+	int		r_rtt;		/* RTT for rpc */
+	struct proc	*r_procp;	/* Proc that did I/O system call */
+};
+
+/* Flag values for r_flags */
+#define R_TIMING	0x01		/* timing request (in mntp) */
+#define R_SENT		0x02		/* request has been sent */
+#define	R_SOFTTERM	0x04		/* soft mnt, too many retries */
+#define	R_INTR		0x08		/* intr mnt, signal pending */
+#define	R_SOCKERR	0x10		/* Fatal error on socket */
+#define	R_TPRINTFMSG	0x20		/* Did a tprintf msg. */
+#define	R_MUSTRESEND	0x40		/* Must resend request */
+#define	R_GETONEREP	0x80		/* Probe for one reply only */
+
+struct nfsstats nfsstats;
+
+/*
+ * A list of nfssvc_sock structures is maintained with all the sockets
+ * that require service by the nfsd.
+ * The nfsuid structs hang off of the nfssvc_sock structs in both lru
+ * and uid hash lists.
+ */
+#define	NUIDHASHSIZ	32
+#define	NUIDHASH(uid)	((uid) & (NUIDHASHSIZ - 1))
+
+/*
+ * Network address hash list element
+ */
+union nethostaddr {
+	u_long had_inetaddr;
+	struct mbuf *had_nam;
+};
+
+struct nfsuid {
+	struct nfsuid	*nu_lrunext;	/* MUST be first */
+	struct nfsuid	*nu_lruprev;
+	struct nfsuid	*nu_hnext;
+	struct nfsuid	*nu_hprev;
+	int		nu_flag;	/* Flags */
+	uid_t		nu_uid;		/* Uid mapped by this entry */
+	union nethostaddr nu_haddr;	/* Host addr. for dgram sockets */
+	struct ucred	nu_cr;		/* Cred uid mapped to */
+};
+
+#define	nu_inetaddr	nu_haddr.had_inetaddr
+#define	nu_nam		nu_haddr.had_nam
+/* Bits for nu_flag */
+#define	NU_INETADDR	0x1
+
+struct nfssvc_sock {
+	struct nfsuid	*ns_lrunext;	/* MUST be first */
+	struct nfsuid	*ns_lruprev;
+	struct nfssvc_sock *ns_next;
+	struct nfssvc_sock *ns_prev;
+	int		ns_flag;
+	u_long		ns_sref;
+	struct file	*ns_fp;
+	struct socket	*ns_so;
+	int		ns_solock;
+	struct mbuf	*ns_nam;
+	int		ns_cc;
+	struct mbuf	*ns_raw;
+	struct mbuf	*ns_rawend;
+	int		ns_reclen;
+	struct mbuf	*ns_rec;
+	struct mbuf	*ns_recend;
+	int		ns_numuids;
+	struct nfsuid	*ns_uidh[NUIDHASHSIZ];
+};
+
+/* Bits for "ns_flag" */
+#define	SLP_VALID	0x01
+#define	SLP_DOREC	0x02
+#define	SLP_NEEDQ	0x04
+#define	SLP_DISCONN	0x08
+#define	SLP_GETSTREAM	0x10
+#define	SLP_INIT	0x20
+#define	SLP_WANTINIT	0x40
+
+#define SLP_ALLFLAGS	0xff
+
+/*
+ * One of these structures is allocated for each nfsd.
+ */
+struct nfsd {
+	struct nfsd	*nd_next;	/* Must be first */
+	struct nfsd	*nd_prev;
+	int		nd_flag;	/* NFSD_ flags */
+	struct nfssvc_sock *nd_slp;	/* Current socket */
+	struct mbuf	*nd_nam;	/* Client addr for datagram req. */
+	struct mbuf	*nd_mrep;	/* Req. mbuf list */
+	struct mbuf	*nd_md;
+	caddr_t		nd_dpos;	/* Position in list */
+	int		nd_procnum;	/* RPC procedure number */
+	u_long		nd_retxid;	/* RPC xid */
+	int		nd_repstat;	/* Reply status value */
+	struct ucred	nd_cr;		/* Credentials for req. */
+	int		nd_nqlflag;	/* Leasing flag */
+	int		nd_duration;	/* Lease duration */
+	int		nd_authlen;	/* Authenticator len */
+	u_char		nd_authstr[RPCAUTH_MAXSIZ]; /* Authenticator data */
+	struct proc	*nd_procp;	/* Proc ptr */
+};
+
+#define	NFSD_WAITING	0x01
+#define	NFSD_CHECKSLP	0x02
+#define	NFSD_REQINPROG	0x04
+#define	NFSD_NEEDAUTH	0x08
+#define	NFSD_AUTHFAIL	0x10
+#endif	/* KERNEL */
diff --git a/sys/nfs/nfs_bio.c b/sys/nfs/nfs_bio.c
new file mode 100644
index 00000000000..177a278b631
--- /dev/null
+++ b/sys/nfs/nfs_bio.c
@@ -0,0 +1,799 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfs_bio.c	8.5 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/resourcevar.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/trace.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+
+#include <vm/vm.h>
+
+#include <nfs/nfsnode.h>
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nqnfs.h>
+
+struct buf *incore(), *nfs_getcacheblk();
+extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
+extern int nfs_numasync;
+
+/*
+ * Vnode op for read using bio
+ * Any similarity to readip() is purely coincidental
+ */
+nfs_bioread(vp, uio, ioflag, cred)
+	register struct vnode *vp;
+	register struct uio *uio;
+	int ioflag;
+	struct ucred *cred;
+{
+	register struct nfsnode *np = VTONFS(vp);
+	register int biosize, diff;
+	struct buf *bp, *rabp;
+	struct vattr vattr;
+	struct proc *p;
+	struct nfsmount *nmp;
+	daddr_t lbn, bn, rabn;
+	caddr_t baddr;
+	int got_buf, nra, error = 0, n, on, not_readin;
+
+#ifdef lint
+	ioflag = ioflag;
+#endif /* lint */
+#ifdef DIAGNOSTIC
+	if (uio->uio_rw != UIO_READ)
+		panic("nfs_read mode");
+#endif
+	if (uio->uio_resid == 0)
+		return (0);
+	if (uio->uio_offset < 0 && vp->v_type != VDIR)
+		return (EINVAL);
+	nmp = VFSTONFS(vp->v_mount);
+	biosize = nmp->nm_rsize;
+	p = uio->uio_procp;
+	/*
+	 * For nfs, cache consistency can only be maintained approximately.
+	 * Although RFC1094 does not specify the criteria, the following is
+	 * believed to be compatible with the reference port.
+	 * For nqnfs, full cache consistency is maintained within the loop.
+	 * For nfs:
+	 * If the file's modify time on the server has changed since the
+	 * last read rpc or you have written to the file,
+	 * you may have lost data cache consistency with the
+	 * server, so flush all of the file's data out of the cache.
+	 * Then force a getattr rpc to ensure that you have up to date
+	 * attributes.
+	 * The mount flag NFSMNT_MYWRITE says "Assume that my writes are
+	 * the ones changing the modify time.
+	 * NB: This implies that cache data can be read when up to
+	 * NFS_ATTRTIMEO seconds out of date. If you find that you need current
+	 * attributes this could be forced by setting n_attrstamp to 0 before
+	 * the VOP_GETATTR() call.
+	 */
+	if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) {
+		if (np->n_flag & NMODIFIED) {
+			if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 ||
+			     vp->v_type != VREG) {
+				if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+					return (error);
+			}
+			np->n_attrstamp = 0;
+			np->n_direofoffset = 0;
+			if (error = VOP_GETATTR(vp, &vattr, cred, p))
+				return (error);
+			np->n_mtime = vattr.va_mtime.ts_sec;
+		} else {
+			if (error = VOP_GETATTR(vp, &vattr, cred, p))
+				return (error);
+			if (np->n_mtime != vattr.va_mtime.ts_sec) {
+				np->n_direofoffset = 0;
+				if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+					return (error);
+				np->n_mtime = vattr.va_mtime.ts_sec;
+			}
+		}
+	}
+	do {
+
+	    /*
+	     * Get a valid lease. If cached data is stale, flush it.
+	     */
+	    if (nmp->nm_flag & NFSMNT_NQNFS) {
+		if (NQNFS_CKINVALID(vp, np, NQL_READ)) {
+		    do {
+			error = nqnfs_getlease(vp, NQL_READ, cred, p);
+		    } while (error == NQNFS_EXPIRED);
+		    if (error)
+			return (error);
+		    if (np->n_lrev != np->n_brev ||
+			(np->n_flag & NQNFSNONCACHE) ||
+			((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
+			if (vp->v_type == VDIR) {
+			    np->n_direofoffset = 0;
+			    cache_purge(vp);
+			}
+			if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+			    return (error);
+			np->n_brev = np->n_lrev;
+		    }
+		} else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) {
+		    np->n_direofoffset = 0;
+		    cache_purge(vp);
+		    if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+			return (error);
+		}
+	    }
+	    if (np->n_flag & NQNFSNONCACHE) {
+		switch (vp->v_type) {
+		case VREG:
+			error = nfs_readrpc(vp, uio, cred);
+			break;
+		case VLNK:
+			error = nfs_readlinkrpc(vp, uio, cred);
+			break;
+		case VDIR:
+			error = nfs_readdirrpc(vp, uio, cred);
+			break;
+		};
+		return (error);
+	    }
+	    baddr = (caddr_t)0;
+	    switch (vp->v_type) {
+	    case VREG:
+		nfsstats.biocache_reads++;
+		lbn = uio->uio_offset / biosize;
+		on = uio->uio_offset & (biosize-1);
+		bn = lbn * (biosize / DEV_BSIZE);
+		not_readin = 1;
+
+		/*
+		 * Start the read ahead(s), as required.
+		 */
+		if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
+		    lbn == vp->v_lastr + 1) {
+		    for (nra = 0; nra < nmp->nm_readahead &&
+			(lbn + 1 + nra) * biosize < np->n_size; nra++) {
+			rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE);
+			if (!incore(vp, rabn)) {
+			    rabp = nfs_getcacheblk(vp, rabn, biosize, p);
+			    if (!rabp)
+				return (EINTR);
+			    if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) {
+				rabp->b_flags |= (B_READ | B_ASYNC);
+				if (nfs_asyncio(rabp, cred)) {
+				    rabp->b_flags |= B_INVAL;
+				    brelse(rabp);
+				}
+			    }
+			}
+		    }
+		}
+
+		/*
+		 * If the block is in the cache and has the required data
+		 * in a valid region, just copy it out.
+		 * Otherwise, get the block and write back/read in,
+		 * as required.
+		 */
+		if ((bp = incore(vp, bn)) &&
+		    (bp->b_flags & (B_BUSY | B_WRITEINPROG)) ==
+		    (B_BUSY | B_WRITEINPROG))
+			got_buf = 0;
+		else {
+again:
+			bp = nfs_getcacheblk(vp, bn, biosize, p);
+			if (!bp)
+				return (EINTR);
+			got_buf = 1;
+			if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {
+				bp->b_flags |= B_READ;
+				not_readin = 0;
+				if (error = nfs_doio(bp, cred, p)) {
+				    brelse(bp);
+				    return (error);
+				}
+			}
+		}
+		n = min((unsigned)(biosize - on), uio->uio_resid);
+		diff = np->n_size - uio->uio_offset;
+		if (diff < n)
+			n = diff;
+		if (not_readin && n > 0) {
+			if (on < bp->b_validoff || (on + n) > bp->b_validend) {
+				if (!got_buf) {
+				    bp = nfs_getcacheblk(vp, bn, biosize, p);
+				    if (!bp)
+					return (EINTR);
+				    got_buf = 1;
+				}
+				bp->b_flags |= B_INVAL;
+				if (bp->b_dirtyend > 0) {
+				    if ((bp->b_flags & B_DELWRI) == 0)
+					panic("nfsbioread");
+				    if (VOP_BWRITE(bp) == EINTR)
+					return (EINTR);
+				} else
+				    brelse(bp);
+				goto again;
+			}
+		}
+		vp->v_lastr = lbn;
+		diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);
+		if (diff < n)
+			n = diff;
+		break;
+	    case VLNK:
+		nfsstats.biocache_readlinks++;
+		bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p);
+		if (!bp)
+			return (EINTR);
+		if ((bp->b_flags & B_DONE) == 0) {
+			bp->b_flags |= B_READ;
+			if (error = nfs_doio(bp, cred, p)) {
+				brelse(bp);
+				return (error);
+			}
+		}
+		n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
+		got_buf = 1;
+		on = 0;
+		break;
+	    case VDIR:
+		nfsstats.biocache_readdirs++;
+		bn = (daddr_t)uio->uio_offset;
+		bp = nfs_getcacheblk(vp, bn, NFS_DIRBLKSIZ, p);
+		if (!bp)
+			return (EINTR);
+		if ((bp->b_flags & B_DONE) == 0) {
+			bp->b_flags |= B_READ;
+			if (error = nfs_doio(bp, cred, p)) {
+				brelse(bp);
+				return (error);
+			}
+		}
+
+		/*
+		 * If not eof and read aheads are enabled, start one.
+		 * (You need the current block first, so that you have the
+		 *  directory offset cookie of the next block.
+		 */
+		rabn = bp->b_blkno;
+		if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
+		    rabn != 0 && rabn != np->n_direofoffset &&
+		    !incore(vp, rabn)) {
+			rabp = nfs_getcacheblk(vp, rabn, NFS_DIRBLKSIZ, p);
+			if (rabp) {
+			    if ((rabp->b_flags & (B_DONE | B_DELWRI)) == 0) {
+				rabp->b_flags |= (B_READ | B_ASYNC);
+				if (nfs_asyncio(rabp, cred)) {
+				    rabp->b_flags |= B_INVAL;
+				    brelse(rabp);
+				}
+			    }
+			}
+		}
+		on = 0;
+		n = min(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid);
+		got_buf = 1;
+		break;
+	    };
+
+	    if (n > 0) {
+		if (!baddr)
+			baddr = bp->b_data;
+		error = uiomove(baddr + on, (int)n, uio);
+	    }
+	    switch (vp->v_type) {
+	    case VREG:
+		if (n + on == biosize || uio->uio_offset == np->n_size)
+			bp->b_flags |= B_AGE;
+		break;
+	    case VLNK:
+		n = 0;
+		break;
+	    case VDIR:
+		uio->uio_offset = bp->b_blkno;
+		break;
+	    };
+	    if (got_buf)
+		brelse(bp);
+	} while (error == 0 && uio->uio_resid > 0 && n > 0);
+	return (error);
+}
+
+/*
+ * Vnode op for write using bio
+ */
+nfs_write(ap)
+	struct vop_write_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	register int biosize;
+	register struct uio *uio = ap->a_uio;
+	struct proc *p = uio->uio_procp;
+	register struct vnode *vp = ap->a_vp;
+	struct nfsnode *np = VTONFS(vp);
+	register struct ucred *cred = ap->a_cred;
+	int ioflag = ap->a_ioflag;
+	struct buf *bp;
+	struct vattr vattr;
+	struct nfsmount *nmp;
+	daddr_t lbn, bn;
+	int n, on, error = 0;
+
+#ifdef DIAGNOSTIC
+	if (uio->uio_rw != UIO_WRITE)
+		panic("nfs_write mode");
+	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
+		panic("nfs_write proc");
+#endif
+	if (vp->v_type != VREG)
+		return (EIO);
+	if (np->n_flag & NWRITEERR) {
+		np->n_flag &= ~NWRITEERR;
+		return (np->n_error);
+	}
+	if (ioflag & (IO_APPEND | IO_SYNC)) {
+		if (np->n_flag & NMODIFIED) {
+			np->n_attrstamp = 0;
+			if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+				return (error);
+		}
+		if (ioflag & IO_APPEND) {
+			np->n_attrstamp = 0;
+			if (error = VOP_GETATTR(vp, &vattr, cred, p))
+				return (error);
+			uio->uio_offset = np->n_size;
+		}
+	}
+	nmp = VFSTONFS(vp->v_mount);
+	if (uio->uio_offset < 0)
+		return (EINVAL);
+	if (uio->uio_resid == 0)
+		return (0);
+	/*
+	 * Maybe this should be above the vnode op call, but so long as
+	 * file servers have no limits, i don't think it matters
+	 */
+	if (p && uio->uio_offset + uio->uio_resid >
+	      p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
+		psignal(p, SIGXFSZ);
+		return (EFBIG);
+	}
+	/*
+	 * I use nm_rsize, not nm_wsize so that all buffer cache blocks
+	 * will be the same size within a filesystem. nfs_writerpc will
+	 * still use nm_wsize when sizing the rpc's.
+	 */
+	biosize = nmp->nm_rsize;
+	do {
+
+		/*
+		 * Check for a valid write lease.
+		 * If non-cachable, just do the rpc
+		 */
+		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
+		    NQNFS_CKINVALID(vp, np, NQL_WRITE)) {
+			do {
+				error = nqnfs_getlease(vp, NQL_WRITE, cred, p);
+			} while (error == NQNFS_EXPIRED);
+			if (error)
+				return (error);
+			if (np->n_lrev != np->n_brev ||
+			    (np->n_flag & NQNFSNONCACHE)) {
+				if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+					return (error);
+				np->n_brev = np->n_lrev;
+			}
+		}
+		if (np->n_flag & NQNFSNONCACHE)
+			return (nfs_writerpc(vp, uio, cred, ioflag));
+		nfsstats.biocache_writes++;
+		lbn = uio->uio_offset / biosize;
+		on = uio->uio_offset & (biosize-1);
+		n = min((unsigned)(biosize - on), uio->uio_resid);
+		bn = lbn * (biosize / DEV_BSIZE);
+again:
+		bp = nfs_getcacheblk(vp, bn, biosize, p);
+		if (!bp)
+			return (EINTR);
+		if (bp->b_wcred == NOCRED) {
+			crhold(cred);
+			bp->b_wcred = cred;
+		}
+		np->n_flag |= NMODIFIED;
+		if (uio->uio_offset + n > np->n_size) {
+			np->n_size = uio->uio_offset + n;
+			vnode_pager_setsize(vp, (u_long)np->n_size);
+		}
+
+		/*
+		 * If the new write will leave a contiguous dirty
+		 * area, just update the b_dirtyoff and b_dirtyend,
+		 * otherwise force a write rpc of the old dirty area.
+		 */
+		if (bp->b_dirtyend > 0 &&
+		    (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
+			bp->b_proc = p;
+			if (VOP_BWRITE(bp) == EINTR)
+				return (EINTR);
+			goto again;
+		}
+
+		/*
+		 * Check for valid write lease and get one as required.
+		 * In case getblk() and/or bwrite() delayed us.
+		 */
+		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
+		    NQNFS_CKINVALID(vp, np, NQL_WRITE)) {
+			do {
+				error = nqnfs_getlease(vp, NQL_WRITE, cred, p);
+			} while (error == NQNFS_EXPIRED);
+			if (error) {
+				brelse(bp);
+				return (error);
+			}
+			if (np->n_lrev != np->n_brev ||
+			    (np->n_flag & NQNFSNONCACHE)) {
+				brelse(bp);
+				if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+					return (error);
+				np->n_brev = np->n_lrev;
+				goto again;
+			}
+		}
+		if (error = uiomove((char *)bp->b_data + on, n, uio)) {
+			bp->b_flags |= B_ERROR;
+			brelse(bp);
+			return (error);
+		}
+		if (bp->b_dirtyend > 0) {
+			bp->b_dirtyoff = min(on, bp->b_dirtyoff);
+			bp->b_dirtyend = max((on + n), bp->b_dirtyend);
+		} else {
+			bp->b_dirtyoff = on;
+			bp->b_dirtyend = on + n;
+		}
+#ifndef notdef
+		if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
+		    bp->b_validoff > bp->b_dirtyend) {
+			bp->b_validoff = bp->b_dirtyoff;
+			bp->b_validend = bp->b_dirtyend;
+		} else {
+			bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
+			bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
+		}
+#else
+		bp->b_validoff = bp->b_dirtyoff;
+		bp->b_validend = bp->b_dirtyend;
+#endif
+		if (ioflag & IO_APPEND)
+			bp->b_flags |= B_APPENDWRITE;
+
+		/*
+		 * If the lease is non-cachable or IO_SYNC do bwrite().
+		 */
+		if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) {
+			bp->b_proc = p;
+			if (error = VOP_BWRITE(bp))
+				return (error);
+		} else if ((n + on) == biosize &&
+			(nmp->nm_flag & NFSMNT_NQNFS) == 0) {
+			bp->b_proc = (struct proc *)0;
+			bawrite(bp);
+		} else
+			bdwrite(bp);
+	} while (uio->uio_resid > 0 && n > 0);
+	return (0);
+}
+
+/*
+ * Get an nfs cache block.
+ * Allocate a new one if the block isn't currently in the cache
+ * and return the block marked busy. If the calling process is
+ * interrupted by a signal for an interruptible mount point, return
+ * NULL.
+ */
+struct buf *
+nfs_getcacheblk(vp, bn, size, p)
+	struct vnode *vp;
+	daddr_t bn;
+	int size;
+	struct proc *p;
+{
+	register struct buf *bp;
+	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+
+	if (nmp->nm_flag & NFSMNT_INT) {
+		bp = getblk(vp, bn, size, PCATCH, 0);
+		while (bp == (struct buf *)0) {
+			if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
+				return ((struct buf *)0);
+			bp = getblk(vp, bn, size, 0, 2 * hz);
+		}
+	} else
+		bp = getblk(vp, bn, size, 0, 0);
+	return (bp);
+}
+
+/*
+ * Flush and invalidate all dirty buffers. If another process is already
+ * doing the flush, just wait for completion.
+ */
+nfs_vinvalbuf(vp, flags, cred, p, intrflg)
+	struct vnode *vp;
+	int flags;
+	struct ucred *cred;
+	struct proc *p;
+	int intrflg;
+{
+	register struct nfsnode *np = VTONFS(vp);
+	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+	int error = 0, slpflag, slptimeo;
+
+	if ((nmp->nm_flag & NFSMNT_INT) == 0)
+		intrflg = 0;
+	if (intrflg) {
+		slpflag = PCATCH;
+		slptimeo = 2 * hz;
+	} else {
+		slpflag = 0;
+		slptimeo = 0;
+	}
+	/*
+	 * First wait for any other process doing a flush to complete.
+	 */
+	while (np->n_flag & NFLUSHINPROG) {
+		np->n_flag |= NFLUSHWANT;
+		error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
+			slptimeo);
+		if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
+			return (EINTR);
+	}
+
+	/*
+	 * Now, flush as required.
+	 */
+	np->n_flag |= NFLUSHINPROG;
+	error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
+	while (error) {
+		if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
+			np->n_flag &= ~NFLUSHINPROG;
+			if (np->n_flag & NFLUSHWANT) {
+				np->n_flag &= ~NFLUSHWANT;
+				wakeup((caddr_t)&np->n_flag);
+			}
+			return (EINTR);
+		}
+		error = vinvalbuf(vp, flags, cred, p, 0, slptimeo);
+	}
+	np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
+	if (np->n_flag & NFLUSHWANT) {
+		np->n_flag &= ~NFLUSHWANT;
+		wakeup((caddr_t)&np->n_flag);
+	}
+	return (0);
+}
+
+/*
+ * Initiate asynchronous I/O. Return an error if no nfsiods are available.
+ * This is mainly to avoid queueing async I/O requests when the nfsiods
+ * are all hung on a dead server.
+ */
+nfs_asyncio(bp, cred)
+	register struct buf *bp;
+	struct ucred *cred;
+{
+	register int i;
+
+	if (nfs_numasync == 0)
+		return (EIO);
+	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
+	    if (nfs_iodwant[i]) {
+		if (bp->b_flags & B_READ) {
+			if (bp->b_rcred == NOCRED && cred != NOCRED) {
+				crhold(cred);
+				bp->b_rcred = cred;
+			}
+		} else {
+			if (bp->b_wcred == NOCRED && cred != NOCRED) {
+				crhold(cred);
+				bp->b_wcred = cred;
+			}
+		}
+	
+		TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist);
+		nfs_iodwant[i] = (struct proc *)0;
+		wakeup((caddr_t)&nfs_iodwant[i]);
+		return (0);
+	    }
+	return (EIO);
+}
+
+/*
+ * Do an I/O operation to/from a cache block. This may be called
+ * synchronously or from an nfsiod.
+ */
+int
+nfs_doio(bp, cr, p)
+	register struct buf *bp;
+	struct cred *cr;
+	struct proc *p;
+{
+	register struct uio *uiop;
+	register struct vnode *vp;
+	struct nfsnode *np;
+	struct nfsmount *nmp;
+	int error, diff, len;
+	struct uio uio;
+	struct iovec io;
+
+	vp = bp->b_vp;
+	np = VTONFS(vp);
+	nmp = VFSTONFS(vp->v_mount);
+	uiop = &uio;
+	uiop->uio_iov = &io;
+	uiop->uio_iovcnt = 1;
+	uiop->uio_segflg = UIO_SYSSPACE;
+	uiop->uio_procp = p;
+
+	/*
+	 * Historically, paging was done with physio, but no more.
+	 */
+	if (bp->b_flags & B_PHYS)
+	    panic("doio phys");
+	if (bp->b_flags & B_READ) {
+	    io.iov_len = uiop->uio_resid = bp->b_bcount;
+	    io.iov_base = bp->b_data;
+	    uiop->uio_rw = UIO_READ;
+	    switch (vp->v_type) {
+	    case VREG:
+		uiop->uio_offset = bp->b_blkno * DEV_BSIZE;
+		nfsstats.read_bios++;
+		error = nfs_readrpc(vp, uiop, cr);
+		if (!error) {
+		    bp->b_validoff = 0;
+		    if (uiop->uio_resid) {
+			/*
+			 * If len > 0, there is a hole in the file and
+			 * no writes after the hole have been pushed to
+			 * the server yet.
+			 * Just zero fill the rest of the valid area.
+			 */
+			diff = bp->b_bcount - uiop->uio_resid;
+			len = np->n_size - (bp->b_blkno * DEV_BSIZE
+				+ diff);
+			if (len > 0) {
+			    len = min(len, uiop->uio_resid);
+			    bzero((char *)bp->b_data + diff, len);
+			    bp->b_validend = diff + len;
+			} else
+			    bp->b_validend = diff;
+		    } else
+			bp->b_validend = bp->b_bcount;
+		}
+		if (p && (vp->v_flag & VTEXT) &&
+			(((nmp->nm_flag & NFSMNT_NQNFS) &&
+			  np->n_lrev != np->n_brev) ||
+			 (!(nmp->nm_flag & NFSMNT_NQNFS) &&
+			  np->n_mtime != np->n_vattr.va_mtime.ts_sec))) {
+			uprintf("Process killed due to text file modification\n");
+			psignal(p, SIGKILL);
+			p->p_flag |= P_NOSWAP;
+		}
+		break;
+	    case VLNK:
+		uiop->uio_offset = 0;
+		nfsstats.readlink_bios++;
+		error = nfs_readlinkrpc(vp, uiop, cr);
+		break;
+	    case VDIR:
+		uiop->uio_offset = bp->b_lblkno;
+		nfsstats.readdir_bios++;
+		if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS)
+		    error = nfs_readdirlookrpc(vp, uiop, cr);
+		else
+		    error = nfs_readdirrpc(vp, uiop, cr);
+		/*
+		 * Save offset cookie in b_blkno.
+		 */
+		bp->b_blkno = uiop->uio_offset;
+		break;
+	    };
+	    if (error) {
+		bp->b_flags |= B_ERROR;
+		bp->b_error = error;
+	    }
+	} else {
+	    io.iov_len = uiop->uio_resid = bp->b_dirtyend
+		- bp->b_dirtyoff;
+	    uiop->uio_offset = (bp->b_blkno * DEV_BSIZE)
+		+ bp->b_dirtyoff;
+	    io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
+	    uiop->uio_rw = UIO_WRITE;
+	    nfsstats.write_bios++;
+	    if (bp->b_flags & B_APPENDWRITE)
+		error = nfs_writerpc(vp, uiop, cr, IO_APPEND);
+	    else
+		error = nfs_writerpc(vp, uiop, cr, 0);
+	    bp->b_flags &= ~(B_WRITEINPROG | B_APPENDWRITE);
+
+	    /*
+	     * For an interrupted write, the buffer is still valid and the
+	     * write hasn't been pushed to the server yet, so we can't set
+	     * B_ERROR and report the interruption by setting B_EINTR. For
+	     * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt
+	     * is essentially a noop.
+	     */
+	    if (error == EINTR) {
+		bp->b_flags &= ~B_INVAL;
+		bp->b_flags |= B_DELWRI;
+
+		/*
+		 * Since for the B_ASYNC case, nfs_bwrite() has reassigned the
+		 * buffer to the clean list, we have to reassign it back to the
+		 * dirty one. Ugh.
+		 */
+		if (bp->b_flags & B_ASYNC)
+		    reassignbuf(bp, vp);
+		else
+		    bp->b_flags |= B_EINTR;
+	    } else {
+		if (error) {
+		    bp->b_flags |= B_ERROR;
+		    bp->b_error = np->n_error = error;
+		    np->n_flag |= NWRITEERR;
+		}
+		bp->b_dirtyoff = bp->b_dirtyend = 0;
+	    }
+	}
+	bp->b_resid = uiop->uio_resid;
+	biodone(bp);
+	return (error);
+}
diff --git a/sys/nfs/nfs_common.c b/sys/nfs/nfs_common.c
new file mode 100644
index 00000000000..5778f7d7f01
--- /dev/null
+++ b/sys/nfs/nfs_common.c
@@ -0,0 +1,1130 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfs_subs.c	8.3 (Berkeley) 1/4/94
+ */
+
+/*
+ * These functions support the macros and help fiddle mbuf chains for
+ * the nfs op functions. They do things like create the rpc header and
+ * copy data between mbuf chains and uio lists.
+ */
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/namei.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nqnfs.h>
+#include <nfs/nfsrtt.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <netinet/in.h>
+#ifdef ISO
+#include <netiso/iso.h>
+#endif
+
+#define TRUE	1
+#define	FALSE	0
+
+/*
+ * Data items converted to xdr at startup, since they are constant
+ * This is kinda hokey, but may save a little time doing byte swaps
+ */
+u_long nfs_procids[NFS_NPROCS];
+u_long nfs_xdrneg1;
+u_long rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr,
+	rpc_mismatch, rpc_auth_unix, rpc_msgaccepted, rpc_rejectedcred,
+	rpc_auth_kerb;
+u_long nfs_vers, nfs_prog, nfs_true, nfs_false;
+
+/* And other global data */
+static u_long nfs_xid = 0;
+enum vtype ntov_type[7] = { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON };
+extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
+extern struct nfsreq nfsreqh;
+extern int nqnfs_piggy[NFS_NPROCS];
+extern struct nfsrtt nfsrtt;
+extern time_t nqnfsstarttime;
+extern u_long nqnfs_prog, nqnfs_vers;
+extern int nqsrv_clockskew;
+extern int nqsrv_writeslack;
+extern int nqsrv_maxlease;
+
+/*
+ * Create the header for an rpc request packet
+ * The hsiz is the size of the rest of the nfs request header.
+ * (just used to decide if a cluster is a good idea)
+ */
+struct mbuf *
+nfsm_reqh(vp, procid, hsiz, bposp)
+	struct vnode *vp;
+	u_long procid;
+	int hsiz;
+	caddr_t *bposp;
+{
+	register struct mbuf *mb;
+	register u_long *tl;
+	register caddr_t bpos;
+	struct mbuf *mb2;
+	struct nfsmount *nmp;
+	int nqflag;
+
+	MGET(mb, M_WAIT, MT_DATA);
+	if (hsiz >= MINCLSIZE)
+		MCLGET(mb, M_WAIT);
+	mb->m_len = 0;
+	bpos = mtod(mb, caddr_t);
+	
+	/*
+	 * For NQNFS, add lease request.
+	 */
+	if (vp) {
+		nmp = VFSTONFS(vp->v_mount);
+		if (nmp->nm_flag & NFSMNT_NQNFS) {
+			nqflag = NQNFS_NEEDLEASE(vp, procid);
+			if (nqflag) {
+				nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+				*tl++ = txdr_unsigned(nqflag);
+				*tl = txdr_unsigned(nmp->nm_leaseterm);
+			} else {
+				nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+				*tl = 0;
+			}
+		}
+	}
+	/* Finally, return values */
+	*bposp = bpos;
+	return (mb);
+}
+
+/*
+ * Build the RPC header and fill in the authorization info.
+ * The authorization string argument is only used when the credentials
+ * come from outside of the kernel.
+ * Returns the head of the mbuf list.
+ */
+struct mbuf *
+nfsm_rpchead(cr, nqnfs, procid, auth_type, auth_len, auth_str, mrest,
+	mrest_len, mbp, xidp)
+	register struct ucred *cr;
+	int nqnfs;
+	int procid;
+	int auth_type;
+	int auth_len;
+	char *auth_str;
+	struct mbuf *mrest;
+	int mrest_len;
+	struct mbuf **mbp;
+	u_long *xidp;
+{
+	register struct mbuf *mb;
+	register u_long *tl;
+	register caddr_t bpos;
+	register int i;
+	struct mbuf *mreq, *mb2;
+	int siz, grpsiz, authsiz;
+
+	authsiz = nfsm_rndup(auth_len);
+	if (auth_type == RPCAUTH_NQNFS)
+		authsiz += 2 * NFSX_UNSIGNED;
+	MGETHDR(mb, M_WAIT, MT_DATA);
+	if ((authsiz + 10*NFSX_UNSIGNED) >= MINCLSIZE) {
+		MCLGET(mb, M_WAIT);
+	} else if ((authsiz + 10*NFSX_UNSIGNED) < MHLEN) {
+		MH_ALIGN(mb, authsiz + 10*NFSX_UNSIGNED);
+	} else {
+		MH_ALIGN(mb, 8*NFSX_UNSIGNED);
+	}
+	mb->m_len = 0;
+	mreq = mb;
+	bpos = mtod(mb, caddr_t);
+
+	/*
+	 * First the RPC header.
+	 */
+	nfsm_build(tl, u_long *, 8*NFSX_UNSIGNED);
+	if (++nfs_xid == 0)
+		nfs_xid++;
+	*tl++ = *xidp = txdr_unsigned(nfs_xid);
+	*tl++ = rpc_call;
+	*tl++ = rpc_vers;
+	if (nqnfs) {
+		*tl++ = txdr_unsigned(NQNFS_PROG);
+		*tl++ = txdr_unsigned(NQNFS_VER1);
+	} else {
+		*tl++ = txdr_unsigned(NFS_PROG);
+		*tl++ = txdr_unsigned(NFS_VER2);
+	}
+	*tl++ = txdr_unsigned(procid);
+
+	/*
+	 * And then the authorization cred.
+	 */
+	*tl++ = txdr_unsigned(auth_type);
+	*tl = txdr_unsigned(authsiz);
+	switch (auth_type) {
+	case RPCAUTH_UNIX:
+		nfsm_build(tl, u_long *, auth_len);
+		*tl++ = 0;		/* stamp ?? */
+		*tl++ = 0;		/* NULL hostname */
+		*tl++ = txdr_unsigned(cr->cr_uid);
+		*tl++ = txdr_unsigned(cr->cr_groups[0]);
+		grpsiz = (auth_len >> 2) - 5;
+		*tl++ = txdr_unsigned(grpsiz);
+		for (i = 1; i <= grpsiz; i++)
+			*tl++ = txdr_unsigned(cr->cr_groups[i]);
+		break;
+	case RPCAUTH_NQNFS:
+		nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+		*tl++ = txdr_unsigned(cr->cr_uid);
+		*tl = txdr_unsigned(auth_len);
+		siz = auth_len;
+		while (siz > 0) {
+			if (M_TRAILINGSPACE(mb) == 0) {
+				MGET(mb2, M_WAIT, MT_DATA);
+				if (siz >= MINCLSIZE)
+					MCLGET(mb2, M_WAIT);
+				mb->m_next = mb2;
+				mb = mb2;
+				mb->m_len = 0;
+				bpos = mtod(mb, caddr_t);
+			}
+			i = min(siz, M_TRAILINGSPACE(mb));
+			bcopy(auth_str, bpos, i);
+			mb->m_len += i;
+			auth_str += i;
+			bpos += i;
+			siz -= i;
+		}
+		if ((siz = (nfsm_rndup(auth_len) - auth_len)) > 0) {
+			for (i = 0; i < siz; i++)
+				*bpos++ = '\0';
+			mb->m_len += siz;
+		}
+		break;
+	};
+	nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+	*tl++ = txdr_unsigned(RPCAUTH_NULL);
+	*tl = 0;
+	mb->m_next = mrest;
+	mreq->m_pkthdr.len = authsiz + 10*NFSX_UNSIGNED + mrest_len;
+	mreq->m_pkthdr.rcvif = (struct ifnet *)0;
+	*mbp = mb;
+	return (mreq);
+}
+
+/*
+ * copies mbuf chain to the uio scatter/gather list
+ */
+nfsm_mbuftouio(mrep, uiop, siz, dpos)
+	struct mbuf **mrep;
+	register struct uio *uiop;
+	int siz;
+	caddr_t *dpos;
+{
+	register char *mbufcp, *uiocp;
+	register int xfer, left, len;
+	register struct mbuf *mp;
+	long uiosiz, rem;
+	int error = 0;
+
+	mp = *mrep;
+	mbufcp = *dpos;
+	len = mtod(mp, caddr_t)+mp->m_len-mbufcp;
+	rem = nfsm_rndup(siz)-siz;
+	while (siz > 0) {
+		if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
+			return (EFBIG);
+		left = uiop->uio_iov->iov_len;
+		uiocp = uiop->uio_iov->iov_base;
+		if (left > siz)
+			left = siz;
+		uiosiz = left;
+		while (left > 0) {
+			while (len == 0) {
+				mp = mp->m_next;
+				if (mp == NULL)
+					return (EBADRPC);
+				mbufcp = mtod(mp, caddr_t);
+				len = mp->m_len;
+			}
+			xfer = (left > len) ? len : left;
+#ifdef notdef
+			/* Not Yet.. */
+			if (uiop->uio_iov->iov_op != NULL)
+				(*(uiop->uio_iov->iov_op))
+				(mbufcp, uiocp, xfer);
+			else
+#endif
+			if (uiop->uio_segflg == UIO_SYSSPACE)
+				bcopy(mbufcp, uiocp, xfer);
+			else
+				copyout(mbufcp, uiocp, xfer);
+			left -= xfer;
+			len -= xfer;
+			mbufcp += xfer;
+			uiocp += xfer;
+			uiop->uio_offset += xfer;
+			uiop->uio_resid -= xfer;
+		}
+		if (uiop->uio_iov->iov_len <= siz) {
+			uiop->uio_iovcnt--;
+			uiop->uio_iov++;
+		} else {
+			uiop->uio_iov->iov_base += uiosiz;
+			uiop->uio_iov->iov_len -= uiosiz;
+		}
+		siz -= uiosiz;
+	}
+	*dpos = mbufcp;
+	*mrep = mp;
+	if (rem > 0) {
+		if (len < rem)
+			error = nfs_adv(mrep, dpos, rem, len);
+		else
+			*dpos += rem;
+	}
+	return (error);
+}
+
+/*
+ * copies a uio scatter/gather list to an mbuf chain...
+ */
+nfsm_uiotombuf(uiop, mq, siz, bpos)
+	register struct uio *uiop;
+	struct mbuf **mq;
+	int siz;
+	caddr_t *bpos;
+{
+	register char *uiocp;
+	register struct mbuf *mp, *mp2;
+	register int xfer, left, mlen;
+	int uiosiz, clflg, rem;
+	char *cp;
+
+	if (siz > MLEN)		/* or should it >= MCLBYTES ?? */
+		clflg = 1;
+	else
+		clflg = 0;
+	rem = nfsm_rndup(siz)-siz;
+	mp = mp2 = *mq;
+	while (siz > 0) {
+		if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
+			return (EINVAL);
+		left = uiop->uio_iov->iov_len;
+		uiocp = uiop->uio_iov->iov_base;
+		if (left > siz)
+			left = siz;
+		uiosiz = left;
+		while (left > 0) {
+			mlen = M_TRAILINGSPACE(mp);
+			if (mlen == 0) {
+				MGET(mp, M_WAIT, MT_DATA);
+				if (clflg)
+					MCLGET(mp, M_WAIT);
+				mp->m_len = 0;
+				mp2->m_next = mp;
+				mp2 = mp;
+				mlen = M_TRAILINGSPACE(mp);
+			}
+			xfer = (left > mlen) ? mlen : left;
+#ifdef notdef
+			/* Not Yet.. */
+			if (uiop->uio_iov->iov_op != NULL)
+				(*(uiop->uio_iov->iov_op))
+				(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+			else
+#endif
+			if (uiop->uio_segflg == UIO_SYSSPACE)
+				bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+			else
+				copyin(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+			mp->m_len += xfer;
+			left -= xfer;
+			uiocp += xfer;
+			uiop->uio_offset += xfer;
+			uiop->uio_resid -= xfer;
+		}
+		if (uiop->uio_iov->iov_len <= siz) {
+			uiop->uio_iovcnt--;
+			uiop->uio_iov++;
+		} else {
+			uiop->uio_iov->iov_base += uiosiz;
+			uiop->uio_iov->iov_len -= uiosiz;
+		}
+		siz -= uiosiz;
+	}
+	if (rem > 0) {
+		if (rem > M_TRAILINGSPACE(mp)) {
+			MGET(mp, M_WAIT, MT_DATA);
+			mp->m_len = 0;
+			mp2->m_next = mp;
+		}
+		cp = mtod(mp, caddr_t)+mp->m_len;
+		for (left = 0; left < rem; left++)
+			*cp++ = '\0';
+		mp->m_len += rem;
+		*bpos = cp;
+	} else
+		*bpos = mtod(mp, caddr_t)+mp->m_len;
+	*mq = mp;
+	return (0);
+}
+
+/*
+ * Help break down an mbuf chain by setting the first siz bytes contiguous
+ * pointed to by returned val.
+ * This is used by the macros nfsm_dissect and nfsm_dissecton for tough
+ * cases. (The macros use the vars. dpos and dpos2)
+ */
+nfsm_disct(mdp, dposp, siz, left, cp2)
+	struct mbuf **mdp;
+	caddr_t *dposp;
+	int siz;
+	int left;
+	caddr_t *cp2;
+{
+	register struct mbuf *mp, *mp2;
+	register int siz2, xfer;
+	register caddr_t p;
+
+	mp = *mdp;
+	while (left == 0) {
+		*mdp = mp = mp->m_next;
+		if (mp == NULL)
+			return (EBADRPC);
+		left = mp->m_len;
+		*dposp = mtod(mp, caddr_t);
+	}
+	if (left >= siz) {
+		*cp2 = *dposp;
+		*dposp += siz;
+	} else if (mp->m_next == NULL) {
+		return (EBADRPC);
+	} else if (siz > MHLEN) {
+		panic("nfs S too big");
+	} else {
+		MGET(mp2, M_WAIT, MT_DATA);
+		mp2->m_next = mp->m_next;
+		mp->m_next = mp2;
+		mp->m_len -= left;
+		mp = mp2;
+		*cp2 = p = mtod(mp, caddr_t);
+		bcopy(*dposp, p, left);		/* Copy what was left */
+		siz2 = siz-left;
+		p += left;
+		mp2 = mp->m_next;
+		/* Loop around copying up the siz2 bytes */
+		while (siz2 > 0) {
+			if (mp2 == NULL)
+				return (EBADRPC);
+			xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2;
+			if (xfer > 0) {
+				bcopy(mtod(mp2, caddr_t), p, xfer);
+				NFSMADV(mp2, xfer);
+				mp2->m_len -= xfer;
+				p += xfer;
+				siz2 -= xfer;
+			}
+			if (siz2 > 0)
+				mp2 = mp2->m_next;
+		}
+		mp->m_len = siz;
+		*mdp = mp2;
+		*dposp = mtod(mp2, caddr_t);
+	}
+	return (0);
+}
+
+/*
+ * Advance the position in the mbuf chain.
+ */
+nfs_adv(mdp, dposp, offs, left)
+	struct mbuf **mdp;
+	caddr_t *dposp;
+	int offs;
+	int left;
+{
+	register struct mbuf *m;
+	register int s;
+
+	m = *mdp;
+	s = left;
+	while (s < offs) {
+		offs -= s;
+		m = m->m_next;
+		if (m == NULL)
+			return (EBADRPC);
+		s = m->m_len;
+	}
+	*mdp = m;
+	*dposp = mtod(m, caddr_t)+offs;
+	return (0);
+}
+
+/*
+ * Copy a string into mbufs for the hard cases...
+ */
+nfsm_strtmbuf(mb, bpos, cp, siz)
+	struct mbuf **mb;
+	char **bpos;
+	char *cp;
+	long siz;
+{
+	register struct mbuf *m1, *m2;
+	long left, xfer, len, tlen;
+	u_long *tl;
+	int putsize;
+
+	putsize = 1;
+	m2 = *mb;
+	left = M_TRAILINGSPACE(m2);
+	if (left > 0) {
+		tl = ((u_long *)(*bpos));
+		*tl++ = txdr_unsigned(siz);
+		putsize = 0;
+		left -= NFSX_UNSIGNED;
+		m2->m_len += NFSX_UNSIGNED;
+		if (left > 0) {
+			bcopy(cp, (caddr_t) tl, left);
+			siz -= left;
+			cp += left;
+			m2->m_len += left;
+			left = 0;
+		}
+	}
+	/* Loop around adding mbufs */
+	while (siz > 0) {
+		MGET(m1, M_WAIT, MT_DATA);
+		if (siz > MLEN)
+			MCLGET(m1, M_WAIT);
+		m1->m_len = NFSMSIZ(m1);
+		m2->m_next = m1;
+		m2 = m1;
+		tl = mtod(m1, u_long *);
+		tlen = 0;
+		if (putsize) {
+			*tl++ = txdr_unsigned(siz);
+			m1->m_len -= NFSX_UNSIGNED;
+			tlen = NFSX_UNSIGNED;
+			putsize = 0;
+		}
+		if (siz < m1->m_len) {
+			len = nfsm_rndup(siz);
+			xfer = siz;
+			if (xfer < len)
+				*(tl+(xfer>>2)) = 0;
+		} else {
+			xfer = len = m1->m_len;
+		}
+		bcopy(cp, (caddr_t) tl, xfer);
+		m1->m_len = len+tlen;
+		siz -= xfer;
+		cp += xfer;
+	}
+	*mb = m1;
+	*bpos = mtod(m1, caddr_t)+m1->m_len;
+	return (0);
+}
+
+/*
+ * Called once to initialize data structures...
+ */
+nfs_init()
+{
+	register int i;
+
+	nfsrtt.pos = 0;
+	rpc_vers = txdr_unsigned(RPC_VER2);
+	rpc_call = txdr_unsigned(RPC_CALL);
+	rpc_reply = txdr_unsigned(RPC_REPLY);
+	rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED);
+	rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED);
+	rpc_mismatch = txdr_unsigned(RPC_MISMATCH);
+	rpc_autherr = txdr_unsigned(RPC_AUTHERR);
+	rpc_rejectedcred = txdr_unsigned(AUTH_REJECTCRED);
+	rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX);
+	rpc_auth_kerb = txdr_unsigned(RPCAUTH_NQNFS);
+	nfs_vers = txdr_unsigned(NFS_VER2);
+	nfs_prog = txdr_unsigned(NFS_PROG);
+	nfs_true = txdr_unsigned(TRUE);
+	nfs_false = txdr_unsigned(FALSE);
+	/* Loop thru nfs procids */
+	for (i = 0; i < NFS_NPROCS; i++)
+		nfs_procids[i] = txdr_unsigned(i);
+	/* Ensure async daemons disabled */
+	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
+		nfs_iodwant[i] = (struct proc *)0;
+	TAILQ_INIT(&nfs_bufq);
+	nfs_xdrneg1 = txdr_unsigned(-1);
+	nfs_nhinit();			/* Init the nfsnode table */
+	nfsrv_init(0);			/* Init server data structures */
+	nfsrv_initcache();		/* Init the server request cache */
+
+	/*
+	 * Initialize the nqnfs server stuff.
+	 */
+	if (nqnfsstarttime == 0) {
+		nqnfsstarttime = boottime.tv_sec + nqsrv_maxlease
+			+ nqsrv_clockskew + nqsrv_writeslack;
+		NQLOADNOVRAM(nqnfsstarttime);
+		nqnfs_prog = txdr_unsigned(NQNFS_PROG);
+		nqnfs_vers = txdr_unsigned(NQNFS_VER1);
+		nqthead.th_head[0] = &nqthead;
+		nqthead.th_head[1] = &nqthead;
+		nqfhead = hashinit(NQLCHSZ, M_NQLEASE, &nqfheadhash);
+	}
+
+	/*
+	 * Initialize reply list and start timer
+	 */
+	nfsreqh.r_prev = nfsreqh.r_next = &nfsreqh;
+	nfs_timer();
+}
+
+/*
+ * Attribute cache routines.
+ * nfs_loadattrcache() - loads or updates the cache contents from attributes
+ *	that are on the mbuf list
+ * nfs_getattrcache() - returns valid attributes if found in cache, returns
+ *	error otherwise
+ */
+
+/*
+ * Load the attribute cache (that lives in the nfsnode entry) with
+ * the values on the mbuf list and
+ * Iff vap not NULL
+ *    copy the attributes to *vaper
+ */
+nfs_loadattrcache(vpp, mdp, dposp, vaper)
+	struct vnode **vpp;
+	struct mbuf **mdp;
+	caddr_t *dposp;
+	struct vattr *vaper;
+{
+	register struct vnode *vp = *vpp;
+	register struct vattr *vap;
+	register struct nfsv2_fattr *fp;
+	extern int (**spec_nfsv2nodeop_p)();
+	register struct nfsnode *np, *nq, **nhpp;
+	register long t1;
+	caddr_t dpos, cp2;
+	int error = 0, isnq;
+	struct mbuf *md;
+	enum vtype vtyp;
+	u_short vmode;
+	long rdev;
+	struct timespec mtime;
+	struct vnode *nvp;
+
+	md = *mdp;
+	dpos = *dposp;
+	t1 = (mtod(md, caddr_t) + md->m_len) - dpos;
+	isnq = (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS);
+	if (error = nfsm_disct(&md, &dpos, NFSX_FATTR(isnq), t1, &cp2))
+		return (error);
+	fp = (struct nfsv2_fattr *)cp2;
+	vtyp = nfstov_type(fp->fa_type);
+	vmode = fxdr_unsigned(u_short, fp->fa_mode);
+	if (vtyp == VNON || vtyp == VREG)
+		vtyp = IFTOVT(vmode);
+	if (isnq) {
+		rdev = fxdr_unsigned(long, fp->fa_nqrdev);
+		fxdr_nqtime(&fp->fa_nqmtime, &mtime);
+	} else {
+		rdev = fxdr_unsigned(long, fp->fa_nfsrdev);
+		fxdr_nfstime(&fp->fa_nfsmtime, &mtime);
+	}
+	/*
+	 * If v_type == VNON it is a new node, so fill in the v_type,
+	 * n_mtime fields. Check to see if it represents a special 
+	 * device, and if so, check for a possible alias. Once the
+	 * correct vnode has been obtained, fill in the rest of the
+	 * information.
+	 */
+	np = VTONFS(vp);
+	if (vp->v_type == VNON) {
+		if (vtyp == VCHR && rdev == 0xffffffff)
+			vp->v_type = vtyp = VFIFO;
+		else
+			vp->v_type = vtyp;
+		if (vp->v_type == VFIFO) {
+#ifdef FIFO
+			extern int (**fifo_nfsv2nodeop_p)();
+			vp->v_op = fifo_nfsv2nodeop_p;
+#else
+			return (EOPNOTSUPP);
+#endif /* FIFO */
+		}
+		if (vp->v_type == VCHR || vp->v_type == VBLK) {
+			vp->v_op = spec_nfsv2nodeop_p;
+			if (nvp = checkalias(vp, (dev_t)rdev, vp->v_mount)) {
+				/*
+				 * Discard unneeded vnode, but save its nfsnode.
+				 */
+				if (nq = np->n_forw)
+					nq->n_back = np->n_back;
+				*np->n_back = nq;
+				nvp->v_data = vp->v_data;
+				vp->v_data = NULL;
+				vp->v_op = spec_vnodeop_p;
+				vrele(vp);
+				vgone(vp);
+				/*
+				 * Reinitialize aliased node.
+				 */
+				np->n_vnode = nvp;
+				nhpp = (struct nfsnode **)nfs_hash(&np->n_fh);
+				if (nq = *nhpp)
+					nq->n_back = &np->n_forw;
+				np->n_forw = nq;
+				np->n_back = nhpp;
+				*nhpp = np;
+				*vpp = vp = nvp;
+			}
+		}
+		np->n_mtime = mtime.ts_sec;
+	}
+	vap = &np->n_vattr;
+	vap->va_type = vtyp;
+	vap->va_mode = (vmode & 07777);
+	vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
+	vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid);
+	vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid);
+	vap->va_rdev = (dev_t)rdev;
+	vap->va_mtime = mtime;
+	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+	if (isnq) {
+		fxdr_hyper(&fp->fa_nqsize, &vap->va_size);
+		vap->va_blocksize = fxdr_unsigned(long, fp->fa_nqblocksize);
+		fxdr_hyper(&fp->fa_nqbytes, &vap->va_bytes);
+		vap->va_fileid = fxdr_unsigned(long, fp->fa_nqfileid);
+		fxdr_nqtime(&fp->fa_nqatime, &vap->va_atime);
+		vap->va_flags = fxdr_unsigned(u_long, fp->fa_nqflags);
+		fxdr_nqtime(&fp->fa_nqctime, &vap->va_ctime);
+		vap->va_gen = fxdr_unsigned(u_long, fp->fa_nqgen);
+		fxdr_hyper(&fp->fa_nqfilerev, &vap->va_filerev);
+	} else {
+		vap->va_size = fxdr_unsigned(u_long, fp->fa_nfssize);
+		vap->va_blocksize = fxdr_unsigned(long, fp->fa_nfsblocksize);
+		vap->va_bytes = fxdr_unsigned(long, fp->fa_nfsblocks) * NFS_FABLKSIZE;
+		vap->va_fileid = fxdr_unsigned(long, fp->fa_nfsfileid);
+		fxdr_nfstime(&fp->fa_nfsatime, &vap->va_atime);
+		vap->va_flags = 0;
+		vap->va_ctime.ts_sec = fxdr_unsigned(long, fp->fa_nfsctime.nfs_sec);
+		vap->va_ctime.ts_nsec = 0;
+		vap->va_gen = fxdr_unsigned(u_long, fp->fa_nfsctime.nfs_usec);
+		vap->va_filerev = 0;
+	}
+	if (vap->va_size != np->n_size) {
+		if (vap->va_type == VREG) {
+			if (np->n_flag & NMODIFIED) {
+				if (vap->va_size < np->n_size)
+					vap->va_size = np->n_size;
+				else
+					np->n_size = vap->va_size;
+			} else
+				np->n_size = vap->va_size;
+			vnode_pager_setsize(vp, (u_long)np->n_size);
+		} else
+			np->n_size = vap->va_size;
+	}
+	np->n_attrstamp = time.tv_sec;
+	*dposp = dpos;
+	*mdp = md;
+	if (vaper != NULL) {
+		bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap));
+#ifdef notdef
+		if ((np->n_flag & NMODIFIED) && np->n_size > vap->va_size)
+		if (np->n_size > vap->va_size)
+			vaper->va_size = np->n_size;
+#endif
+		if (np->n_flag & NCHG) {
+			if (np->n_flag & NACC) {
+				vaper->va_atime.ts_sec = np->n_atim.tv_sec;
+				vaper->va_atime.ts_nsec =
+				    np->n_atim.tv_usec * 1000;
+			}
+			if (np->n_flag & NUPD) {
+				vaper->va_mtime.ts_sec = np->n_mtim.tv_sec;
+				vaper->va_mtime.ts_nsec =
+				    np->n_mtim.tv_usec * 1000;
+			}
+		}
+	}
+	return (0);
+}
+
+/*
+ * Check the time stamp
+ * If the cache is valid, copy contents to *vap and return 0
+ * otherwise return an error
+ */
+nfs_getattrcache(vp, vaper)
+	register struct vnode *vp;
+	struct vattr *vaper;
+{
+	register struct nfsnode *np = VTONFS(vp);
+	register struct vattr *vap;
+
+	if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQLOOKLEASE) {
+		if (!NQNFS_CKCACHABLE(vp, NQL_READ) || np->n_attrstamp == 0) {
+			nfsstats.attrcache_misses++;
+			return (ENOENT);
+		}
+	} else if ((time.tv_sec - np->n_attrstamp) >= NFS_ATTRTIMEO(np)) {
+		nfsstats.attrcache_misses++;
+		return (ENOENT);
+	}
+	nfsstats.attrcache_hits++;
+	vap = &np->n_vattr;
+	if (vap->va_size != np->n_size) {
+		if (vap->va_type == VREG) {
+			if (np->n_flag & NMODIFIED) {
+				if (vap->va_size < np->n_size)
+					vap->va_size = np->n_size;
+				else
+					np->n_size = vap->va_size;
+			} else
+				np->n_size = vap->va_size;
+			vnode_pager_setsize(vp, (u_long)np->n_size);
+		} else
+			np->n_size = vap->va_size;
+	}
+	bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr));
+#ifdef notdef
+	if ((np->n_flag & NMODIFIED) == 0) {
+		np->n_size = vaper->va_size;
+		vnode_pager_setsize(vp, (u_long)np->n_size);
+	} else if (np->n_size > vaper->va_size)
+	if (np->n_size > vaper->va_size)
+		vaper->va_size = np->n_size;
+#endif
+	if (np->n_flag & NCHG) {
+		if (np->n_flag & NACC) {
+			vaper->va_atime.ts_sec = np->n_atim.tv_sec;
+			vaper->va_atime.ts_nsec = np->n_atim.tv_usec * 1000;
+		}
+		if (np->n_flag & NUPD) {
+			vaper->va_mtime.ts_sec = np->n_mtim.tv_sec;
+			vaper->va_mtime.ts_nsec = np->n_mtim.tv_usec * 1000;
+		}
+	}
+	return (0);
+}
+
+/*
+ * Set up nameidata for a lookup() call and do it
+ */
+nfs_namei(ndp, fhp, len, slp, nam, mdp, dposp, p)
+	register struct nameidata *ndp;
+	fhandle_t *fhp;
+	int len;
+	struct nfssvc_sock *slp;
+	struct mbuf *nam;
+	struct mbuf **mdp;
+	caddr_t *dposp;
+	struct proc *p;
+{
+	register int i, rem;
+	register struct mbuf *md;
+	register char *fromcp, *tocp;
+	struct vnode *dp;
+	int error, rdonly;
+	struct componentname *cnp = &ndp->ni_cnd;
+
+	MALLOC(cnp->cn_pnbuf, char *, len + 1, M_NAMEI, M_WAITOK);
+	/*
+	 * Copy the name from the mbuf list to ndp->ni_pnbuf
+	 * and set the various ndp fields appropriately.
+	 */
+	fromcp = *dposp;
+	tocp = cnp->cn_pnbuf;
+	md = *mdp;
+	rem = mtod(md, caddr_t) + md->m_len - fromcp;
+	cnp->cn_hash = 0;
+	for (i = 0; i < len; i++) {
+		while (rem == 0) {
+			md = md->m_next;
+			if (md == NULL) {
+				error = EBADRPC;
+				goto out;
+			}
+			fromcp = mtod(md, caddr_t);
+			rem = md->m_len;
+		}
+		if (*fromcp == '\0' || *fromcp == '/') {
+			error = EINVAL;
+			goto out;
+		}
+		cnp->cn_hash += (unsigned char)*fromcp;
+		*tocp++ = *fromcp++;
+		rem--;
+	}
+	*tocp = '\0';
+	*mdp = md;
+	*dposp = fromcp;
+	len = nfsm_rndup(len)-len;
+	if (len > 0) {
+		if (rem >= len)
+			*dposp += len;
+		else if (error = nfs_adv(mdp, dposp, len, rem))
+			goto out;
+	}
+	ndp->ni_pathlen = tocp - cnp->cn_pnbuf;
+	cnp->cn_nameptr = cnp->cn_pnbuf;
+	/*
+	 * Extract and set starting directory.
+	 */
+	if (error = nfsrv_fhtovp(fhp, FALSE, &dp, ndp->ni_cnd.cn_cred, slp,
+	    nam, &rdonly))
+		goto out;
+	if (dp->v_type != VDIR) {
+		vrele(dp);
+		error = ENOTDIR;
+		goto out;
+	}
+	ndp->ni_startdir = dp;
+	if (rdonly)
+		cnp->cn_flags |= (NOCROSSMOUNT | RDONLY);
+	else
+		cnp->cn_flags |= NOCROSSMOUNT;
+	/*
+	 * And call lookup() to do the real work
+	 */
+	cnp->cn_proc = p;
+	if (error = lookup(ndp))
+		goto out;
+	/*
+	 * Check for encountering a symbolic link
+	 */
+	if (cnp->cn_flags & ISSYMLINK) {
+		if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
+			vput(ndp->ni_dvp);
+		else
+			vrele(ndp->ni_dvp);
+		vput(ndp->ni_vp);
+		ndp->ni_vp = NULL;
+		error = EINVAL;
+		goto out;
+	}
+	/*
+	 * Check for saved name request
+	 */
+	if (cnp->cn_flags & (SAVENAME | SAVESTART)) {
+		cnp->cn_flags |= HASBUF;
+		return (0);
+	}
+out:
+	FREE(cnp->cn_pnbuf, M_NAMEI);
+	return (error);
+}
+
+/*
+ * A fiddled version of m_adj() that ensures null fill to a long
+ * boundary and only trims off the back end
+ */
+void
+nfsm_adj(mp, len, nul)
+	struct mbuf *mp;
+	register int len;
+	int nul;
+{
+	register struct mbuf *m;
+	register int count, i;
+	register char *cp;
+
+	/*
+	 * Trim from tail.  Scan the mbuf chain,
+	 * calculating its length and finding the last mbuf.
+	 * If the adjustment only affects this mbuf, then just
+	 * adjust and return.  Otherwise, rescan and truncate
+	 * after the remaining size.
+	 */
+	count = 0;
+	m = mp;
+	for (;;) {
+		count += m->m_len;
+		if (m->m_next == (struct mbuf *)0)
+			break;
+		m = m->m_next;
+	}
+	if (m->m_len > len) {
+		m->m_len -= len;
+		if (nul > 0) {
+			cp = mtod(m, caddr_t)+m->m_len-nul;
+			for (i = 0; i < nul; i++)
+				*cp++ = '\0';
+		}
+		return;
+	}
+	count -= len;
+	if (count < 0)
+		count = 0;
+	/*
+	 * Correct length for chain is "count".
+	 * Find the mbuf with last data, adjust its length,
+	 * and toss data from remaining mbufs on chain.
+	 */
+	for (m = mp; m; m = m->m_next) {
+		if (m->m_len >= count) {
+			m->m_len = count;
+			if (nul > 0) {
+				cp = mtod(m, caddr_t)+m->m_len-nul;
+				for (i = 0; i < nul; i++)
+					*cp++ = '\0';
+			}
+			break;
+		}
+		count -= m->m_len;
+	}
+	while (m = m->m_next)
+		m->m_len = 0;
+}
+
+/*
+ * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked)
+ * 	- look up fsid in mount list (if not found ret error)
+ *	- get vp and export rights by calling VFS_FHTOVP()
+ *	- if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
+ *	- if not lockflag unlock it with VOP_UNLOCK()
+ */
+nfsrv_fhtovp(fhp, lockflag, vpp, cred, slp, nam, rdonlyp)
+	fhandle_t *fhp;
+	int lockflag;
+	struct vnode **vpp;
+	struct ucred *cred;
+	struct nfssvc_sock *slp;
+	struct mbuf *nam;
+	int *rdonlyp;
+{
+	register struct mount *mp;
+	register struct nfsuid *uidp;
+	register int i;
+	struct ucred *credanon;
+	int error, exflags;
+
+	*vpp = (struct vnode *)0;
+	if ((mp = getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if (error = VFS_FHTOVP(mp, &fhp->fh_fid, nam, vpp, &exflags, &credanon))
+		return (error);
+	/*
+	 * Check/setup credentials.
+	 */
+	if (exflags & MNT_EXKERB) {
+		uidp = slp->ns_uidh[NUIDHASH(cred->cr_uid)];
+		while (uidp) {
+			if (uidp->nu_uid == cred->cr_uid)
+				break;
+			uidp = uidp->nu_hnext;
+		}
+		if (uidp) {
+			cred->cr_uid = uidp->nu_cr.cr_uid;
+			for (i = 0; i < uidp->nu_cr.cr_ngroups; i++)
+				cred->cr_groups[i] = uidp->nu_cr.cr_groups[i];
+		} else {
+			vput(*vpp);
+			return (NQNFS_AUTHERR);
+		}
+	} else if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) {
+		cred->cr_uid = credanon->cr_uid;
+		for (i = 0; i < credanon->cr_ngroups && i < NGROUPS; i++)
+			cred->cr_groups[i] = credanon->cr_groups[i];
+	}
+	if (exflags & MNT_EXRDONLY)
+		*rdonlyp = 1;
+	else
+		*rdonlyp = 0;
+	if (!lockflag)
+		VOP_UNLOCK(*vpp);
+	return (0);
+}
+
+/*
+ * This function compares two net addresses by family and returns TRUE
+ * if they are the same host.
+ * If there is any doubt, return FALSE.
+ * The AF_INET family is handled as a special case so that address mbufs
+ * don't need to be saved to store "struct in_addr", which is only 4 bytes.
+ */
+netaddr_match(family, haddr, nam)
+	int family;
+	union nethostaddr *haddr;
+	struct mbuf *nam;
+{
+	register struct sockaddr_in *inetaddr;
+
+	switch (family) {
+	case AF_INET:
+		inetaddr = mtod(nam, struct sockaddr_in *);
+		if (inetaddr->sin_family == AF_INET &&
+		    inetaddr->sin_addr.s_addr == haddr->had_inetaddr)
+			return (1);
+		break;
+#ifdef ISO
+	case AF_ISO:
+	    {
+		register struct sockaddr_iso *isoaddr1, *isoaddr2;
+
+		isoaddr1 = mtod(nam, struct sockaddr_iso *);
+		isoaddr2 = mtod(haddr->had_nam, struct sockaddr_iso *);
+		if (isoaddr1->siso_family == AF_ISO &&
+		    isoaddr1->siso_nlen > 0 &&
+		    isoaddr1->siso_nlen == isoaddr2->siso_nlen &&
+		    SAME_ISOADDR(isoaddr1, isoaddr2))
+			return (1);
+		break;
+	    }
+#endif	/* ISO */
+	default:
+		break;
+	};
+	return (0);
+}
diff --git a/sys/nfs/nfs_common.h b/sys/nfs/nfs_common.h
new file mode 100644
index 00000000000..879db360057
--- /dev/null
+++ b/sys/nfs/nfs_common.h
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfsm_subs.h	8.1 (Berkeley) 6/16/93
+ */
+
+/*
+ * These macros do strange and peculiar things to mbuf chains for
+ * the assistance of the nfs code. To attempt to use them for any
+ * other purpose will be dangerous. (they make weird assumptions)
+ */
+
+/*
+ * First define what the actual subs. return
+ */
+extern struct mbuf *nfsm_reqh();
+
+#define	M_HASCL(m)	((m)->m_flags & M_EXT)
+#define	NFSMINOFF(m) \
+		if (M_HASCL(m)) \
+			(m)->m_data = (m)->m_ext.ext_buf; \
+		else if ((m)->m_flags & M_PKTHDR) \
+			(m)->m_data = (m)->m_pktdat; \
+		else \
+			(m)->m_data = (m)->m_dat
+#define	NFSMADV(m, s)	(m)->m_data += (s)
+#define	NFSMSIZ(m)	((M_HASCL(m))?MCLBYTES: \
+				(((m)->m_flags & M_PKTHDR)?MHLEN:MLEN))
+
+/*
+ * Now for the macros that do the simple stuff and call the functions
+ * for the hard stuff.
+ * These macros use several vars. declared in nfsm_reqhead and these
+ * vars. must not be used elsewhere unless you are careful not to corrupt
+ * them. The vars. starting with pN and tN (N=1,2,3,..) are temporaries
+ * that may be used so long as the value is not expected to retained
+ * after a macro.
+ * I know, this is kind of dorkey, but it makes the actual op functions
+ * fairly clean and deals with the mess caused by the xdr discriminating
+ * unions.
+ */
+
+#define	nfsm_build(a,c,s) \
+		{ if ((s) > M_TRAILINGSPACE(mb)) { \
+			MGET(mb2, M_WAIT, MT_DATA); \
+			if ((s) > MLEN) \
+				panic("build > MLEN"); \
+			mb->m_next = mb2; \
+			mb = mb2; \
+			mb->m_len = 0; \
+			bpos = mtod(mb, caddr_t); \
+		} \
+		(a) = (c)(bpos); \
+		mb->m_len += (s); \
+		bpos += (s); }
+
+#define	nfsm_dissect(a,c,s) \
+		{ t1 = mtod(md, caddr_t)+md->m_len-dpos; \
+		if (t1 >= (s)) { \
+			(a) = (c)(dpos); \
+			dpos += (s); \
+		} else if (error = nfsm_disct(&md, &dpos, (s), t1, &cp2)) { \
+			m_freem(mrep); \
+			goto nfsmout; \
+		} else { \
+			(a) = (c)cp2; \
+		} }
+
+#define nfsm_fhtom(v) \
+		nfsm_build(cp,caddr_t,NFSX_FH); \
+		bcopy((caddr_t)&(VTONFS(v)->n_fh), cp, NFSX_FH)
+
+#define nfsm_srvfhtom(f) \
+		nfsm_build(cp,caddr_t,NFSX_FH); \
+		bcopy((caddr_t)(f), cp, NFSX_FH)
+
+#define nfsm_mtofh(d,v) \
+		{ struct nfsnode *np; nfsv2fh_t *fhp; \
+		nfsm_dissect(fhp,nfsv2fh_t *,NFSX_FH); \
+		if (error = nfs_nget((d)->v_mount, fhp, &np)) { \
+			m_freem(mrep); \
+			goto nfsmout; \
+		} \
+		(v) = NFSTOV(np); \
+		nfsm_loadattr(v, (struct vattr *)0); \
+		}
+
+#define	nfsm_loadattr(v,a) \
+		{ struct vnode *tvp = (v); \
+		if (error = nfs_loadattrcache(&tvp, &md, &dpos, (a))) { \
+			m_freem(mrep); \
+			goto nfsmout; \
+		} \
+		(v) = tvp; }
+
+#define	nfsm_strsiz(s,m) \
+		{ nfsm_dissect(tl,u_long *,NFSX_UNSIGNED); \
+		if (((s) = fxdr_unsigned(long,*tl)) > (m)) { \
+			m_freem(mrep); \
+			error = EBADRPC; \
+			goto nfsmout; \
+		} }
+
+#define	nfsm_srvstrsiz(s,m) \
+		{ nfsm_dissect(tl,u_long *,NFSX_UNSIGNED); \
+		if (((s) = fxdr_unsigned(long,*tl)) > (m) || (s) <= 0) { \
+			error = EBADRPC; \
+			nfsm_reply(0); \
+		} }
+
+#define nfsm_mtouio(p,s) \
+		if ((s) > 0 && \
+		   (error = nfsm_mbuftouio(&md,(p),(s),&dpos))) { \
+			m_freem(mrep); \
+			goto nfsmout; \
+		}
+
+#define nfsm_uiotom(p,s) \
+		if (error = nfsm_uiotombuf((p),&mb,(s),&bpos)) { \
+			m_freem(mreq); \
+			goto nfsmout; \
+		}
+
+#define	nfsm_reqhead(v,a,s) \
+		mb = mreq = nfsm_reqh((v),(a),(s),&bpos)
+
+#define nfsm_reqdone	m_freem(mrep); \
+		nfsmout: 
+
+#define nfsm_rndup(a)	(((a)+3)&(~0x3))
+
+#define	nfsm_request(v, t, p, c)	\
+		if (error = nfs_request((v), mreq, (t), (p), \
+		   (c), &mrep, &md, &dpos)) \
+			goto nfsmout
+
+#define	nfsm_strtom(a,s,m) \
+		if ((s) > (m)) { \
+			m_freem(mreq); \
+			error = ENAMETOOLONG; \
+			goto nfsmout; \
+		} \
+		t2 = nfsm_rndup(s)+NFSX_UNSIGNED; \
+		if (t2 <= M_TRAILINGSPACE(mb)) { \
+			nfsm_build(tl,u_long *,t2); \
+			*tl++ = txdr_unsigned(s); \
+			*(tl+((t2>>2)-2)) = 0; \
+			bcopy((caddr_t)(a), (caddr_t)tl, (s)); \
+		} else if (error = nfsm_strtmbuf(&mb, &bpos, (a), (s))) { \
+			m_freem(mreq); \
+			goto nfsmout; \
+		}
+
+#define	nfsm_srvdone \
+		nfsmout: \
+		return(error)
+
+#define	nfsm_reply(s) \
+		{ \
+		nfsd->nd_repstat = error; \
+		if (error) \
+		   (void) nfs_rephead(0, nfsd, error, cache, &frev, \
+			mrq, &mb, &bpos); \
+		else \
+		   (void) nfs_rephead((s), nfsd, error, cache, &frev, \
+			mrq, &mb, &bpos); \
+		m_freem(mrep); \
+		mreq = *mrq; \
+		if (error) \
+			return(0); \
+		}
+
+#define	nfsm_adv(s) \
+		t1 = mtod(md, caddr_t)+md->m_len-dpos; \
+		if (t1 >= (s)) { \
+			dpos += (s); \
+		} else if (error = nfs_adv(&md, &dpos, (s), t1)) { \
+			m_freem(mrep); \
+			goto nfsmout; \
+		}
+
+#define nfsm_srvmtofh(f) \
+		nfsm_dissect(tl, u_long *, NFSX_FH); \
+		bcopy((caddr_t)tl, (caddr_t)f, NFSX_FH)
+
+#define	nfsm_clget \
+		if (bp >= be) { \
+			if (mp == mb) \
+				mp->m_len += bp-bpos; \
+			MGET(mp, M_WAIT, MT_DATA); \
+			MCLGET(mp, M_WAIT); \
+			mp->m_len = NFSMSIZ(mp); \
+			mp2->m_next = mp; \
+			mp2 = mp; \
+			bp = mtod(mp, caddr_t); \
+			be = bp+mp->m_len; \
+		} \
+		tl = (u_long *)bp
+
+#define	nfsm_srvfillattr \
+	fp->fa_type = vtonfs_type(vap->va_type); \
+	fp->fa_mode = vtonfs_mode(vap->va_type, vap->va_mode); \
+	fp->fa_nlink = txdr_unsigned(vap->va_nlink); \
+	fp->fa_uid = txdr_unsigned(vap->va_uid); \
+	fp->fa_gid = txdr_unsigned(vap->va_gid); \
+	if (nfsd->nd_nqlflag == NQL_NOVAL) { \
+		fp->fa_nfsblocksize = txdr_unsigned(vap->va_blocksize); \
+		if (vap->va_type == VFIFO) \
+			fp->fa_nfsrdev = 0xffffffff; \
+		else \
+			fp->fa_nfsrdev = txdr_unsigned(vap->va_rdev); \
+		fp->fa_nfsfsid = txdr_unsigned(vap->va_fsid); \
+		fp->fa_nfsfileid = txdr_unsigned(vap->va_fileid); \
+		fp->fa_nfssize = txdr_unsigned(vap->va_size); \
+		fp->fa_nfsblocks = txdr_unsigned(vap->va_bytes / NFS_FABLKSIZE); \
+		txdr_nfstime(&vap->va_atime, &fp->fa_nfsatime); \
+		txdr_nfstime(&vap->va_mtime, &fp->fa_nfsmtime); \
+		fp->fa_nfsctime.nfs_sec = txdr_unsigned(vap->va_ctime.ts_sec); \
+		fp->fa_nfsctime.nfs_usec = txdr_unsigned(vap->va_gen); \
+	} else { \
+		fp->fa_nqblocksize = txdr_unsigned(vap->va_blocksize); \
+		if (vap->va_type == VFIFO) \
+			fp->fa_nqrdev = 0xffffffff; \
+		else \
+			fp->fa_nqrdev = txdr_unsigned(vap->va_rdev); \
+		fp->fa_nqfsid = txdr_unsigned(vap->va_fsid); \
+		fp->fa_nqfileid = txdr_unsigned(vap->va_fileid); \
+		txdr_hyper(&vap->va_size, &fp->fa_nqsize); \
+		txdr_hyper(&vap->va_bytes, &fp->fa_nqbytes); \
+		txdr_nqtime(&vap->va_atime, &fp->fa_nqatime); \
+		txdr_nqtime(&vap->va_mtime, &fp->fa_nqmtime); \
+		txdr_nqtime(&vap->va_ctime, &fp->fa_nqctime); \
+		fp->fa_nqflags = txdr_unsigned(vap->va_flags); \
+		fp->fa_nqgen = txdr_unsigned(vap->va_gen); \
+		txdr_hyper(&vap->va_filerev, &fp->fa_nqfilerev); \
+	}
+
diff --git a/sys/nfs/nfs_node.c b/sys/nfs/nfs_node.c
new file mode 100644
index 00000000000..032bdef0d5a
--- /dev/null
+++ b/sys/nfs/nfs_node.c
@@ -0,0 +1,294 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfs_node.c	8.2 (Berkeley) 12/30/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nqnfs.h>
+
+struct nfsnode **nheadhashtbl;
+u_long nheadhash;
+#define	NFSNOHASH(fhsum)	((fhsum)&nheadhash)
+
+#define TRUE	1
+#define	FALSE	0
+
+/*
+ * Initialize hash links for nfsnodes
+ * and build nfsnode free list.
+ */
+nfs_nhinit()
+{
+
+#ifndef lint
+	if ((sizeof(struct nfsnode) - 1) & sizeof(struct nfsnode))
+		printf("nfs_nhinit: bad size %d\n", sizeof(struct nfsnode));
+#endif /* not lint */
+	nheadhashtbl = hashinit(desiredvnodes, M_NFSNODE, &nheadhash);
+}
+
+/*
+ * Compute an entry in the NFS hash table structure
+ */
+struct nfsnode **
+nfs_hash(fhp)
+	register nfsv2fh_t *fhp;
+{
+	register u_char *fhpp;
+	register u_long fhsum;
+	int i;
+
+	fhpp = &fhp->fh_bytes[0];
+	fhsum = 0;
+	for (i = 0; i < NFSX_FH; i++)
+		fhsum += *fhpp++;
+	return (&nheadhashtbl[NFSNOHASH(fhsum)]);
+}
+
+/*
+ * Look up a vnode/nfsnode by file handle.
+ * Callers must check for mount points!!
+ * In all cases, a pointer to a
+ * nfsnode structure is returned.
+ */
+nfs_nget(mntp, fhp, npp)
+	struct mount *mntp;
+	register nfsv2fh_t *fhp;
+	struct nfsnode **npp;
+{
+	register struct nfsnode *np, *nq, **nhpp;
+	register struct vnode *vp;
+	extern int (**nfsv2_vnodeop_p)();
+	struct vnode *nvp;
+	int error;
+
+	nhpp = nfs_hash(fhp);
+loop:
+	for (np = *nhpp; np; np = np->n_forw) {
+		if (mntp != NFSTOV(np)->v_mount ||
+		    bcmp((caddr_t)fhp, (caddr_t)&np->n_fh, NFSX_FH))
+			continue;
+		vp = NFSTOV(np);
+		if (vget(vp, 1))
+			goto loop;
+		*npp = np;
+		return(0);
+	}
+	if (error = getnewvnode(VT_NFS, mntp, nfsv2_vnodeop_p, &nvp)) {
+		*npp = 0;
+		return (error);
+	}
+	vp = nvp;
+	MALLOC(np, struct nfsnode *, sizeof *np, M_NFSNODE, M_WAITOK);
+	vp->v_data = np;
+	np->n_vnode = vp;
+	/*
+	 * Insert the nfsnode in the hash queue for its new file handle
+	 */
+	np->n_flag = 0;
+	if (nq = *nhpp)
+		nq->n_back = &np->n_forw;
+	np->n_forw = nq;
+	np->n_back = nhpp;
+	*nhpp = np;
+	bcopy((caddr_t)fhp, (caddr_t)&np->n_fh, NFSX_FH);
+	np->n_attrstamp = 0;
+	np->n_direofoffset = 0;
+	np->n_sillyrename = (struct sillyrename *)0;
+	np->n_size = 0;
+	np->n_mtime = 0;
+	if (VFSTONFS(mntp)->nm_flag & NFSMNT_NQNFS) {
+		np->n_brev = 0;
+		np->n_lrev = 0;
+		np->n_expiry = (time_t)0;
+		np->n_tnext = (struct nfsnode *)0;
+	}
+	*npp = np;
+	return (0);
+}
+
+nfs_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	register struct nfsnode *np;
+	register struct sillyrename *sp;
+	struct proc *p = curproc;	/* XXX */
+	extern int prtactive;
+
+	np = VTONFS(ap->a_vp);
+	if (prtactive && ap->a_vp->v_usecount != 0)
+		vprint("nfs_inactive: pushing active", ap->a_vp);
+	sp = np->n_sillyrename;
+	np->n_sillyrename = (struct sillyrename *)0;
+	if (sp) {
+		/*
+		 * Remove the silly file that was rename'd earlier
+		 */
+		(void) nfs_vinvalbuf(ap->a_vp, 0, sp->s_cred, p, 1);
+		nfs_removeit(sp);
+		crfree(sp->s_cred);
+		vrele(sp->s_dvp);
+#ifdef SILLYSEPARATE
+		free((caddr_t)sp, M_NFSREQ);
+#endif
+	}
+	np->n_flag &= (NMODIFIED | NFLUSHINPROG | NFLUSHWANT | NQNFSEVICTED |
+		NQNFSNONCACHE | NQNFSWRITE);
+	return (0);
+}
+
+/*
+ * Reclaim an nfsnode so that it can be used for other purposes.
+ */
+nfs_reclaim(ap)
+	struct vop_reclaim_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct nfsnode *np = VTONFS(vp);
+	register struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+	register struct nfsnode *nq;
+	extern int prtactive;
+
+	if (prtactive && vp->v_usecount != 0)
+		vprint("nfs_reclaim: pushing active", vp);
+	/*
+	 * Remove the nfsnode from its hash chain.
+	 */
+	if (nq = np->n_forw)
+		nq->n_back = np->n_back;
+	*np->n_back = nq;
+
+	/*
+	 * For nqnfs, take it off the timer queue as required.
+	 */
+	if ((nmp->nm_flag & NFSMNT_NQNFS) && np->n_tnext) {
+		if (np->n_tnext == (struct nfsnode *)nmp)
+			nmp->nm_tprev = np->n_tprev;
+		else
+			np->n_tnext->n_tprev = np->n_tprev;
+		if (np->n_tprev == (struct nfsnode *)nmp)
+			nmp->nm_tnext = np->n_tnext;
+		else
+			np->n_tprev->n_tnext = np->n_tnext;
+	}
+	cache_purge(vp);
+	FREE(vp->v_data, M_NFSNODE);
+	vp->v_data = (void *)0;
+	return (0);
+}
+
+/*
+ * Lock an nfsnode
+ */
+nfs_lock(ap)
+	struct vop_lock_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+
+	/*
+	 * Ugh, another place where interruptible mounts will get hung.
+	 * If you make this sleep interruptible, then you have to fix all
+	 * the VOP_LOCK() calls to expect interruptibility.
+	 */
+	while (vp->v_flag & VXLOCK) {
+		vp->v_flag |= VXWANT;
+		sleep((caddr_t)vp, PINOD);
+	}
+	if (vp->v_tag == VT_NON)
+		return (ENOENT);
+	return (0);
+}
+
+/*
+ * Unlock an nfsnode
+ */
+nfs_unlock(ap)
+	struct vop_unlock_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	return (0);
+}
+
+/*
+ * Check for a locked nfsnode
+ */
+nfs_islocked(ap)
+	struct vop_islocked_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	return (0);
+}
+
+/*
+ * Nfs abort op, called after namei() when a CREATE/DELETE isn't actually
+ * done. Currently nothing to do.
+ */
+/* ARGSUSED */
+int
+nfs_abortop(ap)
+	struct vop_abortop_args /* {
+		struct vnode *a_dvp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+
+	if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
+		FREE(ap->a_cnp->cn_pnbuf, M_NAMEI);
+	return (0);
+}
diff --git a/sys/nfs/nfs_nqlease.c b/sys/nfs/nfs_nqlease.c
new file mode 100644
index 00000000000..965f46132a6
--- /dev/null
+++ b/sys/nfs/nfs_nqlease.c
@@ -0,0 +1,1228 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfs_nqlease.c	8.3 (Berkeley) 1/4/94
+ */
+
+/*
+ * References:
+ *	Cary G. Gray and David R. Cheriton, "Leases: An Efficient Fault-Tolerant
+ *		Mechanism for Distributed File Cache Consistency",
+ *		In Proc. of the Twelfth ACM Symposium on Operating Systems
+ *		Principals, pg. 202-210, Litchfield Park, AZ, Dec. 1989.
+ *	Michael N. Nelson, Brent B. Welch and John K. Ousterhout, "Caching
+ *		in the Sprite Network File System", ACM TOCS 6(1),
+ *		pages 134-154, February 1988.
+ *	V. Srinivasan and Jeffrey C. Mogul, "Spritely NFS: Implementation and
+ *		Performance of Cache-Consistency Protocols", Digital
+ *		Equipment Corporation WRL Research Report 89/5, May 1989.
+ */
+#include <sys/param.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/file.h>
+#include <sys/buf.h>
+#include <sys/stat.h>
+#include <sys/protosw.h>
+
+#include <netinet/in.h>
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nqnfs.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfsmount.h>
+
+/*
+ * List head for the lease queue and other global data.
+ * At any time a lease is linked into a list ordered by increasing expiry time.
+ */
+#define	NQFHHASH(f)	((*((u_long *)(f)))&nqfheadhash)
+
+union nqsrvthead nqthead;
+struct nqlease **nqfhead;
+u_long nqfheadhash;
+time_t nqnfsstarttime = (time_t)0;
+u_long nqnfs_prog, nqnfs_vers;
+int nqsrv_clockskew = NQ_CLOCKSKEW;
+int nqsrv_writeslack = NQ_WRITESLACK;
+int nqsrv_maxlease = NQ_MAXLEASE;
+int nqsrv_maxnumlease = NQ_MAXNUMLEASE;
+void nqsrv_instimeq(), nqsrv_send_eviction(), nfs_sndunlock();
+void nqsrv_unlocklease(), nqsrv_waitfor_expiry(), nfsrv_slpderef();
+void nqsrv_addhost(), nqsrv_locklease(), nqnfs_serverd();
+void nqnfs_clientlease();
+struct mbuf *nfsm_rpchead();
+
+/*
+ * Signifies which rpcs can have piggybacked lease requests
+ */
+int nqnfs_piggy[NFS_NPROCS] = {
+	0,
+	NQL_READ,
+	NQL_WRITE,
+	0,
+	NQL_READ,
+	NQL_READ,
+	NQL_READ,
+	0,
+	NQL_WRITE,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	0,
+	NQL_READ,
+	0,
+	NQL_READ,
+	0,
+	0,
+	0,
+	0,
+};
+
+int nnnnnn = sizeof (struct nqlease);
+int oooooo = sizeof (struct nfsnode);
+extern nfstype nfs_type[9];
+extern struct nfssvc_sock *nfs_udpsock, *nfs_cltpsock;
+extern struct nfsd nfsd_head;
+extern int nfsd_waiting;
+extern struct nfsreq nfsreqh;
+
+#define TRUE	1
+#define	FALSE	0
+
+/*
+ * Get or check for a lease for "vp", based on NQL_CHECK flag.
+ * The rules are as follows:
+ * - if a current non-caching lease, reply non-caching
+ * - if a current lease for same host only, extend lease
+ * - if a read cachable lease and a read lease request
+ *	add host to list any reply cachable
+ * - else { set non-cachable for read-write sharing }
+ *	send eviction notice messages to all other hosts that have lease
+ *	wait for lease termination { either by receiving vacated messages
+ *					from all the other hosts or expiry
+ *					via. timeout }
+ *	modify lease to non-cachable
+ * - else if no current lease, issue new one
+ * - reply
+ * - return boolean TRUE iff nam should be m_freem()'d
+ * NB: Since nqnfs_serverd() is called from a timer, any potential tsleep()
+ *     in here must be framed by nqsrv_locklease() and nqsrv_unlocklease().
+ *     nqsrv_locklease() is coded such that at least one of LC_LOCKED and
+ *     LC_WANTED is set whenever a process is tsleeping in it. The exception
+ *     is when a new lease is being allocated, since it is not in the timer
+ *     queue yet. (Ditto for the splsoftclock() and splx(s) calls)
+ */
+nqsrv_getlease(vp, duration, flags, nd, nam, cachablep, frev, cred)
+	struct vnode *vp;
+	u_long *duration;
+	int flags;
+	struct nfsd *nd;
+	struct mbuf *nam;
+	int *cachablep;
+	u_quad_t *frev;
+	struct ucred *cred;
+{
+	register struct nqlease *lp, *lq, **lpp;
+	register struct nqhost *lph;
+	struct nqlease *tlp;
+	struct nqm **lphp;
+	struct vattr vattr;
+	fhandle_t fh;
+	int i, ok, error, s;
+
+	if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
+		return (0);
+	if (*duration > nqsrv_maxlease)
+		*duration = nqsrv_maxlease;
+	if (error = VOP_GETATTR(vp, &vattr, cred, nd->nd_procp))
+		return (error);
+	*frev = vattr.va_filerev;
+	s = splsoftclock();
+	tlp = vp->v_lease;
+	if ((flags & NQL_CHECK) == 0)
+		nfsstats.srvnqnfs_getleases++;
+	if (tlp == (struct nqlease *)0) {
+
+		/*
+		 * Find the lease by searching the hash list.
+		 */
+		fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
+		if (error = VFS_VPTOFH(vp, &fh.fh_fid)) {
+			splx(s);
+			return (error);
+		}
+		lpp = &nqfhead[NQFHHASH(fh.fh_fid.fid_data)];
+		for (lp = *lpp; lp; lp = lp->lc_fhnext)
+			if (fh.fh_fsid.val[0] == lp->lc_fsid.val[0] &&
+			    fh.fh_fsid.val[1] == lp->lc_fsid.val[1] &&
+			    !bcmp(fh.fh_fid.fid_data, lp->lc_fiddata,
+				  fh.fh_fid.fid_len - sizeof (long))) {
+				/* Found it */
+				lp->lc_vp = vp;
+				vp->v_lease = lp;
+				tlp = lp;
+				break;
+			}
+	}
+	lp = tlp;
+	if (lp) {
+		if ((lp->lc_flag & LC_NONCACHABLE) ||
+		    (lp->lc_morehosts == (struct nqm *)0 &&
+		     nqsrv_cmpnam(nd->nd_slp, nam, &lp->lc_host)))
+			goto doreply;
+		if ((flags & NQL_READ) && (lp->lc_flag & LC_WRITE)==0) {
+			if (flags & NQL_CHECK)
+				goto doreply;
+			if (nqsrv_cmpnam(nd->nd_slp, nam, &lp->lc_host))
+				goto doreply;
+			i = 0;
+			if (lp->lc_morehosts) {
+				lph = lp->lc_morehosts->lpm_hosts;
+				lphp = &lp->lc_morehosts->lpm_next;
+				ok = 1;
+			} else {
+				lphp = &lp->lc_morehosts;
+				ok = 0;
+			}
+			while (ok && (lph->lph_flag & LC_VALID)) {
+				if (nqsrv_cmpnam(nd->nd_slp, nam, lph))
+					goto doreply;
+				if (++i == LC_MOREHOSTSIZ) {
+					i = 0;
+					if (*lphp) {
+						lph = (*lphp)->lpm_hosts;
+						lphp = &((*lphp)->lpm_next);
+					} else
+						ok = 0;
+				} else
+					lph++;
+			}
+			nqsrv_locklease(lp);
+			if (!ok) {
+				*lphp = (struct nqm *)
+					malloc(sizeof (struct nqm),
+						M_NQMHOST, M_WAITOK);
+				bzero((caddr_t)*lphp, sizeof (struct nqm));
+				lph = (*lphp)->lpm_hosts;
+			}
+			nqsrv_addhost(lph, nd->nd_slp, nam);
+			nqsrv_unlocklease(lp);
+		} else {
+			lp->lc_flag |= LC_NONCACHABLE;
+			nqsrv_locklease(lp);
+			nqsrv_send_eviction(vp, lp, nd->nd_slp, nam, cred);
+			nqsrv_waitfor_expiry(lp);
+			nqsrv_unlocklease(lp);
+		}
+doreply:
+		/*
+		 * Update the lease and return
+		 */
+		if ((flags & NQL_CHECK) == 0)
+			nqsrv_instimeq(lp, *duration);
+		if (lp->lc_flag & LC_NONCACHABLE)
+			*cachablep = 0;
+		else {
+			*cachablep = 1;
+			if (flags & NQL_WRITE)
+				lp->lc_flag |= LC_WRITTEN;
+		}
+		splx(s);
+		return (0);
+	}
+	splx(s);
+	if (flags & NQL_CHECK)
+		return (0);
+
+	/*
+	 * Allocate new lease
+	 * The value of nqsrv_maxnumlease should be set generously, so that
+	 * the following "printf" happens infrequently.
+	 */
+	if (nfsstats.srvnqnfs_leases > nqsrv_maxnumlease) {
+		printf("Nqnfs server, too many leases\n");
+		do {
+			(void) tsleep((caddr_t)&lbolt, PSOCK,
+					"nqsrvnuml", 0);
+		} while (nfsstats.srvnqnfs_leases > nqsrv_maxnumlease);
+	}
+	MALLOC(lp, struct nqlease *, sizeof (struct nqlease), M_NQLEASE, M_WAITOK);
+	bzero((caddr_t)lp, sizeof (struct nqlease));
+	if (flags & NQL_WRITE)
+		lp->lc_flag |= (LC_WRITE | LC_WRITTEN);
+	nqsrv_addhost(&lp->lc_host, nd->nd_slp, nam);
+	lp->lc_vp = vp;
+	lp->lc_fsid = fh.fh_fsid;
+	bcopy(fh.fh_fid.fid_data, lp->lc_fiddata, fh.fh_fid.fid_len - sizeof (long));
+	if (lq = *lpp)
+		lq->lc_fhprev = &lp->lc_fhnext;
+	lp->lc_fhnext = lq;
+	lp->lc_fhprev = lpp;
+	*lpp = lp;
+	vp->v_lease = lp;
+	s = splsoftclock();
+	nqsrv_instimeq(lp, *duration);
+	splx(s);
+	*cachablep = 1;
+	if (++nfsstats.srvnqnfs_leases > nfsstats.srvnqnfs_maxleases)
+		nfsstats.srvnqnfs_maxleases = nfsstats.srvnqnfs_leases;
+	return (0);
+}
+
+/*
+ * Local lease check for server syscalls.
+ * Just set up args and let nqsrv_getlease() do the rest.
+ */
+void
+lease_check(vp, p, cred, flag)
+	struct vnode *vp;
+	struct proc *p;
+	struct ucred *cred;
+	int flag;
+{
+	int duration = 0, cache;
+	struct nfsd nfsd;
+	u_quad_t frev;
+
+	nfsd.nd_slp = NQLOCALSLP;
+	nfsd.nd_procp = p;
+	(void) nqsrv_getlease(vp, &duration, NQL_CHECK | flag, &nfsd,
+		(struct mbuf *)0, &cache, &frev, cred);
+}
+
+/*
+ * Add a host to an nqhost structure for a lease.
+ */
+void
+nqsrv_addhost(lph, slp, nam)
+	register struct nqhost *lph;
+	struct nfssvc_sock *slp;
+	struct mbuf *nam;
+{
+	register struct sockaddr_in *saddr;
+
+	if (slp == NQLOCALSLP)
+		lph->lph_flag |= (LC_VALID | LC_LOCAL);
+	else if (slp == nfs_udpsock) {
+		saddr = mtod(nam, struct sockaddr_in *);
+		lph->lph_flag |= (LC_VALID | LC_UDP);
+		lph->lph_inetaddr = saddr->sin_addr.s_addr;
+		lph->lph_port = saddr->sin_port;
+	} else if (slp == nfs_cltpsock) {
+		lph->lph_nam = m_copym(nam, 0, M_COPYALL, M_WAIT);
+		lph->lph_flag |= (LC_VALID | LC_CLTP);
+	} else {
+		lph->lph_flag |= (LC_VALID | LC_SREF);
+		lph->lph_slp = slp;
+		slp->ns_sref++;
+	}
+}
+
+/*
+ * Update the lease expiry time and position it in the timer queue correctly.
+ */
+void
+nqsrv_instimeq(lp, duration)
+	register struct nqlease *lp;
+	u_long duration;
+{
+	register struct nqlease *tlp;
+	time_t newexpiry;
+
+	newexpiry = time.tv_sec + duration + nqsrv_clockskew;
+	if (lp->lc_expiry == newexpiry)
+		return;
+	if (lp->lc_chain1[0])
+		remque(lp);
+	lp->lc_expiry = newexpiry;
+
+	/*
+	 * Find where in the queue it should be.
+	 */
+	tlp = nqthead.th_chain[1];
+	while (tlp->lc_expiry > newexpiry && tlp != (struct nqlease *)&nqthead)
+		tlp = tlp->lc_chain1[1];
+	if (tlp == nqthead.th_chain[1])
+		NQSTORENOVRAM(newexpiry);
+	insque(lp, tlp);
+}
+
+/*
+ * Compare the requesting host address with the lph entry in the lease.
+ * Return true iff it is the same.
+ * This is somewhat messy due to the union in the nqhost structure.
+ * The local host is indicated by the special value of NQLOCALSLP for slp.
+ */
+nqsrv_cmpnam(slp, nam, lph)
+	register struct nfssvc_sock *slp;
+	struct mbuf *nam;
+	register struct nqhost *lph;
+{
+	register struct sockaddr_in *saddr;
+	struct mbuf *addr;
+	union nethostaddr lhaddr;
+	int ret;
+
+	if (slp == NQLOCALSLP) {
+		if (lph->lph_flag & LC_LOCAL)
+			return (1);
+		else
+			return (0);
+	}
+	if (slp == nfs_udpsock || slp == nfs_cltpsock)
+		addr = nam;
+	else
+		addr = slp->ns_nam;
+	if (lph->lph_flag & LC_UDP)
+		ret = netaddr_match(AF_INET, &lph->lph_haddr, addr);
+	else if (lph->lph_flag & LC_CLTP)
+		ret = netaddr_match(AF_ISO, &lph->lph_claddr, addr);
+	else {
+		if ((lph->lph_slp->ns_flag & SLP_VALID) == 0)
+			return (0);
+		saddr = mtod(lph->lph_slp->ns_nam, struct sockaddr_in *);
+		if (saddr->sin_family == AF_INET)
+			lhaddr.had_inetaddr = saddr->sin_addr.s_addr;
+		else
+			lhaddr.had_nam = lph->lph_slp->ns_nam;
+		ret = netaddr_match(saddr->sin_family, &lhaddr, addr);
+	}
+	return (ret);
+}
+
+/*
+ * Send out eviction notice messages to all other hosts for the lease.
+ */
+void
+nqsrv_send_eviction(vp, lp, slp, nam, cred)
+	struct vnode *vp;
+	register struct nqlease *lp;
+	struct nfssvc_sock *slp;
+	struct mbuf *nam;
+	struct ucred *cred;
+{
+	register struct nqhost *lph = &lp->lc_host;
+	register struct mbuf *m;
+	register int siz;
+	struct nqm *lphnext = lp->lc_morehosts;
+	struct mbuf *mreq, *mb, *mb2, *nam2, *mheadend;
+	struct socket *so;
+	struct sockaddr_in *saddr;
+	fhandle_t *fhp;
+	caddr_t bpos, cp;
+	u_long xid;
+	int len = 1, ok = 1, i = 0;
+	int sotype, *solockp;
+
+	while (ok && (lph->lph_flag & LC_VALID)) {
+		if (nqsrv_cmpnam(slp, nam, lph))
+			lph->lph_flag |= LC_VACATED;
+		else if ((lph->lph_flag & (LC_LOCAL | LC_VACATED)) == 0) {
+			if (lph->lph_flag & LC_UDP) {
+				MGET(nam2, M_WAIT, MT_SONAME);
+				saddr = mtod(nam2, struct sockaddr_in *);
+				nam2->m_len = saddr->sin_len =
+					sizeof (struct sockaddr_in);
+				saddr->sin_family = AF_INET;
+				saddr->sin_addr.s_addr = lph->lph_inetaddr;
+				saddr->sin_port = lph->lph_port;
+				so = nfs_udpsock->ns_so;
+			} else if (lph->lph_flag & LC_CLTP) {
+				nam2 = lph->lph_nam;
+				so = nfs_cltpsock->ns_so;
+			} else if (lph->lph_slp->ns_flag & SLP_VALID) {
+				nam2 = (struct mbuf *)0;
+				so = lph->lph_slp->ns_so;
+			} else
+				goto nextone;
+			sotype = so->so_type;
+			if (so->so_proto->pr_flags & PR_CONNREQUIRED)
+				solockp = &lph->lph_slp->ns_solock;
+			else
+				solockp = (int *)0;
+			nfsm_reqhead((struct vnode *)0, NQNFSPROC_EVICTED,
+				NFSX_FH);
+			nfsm_build(cp, caddr_t, NFSX_FH);
+			bzero(cp, NFSX_FH);
+			fhp = (fhandle_t *)cp;
+			fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
+			VFS_VPTOFH(vp, &fhp->fh_fid);
+			m = mreq;
+			siz = 0;
+			while (m) {
+				siz += m->m_len;
+				m = m->m_next;
+			}
+			if (siz <= 0 || siz > NFS_MAXPACKET) {
+				printf("mbuf siz=%d\n",siz);
+				panic("Bad nfs svc reply");
+			}
+			m = nfsm_rpchead(cred, TRUE, NQNFSPROC_EVICTED,
+				RPCAUTH_UNIX, 5*NFSX_UNSIGNED, (char *)0,
+				mreq, siz, &mheadend, &xid);
+			/*
+			 * For stream protocols, prepend a Sun RPC
+			 * Record Mark.
+			 */
+			if (sotype == SOCK_STREAM) {
+				M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
+				*mtod(m, u_long *) = htonl(0x80000000 |
+					(m->m_pkthdr.len - NFSX_UNSIGNED));
+			}
+			if (((lph->lph_flag & (LC_UDP | LC_CLTP)) == 0 &&
+			    (lph->lph_slp->ns_flag & SLP_VALID) == 0) ||
+			    (solockp && (*solockp & NFSMNT_SNDLOCK)))
+				m_freem(m);
+			else {
+				if (solockp)
+					*solockp |= NFSMNT_SNDLOCK;
+				(void) nfs_send(so, nam2, m,
+						(struct nfsreq *)0);
+				if (solockp)
+					nfs_sndunlock(solockp);
+			}
+			if (lph->lph_flag & LC_UDP)
+				MFREE(nam2, m);
+		}
+nextone:
+		if (++i == len) {
+			if (lphnext) {
+				i = 0;
+				len = LC_MOREHOSTSIZ;
+				lph = lphnext->lpm_hosts;
+				lphnext = lphnext->lpm_next;
+			} else
+				ok = 0;
+		} else
+			lph++;
+	}
+}
+
+/*
+ * Wait for the lease to expire.
+ * This will occur when all clients have sent "vacated" messages to
+ * this server OR when it expires do to timeout.
+ */
+void
+nqsrv_waitfor_expiry(lp)
+	register struct nqlease *lp;
+{
+	register struct nqhost *lph;
+	register int i;
+	struct nqm *lphnext;
+	int len, ok;
+
+tryagain:
+	if (time.tv_sec > lp->lc_expiry)
+		return;
+	lph = &lp->lc_host;
+	lphnext = lp->lc_morehosts;
+	len = 1;
+	i = 0;
+	ok = 1;
+	while (ok && (lph->lph_flag & LC_VALID)) {
+		if ((lph->lph_flag & (LC_LOCAL | LC_VACATED)) == 0) {
+			lp->lc_flag |= LC_EXPIREDWANTED;
+			(void) tsleep((caddr_t)&lp->lc_flag, PSOCK,
+					"nqexp", 0);
+			goto tryagain;
+		}
+		if (++i == len) {
+			if (lphnext) {
+				i = 0;
+				len = LC_MOREHOSTSIZ;
+				lph = lphnext->lpm_hosts;
+				lphnext = lphnext->lpm_next;
+			} else
+				ok = 0;
+		} else
+			lph++;
+	}
+}
+
+/*
+ * Nqnfs server timer that maintains the server lease queue.
+ * Scan the lease queue for expired entries:
+ * - when one is found, wakeup anyone waiting for it
+ *   else dequeue and free
+ */
+void
+nqnfs_serverd()
+{
+	register struct nqlease *lp, *lq;
+	register struct nqhost *lph;
+	struct nqlease *nextlp;
+	struct nqm *lphnext, *olphnext;
+	struct mbuf *n;
+	int i, len, ok;
+
+	lp = nqthead.th_chain[0];
+	while (lp != (struct nqlease *)&nqthead) {
+		if (lp->lc_expiry >= time.tv_sec)
+			break;
+		nextlp = lp->lc_chain1[0];
+		if (lp->lc_flag & LC_EXPIREDWANTED) {
+			lp->lc_flag &= ~LC_EXPIREDWANTED;
+			wakeup((caddr_t)&lp->lc_flag);
+		} else if ((lp->lc_flag & (LC_LOCKED | LC_WANTED)) == 0) {
+		    /*
+		     * Make a best effort at keeping a write caching lease long
+		     * enough by not deleting it until it has been explicitly
+		     * vacated or there have been no writes in the previous
+		     * write_slack seconds since expiry and the nfsds are not
+		     * all busy. The assumption is that if the nfsds are not
+		     * all busy now (no queue of nfs requests), then the client
+		     * would have been able to do at least one write to the
+		     * file during the last write_slack seconds if it was still
+		     * trying to push writes to the server.
+		     */
+		    if ((lp->lc_flag & (LC_WRITE | LC_VACATED)) == LC_WRITE &&
+			((lp->lc_flag & LC_WRITTEN) || nfsd_waiting == 0)) {
+			lp->lc_flag &= ~LC_WRITTEN;
+			nqsrv_instimeq(lp, nqsrv_writeslack);
+		    } else {
+			remque(lp);
+			if (lq = lp->lc_fhnext)
+				lq->lc_fhprev = lp->lc_fhprev;
+			*lp->lc_fhprev = lq;
+			/*
+			 * This soft reference may no longer be valid, but
+			 * no harm done. The worst case is if the vnode was
+			 * recycled and has another valid lease reference,
+			 * which is dereferenced prematurely.
+			 */
+			lp->lc_vp->v_lease = (struct nqlease *)0;
+			lph = &lp->lc_host;
+			lphnext = lp->lc_morehosts;
+			olphnext = (struct nqm *)0;
+			len = 1;
+			i = 0;
+			ok = 1;
+			while (ok && (lph->lph_flag & LC_VALID)) {
+				if (lph->lph_flag & LC_CLTP)
+					MFREE(lph->lph_nam, n);
+				if (lph->lph_flag & LC_SREF)
+					nfsrv_slpderef(lph->lph_slp);
+				if (++i == len) {
+					if (olphnext) {
+						free((caddr_t)olphnext, M_NQMHOST);
+						olphnext = (struct nqm *)0;
+					}
+					if (lphnext) {
+						olphnext = lphnext;
+						i = 0;
+						len = LC_MOREHOSTSIZ;
+						lph = lphnext->lpm_hosts;
+						lphnext = lphnext->lpm_next;
+					} else
+						ok = 0;
+				} else
+					lph++;
+			}
+			FREE((caddr_t)lp, M_NQLEASE);
+			if (olphnext)
+				free((caddr_t)olphnext, M_NQMHOST);
+			nfsstats.srvnqnfs_leases--;
+		    }
+		}
+		lp = nextlp;
+	}
+}
+
+/*
+ * Called from nfssvc_nfsd() for a getlease rpc request.
+ * Do the from/to xdr translation and call nqsrv_getlease() to
+ * do the real work.
+ */
+nqnfsrv_getlease(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	register struct nfsv2_fattr *fp;
+	struct vattr va;
+	register struct vattr *vap = &va;
+	struct vnode *vp;
+	nfsv2fh_t nfh;
+	fhandle_t *fhp;
+	register u_long *tl;
+	register long t1;
+	u_quad_t frev;
+	caddr_t bpos;
+	int error = 0;
+	char *cp2;
+	struct mbuf *mb, *mb2, *mreq;
+	int flags, rdonly, cache;
+
+	fhp = &nfh.fh_generic;
+	nfsm_srvmtofh(fhp);
+	nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
+	flags = fxdr_unsigned(int, *tl++);
+	nfsd->nd_duration = fxdr_unsigned(int, *tl);
+	if (error = nfsrv_fhtovp(fhp,
+	    TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+		nfsm_reply(0);
+	if (rdonly && flags == NQL_WRITE) {
+		error = EROFS;
+		nfsm_reply(0);
+	}
+	(void) nqsrv_getlease(vp, &nfsd->nd_duration, flags, nfsd,
+		nam, &cache, &frev, cred);
+	error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp);
+	vput(vp);
+	nfsm_reply(NFSX_NQFATTR + 4*NFSX_UNSIGNED);
+	nfsm_build(tl, u_long *, 4*NFSX_UNSIGNED);
+	*tl++ = txdr_unsigned(cache);
+	*tl++ = txdr_unsigned(nfsd->nd_duration);
+	txdr_hyper(&frev, tl);
+	nfsm_build(fp, struct nfsv2_fattr *, NFSX_NQFATTR);
+	nfsm_srvfillattr;
+	nfsm_srvdone;
+}
+
+/*
+ * Called from nfssvc_nfsd() when a "vacated" message is received from a
+ * client. Find the entry and expire it.
+ */
+nqnfsrv_vacated(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	register struct nqlease *lp;
+	register struct nqhost *lph;
+	struct nqlease *tlp = (struct nqlease *)0;
+	nfsv2fh_t nfh;
+	fhandle_t *fhp;
+	register u_long *tl;
+	register long t1;
+	struct nqm *lphnext;
+	int error = 0, i, len, ok, gotit = 0;
+	char *cp2;
+
+	fhp = &nfh.fh_generic;
+	nfsm_srvmtofh(fhp);
+	m_freem(mrep);
+	/*
+	 * Find the lease by searching the hash list.
+	 */
+	for (lp = nqfhead[NQFHHASH(fhp->fh_fid.fid_data)]; lp;
+	     lp = lp->lc_fhnext)
+		if (fhp->fh_fsid.val[0] == lp->lc_fsid.val[0] &&
+		    fhp->fh_fsid.val[1] == lp->lc_fsid.val[1] &&
+		    !bcmp(fhp->fh_fid.fid_data, lp->lc_fiddata,
+			  MAXFIDSZ)) {
+			/* Found it */
+			tlp = lp;
+			break;
+		}
+	if (tlp) {
+		lp = tlp;
+		len = 1;
+		i = 0;
+		lph = &lp->lc_host;
+		lphnext = lp->lc_morehosts;
+		ok = 1;
+		while (ok && (lph->lph_flag & LC_VALID)) {
+			if (nqsrv_cmpnam(nfsd->nd_slp, nam, lph)) {
+				lph->lph_flag |= LC_VACATED;
+				gotit++;
+				break;
+			}
+			if (++i == len) {
+				if (lphnext) {
+					len = LC_MOREHOSTSIZ;
+					i = 0;
+					lph = lphnext->lpm_hosts;
+					lphnext = lphnext->lpm_next;
+				} else
+					ok = 0;
+			} else
+				lph++;
+		}
+		if ((lp->lc_flag & LC_EXPIREDWANTED) && gotit) {
+			lp->lc_flag &= ~LC_EXPIREDWANTED;
+			wakeup((caddr_t)&lp->lc_flag);
+		}
+nfsmout:
+		return (EPERM);
+	}
+	return (EPERM);
+}
+
+/*
+ * Client get lease rpc function.
+ */
+nqnfs_getlease(vp, rwflag, cred, p)
+	register struct vnode *vp;
+	int rwflag;
+	struct ucred *cred;
+	struct proc *p;
+{
+	register u_long *tl;
+	register caddr_t cp;
+	register long t1;
+	register struct nfsnode *np;
+	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+	caddr_t bpos, dpos, cp2;
+	time_t reqtime;
+	int error = 0;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+	int cachable;
+	u_quad_t frev;
+	
+	nfsstats.rpccnt[NQNFSPROC_GETLEASE]++;
+	mb = mreq = nfsm_reqh(vp, NQNFSPROC_GETLEASE, NFSX_FH+2*NFSX_UNSIGNED,
+		 &bpos);
+	nfsm_fhtom(vp);
+	nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+	*tl++ = txdr_unsigned(rwflag);
+	*tl = txdr_unsigned(nmp->nm_leaseterm);
+	reqtime = time.tv_sec;
+	nfsm_request(vp, NQNFSPROC_GETLEASE, p, cred);
+	np = VTONFS(vp);
+	nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED);
+	cachable = fxdr_unsigned(int, *tl++);
+	reqtime += fxdr_unsigned(int, *tl++);
+	if (reqtime > time.tv_sec) {
+		fxdr_hyper(tl, &frev);
+		nqnfs_clientlease(nmp, np, rwflag, cachable, reqtime, frev);
+		nfsm_loadattr(vp, (struct vattr *)0);
+	} else
+		error = NQNFS_EXPIRED;
+	nfsm_reqdone;
+	return (error);
+}
+
+/*
+ * Client vacated message function.
+ */
+nqnfs_vacated(vp, cred)
+	register struct vnode *vp;
+	struct ucred *cred;
+{
+	register caddr_t cp;
+	register struct mbuf *m;
+	register int i;
+	caddr_t bpos;
+	u_long xid;
+	int error = 0;
+	struct mbuf *mreq, *mb, *mb2, *mheadend;
+	struct nfsmount *nmp;
+	struct nfsreq myrep;
+	
+	nmp = VFSTONFS(vp->v_mount);
+	nfsstats.rpccnt[NQNFSPROC_VACATED]++;
+	nfsm_reqhead(vp, NQNFSPROC_VACATED, NFSX_FH);
+	nfsm_fhtom(vp);
+	m = mreq;
+	i = 0;
+	while (m) {
+		i += m->m_len;
+		m = m->m_next;
+	}
+	m = nfsm_rpchead(cred, TRUE, NQNFSPROC_VACATED,
+		RPCAUTH_UNIX, 5*NFSX_UNSIGNED, (char *)0,
+		mreq, i, &mheadend, &xid);
+	if (nmp->nm_sotype == SOCK_STREAM) {
+		M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
+		*mtod(m, u_long *) = htonl(0x80000000 | (m->m_pkthdr.len -
+			NFSX_UNSIGNED));
+	}
+	myrep.r_flags = 0;
+	myrep.r_nmp = nmp;
+	if (nmp->nm_soflags & PR_CONNREQUIRED)
+		(void) nfs_sndlock(&nmp->nm_flag, (struct nfsreq *)0);
+	(void) nfs_send(nmp->nm_so, nmp->nm_nam, m, &myrep);
+	if (nmp->nm_soflags & PR_CONNREQUIRED)
+		nfs_sndunlock(&nmp->nm_flag);
+	return (error);
+}
+
+/*
+ * Called for client side callbacks
+ */
+nqnfs_callback(nmp, mrep, md, dpos)
+	struct nfsmount *nmp;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+{
+	register struct vnode *vp;
+	register u_long *tl;
+	register long t1;
+	nfsv2fh_t nfh;
+	fhandle_t *fhp;
+	struct nfsnode *np;
+	struct nfsd nd;
+	int error;
+	char *cp2;
+
+	nd.nd_mrep = mrep;
+	nd.nd_md = md;
+	nd.nd_dpos = dpos;
+	if (error = nfs_getreq(&nd, FALSE))
+		return (error);
+	md = nd.nd_md;
+	dpos = nd.nd_dpos;
+	if (nd.nd_procnum != NQNFSPROC_EVICTED) {
+		m_freem(mrep);
+		return (EPERM);
+	}
+	fhp = &nfh.fh_generic;
+	nfsm_srvmtofh(fhp);
+	m_freem(mrep);
+	if (error = nfs_nget(nmp->nm_mountp, fhp, &np))
+		return (error);
+	vp = NFSTOV(np);
+	if (np->n_tnext) {
+		np->n_expiry = 0;
+		np->n_flag |= NQNFSEVICTED;
+		if (np->n_tprev != (struct nfsnode *)nmp) {
+			if (np->n_tnext == (struct nfsnode *)nmp)
+				nmp->nm_tprev = np->n_tprev;
+			else
+				np->n_tnext->n_tprev = np->n_tprev;
+			np->n_tprev->n_tnext = np->n_tnext;
+			np->n_tnext = nmp->nm_tnext;
+			nmp->nm_tnext = np;
+			np->n_tprev = (struct nfsnode *)nmp;
+			if (np->n_tnext == (struct nfsnode *)nmp)
+				nmp->nm_tprev = np;
+			else
+				np->n_tnext->n_tprev = np;
+		}
+	}
+	vrele(vp);
+	nfsm_srvdone;
+}
+
+/*
+ * Nqnfs client helper daemon. Runs once a second to expire leases.
+ * It also get authorization strings for "kerb" mounts.
+ * It must start at the beginning of the list again after any potential
+ * "sleep" since nfs_reclaim() called from vclean() can pull a node off
+ * the list asynchronously.
+ */
+nqnfs_clientd(nmp, cred, ncd, flag, argp, p)
+	register struct nfsmount *nmp;
+	struct ucred *cred;
+	struct nfsd_cargs *ncd;
+	int flag;
+	caddr_t argp;
+	struct proc *p;
+{
+	register struct nfsnode *np;
+	struct vnode *vp;
+	struct nfsreq myrep;
+	int error, vpid;
+
+	/*
+	 * First initialize some variables
+	 */
+	nqnfs_prog = txdr_unsigned(NQNFS_PROG);
+	nqnfs_vers = txdr_unsigned(NQNFS_VER1);
+
+	/*
+	 * If an authorization string is being passed in, get it.
+	 */
+	if ((flag & NFSSVC_GOTAUTH) &&
+		(nmp->nm_flag & (NFSMNT_WAITAUTH | NFSMNT_DISMNT)) == 0) {
+		if (nmp->nm_flag & NFSMNT_HASAUTH)
+			panic("cld kerb");
+		if ((flag & NFSSVC_AUTHINFAIL) == 0) {
+			if (ncd->ncd_authlen <= RPCAUTH_MAXSIZ &&
+				copyin(ncd->ncd_authstr, nmp->nm_authstr,
+				ncd->ncd_authlen) == 0) {
+				nmp->nm_authtype = ncd->ncd_authtype;
+				nmp->nm_authlen = ncd->ncd_authlen;
+			} else
+				nmp->nm_flag |= NFSMNT_AUTHERR;
+		} else
+			nmp->nm_flag |= NFSMNT_AUTHERR;
+		nmp->nm_flag |= NFSMNT_HASAUTH;
+		wakeup((caddr_t)&nmp->nm_authlen);
+	} else
+		nmp->nm_flag |= NFSMNT_WAITAUTH;
+
+	/*
+	 * Loop every second updating queue until there is a termination sig.
+	 */
+	while ((nmp->nm_flag & NFSMNT_DISMNT) == 0) {
+	    if (nmp->nm_flag & NFSMNT_NQNFS) {
+		/*
+		 * If there are no outstanding requests (and therefore no
+		 * processes in nfs_reply) and there is data in the receive
+		 * queue, poke for callbacks.
+		 */
+		if (nfsreqh.r_next == &nfsreqh && nmp->nm_so &&
+		    nmp->nm_so->so_rcv.sb_cc > 0) {
+		    myrep.r_flags = R_GETONEREP;
+		    myrep.r_nmp = nmp;
+		    myrep.r_mrep = (struct mbuf *)0;
+		    myrep.r_procp = (struct proc *)0;
+		    (void) nfs_reply(&myrep);
+		}
+
+		/*
+		 * Loop through the leases, updating as required.
+		 */
+		np = nmp->nm_tnext;
+		while (np != (struct nfsnode *)nmp &&
+		       (nmp->nm_flag & NFSMNT_DISMINPROG) == 0) {
+			vp = NFSTOV(np);
+if (vp->v_mount->mnt_stat.f_fsid.val[1] != MOUNT_NFS) panic("trash2");
+			vpid = vp->v_id;
+			if (np->n_expiry < time.tv_sec) {
+			   if (vget(vp, 1) == 0) {
+			     nmp->nm_inprog = vp;
+			     if (vpid == vp->v_id) {
+if (vp->v_mount->mnt_stat.f_fsid.val[1] != MOUNT_NFS) panic("trash3");
+				if (np->n_tnext == (struct nfsnode *)nmp)
+					nmp->nm_tprev = np->n_tprev;
+				else
+					np->n_tnext->n_tprev = np->n_tprev;
+				if (np->n_tprev == (struct nfsnode *)nmp)
+					nmp->nm_tnext = np->n_tnext;
+				else
+					np->n_tprev->n_tnext = np->n_tnext;
+				np->n_tnext = (struct nfsnode *)0;
+				if ((np->n_flag & (NMODIFIED | NQNFSEVICTED))
+				    && vp->v_type == VREG) {
+					if (np->n_flag & NQNFSEVICTED) {
+						(void) nfs_vinvalbuf(vp,
+						       V_SAVE, cred, p, 0);
+						np->n_flag &= ~NQNFSEVICTED;
+						(void) nqnfs_vacated(vp, cred);
+					} else {
+						(void) VOP_FSYNC(vp, cred,
+						    MNT_WAIT, p);
+						np->n_flag &= ~NMODIFIED;
+					}
+				}
+			      }
+			      vrele(vp);
+			      nmp->nm_inprog = NULLVP;
+			    }
+			    if (np != nmp->nm_tnext)
+				np = nmp->nm_tnext;
+			    else
+				break;
+			} else if ((np->n_expiry - NQ_RENEWAL) < time.tv_sec) {
+			    if ((np->n_flag & (NQNFSWRITE | NQNFSNONCACHE))
+				 == NQNFSWRITE && vp->v_dirtyblkhd.lh_first &&
+				 vget(vp, 1) == 0) {
+				 nmp->nm_inprog = vp;
+if (vp->v_mount->mnt_stat.f_fsid.val[1] != MOUNT_NFS) panic("trash4");
+				 if (vpid == vp->v_id &&
+				     nqnfs_getlease(vp, NQL_WRITE, cred, p)==0)
+					np->n_brev = np->n_lrev;
+				 vrele(vp);
+				 nmp->nm_inprog = NULLVP;
+			    }
+			    if (np != nmp->nm_tnext)
+				np = nmp->nm_tnext;
+			    else
+				break;
+			} else
+				break;
+		}
+	    }
+
+	    /*
+	     * Get an authorization string, if required.
+	     */
+	    if ((nmp->nm_flag & (NFSMNT_WAITAUTH | NFSMNT_DISMNT | NFSMNT_HASAUTH)) == 0) {
+		ncd->ncd_authuid = nmp->nm_authuid;
+		if (copyout((caddr_t)ncd, argp, sizeof (struct nfsd_cargs)))
+			nmp->nm_flag |= NFSMNT_WAITAUTH;
+		else
+			return (ENEEDAUTH);
+	    }
+
+	    /*
+	     * Wait a bit (no pun) and do it again.
+	     */
+	    if ((nmp->nm_flag & NFSMNT_DISMNT) == 0 &&
+		(nmp->nm_flag & (NFSMNT_WAITAUTH | NFSMNT_HASAUTH))) {
+		    error = tsleep((caddr_t)&nmp->nm_authstr, PSOCK | PCATCH,
+			"nqnfstimr", hz / 3);
+		    if (error == EINTR || error == ERESTART)
+			(void) dounmount(nmp->nm_mountp, 0, p);
+	    }
+	}
+	free((caddr_t)nmp, M_NFSMNT);
+	if (error == EWOULDBLOCK)
+		error = 0;
+	return (error);
+}
+
+/*
+ * Adjust all timer queue expiry times when the time of day clock is changed.
+ * Called from the settimeofday() syscall.
+ */
+void
+lease_updatetime(deltat)
+	register int deltat;
+{
+	register struct nqlease *lp;
+	register struct nfsnode *np;
+	struct mount *mp;
+	struct nfsmount *nmp;
+	int s;
+
+	if (nqnfsstarttime != 0)
+		nqnfsstarttime += deltat;
+	s = splsoftclock();
+	lp = nqthead.th_chain[0];
+	while (lp != (struct nqlease *)&nqthead) {
+		lp->lc_expiry += deltat;
+		lp = lp->lc_chain1[0];
+	}
+	splx(s);
+
+	/*
+	 * Search the mount list for all nqnfs mounts and do their timer
+	 * queues.
+	 */
+	for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) {
+		if (mp->mnt_stat.f_fsid.val[1] == MOUNT_NFS) {
+			nmp = VFSTONFS(mp);
+			if (nmp->nm_flag & NFSMNT_NQNFS) {
+				np = nmp->nm_tnext;
+				while (np != (struct nfsnode *)nmp) {
+					np->n_expiry += deltat;
+					np = np->n_tnext;
+				}
+			}
+		}
+	}
+}
+
+/*
+ * Lock a server lease.
+ */
+void
+nqsrv_locklease(lp)
+	struct nqlease *lp;
+{
+
+	while (lp->lc_flag & LC_LOCKED) {
+		lp->lc_flag |= LC_WANTED;
+		(void) tsleep((caddr_t)lp, PSOCK, "nqlc", 0);
+	}
+	lp->lc_flag |= LC_LOCKED;
+	lp->lc_flag &= ~LC_WANTED;
+}
+
+/*
+ * Unlock a server lease.
+ */
+void
+nqsrv_unlocklease(lp)
+	struct nqlease *lp;
+{
+
+	lp->lc_flag &= ~LC_LOCKED;
+	if (lp->lc_flag & LC_WANTED)
+		wakeup((caddr_t)lp);
+}
+
+/*
+ * Update a client lease.
+ */
+void
+nqnfs_clientlease(nmp, np, rwflag, cachable, expiry, frev)
+	register struct nfsmount *nmp;
+	register struct nfsnode *np;
+	int rwflag, cachable;
+	time_t expiry;
+	u_quad_t frev;
+{
+	register struct nfsnode *tp;
+
+	if (np->n_tnext) {
+		if (np->n_tnext == (struct nfsnode *)nmp)
+			nmp->nm_tprev = np->n_tprev;
+		else
+			np->n_tnext->n_tprev = np->n_tprev;
+		if (np->n_tprev == (struct nfsnode *)nmp)
+			nmp->nm_tnext = np->n_tnext;
+		else
+			np->n_tprev->n_tnext = np->n_tnext;
+		if (rwflag == NQL_WRITE)
+			np->n_flag |= NQNFSWRITE;
+	} else if (rwflag == NQL_READ)
+		np->n_flag &= ~NQNFSWRITE;
+	else
+		np->n_flag |= NQNFSWRITE;
+	if (cachable)
+		np->n_flag &= ~NQNFSNONCACHE;
+	else
+		np->n_flag |= NQNFSNONCACHE;
+	np->n_expiry = expiry;
+	np->n_lrev = frev;
+	tp = nmp->nm_tprev;
+	while (tp != (struct nfsnode *)nmp && tp->n_expiry > np->n_expiry)
+		tp = tp->n_tprev;
+	if (tp == (struct nfsnode *)nmp) {
+		np->n_tnext = nmp->nm_tnext;
+		nmp->nm_tnext = np;
+	} else {
+		np->n_tnext = tp->n_tnext;
+		tp->n_tnext = np;
+	}
+	np->n_tprev = tp;
+	if (np->n_tnext == (struct nfsnode *)nmp)
+		nmp->nm_tprev = np;
+	else
+		np->n_tnext->n_tprev = np;
+}
diff --git a/sys/nfs/nfs_serv.c b/sys/nfs/nfs_serv.c
new file mode 100644
index 00000000000..f31b96e02ed
--- /dev/null
+++ b/sys/nfs/nfs_serv.c
@@ -0,0 +1,1908 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfs_serv.c	8.3 (Berkeley) 1/12/94
+ */
+
+/*
+ * nfs version 2 server calls to vnode ops
+ * - these routines generally have 3 phases
+ *   1 - break down and validate rpc request in mbuf list
+ *   2 - do the vnode ops for the request
+ *       (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
+ *   3 - build the rpc reply in an mbuf list
+ *   nb:
+ *	- do not mix the phases, since the nfsm_?? macros can return failures
+ *	  on a bad rpc or similar and do not do any vrele() or vput()'s
+ *
+ *      - the nfsm_reply() macro generates an nfs rpc reply with the nfs
+ *	error number iff error != 0 whereas
+ *	returning an error from the server function implies a fatal error
+ *	such as a badly constructed rpc request that should be dropped without
+ *	a reply.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/mbuf.h>
+#include <sys/dirent.h>
+#include <sys/stat.h>
+
+#include <vm/vm.h>
+
+#include <nfs/nfsv2.h>
+#include <nfs/rpcv2.h>
+#include <nfs/nfs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nqnfs.h>
+
+/* Defs */
+#define	TRUE	1
+#define	FALSE	0
+
+/* Global vars */
+extern u_long nfs_procids[NFS_NPROCS];
+extern u_long nfs_xdrneg1;
+extern u_long nfs_false, nfs_true;
+nfstype nfs_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON,
+		      NFCHR, NFNON };
+
+/*
+ * nqnfs access service
+ */
+nqnfsrv_access(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	struct vnode *vp;
+	nfsv2fh_t nfh;
+	fhandle_t *fhp;
+	register u_long *tl;
+	register long t1;
+	caddr_t bpos;
+	int error = 0, rdonly, cache, mode = 0;
+	char *cp2;
+	struct mbuf *mb, *mreq;
+	u_quad_t frev;
+
+	fhp = &nfh.fh_generic;
+	nfsm_srvmtofh(fhp);
+	nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
+	if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+		nfsm_reply(0);
+	if (*tl++ == nfs_true)
+		mode |= VREAD;
+	if (*tl++ == nfs_true)
+		mode |= VWRITE;
+	if (*tl == nfs_true)
+		mode |= VEXEC;
+	error = nfsrv_access(vp, mode, cred, rdonly, nfsd->nd_procp);
+	vput(vp);
+	nfsm_reply(0);
+	nfsm_srvdone;
+}
+
+/*
+ * nfs getattr service
+ */
+nfsrv_getattr(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	register struct nfsv2_fattr *fp;
+	struct vattr va;
+	register struct vattr *vap = &va;
+	struct vnode *vp;
+	nfsv2fh_t nfh;
+	fhandle_t *fhp;
+	register u_long *tl;
+	register long t1;
+	caddr_t bpos;
+	int error = 0, rdonly, cache;
+	char *cp2;
+	struct mbuf *mb, *mb2, *mreq;
+	u_quad_t frev;
+
+	fhp = &nfh.fh_generic;
+	nfsm_srvmtofh(fhp);
+	if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+		nfsm_reply(0);
+	nqsrv_getl(vp, NQL_READ);
+	error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp);
+	vput(vp);
+	nfsm_reply(NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+	nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+	nfsm_srvfillattr;
+	nfsm_srvdone;
+}
+
+/*
+ * nfs setattr service
+ */
+nfsrv_setattr(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	struct vattr va;
+	register struct vattr *vap = &va;
+	register struct nfsv2_sattr *sp;
+	register struct nfsv2_fattr *fp;
+	struct vnode *vp;
+	nfsv2fh_t nfh;
+	fhandle_t *fhp;
+	register u_long *tl;
+	register long t1;
+	caddr_t bpos;
+	int error = 0, rdonly, cache;
+	char *cp2;
+	struct mbuf *mb, *mb2, *mreq;
+	u_quad_t frev, frev2;
+
+	fhp = &nfh.fh_generic;
+	nfsm_srvmtofh(fhp);
+	nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_SATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+	if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+		nfsm_reply(0);
+	nqsrv_getl(vp, NQL_WRITE);
+	VATTR_NULL(vap);
+	/*
+	 * Nah nah nah nah na nah
+	 * There is a bug in the Sun client that puts 0xffff in the mode
+	 * field of sattr when it should put in 0xffffffff. The u_short
+	 * doesn't sign extend.
+	 * --> check the low order 2 bytes for 0xffff
+	 */
+	if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
+		vap->va_mode = nfstov_mode(sp->sa_mode);
+	if (sp->sa_uid != nfs_xdrneg1)
+		vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid);
+	if (sp->sa_gid != nfs_xdrneg1)
+		vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid);
+	if (nfsd->nd_nqlflag == NQL_NOVAL) {
+		if (sp->sa_nfssize != nfs_xdrneg1)
+			vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_nfssize);
+		if (sp->sa_nfsatime.nfs_sec != nfs_xdrneg1) {
+#ifdef notyet
+			fxdr_nfstime(&sp->sa_nfsatime, &vap->va_atime);
+#else
+			vap->va_atime.ts_sec =
+				fxdr_unsigned(long, sp->sa_nfsatime.nfs_sec);
+			vap->va_atime.ts_nsec = 0;
+#endif
+		}
+		if (sp->sa_nfsmtime.nfs_sec != nfs_xdrneg1)
+			fxdr_nfstime(&sp->sa_nfsmtime, &vap->va_mtime);
+	} else {
+		fxdr_hyper(&sp->sa_nqsize, &vap->va_size);
+		fxdr_nqtime(&sp->sa_nqatime, &vap->va_atime);
+		fxdr_nqtime(&sp->sa_nqmtime, &vap->va_mtime);
+		vap->va_flags = fxdr_unsigned(u_long, sp->sa_nqflags);
+	}
+
+	/*
+	 * If the size is being changed write acces is required, otherwise
+	 * just check for a read only file system.
+	 */
+	if (vap->va_size == ((u_quad_t)((quad_t) -1))) {
+		if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
+			error = EROFS;
+			goto out;
+		}
+	} else {
+		if (vp->v_type == VDIR) {
+			error = EISDIR;
+			goto out;
+		} else if (error = nfsrv_access(vp, VWRITE, cred, rdonly,
+			nfsd->nd_procp))
+			goto out;
+	}
+	if (error = VOP_SETATTR(vp, vap, cred, nfsd->nd_procp)) {
+		vput(vp);
+		nfsm_reply(0);
+	}
+	error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp);
+out:
+	vput(vp);
+	nfsm_reply(NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL) + 2*NFSX_UNSIGNED);
+	nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+	nfsm_srvfillattr;
+	if (nfsd->nd_nqlflag != NQL_NOVAL) {
+		nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+		txdr_hyper(&frev2, tl);
+	}
+	nfsm_srvdone;
+}
+
+/*
+ * nfs lookup rpc
+ */
+nfsrv_lookup(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	register struct nfsv2_fattr *fp;
+	struct nameidata nd;
+	struct vnode *vp;
+	nfsv2fh_t nfh;
+	fhandle_t *fhp;
+	register caddr_t cp;
+	register u_long *tl;
+	register long t1;
+	caddr_t bpos;
+	int error = 0, cache, duration2, cache2, len;
+	char *cp2;
+	struct mbuf *mb, *mb2, *mreq;
+	struct vattr va, *vap = &va;
+	u_quad_t frev, frev2;
+
+	fhp = &nfh.fh_generic;
+	duration2 = 0;
+	if (nfsd->nd_nqlflag != NQL_NOVAL) {
+		nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+		duration2 = fxdr_unsigned(int, *tl);
+	}
+	nfsm_srvmtofh(fhp);
+	nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+	nd.ni_cnd.cn_cred = cred;
+	nd.ni_cnd.cn_nameiop = LOOKUP;
+	nd.ni_cnd.cn_flags = LOCKLEAF | SAVESTART;
+	if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos,
+	    nfsd->nd_procp))
+		nfsm_reply(0);
+	nqsrv_getl(nd.ni_startdir, NQL_READ);
+	vrele(nd.ni_startdir);
+	FREE(nd.ni_cnd.cn_pnbuf, M_NAMEI);
+	vp = nd.ni_vp;
+	bzero((caddr_t)fhp, sizeof(nfh));
+	fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
+	if (error = VFS_VPTOFH(vp, &fhp->fh_fid)) {
+		vput(vp);
+		nfsm_reply(0);
+	}
+	if (duration2)
+		(void) nqsrv_getlease(vp, &duration2, NQL_READ, nfsd,
+			nam, &cache2, &frev2, cred);
+	error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp);
+	vput(vp);
+	nfsm_reply(NFSX_FH + NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL) + 5*NFSX_UNSIGNED);
+	if (nfsd->nd_nqlflag != NQL_NOVAL) {
+		if (duration2) {
+			nfsm_build(tl, u_long *, 5*NFSX_UNSIGNED);
+			*tl++ = txdr_unsigned(NQL_READ);
+			*tl++ = txdr_unsigned(cache2);
+			*tl++ = txdr_unsigned(duration2);
+			txdr_hyper(&frev2, tl);
+		} else {
+			nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+			*tl = 0;
+		}
+	}
+	nfsm_srvfhtom(fhp);
+	nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+	nfsm_srvfillattr;
+	nfsm_srvdone;
+}
+
+/*
+ * nfs readlink service
+ */
+nfsrv_readlink(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
+	register struct iovec *ivp = iv;
+	register struct mbuf *mp;
+	register u_long *tl;
+	register long t1;
+	caddr_t bpos;
+	int error = 0, rdonly, cache, i, tlen, len;
+	char *cp2;
+	struct mbuf *mb, *mb2, *mp2, *mp3, *mreq;
+	struct vnode *vp;
+	nfsv2fh_t nfh;
+	fhandle_t *fhp;
+	struct uio io, *uiop = &io;
+	u_quad_t frev;
+
+	fhp = &nfh.fh_generic;
+	nfsm_srvmtofh(fhp);
+	len = 0;
+	i = 0;
+	while (len < NFS_MAXPATHLEN) {
+		MGET(mp, M_WAIT, MT_DATA);
+		MCLGET(mp, M_WAIT);
+		mp->m_len = NFSMSIZ(mp);
+		if (len == 0)
+			mp3 = mp2 = mp;
+		else {
+			mp2->m_next = mp;
+			mp2 = mp;
+		}
+		if ((len+mp->m_len) > NFS_MAXPATHLEN) {
+			mp->m_len = NFS_MAXPATHLEN-len;
+			len = NFS_MAXPATHLEN;
+		} else
+			len += mp->m_len;
+		ivp->iov_base = mtod(mp, caddr_t);
+		ivp->iov_len = mp->m_len;
+		i++;
+		ivp++;
+	}
+	uiop->uio_iov = iv;
+	uiop->uio_iovcnt = i;
+	uiop->uio_offset = 0;
+	uiop->uio_resid = len;
+	uiop->uio_rw = UIO_READ;
+	uiop->uio_segflg = UIO_SYSSPACE;
+	uiop->uio_procp = (struct proc *)0;
+	if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly)) {
+		m_freem(mp3);
+		nfsm_reply(0);
+	}
+	if (vp->v_type != VLNK) {
+		error = EINVAL;
+		goto out;
+	}
+	nqsrv_getl(vp, NQL_READ);
+	error = VOP_READLINK(vp, uiop, cred);
+out:
+	vput(vp);
+	if (error)
+		m_freem(mp3);
+	nfsm_reply(NFSX_UNSIGNED);
+	if (uiop->uio_resid > 0) {
+		len -= uiop->uio_resid;
+		tlen = nfsm_rndup(len);
+		nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len);
+	}
+	nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+	*tl = txdr_unsigned(len);
+	mb->m_next = mp3;
+	nfsm_srvdone;
+}
+
+/*
+ * nfs read service
+ */
+nfsrv_read(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	register struct iovec *iv;
+	struct iovec *iv2;
+	register struct mbuf *m;
+	register struct nfsv2_fattr *fp;
+	register u_long *tl;
+	register long t1;
+	caddr_t bpos;
+	int error = 0, rdonly, cache, i, cnt, len, left, siz, tlen;
+	char *cp2;
+	struct mbuf *mb, *mb2, *mreq;
+	struct mbuf *m2;
+	struct vnode *vp;
+	nfsv2fh_t nfh;
+	fhandle_t *fhp;
+	struct uio io, *uiop = &io;
+	struct vattr va, *vap = &va;
+	off_t off;
+	u_quad_t frev;
+
+	fhp = &nfh.fh_generic;
+	nfsm_srvmtofh(fhp);
+	if (nfsd->nd_nqlflag == NQL_NOVAL) {
+		nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+		off = (off_t)fxdr_unsigned(u_long, *tl);
+	} else {
+		nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
+		fxdr_hyper(tl, &off);
+	}
+	nfsm_srvstrsiz(cnt, NFS_MAXDATA);
+	if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+		nfsm_reply(0);
+	if (vp->v_type != VREG) {
+		error = (vp->v_type == VDIR) ? EISDIR : EACCES;
+		vput(vp);
+		nfsm_reply(0);
+	}
+	nqsrv_getl(vp, NQL_READ);
+	if ((error = nfsrv_access(vp, VREAD, cred, rdonly, nfsd->nd_procp)) &&
+	    (error = nfsrv_access(vp, VEXEC, cred, rdonly, nfsd->nd_procp))) {
+		vput(vp);
+		nfsm_reply(0);
+	}
+	if (error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp)) {
+		vput(vp);
+		nfsm_reply(0);
+	}
+	if (off >= vap->va_size)
+		cnt = 0;
+	else if ((off + cnt) > vap->va_size)
+		cnt = nfsm_rndup(vap->va_size - off);
+	nfsm_reply(NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)+NFSX_UNSIGNED+nfsm_rndup(cnt));
+	nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+	nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+	len = left = cnt;
+	if (cnt > 0) {
+		/*
+		 * Generate the mbuf list with the uio_iov ref. to it.
+		 */
+		i = 0;
+		m = m2 = mb;
+		MALLOC(iv, struct iovec *,
+		       ((NFS_MAXDATA+MLEN-1)/MLEN) * sizeof (struct iovec),
+		       M_TEMP, M_WAITOK);
+		iv2 = iv;
+		while (left > 0) {
+			siz = min(M_TRAILINGSPACE(m), left);
+			if (siz > 0) {
+				m->m_len += siz;
+				iv->iov_base = bpos;
+				iv->iov_len = siz;
+				iv++;
+				i++;
+				left -= siz;
+			}
+			if (left > 0) {
+				MGET(m, M_WAIT, MT_DATA);
+				MCLGET(m, M_WAIT);
+				m->m_len = 0;
+				m2->m_next = m;
+				m2 = m;
+				bpos = mtod(m, caddr_t);
+			}
+		}
+		uiop->uio_iov = iv2;
+		uiop->uio_iovcnt = i;
+		uiop->uio_offset = off;
+		uiop->uio_resid = cnt;
+		uiop->uio_rw = UIO_READ;
+		uiop->uio_segflg = UIO_SYSSPACE;
+		error = VOP_READ(vp, uiop, IO_NODELOCKED, cred);
+		off = uiop->uio_offset;
+		FREE((caddr_t)iv2, M_TEMP);
+		if (error || (error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp))) {
+			m_freem(mreq);
+			vput(vp);
+			nfsm_reply(0);
+		}
+	} else
+		uiop->uio_resid = 0;
+	vput(vp);
+	nfsm_srvfillattr;
+	len -= uiop->uio_resid;
+	tlen = nfsm_rndup(len);
+	if (cnt != tlen || tlen != len)
+		nfsm_adj(mb, cnt-tlen, tlen-len);
+	*tl = txdr_unsigned(len);
+	nfsm_srvdone;
+}
+
+/*
+ * nfs write service
+ */
+nfsrv_write(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	register struct iovec *ivp;
+	register struct mbuf *mp;
+	register struct nfsv2_fattr *fp;
+	struct iovec iv[NFS_MAXIOVEC];
+	struct vattr va;
+	register struct vattr *vap = &va;
+	register u_long *tl;
+	register long t1;
+	caddr_t bpos;
+	int error = 0, rdonly, cache, siz, len, xfer;
+	int ioflags = IO_SYNC | IO_NODELOCKED;
+	char *cp2;
+	struct mbuf *mb, *mb2, *mreq;
+	struct vnode *vp;
+	nfsv2fh_t nfh;
+	fhandle_t *fhp;
+	struct uio io, *uiop = &io;
+	off_t off;
+	u_quad_t frev;
+
+	fhp = &nfh.fh_generic;
+	nfsm_srvmtofh(fhp);
+	nfsm_dissect(tl, u_long *, 4 * NFSX_UNSIGNED);
+	if (nfsd->nd_nqlflag == NQL_NOVAL) {
+		off = (off_t)fxdr_unsigned(u_long, *++tl);
+		tl += 2;
+	} else {
+		fxdr_hyper(tl, &off);
+		tl += 2;
+		if (fxdr_unsigned(u_long, *tl++))
+			ioflags |= IO_APPEND;
+	}
+	len = fxdr_unsigned(long, *tl);
+	if (len > NFS_MAXDATA || len <= 0) {
+		error = EBADRPC;
+		nfsm_reply(0);
+	}
+	if (dpos == (mtod(md, caddr_t)+md->m_len)) {
+		mp = md->m_next;
+		if (mp == NULL) {
+			error = EBADRPC;
+			nfsm_reply(0);
+		}
+	} else {
+		mp = md;
+		siz = dpos-mtod(mp, caddr_t);
+		mp->m_len -= siz;
+		NFSMADV(mp, siz);
+	}
+	if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+		nfsm_reply(0);
+	if (vp->v_type != VREG) {
+		error = (vp->v_type == VDIR) ? EISDIR : EACCES;
+		vput(vp);
+		nfsm_reply(0);
+	}
+	nqsrv_getl(vp, NQL_WRITE);
+	if (error = nfsrv_access(vp, VWRITE, cred, rdonly, nfsd->nd_procp)) {
+		vput(vp);
+		nfsm_reply(0);
+	}
+	uiop->uio_resid = 0;
+	uiop->uio_rw = UIO_WRITE;
+	uiop->uio_segflg = UIO_SYSSPACE;
+	uiop->uio_procp = (struct proc *)0;
+	/*
+	 * Do up to NFS_MAXIOVEC mbufs of write each iteration of the
+	 * loop until done.
+	 */
+	while (len > 0 && uiop->uio_resid == 0) {
+		ivp = iv;
+		siz = 0;
+		uiop->uio_iov = ivp;
+		uiop->uio_iovcnt = 0;
+		uiop->uio_offset = off;
+		while (len > 0 && uiop->uio_iovcnt < NFS_MAXIOVEC && mp != NULL) {
+			ivp->iov_base = mtod(mp, caddr_t);
+			if (len < mp->m_len)
+				ivp->iov_len = xfer = len;
+			else
+				ivp->iov_len = xfer = mp->m_len;
+#ifdef notdef
+			/* Not Yet .. */
+			if (M_HASCL(mp) && (((u_long)ivp->iov_base) & CLOFSET) == 0)
+				ivp->iov_op = NULL;	/* what should it be ?? */
+			else
+				ivp->iov_op = NULL;
+#endif
+			uiop->uio_iovcnt++;
+			ivp++;
+			len -= xfer;
+			siz += xfer;
+			mp = mp->m_next;
+		}
+		if (len > 0 && mp == NULL) {
+			error = EBADRPC;
+			vput(vp);
+			nfsm_reply(0);
+		}
+		uiop->uio_resid = siz;
+		if (error = VOP_WRITE(vp, uiop, ioflags, cred)) {
+			vput(vp);
+			nfsm_reply(0);
+		}
+		off = uiop->uio_offset;
+	}
+	error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp);
+	vput(vp);
+	nfsm_reply(NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+	nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+	nfsm_srvfillattr;
+	if (nfsd->nd_nqlflag != NQL_NOVAL) {
+		nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+		txdr_hyper(&vap->va_filerev, tl);
+	}
+	nfsm_srvdone;
+}
+
+/*
+ * nfs create service
+ * now does a truncate to 0 length via. setattr if it already exists
+ */
+nfsrv_create(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	register struct nfsv2_fattr *fp;
+	struct vattr va;
+	register struct vattr *vap = &va;
+	register struct nfsv2_sattr *sp;
+	register u_long *tl;
+	struct nameidata nd;
+	register caddr_t cp;
+	register long t1;
+	caddr_t bpos;
+	int error = 0, rdev, cache, len, tsize;
+	char *cp2;
+	struct mbuf *mb, *mb2, *mreq;
+	struct vnode *vp;
+	nfsv2fh_t nfh;
+	fhandle_t *fhp;
+	u_quad_t frev;
+
+	nd.ni_cnd.cn_nameiop = 0;
+	fhp = &nfh.fh_generic;
+	nfsm_srvmtofh(fhp);
+	nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+	nd.ni_cnd.cn_cred = cred;
+	nd.ni_cnd.cn_nameiop = CREATE;
+	nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART;
+	if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos,
+	    nfsd->nd_procp))
+		nfsm_reply(0);
+	VATTR_NULL(vap);
+	nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_SATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+	/*
+	 * Iff doesn't exist, create it
+	 * otherwise just truncate to 0 length
+	 *   should I set the mode too ??
+	 */
+	if (nd.ni_vp == NULL) {
+		vap->va_type = IFTOVT(fxdr_unsigned(u_long, sp->sa_mode));
+		if (vap->va_type == VNON)
+			vap->va_type = VREG;
+		vap->va_mode = nfstov_mode(sp->sa_mode);
+		if (nfsd->nd_nqlflag == NQL_NOVAL)
+			rdev = fxdr_unsigned(long, sp->sa_nfssize);
+		else
+			rdev = fxdr_unsigned(long, sp->sa_nqrdev);
+		if (vap->va_type == VREG || vap->va_type == VSOCK) {
+			vrele(nd.ni_startdir);
+			nqsrv_getl(nd.ni_dvp, NQL_WRITE);
+			if (error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap))
+				nfsm_reply(0);
+			FREE(nd.ni_cnd.cn_pnbuf, M_NAMEI);
+		} else if (vap->va_type == VCHR || vap->va_type == VBLK ||
+			vap->va_type == VFIFO) {
+			if (vap->va_type == VCHR && rdev == 0xffffffff)
+				vap->va_type = VFIFO;
+			if (vap->va_type == VFIFO) {
+#ifndef FIFO
+				VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+				vput(nd.ni_dvp);
+				error = ENXIO;
+				goto out;
+#endif /* FIFO */
+			} else if (error = suser(cred, (u_short *)0)) {
+				VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+				vput(nd.ni_dvp);
+				goto out;
+			} else
+				vap->va_rdev = (dev_t)rdev;
+			nqsrv_getl(nd.ni_dvp, NQL_WRITE);
+			if (error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap)) {
+				vrele(nd.ni_startdir);
+				nfsm_reply(0);
+			}
+			nd.ni_cnd.cn_nameiop = LOOKUP;
+			nd.ni_cnd.cn_flags &= ~(LOCKPARENT | SAVESTART);
+			nd.ni_cnd.cn_proc = nfsd->nd_procp;
+			nd.ni_cnd.cn_cred = nfsd->nd_procp->p_ucred;
+			if (error = lookup(&nd)) {
+				free(nd.ni_cnd.cn_pnbuf, M_NAMEI);
+				nfsm_reply(0);
+			}
+			FREE(nd.ni_cnd.cn_pnbuf, M_NAMEI);
+			if (nd.ni_cnd.cn_flags & ISSYMLINK) {
+				vrele(nd.ni_dvp);
+				vput(nd.ni_vp);
+				VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+				error = EINVAL;
+				nfsm_reply(0);
+			}
+		} else {
+			VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+			vput(nd.ni_dvp);
+			error = ENXIO;
+			goto out;
+		}
+		vp = nd.ni_vp;
+	} else {
+		vrele(nd.ni_startdir);
+		free(nd.ni_cnd.cn_pnbuf, M_NAMEI);
+		vp = nd.ni_vp;
+		if (nd.ni_dvp == vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+		if (nfsd->nd_nqlflag == NQL_NOVAL) {
+			tsize = fxdr_unsigned(long, sp->sa_nfssize);
+			if (tsize != -1)
+				vap->va_size = (u_quad_t)tsize;
+			else
+				vap->va_size = -1;
+		} else
+			fxdr_hyper(&sp->sa_nqsize, &vap->va_size);
+		if (vap->va_size != -1) {
+			if (error = nfsrv_access(vp, VWRITE, cred,
+			    (nd.ni_cnd.cn_flags & RDONLY), nfsd->nd_procp)) {
+				vput(vp);
+				nfsm_reply(0);
+			}
+			nqsrv_getl(vp, NQL_WRITE);
+			if (error = VOP_SETATTR(vp, vap, cred, nfsd->nd_procp)) {
+				vput(vp);
+				nfsm_reply(0);
+			}
+		}
+	}
+	bzero((caddr_t)fhp, sizeof(nfh));
+	fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
+	if (error = VFS_VPTOFH(vp, &fhp->fh_fid)) {
+		vput(vp);
+		nfsm_reply(0);
+	}
+	error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp);
+	vput(vp);
+	nfsm_reply(NFSX_FH+NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+	nfsm_srvfhtom(fhp);
+	nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+	nfsm_srvfillattr;
+	return (error);
+nfsmout:
+	if (nd.ni_cnd.cn_nameiop || nd.ni_cnd.cn_flags)
+		vrele(nd.ni_startdir);
+	VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+	if (nd.ni_dvp == nd.ni_vp)
+		vrele(nd.ni_dvp);
+	else
+		vput(nd.ni_dvp);
+	if (nd.ni_vp)
+		vput(nd.ni_vp);
+	return (error);
+
+out:
+	vrele(nd.ni_startdir);
+	free(nd.ni_cnd.cn_pnbuf, M_NAMEI);
+	nfsm_reply(0);
+}
+
+/*
+ * nfs remove service
+ */
+nfsrv_remove(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	struct nameidata nd;
+	register u_long *tl;
+	register long t1;
+	caddr_t bpos;
+	int error = 0, cache, len;
+	char *cp2;
+	struct mbuf *mb, *mreq;
+	struct vnode *vp;
+	nfsv2fh_t nfh;
+	fhandle_t *fhp;
+	u_quad_t frev;
+
+	fhp = &nfh.fh_generic;
+	nfsm_srvmtofh(fhp);
+	nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+	nd.ni_cnd.cn_cred = cred;
+	nd.ni_cnd.cn_nameiop = DELETE;
+	nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
+	if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos,
+	    nfsd->nd_procp))
+		nfsm_reply(0);
+	vp = nd.ni_vp;
+	if (vp->v_type == VDIR &&
+		(error = suser(cred, (u_short *)0)))
+		goto out;
+	/*
+	 * The root of a mounted filesystem cannot be deleted.
+	 */
+	if (vp->v_flag & VROOT) {
+		error = EBUSY;
+		goto out;
+	}
+	if (vp->v_flag & VTEXT)
+		(void) vnode_pager_uncache(vp);
+out:
+	if (!error) {
+		nqsrv_getl(nd.ni_dvp, NQL_WRITE);
+		nqsrv_getl(vp, NQL_WRITE);
+		error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+	} else {
+		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+		if (nd.ni_dvp == vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		vput(vp);
+	}
+	nfsm_reply(0);
+	nfsm_srvdone;
+}
+
+/*
+ * nfs rename service
+ */
+nfsrv_rename(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	register u_long *tl;
+	register long t1;
+	caddr_t bpos;
+	int error = 0, cache, len, len2;
+	char *cp2;
+	struct mbuf *mb, *mreq;
+	struct nameidata fromnd, tond;
+	struct vnode *fvp, *tvp, *tdvp;
+	nfsv2fh_t fnfh, tnfh;
+	fhandle_t *ffhp, *tfhp;
+	u_quad_t frev;
+	uid_t saved_uid;
+
+	ffhp = &fnfh.fh_generic;
+	tfhp = &tnfh.fh_generic;
+	fromnd.ni_cnd.cn_nameiop = 0;
+	tond.ni_cnd.cn_nameiop = 0;
+	nfsm_srvmtofh(ffhp);
+	nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+	/*
+	 * Remember our original uid so that we can reset cr_uid before
+	 * the second nfs_namei() call, in case it is remapped.
+	 */
+	saved_uid = cred->cr_uid;
+	fromnd.ni_cnd.cn_cred = cred;
+	fromnd.ni_cnd.cn_nameiop = DELETE;
+	fromnd.ni_cnd.cn_flags = WANTPARENT | SAVESTART;
+	if (error = nfs_namei(&fromnd, ffhp, len, nfsd->nd_slp, nam, &md,
+	    &dpos, nfsd->nd_procp))
+		nfsm_reply(0);
+	fvp = fromnd.ni_vp;
+	nfsm_srvmtofh(tfhp);
+	nfsm_strsiz(len2, NFS_MAXNAMLEN);
+	cred->cr_uid = saved_uid;
+	tond.ni_cnd.cn_cred = cred;
+	tond.ni_cnd.cn_nameiop = RENAME;
+	tond.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART;
+	if (error = nfs_namei(&tond, tfhp, len2, nfsd->nd_slp, nam, &md,
+	    &dpos, nfsd->nd_procp)) {
+		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
+		vrele(fromnd.ni_dvp);
+		vrele(fvp);
+		goto out1;
+	}
+	tdvp = tond.ni_dvp;
+	tvp = tond.ni_vp;
+	if (tvp != NULL) {
+		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
+			error = EISDIR;
+			goto out;
+		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
+			error = ENOTDIR;
+			goto out;
+		}
+		if (tvp->v_type == VDIR && tvp->v_mountedhere) {
+			error = EXDEV;
+			goto out;
+		}
+	}
+	if (fvp->v_type == VDIR && fvp->v_mountedhere) {
+		error = EBUSY;
+		goto out;
+	}
+	if (fvp->v_mount != tdvp->v_mount) {
+		error = EXDEV;
+		goto out;
+	}
+	if (fvp == tdvp)
+		error = EINVAL;
+	/*
+	 * If source is the same as the destination (that is the
+	 * same vnode with the same name in the same directory),
+	 * then there is nothing to do.
+	 */
+	if (fvp == tvp && fromnd.ni_dvp == tdvp &&
+	    fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
+	    !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
+	      fromnd.ni_cnd.cn_namelen))
+		error = -1;
+out:
+	if (!error) {
+		nqsrv_getl(fromnd.ni_dvp, NQL_WRITE);
+		nqsrv_getl(tdvp, NQL_WRITE);
+		if (tvp)
+			nqsrv_getl(tvp, NQL_WRITE);
+		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
+				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
+	} else {
+		VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
+		if (tdvp == tvp)
+			vrele(tdvp);
+		else
+			vput(tdvp);
+		if (tvp)
+			vput(tvp);
+		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
+		vrele(fromnd.ni_dvp);
+		vrele(fvp);
+	}
+	vrele(tond.ni_startdir);
+	FREE(tond.ni_cnd.cn_pnbuf, M_NAMEI);
+out1:
+	vrele(fromnd.ni_startdir);
+	FREE(fromnd.ni_cnd.cn_pnbuf, M_NAMEI);
+	nfsm_reply(0);
+	return (error);
+
+nfsmout:
+	if (tond.ni_cnd.cn_nameiop || tond.ni_cnd.cn_flags) {
+		vrele(tond.ni_startdir);
+		FREE(tond.ni_cnd.cn_pnbuf, M_NAMEI);
+	}
+	if (fromnd.ni_cnd.cn_nameiop || fromnd.ni_cnd.cn_flags) {
+		vrele(fromnd.ni_startdir);
+		FREE(fromnd.ni_cnd.cn_pnbuf, M_NAMEI);
+		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
+		vrele(fromnd.ni_dvp);
+		vrele(fvp);
+	}
+	return (error);
+}
+
+/*
+ * nfs link service
+ */
+nfsrv_link(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	struct nameidata nd;
+	register u_long *tl;
+	register long t1;
+	caddr_t bpos;
+	int error = 0, rdonly, cache, len;
+	char *cp2;
+	struct mbuf *mb, *mreq;
+	struct vnode *vp, *xp;
+	nfsv2fh_t nfh, dnfh;
+	fhandle_t *fhp, *dfhp;
+	u_quad_t frev;
+
+	fhp = &nfh.fh_generic;
+	dfhp = &dnfh.fh_generic;
+	nfsm_srvmtofh(fhp);
+	nfsm_srvmtofh(dfhp);
+	nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+	if (error = nfsrv_fhtovp(fhp, FALSE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+		nfsm_reply(0);
+	if (vp->v_type == VDIR && (error = suser(cred, (u_short *)0)))
+		goto out1;
+	nd.ni_cnd.cn_cred = cred;
+	nd.ni_cnd.cn_nameiop = CREATE;
+	nd.ni_cnd.cn_flags = LOCKPARENT;
+	if (error = nfs_namei(&nd, dfhp, len, nfsd->nd_slp, nam, &md, &dpos,
+	    nfsd->nd_procp))
+		goto out1;
+	xp = nd.ni_vp;
+	if (xp != NULL) {
+		error = EEXIST;
+		goto out;
+	}
+	xp = nd.ni_dvp;
+	if (vp->v_mount != xp->v_mount)
+		error = EXDEV;
+out:
+	if (!error) {
+		nqsrv_getl(vp, NQL_WRITE);
+		nqsrv_getl(xp, NQL_WRITE);
+		error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
+	} else {
+		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+		if (nd.ni_dvp == nd.ni_vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		if (nd.ni_vp)
+			vrele(nd.ni_vp);
+	}
+out1:
+	vrele(vp);
+	nfsm_reply(0);
+	nfsm_srvdone;
+}
+
+/*
+ * nfs symbolic link service
+ */
+nfsrv_symlink(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	struct vattr va;
+	struct nameidata nd;
+	register struct vattr *vap = &va;
+	register u_long *tl;
+	register long t1;
+	struct nfsv2_sattr *sp;
+	caddr_t bpos;
+	struct uio io;
+	struct iovec iv;
+	int error = 0, cache, len, len2;
+	char *pathcp, *cp2;
+	struct mbuf *mb, *mreq;
+	nfsv2fh_t nfh;
+	fhandle_t *fhp;
+	u_quad_t frev;
+
+	pathcp = (char *)0;
+	fhp = &nfh.fh_generic;
+	nfsm_srvmtofh(fhp);
+	nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+	nd.ni_cnd.cn_cred = cred;
+	nd.ni_cnd.cn_nameiop = CREATE;
+	nd.ni_cnd.cn_flags = LOCKPARENT;
+	if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos,
+	    nfsd->nd_procp))
+		goto out;
+	nfsm_strsiz(len2, NFS_MAXPATHLEN);
+	MALLOC(pathcp, caddr_t, len2 + 1, M_TEMP, M_WAITOK);
+	iv.iov_base = pathcp;
+	iv.iov_len = len2;
+	io.uio_resid = len2;
+	io.uio_offset = 0;
+	io.uio_iov = &iv;
+	io.uio_iovcnt = 1;
+	io.uio_segflg = UIO_SYSSPACE;
+	io.uio_rw = UIO_READ;
+	io.uio_procp = (struct proc *)0;
+	nfsm_mtouio(&io, len2);
+	nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_SATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+	*(pathcp + len2) = '\0';
+	if (nd.ni_vp) {
+		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+		if (nd.ni_dvp == nd.ni_vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		vrele(nd.ni_vp);
+		error = EEXIST;
+		goto out;
+	}
+	VATTR_NULL(vap);
+	vap->va_mode = fxdr_unsigned(u_short, sp->sa_mode);
+	nqsrv_getl(nd.ni_dvp, NQL_WRITE);
+	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap, pathcp);
+out:
+	if (pathcp)
+		FREE(pathcp, M_TEMP);
+	nfsm_reply(0);
+	return (error);
+nfsmout:
+	VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+	if (nd.ni_dvp == nd.ni_vp)
+		vrele(nd.ni_dvp);
+	else
+		vput(nd.ni_dvp);
+	if (nd.ni_vp)
+		vrele(nd.ni_vp);
+	if (pathcp)
+		FREE(pathcp, M_TEMP);
+	return (error);
+}
+
+/*
+ * nfs mkdir service
+ */
+nfsrv_mkdir(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	struct vattr va;
+	register struct vattr *vap = &va;
+	register struct nfsv2_fattr *fp;
+	struct nameidata nd;
+	register caddr_t cp;
+	register u_long *tl;
+	register long t1;
+	caddr_t bpos;
+	int error = 0, cache, len;
+	char *cp2;
+	struct mbuf *mb, *mb2, *mreq;
+	struct vnode *vp;
+	nfsv2fh_t nfh;
+	fhandle_t *fhp;
+	u_quad_t frev;
+
+	fhp = &nfh.fh_generic;
+	nfsm_srvmtofh(fhp);
+	nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+	nd.ni_cnd.cn_cred = cred;
+	nd.ni_cnd.cn_nameiop = CREATE;
+	nd.ni_cnd.cn_flags = LOCKPARENT;
+	if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos,
+	    nfsd->nd_procp))
+		nfsm_reply(0);
+	nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+	VATTR_NULL(vap);
+	vap->va_type = VDIR;
+	vap->va_mode = nfstov_mode(*tl++);
+	vp = nd.ni_vp;
+	if (vp != NULL) {
+		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+		if (nd.ni_dvp == vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		vrele(vp);
+		error = EEXIST;
+		nfsm_reply(0);
+	}
+	nqsrv_getl(nd.ni_dvp, NQL_WRITE);
+	if (error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap))
+		nfsm_reply(0);
+	vp = nd.ni_vp;
+	bzero((caddr_t)fhp, sizeof(nfh));
+	fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
+	if (error = VFS_VPTOFH(vp, &fhp->fh_fid)) {
+		vput(vp);
+		nfsm_reply(0);
+	}
+	error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp);
+	vput(vp);
+	nfsm_reply(NFSX_FH+NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+	nfsm_srvfhtom(fhp);
+	nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+	nfsm_srvfillattr;
+	return (error);
+nfsmout:
+	VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+	if (nd.ni_dvp == nd.ni_vp)
+		vrele(nd.ni_dvp);
+	else
+		vput(nd.ni_dvp);
+	if (nd.ni_vp)
+		vrele(nd.ni_vp);
+	return (error);
+}
+
+/*
+ * nfs rmdir service
+ */
+nfsrv_rmdir(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	register u_long *tl;
+	register long t1;
+	caddr_t bpos;
+	int error = 0, cache, len;
+	char *cp2;
+	struct mbuf *mb, *mreq;
+	struct vnode *vp;
+	nfsv2fh_t nfh;
+	fhandle_t *fhp;
+	struct nameidata nd;
+	u_quad_t frev;
+
+	fhp = &nfh.fh_generic;
+	nfsm_srvmtofh(fhp);
+	nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+	nd.ni_cnd.cn_cred = cred;
+	nd.ni_cnd.cn_nameiop = DELETE;
+	nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
+	if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos,
+	    nfsd->nd_procp))
+		nfsm_reply(0);
+	vp = nd.ni_vp;
+	if (vp->v_type != VDIR) {
+		error = ENOTDIR;
+		goto out;
+	}
+	/*
+	 * No rmdir "." please.
+	 */
+	if (nd.ni_dvp == vp) {
+		error = EINVAL;
+		goto out;
+	}
+	/*
+	 * The root of a mounted filesystem cannot be deleted.
+	 */
+	if (vp->v_flag & VROOT)
+		error = EBUSY;
+out:
+	if (!error) {
+		nqsrv_getl(nd.ni_dvp, NQL_WRITE);
+		nqsrv_getl(vp, NQL_WRITE);
+		error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+	} else {
+		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+		if (nd.ni_dvp == nd.ni_vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		vput(vp);
+	}
+	nfsm_reply(0);
+	nfsm_srvdone;
+}
+
+/*
+ * nfs readdir service
+ * - mallocs what it thinks is enough to read
+ *	count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR
+ * - calls VOP_READDIR()
+ * - loops around building the reply
+ *	if the output generated exceeds count break out of loop
+ *	The nfsm_clget macro is used here so that the reply will be packed
+ *	tightly in mbuf clusters.
+ * - it only knows that it has encountered eof when the VOP_READDIR()
+ *	reads nothing
+ * - as such one readdir rpc will return eof false although you are there
+ *	and then the next will return eof
+ * - it trims out records with d_fileno == 0
+ *	this doesn't matter for Unix clients, but they might confuse clients
+ *	for other os'.
+ * NB: It is tempting to set eof to true if the VOP_READDIR() reads less
+ *	than requested, but this may not apply to all filesystems. For
+ *	example, client NFS does not { although it is never remote mounted
+ *	anyhow }
+ *     The alternate call nqnfsrv_readdirlook() does lookups as well.
+ * PS: The NFS protocol spec. does not clarify what the "count" byte
+ *	argument is a count of.. just name strings and file id's or the
+ *	entire reply rpc or ...
+ *	I tried just file name and id sizes and it confused the Sun client,
+ *	so I am using the full rpc size now. The "paranoia.." comment refers
+ *	to including the status longwords that are not a part of the dir.
+ *	"entry" structures, but are in the rpc.
+ */
+struct flrep {
+	u_long fl_cachable;
+	u_long fl_duration;
+	u_long fl_frev[2];
+	nfsv2fh_t fl_nfh;
+	u_long fl_fattr[NFSX_NQFATTR / sizeof (u_long)];
+};
+
+nfsrv_readdir(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	register char *bp, *be;
+	register struct mbuf *mp;
+	register struct dirent *dp;
+	register caddr_t cp;
+	register u_long *tl;
+	register long t1;
+	caddr_t bpos;
+	struct mbuf *mb, *mb2, *mreq, *mp2;
+	char *cpos, *cend, *cp2, *rbuf;
+	struct vnode *vp;
+	nfsv2fh_t nfh;
+	fhandle_t *fhp;
+	struct uio io;
+	struct iovec iv;
+	int len, nlen, rem, xfer, tsiz, i, error = 0;
+	int siz, cnt, fullsiz, eofflag, rdonly, cache;
+	u_quad_t frev;
+	u_long on, off, toff;
+
+	fhp = &nfh.fh_generic;
+	nfsm_srvmtofh(fhp);
+	nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
+	toff = fxdr_unsigned(u_long, *tl++);
+	off = (toff & ~(NFS_DIRBLKSIZ-1));
+	on = (toff & (NFS_DIRBLKSIZ-1));
+	cnt = fxdr_unsigned(int, *tl);
+	siz = ((cnt+NFS_DIRBLKSIZ-1) & ~(NFS_DIRBLKSIZ-1));
+	if (cnt > NFS_MAXREADDIR)
+		siz = NFS_MAXREADDIR;
+	fullsiz = siz;
+	if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+		nfsm_reply(0);
+	nqsrv_getl(vp, NQL_READ);
+	if (error = nfsrv_access(vp, VEXEC, cred, rdonly, nfsd->nd_procp)) {
+		vput(vp);
+		nfsm_reply(0);
+	}
+	VOP_UNLOCK(vp);
+	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
+again:
+	iv.iov_base = rbuf;
+	iv.iov_len = fullsiz;
+	io.uio_iov = &iv;
+	io.uio_iovcnt = 1;
+	io.uio_offset = (off_t)off;
+	io.uio_resid = fullsiz;
+	io.uio_segflg = UIO_SYSSPACE;
+	io.uio_rw = UIO_READ;
+	io.uio_procp = (struct proc *)0;
+	error = VOP_READDIR(vp, &io, cred);
+	off = (off_t)io.uio_offset;
+	if (error) {
+		vrele(vp);
+		free((caddr_t)rbuf, M_TEMP);
+		nfsm_reply(0);
+	}
+	if (io.uio_resid < fullsiz)
+		eofflag = 0;
+	else
+		eofflag = 1;
+	if (io.uio_resid) {
+		siz -= io.uio_resid;
+
+		/*
+		 * If nothing read, return eof
+		 * rpc reply
+		 */
+		if (siz == 0) {
+			vrele(vp);
+			nfsm_reply(2*NFSX_UNSIGNED);
+			nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+			*tl++ = nfs_false;
+			*tl = nfs_true;
+			FREE((caddr_t)rbuf, M_TEMP);
+			return (0);
+		}
+	}
+
+	/*
+	 * Check for degenerate cases of nothing useful read.
+	 * If so go try again
+	 */
+	cpos = rbuf + on;
+	cend = rbuf + siz;
+	dp = (struct dirent *)cpos;
+	while (cpos < cend && dp->d_fileno == 0) {
+		cpos += dp->d_reclen;
+		dp = (struct dirent *)cpos;
+	}
+	if (cpos >= cend) {
+		toff = off;
+		siz = fullsiz;
+		on = 0;
+		goto again;
+	}
+
+	cpos = rbuf + on;
+	cend = rbuf + siz;
+	dp = (struct dirent *)cpos;
+	len = 3*NFSX_UNSIGNED;	/* paranoia, probably can be 0 */
+	nfsm_reply(siz);
+	mp = mp2 = mb;
+	bp = bpos;
+	be = bp + M_TRAILINGSPACE(mp);
+
+	/* Loop through the records and build reply */
+	while (cpos < cend) {
+		if (dp->d_fileno != 0) {
+			nlen = dp->d_namlen;
+			rem = nfsm_rndup(nlen)-nlen;
+			len += (4*NFSX_UNSIGNED + nlen + rem);
+			if (len > cnt) {
+				eofflag = 0;
+				break;
+			}
+			/*
+			 * Build the directory record xdr from
+			 * the dirent entry.
+			 */
+			nfsm_clget;
+			*tl = nfs_true;
+			bp += NFSX_UNSIGNED;
+			nfsm_clget;
+			*tl = txdr_unsigned(dp->d_fileno);
+			bp += NFSX_UNSIGNED;
+			nfsm_clget;
+			*tl = txdr_unsigned(nlen);
+			bp += NFSX_UNSIGNED;
+	
+			/* And loop around copying the name */
+			xfer = nlen;
+			cp = dp->d_name;
+			while (xfer > 0) {
+				nfsm_clget;
+				if ((bp+xfer) > be)
+					tsiz = be-bp;
+				else
+					tsiz = xfer;
+				bcopy(cp, bp, tsiz);
+				bp += tsiz;
+				xfer -= tsiz;
+				if (xfer > 0)
+					cp += tsiz;
+			}
+			/* And null pad to a long boundary */
+			for (i = 0; i < rem; i++)
+				*bp++ = '\0';
+			nfsm_clget;
+	
+			/* Finish off the record */
+			toff += dp->d_reclen;
+			*tl = txdr_unsigned(toff);
+			bp += NFSX_UNSIGNED;
+		} else
+			toff += dp->d_reclen;
+		cpos += dp->d_reclen;
+		dp = (struct dirent *)cpos;
+	}
+	vrele(vp);
+	nfsm_clget;
+	*tl = nfs_false;
+	bp += NFSX_UNSIGNED;
+	nfsm_clget;
+	if (eofflag)
+		*tl = nfs_true;
+	else
+		*tl = nfs_false;
+	bp += NFSX_UNSIGNED;
+	if (mp != mb) {
+		if (bp < be)
+			mp->m_len = bp - mtod(mp, caddr_t);
+	} else
+		mp->m_len += bp - bpos;
+	FREE(rbuf, M_TEMP);
+	nfsm_srvdone;
+}
+
+nqnfsrv_readdirlook(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	register char *bp, *be;
+	register struct mbuf *mp;
+	register struct dirent *dp;
+	register caddr_t cp;
+	register u_long *tl;
+	register long t1;
+	caddr_t bpos;
+	struct mbuf *mb, *mb2, *mreq, *mp2;
+	char *cpos, *cend, *cp2, *rbuf;
+	struct vnode *vp, *nvp;
+	struct flrep fl;
+	nfsv2fh_t nfh;
+	fhandle_t *fhp;
+	struct uio io;
+	struct iovec iv;
+	struct vattr va, *vap = &va;
+	struct nfsv2_fattr *fp;
+	int len, nlen, rem, xfer, tsiz, i, error = 0, duration2, cache2;
+	int siz, cnt, fullsiz, eofflag, rdonly, cache;
+	u_quad_t frev, frev2;
+	u_long on, off, toff;
+
+	fhp = &nfh.fh_generic;
+	nfsm_srvmtofh(fhp);
+	nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED);
+	toff = fxdr_unsigned(u_long, *tl++);
+	off = (toff & ~(NFS_DIRBLKSIZ-1));
+	on = (toff & (NFS_DIRBLKSIZ-1));
+	cnt = fxdr_unsigned(int, *tl++);
+	duration2 = fxdr_unsigned(int, *tl);
+	siz = ((cnt+NFS_DIRBLKSIZ-1) & ~(NFS_DIRBLKSIZ-1));
+	if (cnt > NFS_MAXREADDIR)
+		siz = NFS_MAXREADDIR;
+	fullsiz = siz;
+	if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+		nfsm_reply(0);
+	nqsrv_getl(vp, NQL_READ);
+	if (error = nfsrv_access(vp, VEXEC, cred, rdonly, nfsd->nd_procp)) {
+		vput(vp);
+		nfsm_reply(0);
+	}
+	VOP_UNLOCK(vp);
+	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
+again:
+	iv.iov_base = rbuf;
+	iv.iov_len = fullsiz;
+	io.uio_iov = &iv;
+	io.uio_iovcnt = 1;
+	io.uio_offset = (off_t)off;
+	io.uio_resid = fullsiz;
+	io.uio_segflg = UIO_SYSSPACE;
+	io.uio_rw = UIO_READ;
+	io.uio_procp = (struct proc *)0;
+	error = VOP_READDIR(vp, &io, cred);
+	off = (u_long)io.uio_offset;
+	if (error) {
+		vrele(vp);
+		free((caddr_t)rbuf, M_TEMP);
+		nfsm_reply(0);
+	}
+	if (io.uio_resid < fullsiz)
+		eofflag = 0;
+	else
+		eofflag = 1;
+	if (io.uio_resid) {
+		siz -= io.uio_resid;
+
+		/*
+		 * If nothing read, return eof
+		 * rpc reply
+		 */
+		if (siz == 0) {
+			vrele(vp);
+			nfsm_reply(2 * NFSX_UNSIGNED);
+			nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
+			*tl++ = nfs_false;
+			*tl = nfs_true;
+			FREE((caddr_t)rbuf, M_TEMP);
+			return (0);
+		}
+	}
+
+	/*
+	 * Check for degenerate cases of nothing useful read.
+	 * If so go try again
+	 */
+	cpos = rbuf + on;
+	cend = rbuf + siz;
+	dp = (struct dirent *)cpos;
+	while (cpos < cend && dp->d_fileno == 0) {
+		cpos += dp->d_reclen;
+		dp = (struct dirent *)cpos;
+	}
+	if (cpos >= cend) {
+		toff = off;
+		siz = fullsiz;
+		on = 0;
+		goto again;
+	}
+
+	cpos = rbuf + on;
+	cend = rbuf + siz;
+	dp = (struct dirent *)cpos;
+	len = 3 * NFSX_UNSIGNED;	/* paranoia, probably can be 0 */
+	nfsm_reply(siz);
+	mp = mp2 = mb;
+	bp = bpos;
+	be = bp + M_TRAILINGSPACE(mp);
+
+	/* Loop through the records and build reply */
+	while (cpos < cend) {
+		if (dp->d_fileno != 0) {
+			nlen = dp->d_namlen;
+			rem = nfsm_rndup(nlen)-nlen;
+	
+			/*
+			 * For readdir_and_lookup get the vnode using
+			 * the file number.
+			 */
+			if (VFS_VGET(vp->v_mount, dp->d_fileno, &nvp))
+				goto invalid;
+			bzero((caddr_t)&fl.fl_nfh, sizeof (nfsv2fh_t));
+			fl.fl_nfh.fh_generic.fh_fsid =
+				nvp->v_mount->mnt_stat.f_fsid;
+			if (VFS_VPTOFH(nvp, &fl.fl_nfh.fh_generic.fh_fid)) {
+				vput(nvp);
+				goto invalid;
+			}
+			if (duration2) {
+				(void) nqsrv_getlease(nvp, &duration2, NQL_READ,
+					nfsd, nam, &cache2, &frev2, cred);
+				fl.fl_duration = txdr_unsigned(duration2);
+				fl.fl_cachable = txdr_unsigned(cache2);
+				txdr_hyper(&frev2, fl.fl_frev);
+			} else
+				fl.fl_duration = 0;
+			if (VOP_GETATTR(nvp, vap, cred, nfsd->nd_procp)) {
+				vput(nvp);
+				goto invalid;
+			}
+			vput(nvp);
+			fp = (struct nfsv2_fattr *)&fl.fl_fattr;
+			nfsm_srvfillattr;
+			len += (4*NFSX_UNSIGNED + nlen + rem + NFSX_FH
+				+ NFSX_NQFATTR);
+			if (len > cnt) {
+				eofflag = 0;
+				break;
+			}
+			/*
+			 * Build the directory record xdr from
+			 * the dirent entry.
+			 */
+			nfsm_clget;
+			*tl = nfs_true;
+			bp += NFSX_UNSIGNED;
+
+			/*
+			 * For readdir_and_lookup copy the stuff out.
+			 */
+			xfer = sizeof (struct flrep);
+			cp = (caddr_t)&fl;
+			while (xfer > 0) {
+				nfsm_clget;
+				if ((bp+xfer) > be)
+					tsiz = be-bp;
+				else
+					tsiz = xfer;
+				bcopy(cp, bp, tsiz);
+				bp += tsiz;
+				xfer -= tsiz;
+				if (xfer > 0)
+					cp += tsiz;
+			}
+			nfsm_clget;
+			*tl = txdr_unsigned(dp->d_fileno);
+			bp += NFSX_UNSIGNED;
+			nfsm_clget;
+			*tl = txdr_unsigned(nlen);
+			bp += NFSX_UNSIGNED;
+	
+			/* And loop around copying the name */
+			xfer = nlen;
+			cp = dp->d_name;
+			while (xfer > 0) {
+				nfsm_clget;
+				if ((bp+xfer) > be)
+					tsiz = be-bp;
+				else
+					tsiz = xfer;
+				bcopy(cp, bp, tsiz);
+				bp += tsiz;
+				xfer -= tsiz;
+				if (xfer > 0)
+					cp += tsiz;
+			}
+			/* And null pad to a long boundary */
+			for (i = 0; i < rem; i++)
+				*bp++ = '\0';
+			nfsm_clget;
+	
+			/* Finish off the record */
+			toff += dp->d_reclen;
+			*tl = txdr_unsigned(toff);
+			bp += NFSX_UNSIGNED;
+		} else
+invalid:
+			toff += dp->d_reclen;
+		cpos += dp->d_reclen;
+		dp = (struct dirent *)cpos;
+	}
+	vrele(vp);
+	nfsm_clget;
+	*tl = nfs_false;
+	bp += NFSX_UNSIGNED;
+	nfsm_clget;
+	if (eofflag)
+		*tl = nfs_true;
+	else
+		*tl = nfs_false;
+	bp += NFSX_UNSIGNED;
+	if (mp != mb) {
+		if (bp < be)
+			mp->m_len = bp - mtod(mp, caddr_t);
+	} else
+		mp->m_len += bp - bpos;
+	FREE(rbuf, M_TEMP);
+	nfsm_srvdone;
+}
+
+/*
+ * nfs statfs service
+ */
+nfsrv_statfs(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	register struct statfs *sf;
+	register struct nfsv2_statfs *sfp;
+	register u_long *tl;
+	register long t1;
+	caddr_t bpos;
+	int error = 0, rdonly, cache, isnq;
+	char *cp2;
+	struct mbuf *mb, *mb2, *mreq;
+	struct vnode *vp;
+	nfsv2fh_t nfh;
+	fhandle_t *fhp;
+	struct statfs statfs;
+	u_quad_t frev;
+
+	fhp = &nfh.fh_generic;
+	isnq = (nfsd->nd_nqlflag != NQL_NOVAL);
+	nfsm_srvmtofh(fhp);
+	if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+		nfsm_reply(0);
+	sf = &statfs;
+	error = VFS_STATFS(vp->v_mount, sf, nfsd->nd_procp);
+	vput(vp);
+	nfsm_reply(NFSX_STATFS(isnq));
+	nfsm_build(sfp, struct nfsv2_statfs *, NFSX_STATFS(isnq));
+	sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA);
+	sfp->sf_bsize = txdr_unsigned(sf->f_bsize);
+	sfp->sf_blocks = txdr_unsigned(sf->f_blocks);
+	sfp->sf_bfree = txdr_unsigned(sf->f_bfree);
+	sfp->sf_bavail = txdr_unsigned(sf->f_bavail);
+	if (isnq) {
+		sfp->sf_files = txdr_unsigned(sf->f_files);
+		sfp->sf_ffree = txdr_unsigned(sf->f_ffree);
+	}
+	nfsm_srvdone;
+}
+
+/*
+ * Null operation, used by clients to ping server
+ */
+/* ARGSUSED */
+nfsrv_null(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	caddr_t bpos;
+	int error = VNOVAL, cache;
+	struct mbuf *mb, *mreq;
+	u_quad_t frev;
+
+	nfsm_reply(0);
+	return (error);
+}
+
+/*
+ * No operation, used for obsolete procedures
+ */
+/* ARGSUSED */
+nfsrv_noop(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	caddr_t bpos;
+	int error, cache;
+	struct mbuf *mb, *mreq;
+	u_quad_t frev;
+
+	if (nfsd->nd_repstat)
+		error = nfsd->nd_repstat;
+	else
+		error = EPROCUNAVAIL;
+	nfsm_reply(0);
+	return (error);
+}
+
+/*
+ * Perform access checking for vnodes obtained from file handles that would
+ * refer to files already opened by a Unix client. You cannot just use
+ * vn_writechk() and VOP_ACCESS() for two reasons.
+ * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case
+ * 2 - The owner is to be given access irrespective of mode bits so that
+ *     processes that chmod after opening a file don't break. I don't like
+ *     this because it opens a security hole, but since the nfs server opens
+ *     a security hole the size of a barn door anyhow, what the heck.
+ */
+nfsrv_access(vp, flags, cred, rdonly, p)
+	register struct vnode *vp;
+	int flags;
+	register struct ucred *cred;
+	int rdonly;
+	struct proc *p;
+{
+	struct vattr vattr;
+	int error;
+	if (flags & VWRITE) {
+		/* Just vn_writechk() changed to check rdonly */
+		/*
+		 * Disallow write attempts on read-only file systems;
+		 * unless the file is a socket or a block or character
+		 * device resident on the file system.
+		 */
+		if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
+			switch (vp->v_type) {
+			case VREG: case VDIR: case VLNK:
+				return (EROFS);
+			}
+		}
+		/*
+		 * If there's shared text associated with
+		 * the inode, try to free it up once.  If
+		 * we fail, we can't allow writing.
+		 */
+		if ((vp->v_flag & VTEXT) && !vnode_pager_uncache(vp))
+			return (ETXTBSY);
+	}
+	if (error = VOP_GETATTR(vp, &vattr, cred, p))
+		return (error);
+	if ((error = VOP_ACCESS(vp, flags, cred, p)) &&
+	    cred->cr_uid != vattr.va_uid)
+		return (error);
+	return (0);
+}
diff --git a/sys/nfs/nfs_socket.c b/sys/nfs/nfs_socket.c
new file mode 100644
index 00000000000..cf88ed33d92
--- /dev/null
+++ b/sys/nfs/nfs_socket.c
@@ -0,0 +1,1990 @@
+/*
+ * Copyright (c) 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfs_socket.c	8.3 (Berkeley) 1/12/94
+ */
+
+/*
+ * Socket operations for use by nfs
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/vnode.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/syslog.h>
+#include <sys/tprintf.h>
+
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfsrtt.h>
+#include <nfs/nqnfs.h>
+
+#define	TRUE	1
+#define	FALSE	0
+
+/*
+ * Estimate rto for an nfs rpc sent via. an unreliable datagram.
+ * Use the mean and mean deviation of rtt for the appropriate type of rpc
+ * for the frequent rpcs and a default for the others.
+ * The justification for doing "other" this way is that these rpcs
+ * happen so infrequently that timer est. would probably be stale.
+ * Also, since many of these rpcs are
+ * non-idempotent, a conservative timeout is desired.
+ * getattr, lookup - A+2D
+ * read, write     - A+4D
+ * other           - nm_timeo
+ */
+#define	NFS_RTO(n, t) \
+	((t) == 0 ? (n)->nm_timeo : \
+	 ((t) < 3 ? \
+	  (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
+	  ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
+#define	NFS_SRTT(r)	(r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
+#define	NFS_SDRTT(r)	(r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
+/*
+ * External data, mostly RPC constants in XDR form
+ */
+extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix,
+	rpc_msgaccepted, rpc_call, rpc_autherr, rpc_rejectedcred,
+	rpc_auth_kerb;
+extern u_long nfs_prog, nfs_vers, nqnfs_prog, nqnfs_vers;
+extern time_t nqnfsstarttime;
+extern int nonidempotent[NFS_NPROCS];
+
+/*
+ * Maps errno values to nfs error numbers.
+ * Use NFSERR_IO as the catch all for ones not specifically defined in
+ * RFC 1094.
+ */
+static int nfsrv_errmap[ELAST] = {
+  NFSERR_PERM,	NFSERR_NOENT,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_NXIO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_IO,	NFSERR_IO,	NFSERR_ACCES,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_IO,	NFSERR_EXIST,	NFSERR_IO,	NFSERR_NODEV,	NFSERR_NOTDIR,
+  NFSERR_ISDIR,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_IO,	NFSERR_FBIG,	NFSERR_NOSPC,	NFSERR_IO,	NFSERR_ROFS,
+  NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_IO,	NFSERR_IO,	NFSERR_NAMETOL,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_NOTEMPTY, NFSERR_IO,	NFSERR_IO,	NFSERR_DQUOT,	NFSERR_STALE,
+  NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_IO,
+};
+
+/*
+ * Defines which timer to use for the procnum.
+ * 0 - default
+ * 1 - getattr
+ * 2 - lookup
+ * 3 - read
+ * 4 - write
+ */
+static int proct[NFS_NPROCS] = {
+	0, 1, 0, 0, 2, 3, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 0, 0, 0,
+};
+
+/*
+ * There is a congestion window for outstanding rpcs maintained per mount
+ * point. The cwnd size is adjusted in roughly the way that:
+ * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
+ * SIGCOMM '88". ACM, August 1988.
+ * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
+ * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
+ * of rpcs is in progress.
+ * (The sent count and cwnd are scaled for integer arith.)
+ * Variants of "slow start" were tried and were found to be too much of a
+ * performance hit (ave. rtt 3 times larger),
+ * I suspect due to the large rtt that nfs rpcs have.
+ */
+#define	NFS_CWNDSCALE	256
+#define	NFS_MAXCWND	(NFS_CWNDSCALE * 32)
+static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
+int	nfs_sbwait();
+void	nfs_disconnect(), nfs_realign(), nfsrv_wakenfsd(), nfs_sndunlock();
+void	nfs_rcvunlock(), nqnfs_serverd(), nqnfs_clientlease();
+struct mbuf *nfsm_rpchead();
+int nfsrtton = 0;
+struct nfsrtt nfsrtt;
+struct nfsd nfsd_head;
+
+int	nfsrv_null(),
+	nfsrv_getattr(),
+	nfsrv_setattr(),
+	nfsrv_lookup(),
+	nfsrv_readlink(),
+	nfsrv_read(),
+	nfsrv_write(),
+	nfsrv_create(),
+	nfsrv_remove(),
+	nfsrv_rename(),
+	nfsrv_link(),
+	nfsrv_symlink(),
+	nfsrv_mkdir(),
+	nfsrv_rmdir(),
+	nfsrv_readdir(),
+	nfsrv_statfs(),
+	nfsrv_noop(),
+	nqnfsrv_readdirlook(),
+	nqnfsrv_getlease(),
+	nqnfsrv_vacated(),
+	nqnfsrv_access();
+
+int (*nfsrv_procs[NFS_NPROCS])() = {
+	nfsrv_null,
+	nfsrv_getattr,
+	nfsrv_setattr,
+	nfsrv_noop,
+	nfsrv_lookup,
+	nfsrv_readlink,
+	nfsrv_read,
+	nfsrv_noop,
+	nfsrv_write,
+	nfsrv_create,
+	nfsrv_remove,
+	nfsrv_rename,
+	nfsrv_link,
+	nfsrv_symlink,
+	nfsrv_mkdir,
+	nfsrv_rmdir,
+	nfsrv_readdir,
+	nfsrv_statfs,
+	nqnfsrv_readdirlook,
+	nqnfsrv_getlease,
+	nqnfsrv_vacated,
+	nfsrv_noop,
+	nqnfsrv_access,
+};
+
+struct nfsreq nfsreqh;
+
+/*
+ * Initialize sockets and congestion for a new NFS connection.
+ * We do not free the sockaddr if error.
+ */
+nfs_connect(nmp, rep)
+	register struct nfsmount *nmp;
+	struct nfsreq *rep;
+{
+	register struct socket *so;
+	int s, error, rcvreserve, sndreserve;
+	struct sockaddr *saddr;
+	struct sockaddr_in *sin;
+	struct mbuf *m;
+	u_short tport;
+
+	nmp->nm_so = (struct socket *)0;
+	saddr = mtod(nmp->nm_nam, struct sockaddr *);
+	if (error = socreate(saddr->sa_family,
+		&nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto))
+		goto bad;
+	so = nmp->nm_so;
+	nmp->nm_soflags = so->so_proto->pr_flags;
+
+	/*
+	 * Some servers require that the client port be a reserved port number.
+	 */
+	if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
+		MGET(m, M_WAIT, MT_SONAME);
+		sin = mtod(m, struct sockaddr_in *);
+		sin->sin_len = m->m_len = sizeof (struct sockaddr_in);
+		sin->sin_family = AF_INET;
+		sin->sin_addr.s_addr = INADDR_ANY;
+		tport = IPPORT_RESERVED - 1;
+		sin->sin_port = htons(tport);
+		while ((error = sobind(so, m)) == EADDRINUSE &&
+		       --tport > IPPORT_RESERVED / 2)
+			sin->sin_port = htons(tport);
+		m_freem(m);
+		if (error)
+			goto bad;
+	}
+
+	/*
+	 * Protocols that do not require connections may be optionally left
+	 * unconnected for servers that reply from a port other than NFS_PORT.
+	 */
+	if (nmp->nm_flag & NFSMNT_NOCONN) {
+		if (nmp->nm_soflags & PR_CONNREQUIRED) {
+			error = ENOTCONN;
+			goto bad;
+		}
+	} else {
+		if (error = soconnect(so, nmp->nm_nam))
+			goto bad;
+
+		/*
+		 * Wait for the connection to complete. Cribbed from the
+		 * connect system call but with the wait timing out so
+		 * that interruptible mounts don't hang here for a long time.
+		 */
+		s = splnet();
+		while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
+			(void) tsleep((caddr_t)&so->so_timeo, PSOCK,
+				"nfscon", 2 * hz);
+			if ((so->so_state & SS_ISCONNECTING) &&
+			    so->so_error == 0 && rep &&
+			    (error = nfs_sigintr(nmp, rep, rep->r_procp))) {
+				so->so_state &= ~SS_ISCONNECTING;
+				splx(s);
+				goto bad;
+			}
+		}
+		if (so->so_error) {
+			error = so->so_error;
+			so->so_error = 0;
+			splx(s);
+			goto bad;
+		}
+		splx(s);
+	}
+	if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) {
+		so->so_rcv.sb_timeo = (5 * hz);
+		so->so_snd.sb_timeo = (5 * hz);
+	} else {
+		so->so_rcv.sb_timeo = 0;
+		so->so_snd.sb_timeo = 0;
+	}
+	if (nmp->nm_sotype == SOCK_DGRAM) {
+		sndreserve = nmp->nm_wsize + NFS_MAXPKTHDR;
+		rcvreserve = nmp->nm_rsize + NFS_MAXPKTHDR;
+	} else if (nmp->nm_sotype == SOCK_SEQPACKET) {
+		sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2;
+		rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) * 2;
+	} else {
+		if (nmp->nm_sotype != SOCK_STREAM)
+			panic("nfscon sotype");
+		if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
+			MGET(m, M_WAIT, MT_SOOPTS);
+			*mtod(m, int *) = 1;
+			m->m_len = sizeof(int);
+			sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
+		}
+		if (so->so_proto->pr_protocol == IPPROTO_TCP) {
+			MGET(m, M_WAIT, MT_SOOPTS);
+			*mtod(m, int *) = 1;
+			m->m_len = sizeof(int);
+			sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
+		}
+		sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long))
+				* 2;
+		rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_long))
+				* 2;
+	}
+	if (error = soreserve(so, sndreserve, rcvreserve))
+		goto bad;
+	so->so_rcv.sb_flags |= SB_NOINTR;
+	so->so_snd.sb_flags |= SB_NOINTR;
+
+	/* Initialize other non-zero congestion variables */
+	nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] =
+		nmp->nm_srtt[4] = (NFS_TIMEO << 3);
+	nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
+		nmp->nm_sdrtt[3] = nmp->nm_sdrtt[4] = 0;
+	nmp->nm_cwnd = NFS_MAXCWND / 2;	    /* Initial send window */
+	nmp->nm_sent = 0;
+	nmp->nm_timeouts = 0;
+	return (0);
+
+bad:
+	nfs_disconnect(nmp);
+	return (error);
+}
+
+/*
+ * Reconnect routine:
+ * Called when a connection is broken on a reliable protocol.
+ * - clean up the old socket
+ * - nfs_connect() again
+ * - set R_MUSTRESEND for all outstanding requests on mount point
+ * If this fails the mount point is DEAD!
+ * nb: Must be called with the nfs_sndlock() set on the mount point.
+ */
+nfs_reconnect(rep)
+	register struct nfsreq *rep;
+{
+	register struct nfsreq *rp;
+	register struct nfsmount *nmp = rep->r_nmp;
+	int error;
+
+	nfs_disconnect(nmp);
+	while (error = nfs_connect(nmp, rep)) {
+		if (error == EINTR || error == ERESTART)
+			return (EINTR);
+		(void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
+	}
+
+	/*
+	 * Loop through outstanding request list and fix up all requests
+	 * on old socket.
+	 */
+	rp = nfsreqh.r_next;
+	while (rp != &nfsreqh) {
+		if (rp->r_nmp == nmp)
+			rp->r_flags |= R_MUSTRESEND;
+		rp = rp->r_next;
+	}
+	return (0);
+}
+
+/*
+ * NFS disconnect. Clean up and unlink.
+ */
+void
+nfs_disconnect(nmp)
+	register struct nfsmount *nmp;
+{
+	register struct socket *so;
+
+	if (nmp->nm_so) {
+		so = nmp->nm_so;
+		nmp->nm_so = (struct socket *)0;
+		soshutdown(so, 2);
+		soclose(so);
+	}
+}
+
+/*
+ * This is the nfs send routine. For connection based socket types, it
+ * must be called with an nfs_sndlock() on the socket.
+ * "rep == NULL" indicates that it has been called from a server.
+ * For the client side:
+ * - return EINTR if the RPC is terminated, 0 otherwise
+ * - set R_MUSTRESEND if the send fails for any reason
+ * - do any cleanup required by recoverable socket errors (???)
+ * For the server side:
+ * - return EINTR or ERESTART if interrupted by a signal
+ * - return EPIPE if a connection is lost for connection based sockets (TCP...)
+ * - do any cleanup required by recoverable socket errors (???)
+ */
+nfs_send(so, nam, top, rep)
+	register struct socket *so;
+	struct mbuf *nam;
+	register struct mbuf *top;
+	struct nfsreq *rep;
+{
+	struct mbuf *sendnam;
+	int error, soflags, flags;
+
+	if (rep) {
+		if (rep->r_flags & R_SOFTTERM) {
+			m_freem(top);
+			return (EINTR);
+		}
+		if ((so = rep->r_nmp->nm_so) == NULL) {
+			rep->r_flags |= R_MUSTRESEND;
+			m_freem(top);
+			return (0);
+		}
+		rep->r_flags &= ~R_MUSTRESEND;
+		soflags = rep->r_nmp->nm_soflags;
+	} else
+		soflags = so->so_proto->pr_flags;
+	if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
+		sendnam = (struct mbuf *)0;
+	else
+		sendnam = nam;
+	if (so->so_type == SOCK_SEQPACKET)
+		flags = MSG_EOR;
+	else
+		flags = 0;
+
+	error = sosend(so, sendnam, (struct uio *)0, top,
+		(struct mbuf *)0, flags);
+	if (error) {
+		if (rep) {
+			log(LOG_INFO, "nfs send error %d for server %s\n",error,
+			    rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+			/*
+			 * Deal with errors for the client side.
+			 */
+			if (rep->r_flags & R_SOFTTERM)
+				error = EINTR;
+			else
+				rep->r_flags |= R_MUSTRESEND;
+		} else
+			log(LOG_INFO, "nfsd send error %d\n", error);
+
+		/*
+		 * Handle any recoverable (soft) socket errors here. (???)
+		 */
+		if (error != EINTR && error != ERESTART &&
+			error != EWOULDBLOCK && error != EPIPE)
+			error = 0;
+	}
+	return (error);
+}
+
+/*
+ * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
+ * done by soreceive(), but for SOCK_STREAM we must deal with the Record
+ * Mark and consolidate the data into a new mbuf list.
+ * nb: Sometimes TCP passes the data up to soreceive() in long lists of
+ *     small mbufs.
+ * For SOCK_STREAM we must be very careful to read an entire record once
+ * we have read any of it, even if the system call has been interrupted.
+ */
+nfs_receive(rep, aname, mp)
+	register struct nfsreq *rep;
+	struct mbuf **aname;
+	struct mbuf **mp;
+{
+	register struct socket *so;
+	struct uio auio;
+	struct iovec aio;
+	register struct mbuf *m;
+	struct mbuf *control;
+	u_long len;
+	struct mbuf **getnam;
+	int error, sotype, rcvflg;
+	struct proc *p = curproc;	/* XXX */
+
+	/*
+	 * Set up arguments for soreceive()
+	 */
+	*mp = (struct mbuf *)0;
+	*aname = (struct mbuf *)0;
+	sotype = rep->r_nmp->nm_sotype;
+
+	/*
+	 * For reliable protocols, lock against other senders/receivers
+	 * in case a reconnect is necessary.
+	 * For SOCK_STREAM, first get the Record Mark to find out how much
+	 * more there is to get.
+	 * We must lock the socket against other receivers
+	 * until we have an entire rpc request/reply.
+	 */
+	if (sotype != SOCK_DGRAM) {
+		if (error = nfs_sndlock(&rep->r_nmp->nm_flag, rep))
+			return (error);
+tryagain:
+		/*
+		 * Check for fatal errors and resending request.
+		 */
+		/*
+		 * Ugh: If a reconnect attempt just happened, nm_so
+		 * would have changed. NULL indicates a failed
+		 * attempt that has essentially shut down this
+		 * mount point.
+		 */
+		if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
+			nfs_sndunlock(&rep->r_nmp->nm_flag);
+			return (EINTR);
+		}
+		if ((so = rep->r_nmp->nm_so) == NULL) {
+			if (error = nfs_reconnect(rep)) {
+				nfs_sndunlock(&rep->r_nmp->nm_flag);
+				return (error);
+			}
+			goto tryagain;
+		}
+		while (rep->r_flags & R_MUSTRESEND) {
+			m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
+			nfsstats.rpcretries++;
+			if (error = nfs_send(so, rep->r_nmp->nm_nam, m, rep)) {
+				if (error == EINTR || error == ERESTART ||
+				    (error = nfs_reconnect(rep))) {
+					nfs_sndunlock(&rep->r_nmp->nm_flag);
+					return (error);
+				}
+				goto tryagain;
+			}
+		}
+		nfs_sndunlock(&rep->r_nmp->nm_flag);
+		if (sotype == SOCK_STREAM) {
+			aio.iov_base = (caddr_t) &len;
+			aio.iov_len = sizeof(u_long);
+			auio.uio_iov = &aio;
+			auio.uio_iovcnt = 1;
+			auio.uio_segflg = UIO_SYSSPACE;
+			auio.uio_rw = UIO_READ;
+			auio.uio_offset = 0;
+			auio.uio_resid = sizeof(u_long);
+			auio.uio_procp = p;
+			do {
+			   rcvflg = MSG_WAITALL;
+			   error = soreceive(so, (struct mbuf **)0, &auio,
+				(struct mbuf **)0, (struct mbuf **)0, &rcvflg);
+			   if (error == EWOULDBLOCK && rep) {
+				if (rep->r_flags & R_SOFTTERM)
+					return (EINTR);
+			   }
+			} while (error == EWOULDBLOCK);
+			if (!error && auio.uio_resid > 0) {
+			    log(LOG_INFO,
+				 "short receive (%d/%d) from nfs server %s\n",
+				 sizeof(u_long) - auio.uio_resid,
+				 sizeof(u_long),
+				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+			    error = EPIPE;
+			}
+			if (error)
+				goto errout;
+			len = ntohl(len) & ~0x80000000;
+			/*
+			 * This is SERIOUS! We are out of sync with the sender
+			 * and forcing a disconnect/reconnect is all I can do.
+			 */
+			if (len > NFS_MAXPACKET) {
+			    log(LOG_ERR, "%s (%d) from nfs server %s\n",
+				"impossible packet length",
+				len,
+				rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+			    error = EFBIG;
+			    goto errout;
+			}
+			auio.uio_resid = len;
+			do {
+			    rcvflg = MSG_WAITALL;
+			    error =  soreceive(so, (struct mbuf **)0,
+				&auio, mp, (struct mbuf **)0, &rcvflg);
+			} while (error == EWOULDBLOCK || error == EINTR ||
+				 error == ERESTART);
+			if (!error && auio.uio_resid > 0) {
+			    log(LOG_INFO,
+				"short receive (%d/%d) from nfs server %s\n",
+				len - auio.uio_resid, len,
+				rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+			    error = EPIPE;
+			}
+		} else {
+			/*
+			 * NB: Since uio_resid is big, MSG_WAITALL is ignored
+			 * and soreceive() will return when it has either a
+			 * control msg or a data msg.
+			 * We have no use for control msg., but must grab them
+			 * and then throw them away so we know what is going
+			 * on.
+			 */
+			auio.uio_resid = len = 100000000; /* Anything Big */
+			auio.uio_procp = p;
+			do {
+			    rcvflg = 0;
+			    error =  soreceive(so, (struct mbuf **)0,
+				&auio, mp, &control, &rcvflg);
+			    if (control)
+				m_freem(control);
+			    if (error == EWOULDBLOCK && rep) {
+				if (rep->r_flags & R_SOFTTERM)
+					return (EINTR);
+			    }
+			} while (error == EWOULDBLOCK ||
+				 (!error && *mp == NULL && control));
+			if ((rcvflg & MSG_EOR) == 0)
+				printf("Egad!!\n");
+			if (!error && *mp == NULL)
+				error = EPIPE;
+			len -= auio.uio_resid;
+		}
+errout:
+		if (error && error != EINTR && error != ERESTART) {
+			m_freem(*mp);
+			*mp = (struct mbuf *)0;
+			if (error != EPIPE)
+				log(LOG_INFO,
+				    "receive error %d from nfs server %s\n",
+				    error,
+				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+			error = nfs_sndlock(&rep->r_nmp->nm_flag, rep);
+			if (!error)
+				error = nfs_reconnect(rep);
+			if (!error)
+				goto tryagain;
+		}
+	} else {
+		if ((so = rep->r_nmp->nm_so) == NULL)
+			return (EACCES);
+		if (so->so_state & SS_ISCONNECTED)
+			getnam = (struct mbuf **)0;
+		else
+			getnam = aname;
+		auio.uio_resid = len = 1000000;
+		auio.uio_procp = p;
+		do {
+			rcvflg = 0;
+			error =  soreceive(so, getnam, &auio, mp,
+				(struct mbuf **)0, &rcvflg);
+			if (error == EWOULDBLOCK &&
+			    (rep->r_flags & R_SOFTTERM))
+				return (EINTR);
+		} while (error == EWOULDBLOCK);
+		len -= auio.uio_resid;
+	}
+	if (error) {
+		m_freem(*mp);
+		*mp = (struct mbuf *)0;
+	}
+	/*
+	 * Search for any mbufs that are not a multiple of 4 bytes long
+	 * or with m_data not longword aligned.
+	 * These could cause pointer alignment problems, so copy them to
+	 * well aligned mbufs.
+	 */
+	nfs_realign(*mp, 5 * NFSX_UNSIGNED);
+	return (error);
+}
+
+/*
+ * Implement receipt of reply on a socket.
+ * We must search through the list of received datagrams matching them
+ * with outstanding requests using the xid, until ours is found.
+ */
+/* ARGSUSED */
+nfs_reply(myrep)
+	struct nfsreq *myrep;
+{
+	register struct nfsreq *rep;
+	register struct nfsmount *nmp = myrep->r_nmp;
+	register long t1;
+	struct mbuf *mrep, *nam, *md;
+	u_long rxid, *tl;
+	caddr_t dpos, cp2;
+	int error;
+
+	/*
+	 * Loop around until we get our own reply
+	 */
+	for (;;) {
+		/*
+		 * Lock against other receivers so that I don't get stuck in
+		 * sbwait() after someone else has received my reply for me.
+		 * Also necessary for connection based protocols to avoid
+		 * race conditions during a reconnect.
+		 */
+		if (error = nfs_rcvlock(myrep))
+			return (error);
+		/* Already received, bye bye */
+		if (myrep->r_mrep != NULL) {
+			nfs_rcvunlock(&nmp->nm_flag);
+			return (0);
+		}
+		/*
+		 * Get the next Rpc reply off the socket
+		 */
+		error = nfs_receive(myrep, &nam, &mrep);
+		nfs_rcvunlock(&nmp->nm_flag);
+		if (error) {
+
+			/*
+			 * Ignore routing errors on connectionless protocols??
+			 */
+			if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
+				nmp->nm_so->so_error = 0;
+				if (myrep->r_flags & R_GETONEREP)
+					return (0);
+				continue;
+			}
+			return (error);
+		}
+		if (nam)
+			m_freem(nam);
+	
+		/*
+		 * Get the xid and check that it is an rpc reply
+		 */
+		md = mrep;
+		dpos = mtod(md, caddr_t);
+		nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
+		rxid = *tl++;
+		if (*tl != rpc_reply) {
+			if (nmp->nm_flag & NFSMNT_NQNFS) {
+				if (nqnfs_callback(nmp, mrep, md, dpos))
+					nfsstats.rpcinvalid++;
+			} else {
+				nfsstats.rpcinvalid++;
+				m_freem(mrep);
+			}
+nfsmout:
+			if (myrep->r_flags & R_GETONEREP)
+				return (0);
+			continue;
+		}
+
+		/*
+		 * Loop through the request list to match up the reply
+		 * Iff no match, just drop the datagram
+		 */
+		rep = nfsreqh.r_next;
+		while (rep != &nfsreqh) {
+			if (rep->r_mrep == NULL && rxid == rep->r_xid) {
+				/* Found it.. */
+				rep->r_mrep = mrep;
+				rep->r_md = md;
+				rep->r_dpos = dpos;
+				if (nfsrtton) {
+					struct rttl *rt;
+
+					rt = &nfsrtt.rttl[nfsrtt.pos];
+					rt->proc = rep->r_procnum;
+					rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]);
+					rt->sent = nmp->nm_sent;
+					rt->cwnd = nmp->nm_cwnd;
+					rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1];
+					rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1];
+					rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid;
+					rt->tstamp = time;
+					if (rep->r_flags & R_TIMING)
+						rt->rtt = rep->r_rtt;
+					else
+						rt->rtt = 1000000;
+					nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ;
+				}
+				/*
+				 * Update congestion window.
+				 * Do the additive increase of
+				 * one rpc/rtt.
+				 */
+				if (nmp->nm_cwnd <= nmp->nm_sent) {
+					nmp->nm_cwnd +=
+					   (NFS_CWNDSCALE * NFS_CWNDSCALE +
+					   (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
+					if (nmp->nm_cwnd > NFS_MAXCWND)
+						nmp->nm_cwnd = NFS_MAXCWND;
+				}
+				rep->r_flags &= ~R_SENT;
+				nmp->nm_sent -= NFS_CWNDSCALE;
+				/*
+				 * Update rtt using a gain of 0.125 on the mean
+				 * and a gain of 0.25 on the deviation.
+				 */
+				if (rep->r_flags & R_TIMING) {
+					/*
+					 * Since the timer resolution of
+					 * NFS_HZ is so course, it can often
+					 * result in r_rtt == 0. Since
+					 * r_rtt == N means that the actual
+					 * rtt is between N+dt and N+2-dt ticks,
+					 * add 1.
+					 */
+					t1 = rep->r_rtt + 1;
+					t1 -= (NFS_SRTT(rep) >> 3);
+					NFS_SRTT(rep) += t1;
+					if (t1 < 0)
+						t1 = -t1;
+					t1 -= (NFS_SDRTT(rep) >> 2);
+					NFS_SDRTT(rep) += t1;
+				}
+				nmp->nm_timeouts = 0;
+				break;
+			}
+			rep = rep->r_next;
+		}
+		/*
+		 * If not matched to a request, drop it.
+		 * If it's mine, get out.
+		 */
+		if (rep == &nfsreqh) {
+			nfsstats.rpcunexpected++;
+			m_freem(mrep);
+		} else if (rep == myrep) {
+			if (rep->r_mrep == NULL)
+				panic("nfsreply nil");
+			return (0);
+		}
+		if (myrep->r_flags & R_GETONEREP)
+			return (0);
+	}
+}
+
+/*
+ * nfs_request - goes something like this
+ *	- fill in request struct
+ *	- links it into list
+ *	- calls nfs_send() for first transmit
+ *	- calls nfs_receive() to get reply
+ *	- break down rpc header and return with nfs reply pointed to
+ *	  by mrep or error
+ * nb: always frees up mreq mbuf list
+ */
+nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp)
+	struct vnode *vp;
+	struct mbuf *mrest;
+	int procnum;
+	struct proc *procp;
+	struct ucred *cred;
+	struct mbuf **mrp;
+	struct mbuf **mdp;
+	caddr_t *dposp;
+{
+	register struct mbuf *m, *mrep;
+	register struct nfsreq *rep;
+	register u_long *tl;
+	register int i;
+	struct nfsmount *nmp;
+	struct mbuf *md, *mheadend;
+	struct nfsreq *reph;
+	struct nfsnode *np;
+	time_t reqtime, waituntil;
+	caddr_t dpos, cp2;
+	int t1, nqlflag, cachable, s, error = 0, mrest_len, auth_len, auth_type;
+	int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0, failed_auth = 0;
+	u_long xid;
+	u_quad_t frev;
+	char *auth_str;
+
+	nmp = VFSTONFS(vp->v_mount);
+	MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
+	rep->r_nmp = nmp;
+	rep->r_vp = vp;
+	rep->r_procp = procp;
+	rep->r_procnum = procnum;
+	i = 0;
+	m = mrest;
+	while (m) {
+		i += m->m_len;
+		m = m->m_next;
+	}
+	mrest_len = i;
+
+	/*
+	 * Get the RPC header with authorization.
+	 */
+kerbauth:
+	auth_str = (char *)0;
+	if (nmp->nm_flag & NFSMNT_KERB) {
+		if (failed_auth) {
+			error = nfs_getauth(nmp, rep, cred, &auth_type,
+				&auth_str, &auth_len);
+			if (error) {
+				free((caddr_t)rep, M_NFSREQ);
+				m_freem(mrest);
+				return (error);
+			}
+		} else {
+			auth_type = RPCAUTH_UNIX;
+			auth_len = 5 * NFSX_UNSIGNED;
+		}
+	} else {
+		auth_type = RPCAUTH_UNIX;
+		if (cred->cr_ngroups < 1)
+			panic("nfsreq nogrps");
+		auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ?
+			nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) +
+			5 * NFSX_UNSIGNED;
+	}
+	m = nfsm_rpchead(cred, (nmp->nm_flag & NFSMNT_NQNFS), procnum,
+	     auth_type, auth_len, auth_str, mrest, mrest_len, &mheadend, &xid);
+	if (auth_str)
+		free(auth_str, M_TEMP);
+
+	/*
+	 * For stream protocols, insert a Sun RPC Record Mark.
+	 */
+	if (nmp->nm_sotype == SOCK_STREAM) {
+		M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
+		*mtod(m, u_long *) = htonl(0x80000000 |
+			 (m->m_pkthdr.len - NFSX_UNSIGNED));
+	}
+	rep->r_mreq = m;
+	rep->r_xid = xid;
+tryagain:
+	if (nmp->nm_flag & NFSMNT_SOFT)
+		rep->r_retry = nmp->nm_retry;
+	else
+		rep->r_retry = NFS_MAXREXMIT + 1;	/* past clip limit */
+	rep->r_rtt = rep->r_rexmit = 0;
+	if (proct[procnum] > 0)
+		rep->r_flags = R_TIMING;
+	else
+		rep->r_flags = 0;
+	rep->r_mrep = NULL;
+
+	/*
+	 * Do the client side RPC.
+	 */
+	nfsstats.rpcrequests++;
+	/*
+	 * Chain request into list of outstanding requests. Be sure
+	 * to put it LAST so timer finds oldest requests first.
+	 */
+	s = splsoftclock();
+	reph = &nfsreqh;
+	reph->r_prev->r_next = rep;
+	rep->r_prev = reph->r_prev;
+	reph->r_prev = rep;
+	rep->r_next = reph;
+
+	/* Get send time for nqnfs */
+	reqtime = time.tv_sec;
+
+	/*
+	 * If backing off another request or avoiding congestion, don't
+	 * send this one now but let timer do it. If not timing a request,
+	 * do it now.
+	 */
+	if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
+		(nmp->nm_flag & NFSMNT_DUMBTIMR) ||
+		nmp->nm_sent < nmp->nm_cwnd)) {
+		splx(s);
+		if (nmp->nm_soflags & PR_CONNREQUIRED)
+			error = nfs_sndlock(&nmp->nm_flag, rep);
+		if (!error) {
+			m = m_copym(m, 0, M_COPYALL, M_WAIT);
+			error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep);
+			if (nmp->nm_soflags & PR_CONNREQUIRED)
+				nfs_sndunlock(&nmp->nm_flag);
+		}
+		if (!error && (rep->r_flags & R_MUSTRESEND) == 0) {
+			nmp->nm_sent += NFS_CWNDSCALE;
+			rep->r_flags |= R_SENT;
+		}
+	} else {
+		splx(s);
+		rep->r_rtt = -1;
+	}
+
+	/*
+	 * Wait for the reply from our send or the timer's.
+	 */
+	if (!error || error == EPIPE)
+		error = nfs_reply(rep);
+
+	/*
+	 * RPC done, unlink the request.
+	 */
+	s = splsoftclock();
+	rep->r_prev->r_next = rep->r_next;
+	rep->r_next->r_prev = rep->r_prev;
+	splx(s);
+
+	/*
+	 * Decrement the outstanding request count.
+	 */
+	if (rep->r_flags & R_SENT) {
+		rep->r_flags &= ~R_SENT;	/* paranoia */
+		nmp->nm_sent -= NFS_CWNDSCALE;
+	}
+
+	/*
+	 * If there was a successful reply and a tprintf msg.
+	 * tprintf a response.
+	 */
+	if (!error && (rep->r_flags & R_TPRINTFMSG))
+		nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
+		    "is alive again");
+	mrep = rep->r_mrep;
+	md = rep->r_md;
+	dpos = rep->r_dpos;
+	if (error) {
+		m_freem(rep->r_mreq);
+		free((caddr_t)rep, M_NFSREQ);
+		return (error);
+	}
+
+	/*
+	 * break down the rpc header and check if ok
+	 */
+	nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED);
+	if (*tl++ == rpc_msgdenied) {
+		if (*tl == rpc_mismatch)
+			error = EOPNOTSUPP;
+		else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) {
+			if (*tl == rpc_rejectedcred && failed_auth == 0) {
+				failed_auth++;
+				mheadend->m_next = (struct mbuf *)0;
+				m_freem(mrep);
+				m_freem(rep->r_mreq);
+				goto kerbauth;
+			} else
+				error = EAUTH;
+		} else
+			error = EACCES;
+		m_freem(mrep);
+		m_freem(rep->r_mreq);
+		free((caddr_t)rep, M_NFSREQ);
+		return (error);
+	}
+
+	/*
+	 * skip over the auth_verf, someday we may want to cache auth_short's
+	 * for nfs_reqhead(), but for now just dump it
+	 */
+	if (*++tl != 0) {
+		i = nfsm_rndup(fxdr_unsigned(long, *tl));
+		nfsm_adv(i);
+	}
+	nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+	/* 0 == ok */
+	if (*tl == 0) {
+		nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+		if (*tl != 0) {
+			error = fxdr_unsigned(int, *tl);
+			m_freem(mrep);
+			if ((nmp->nm_flag & NFSMNT_NQNFS) &&
+			    error == NQNFS_TRYLATER) {
+				error = 0;
+				waituntil = time.tv_sec + trylater_delay;
+				while (time.tv_sec < waituntil)
+					(void) tsleep((caddr_t)&lbolt,
+						PSOCK, "nqnfstry", 0);
+				trylater_delay *= nfs_backoff[trylater_cnt];
+				if (trylater_cnt < 7)
+					trylater_cnt++;
+				goto tryagain;
+			}
+
+			/*
+			 * If the File Handle was stale, invalidate the
+			 * lookup cache, just in case.
+			 */
+			if (error == ESTALE)
+				cache_purge(vp);
+			m_freem(rep->r_mreq);
+			free((caddr_t)rep, M_NFSREQ);
+			return (error);
+		}
+
+		/*
+		 * For nqnfs, get any lease in reply
+		 */
+		if (nmp->nm_flag & NFSMNT_NQNFS) {
+			nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+			if (*tl) {
+				np = VTONFS(vp);
+				nqlflag = fxdr_unsigned(int, *tl);
+				nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED);
+				cachable = fxdr_unsigned(int, *tl++);
+				reqtime += fxdr_unsigned(int, *tl++);
+				if (reqtime > time.tv_sec) {
+				    fxdr_hyper(tl, &frev);
+				    nqnfs_clientlease(nmp, np, nqlflag,
+					cachable, reqtime, frev);
+				}
+			}
+		}
+		*mrp = mrep;
+		*mdp = md;
+		*dposp = dpos;
+		m_freem(rep->r_mreq);
+		FREE((caddr_t)rep, M_NFSREQ);
+		return (0);
+	}
+	m_freem(mrep);
+	m_freem(rep->r_mreq);
+	free((caddr_t)rep, M_NFSREQ);
+	error = EPROTONOSUPPORT;
+nfsmout:
+	return (error);
+}
+
+/*
+ * Generate the rpc reply header
+ * siz arg. is used to decide if adding a cluster is worthwhile
+ */
+nfs_rephead(siz, nd, err, cache, frev, mrq, mbp, bposp)
+	int siz;
+	struct nfsd *nd;
+	int err;
+	int cache;
+	u_quad_t *frev;
+	struct mbuf **mrq;
+	struct mbuf **mbp;
+	caddr_t *bposp;
+{
+	register u_long *tl;
+	register struct mbuf *mreq;
+	caddr_t bpos;
+	struct mbuf *mb, *mb2;
+
+	MGETHDR(mreq, M_WAIT, MT_DATA);
+	mb = mreq;
+	/*
+	 * If this is a big reply, use a cluster else
+	 * try and leave leading space for the lower level headers.
+	 */
+	siz += RPC_REPLYSIZ;
+	if (siz >= MINCLSIZE) {
+		MCLGET(mreq, M_WAIT);
+	} else
+		mreq->m_data += max_hdr;
+	tl = mtod(mreq, u_long *);
+	mreq->m_len = 6*NFSX_UNSIGNED;
+	bpos = ((caddr_t)tl)+mreq->m_len;
+	*tl++ = nd->nd_retxid;
+	*tl++ = rpc_reply;
+	if (err == ERPCMISMATCH || err == NQNFS_AUTHERR) {
+		*tl++ = rpc_msgdenied;
+		if (err == NQNFS_AUTHERR) {
+			*tl++ = rpc_autherr;
+			*tl = rpc_rejectedcred;
+			mreq->m_len -= NFSX_UNSIGNED;
+			bpos -= NFSX_UNSIGNED;
+		} else {
+			*tl++ = rpc_mismatch;
+			*tl++ = txdr_unsigned(2);
+			*tl = txdr_unsigned(2);
+		}
+	} else {
+		*tl++ = rpc_msgaccepted;
+		*tl++ = 0;
+		*tl++ = 0;
+		switch (err) {
+		case EPROGUNAVAIL:
+			*tl = txdr_unsigned(RPC_PROGUNAVAIL);
+			break;
+		case EPROGMISMATCH:
+			*tl = txdr_unsigned(RPC_PROGMISMATCH);
+			nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+			*tl++ = txdr_unsigned(2);
+			*tl = txdr_unsigned(2);	/* someday 3 */
+			break;
+		case EPROCUNAVAIL:
+			*tl = txdr_unsigned(RPC_PROCUNAVAIL);
+			break;
+		default:
+			*tl = 0;
+			if (err != VNOVAL) {
+				nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+				if (err)
+					*tl = txdr_unsigned(nfsrv_errmap[err - 1]);
+				else
+					*tl = 0;
+			}
+			break;
+		};
+	}
+
+	/*
+	 * For nqnfs, piggyback lease as requested.
+	 */
+	if (nd->nd_nqlflag != NQL_NOVAL && err == 0) {
+		if (nd->nd_nqlflag) {
+			nfsm_build(tl, u_long *, 5*NFSX_UNSIGNED);
+			*tl++ = txdr_unsigned(nd->nd_nqlflag);
+			*tl++ = txdr_unsigned(cache);
+			*tl++ = txdr_unsigned(nd->nd_duration);
+			txdr_hyper(frev, tl);
+		} else {
+			if (nd->nd_nqlflag != 0)
+				panic("nqreph");
+			nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+			*tl = 0;
+		}
+	}
+	*mrq = mreq;
+	*mbp = mb;
+	*bposp = bpos;
+	if (err != 0 && err != VNOVAL)
+		nfsstats.srvrpc_errs++;
+	return (0);
+}
+
+/*
+ * Nfs timer routine
+ * Scan the nfsreq list and retranmit any requests that have timed out
+ * To avoid retransmission attempts on STREAM sockets (in the future) make
+ * sure to set the r_retry field to 0 (implies nm_retry == 0).
+ */
+void
+nfs_timer(arg)
+	void *arg;
+{
+	register struct nfsreq *rep;
+	register struct mbuf *m;
+	register struct socket *so;
+	register struct nfsmount *nmp;
+	register int timeo;
+	static long lasttime = 0;
+	int s, error;
+
+	s = splnet();
+	for (rep = nfsreqh.r_next; rep != &nfsreqh; rep = rep->r_next) {
+		nmp = rep->r_nmp;
+		if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
+			continue;
+		if (nfs_sigintr(nmp, rep, rep->r_procp)) {
+			rep->r_flags |= R_SOFTTERM;
+			continue;
+		}
+		if (rep->r_rtt >= 0) {
+			rep->r_rtt++;
+			if (nmp->nm_flag & NFSMNT_DUMBTIMR)
+				timeo = nmp->nm_timeo;
+			else
+				timeo = NFS_RTO(nmp, proct[rep->r_procnum]);
+			if (nmp->nm_timeouts > 0)
+				timeo *= nfs_backoff[nmp->nm_timeouts - 1];
+			if (rep->r_rtt <= timeo)
+				continue;
+			if (nmp->nm_timeouts < 8)
+				nmp->nm_timeouts++;
+		}
+		/*
+		 * Check for server not responding
+		 */
+		if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
+		     rep->r_rexmit > nmp->nm_deadthresh) {
+			nfs_msg(rep->r_procp,
+			    nmp->nm_mountp->mnt_stat.f_mntfromname,
+			    "not responding");
+			rep->r_flags |= R_TPRINTFMSG;
+		}
+		if (rep->r_rexmit >= rep->r_retry) {	/* too many */
+			nfsstats.rpctimeouts++;
+			rep->r_flags |= R_SOFTTERM;
+			continue;
+		}
+		if (nmp->nm_sotype != SOCK_DGRAM) {
+			if (++rep->r_rexmit > NFS_MAXREXMIT)
+				rep->r_rexmit = NFS_MAXREXMIT;
+			continue;
+		}
+		if ((so = nmp->nm_so) == NULL)
+			continue;
+
+		/*
+		 * If there is enough space and the window allows..
+		 *	Resend it
+		 * Set r_rtt to -1 in case we fail to send it now.
+		 */
+		rep->r_rtt = -1;
+		if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
+		   ((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
+		    (rep->r_flags & R_SENT) ||
+		    nmp->nm_sent < nmp->nm_cwnd) &&
+		   (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
+			if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
+			    error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
+			    (struct mbuf *)0, (struct mbuf *)0);
+			else
+			    error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
+			    nmp->nm_nam, (struct mbuf *)0);
+			if (error) {
+				if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
+					so->so_error = 0;
+			} else {
+				/*
+				 * Iff first send, start timing
+				 * else turn timing off, backoff timer
+				 * and divide congestion window by 2.
+				 */
+				if (rep->r_flags & R_SENT) {
+					rep->r_flags &= ~R_TIMING;
+					if (++rep->r_rexmit > NFS_MAXREXMIT)
+						rep->r_rexmit = NFS_MAXREXMIT;
+					nmp->nm_cwnd >>= 1;
+					if (nmp->nm_cwnd < NFS_CWNDSCALE)
+						nmp->nm_cwnd = NFS_CWNDSCALE;
+					nfsstats.rpcretries++;
+				} else {
+					rep->r_flags |= R_SENT;
+					nmp->nm_sent += NFS_CWNDSCALE;
+				}
+				rep->r_rtt = 0;
+			}
+		}
+	}
+
+	/*
+	 * Call the nqnfs server timer once a second to handle leases.
+	 */
+	if (lasttime != time.tv_sec) {
+		lasttime = time.tv_sec;
+		nqnfs_serverd();
+	}
+	splx(s);
+	timeout(nfs_timer, (void *)0, hz / NFS_HZ);
+}
+
+/*
+ * Test for a termination condition pending on the process.
+ * This is used for NFSMNT_INT mounts.
+ */
+nfs_sigintr(nmp, rep, p)
+	struct nfsmount *nmp;
+	struct nfsreq *rep;
+	register struct proc *p;
+{
+
+	if (rep && (rep->r_flags & R_SOFTTERM))
+		return (EINTR);
+	if (!(nmp->nm_flag & NFSMNT_INT))
+		return (0);
+	if (p && p->p_siglist &&
+	    (((p->p_siglist & ~p->p_sigmask) & ~p->p_sigignore) &
+	    NFSINT_SIGMASK))
+		return (EINTR);
+	return (0);
+}
+
+/*
+ * Lock a socket against others.
+ * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
+ * and also to avoid race conditions between the processes with nfs requests
+ * in progress when a reconnect is necessary.
+ */
+nfs_sndlock(flagp, rep)
+	register int *flagp;
+	struct nfsreq *rep;
+{
+	struct proc *p;
+	int slpflag = 0, slptimeo = 0;
+
+	if (rep) {
+		p = rep->r_procp;
+		if (rep->r_nmp->nm_flag & NFSMNT_INT)
+			slpflag = PCATCH;
+	} else
+		p = (struct proc *)0;
+	while (*flagp & NFSMNT_SNDLOCK) {
+		if (nfs_sigintr(rep->r_nmp, rep, p))
+			return (EINTR);
+		*flagp |= NFSMNT_WANTSND;
+		(void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsndlck",
+			slptimeo);
+		if (slpflag == PCATCH) {
+			slpflag = 0;
+			slptimeo = 2 * hz;
+		}
+	}
+	*flagp |= NFSMNT_SNDLOCK;
+	return (0);
+}
+
+/*
+ * Unlock the stream socket for others.
+ */
+void
+nfs_sndunlock(flagp)
+	register int *flagp;
+{
+
+	if ((*flagp & NFSMNT_SNDLOCK) == 0)
+		panic("nfs sndunlock");
+	*flagp &= ~NFSMNT_SNDLOCK;
+	if (*flagp & NFSMNT_WANTSND) {
+		*flagp &= ~NFSMNT_WANTSND;
+		wakeup((caddr_t)flagp);
+	}
+}
+
+nfs_rcvlock(rep)
+	register struct nfsreq *rep;
+{
+	register int *flagp = &rep->r_nmp->nm_flag;
+	int slpflag, slptimeo = 0;
+
+	if (*flagp & NFSMNT_INT)
+		slpflag = PCATCH;
+	else
+		slpflag = 0;
+	while (*flagp & NFSMNT_RCVLOCK) {
+		if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp))
+			return (EINTR);
+		*flagp |= NFSMNT_WANTRCV;
+		(void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsrcvlk",
+			slptimeo);
+		if (slpflag == PCATCH) {
+			slpflag = 0;
+			slptimeo = 2 * hz;
+		}
+	}
+	*flagp |= NFSMNT_RCVLOCK;
+	return (0);
+}
+
+/*
+ * Unlock the stream socket for others.
+ */
+void
+nfs_rcvunlock(flagp)
+	register int *flagp;
+{
+
+	if ((*flagp & NFSMNT_RCVLOCK) == 0)
+		panic("nfs rcvunlock");
+	*flagp &= ~NFSMNT_RCVLOCK;
+	if (*flagp & NFSMNT_WANTRCV) {
+		*flagp &= ~NFSMNT_WANTRCV;
+		wakeup((caddr_t)flagp);
+	}
+}
+
+/*
+ * Check for badly aligned mbuf data areas and
+ * realign data in an mbuf list by copying the data areas up, as required.
+ */
+void
+nfs_realign(m, hsiz)
+	register struct mbuf *m;
+	int hsiz;
+{
+	register struct mbuf *m2;
+	register int siz, mlen, olen;
+	register caddr_t tcp, fcp;
+	struct mbuf *mnew;
+
+	while (m) {
+	    /*
+	     * This never happens for UDP, rarely happens for TCP
+	     * but frequently happens for iso transport.
+	     */
+	    if ((m->m_len & 0x3) || (mtod(m, int) & 0x3)) {
+		olen = m->m_len;
+		fcp = mtod(m, caddr_t);
+		if ((int)fcp & 0x3) {
+			m->m_flags &= ~M_PKTHDR;
+			if (m->m_flags & M_EXT)
+				m->m_data = m->m_ext.ext_buf +
+					((m->m_ext.ext_size - olen) & ~0x3);
+			else
+				m->m_data = m->m_dat;
+		}
+		m->m_len = 0;
+		tcp = mtod(m, caddr_t);
+		mnew = m;
+		m2 = m->m_next;
+	
+		/*
+		 * If possible, only put the first invariant part
+		 * of the RPC header in the first mbuf.
+		 */
+		mlen = M_TRAILINGSPACE(m);
+		if (olen <= hsiz && mlen > hsiz)
+			mlen = hsiz;
+	
+		/*
+		 * Loop through the mbuf list consolidating data.
+		 */
+		while (m) {
+			while (olen > 0) {
+				if (mlen == 0) {
+					m2->m_flags &= ~M_PKTHDR;
+					if (m2->m_flags & M_EXT)
+						m2->m_data = m2->m_ext.ext_buf;
+					else
+						m2->m_data = m2->m_dat;
+					m2->m_len = 0;
+					mlen = M_TRAILINGSPACE(m2);
+					tcp = mtod(m2, caddr_t);
+					mnew = m2;
+					m2 = m2->m_next;
+				}
+				siz = min(mlen, olen);
+				if (tcp != fcp)
+					bcopy(fcp, tcp, siz);
+				mnew->m_len += siz;
+				mlen -= siz;
+				olen -= siz;
+				tcp += siz;
+				fcp += siz;
+			}
+			m = m->m_next;
+			if (m) {
+				olen = m->m_len;
+				fcp = mtod(m, caddr_t);
+			}
+		}
+	
+		/*
+		 * Finally, set m_len == 0 for any trailing mbufs that have
+		 * been copied out of.
+		 */
+		while (m2) {
+			m2->m_len = 0;
+			m2 = m2->m_next;
+		}
+		return;
+	    }
+	    m = m->m_next;
+	}
+}
+
+/*
+ * Socket upcall routine for the nfsd sockets.
+ * The caddr_t arg is a pointer to the "struct nfssvc_sock".
+ * Essentially do as much as possible non-blocking, else punt and it will
+ * be called with M_WAIT from an nfsd.
+ */
+void
+nfsrv_rcv(so, arg, waitflag)
+	struct socket *so;
+	caddr_t arg;
+	int waitflag;
+{
+	register struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
+	register struct mbuf *m;
+	struct mbuf *mp, *nam;
+	struct uio auio;
+	int flags, error;
+
+	if ((slp->ns_flag & SLP_VALID) == 0)
+		return;
+#ifdef notdef
+	/*
+	 * Define this to test for nfsds handling this under heavy load.
+	 */
+	if (waitflag == M_DONTWAIT) {
+		slp->ns_flag |= SLP_NEEDQ; goto dorecs;
+	}
+#endif
+	auio.uio_procp = NULL;
+	if (so->so_type == SOCK_STREAM) {
+		/*
+		 * If there are already records on the queue, defer soreceive()
+		 * to an nfsd so that there is feedback to the TCP layer that
+		 * the nfs servers are heavily loaded.
+		 */
+		if (slp->ns_rec && waitflag == M_DONTWAIT) {
+			slp->ns_flag |= SLP_NEEDQ;
+			goto dorecs;
+		}
+
+		/*
+		 * Do soreceive().
+		 */
+		auio.uio_resid = 1000000000;
+		flags = MSG_DONTWAIT;
+		error = soreceive(so, &nam, &auio, &mp, (struct mbuf **)0, &flags);
+		if (error || mp == (struct mbuf *)0) {
+			if (error == EWOULDBLOCK)
+				slp->ns_flag |= SLP_NEEDQ;
+			else
+				slp->ns_flag |= SLP_DISCONN;
+			goto dorecs;
+		}
+		m = mp;
+		if (slp->ns_rawend) {
+			slp->ns_rawend->m_next = m;
+			slp->ns_cc += 1000000000 - auio.uio_resid;
+		} else {
+			slp->ns_raw = m;
+			slp->ns_cc = 1000000000 - auio.uio_resid;
+		}
+		while (m->m_next)
+			m = m->m_next;
+		slp->ns_rawend = m;
+
+		/*
+		 * Now try and parse record(s) out of the raw stream data.
+		 */
+		if (error = nfsrv_getstream(slp, waitflag)) {
+			if (error == EPERM)
+				slp->ns_flag |= SLP_DISCONN;
+			else
+				slp->ns_flag |= SLP_NEEDQ;
+		}
+	} else {
+		do {
+			auio.uio_resid = 1000000000;
+			flags = MSG_DONTWAIT;
+			error = soreceive(so, &nam, &auio, &mp,
+						(struct mbuf **)0, &flags);
+			if (mp) {
+				nfs_realign(mp, 10 * NFSX_UNSIGNED);
+				if (nam) {
+					m = nam;
+					m->m_next = mp;
+				} else
+					m = mp;
+				if (slp->ns_recend)
+					slp->ns_recend->m_nextpkt = m;
+				else
+					slp->ns_rec = m;
+				slp->ns_recend = m;
+				m->m_nextpkt = (struct mbuf *)0;
+			}
+			if (error) {
+				if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
+					&& error != EWOULDBLOCK) {
+					slp->ns_flag |= SLP_DISCONN;
+					goto dorecs;
+				}
+			}
+		} while (mp);
+	}
+
+	/*
+	 * Now try and process the request records, non-blocking.
+	 */
+dorecs:
+	if (waitflag == M_DONTWAIT &&
+		(slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN))))
+		nfsrv_wakenfsd(slp);
+}
+
+/*
+ * Try and extract an RPC request from the mbuf data list received on a
+ * stream socket. The "waitflag" argument indicates whether or not it
+ * can sleep.
+ */
+nfsrv_getstream(slp, waitflag)
+	register struct nfssvc_sock *slp;
+	int waitflag;
+{
+	register struct mbuf *m;
+	register char *cp1, *cp2;
+	register int len;
+	struct mbuf *om, *m2, *recm;
+	u_long recmark;
+
+	if (slp->ns_flag & SLP_GETSTREAM)
+		panic("nfs getstream");
+	slp->ns_flag |= SLP_GETSTREAM;
+	for (;;) {
+	    if (slp->ns_reclen == 0) {
+		if (slp->ns_cc < NFSX_UNSIGNED) {
+			slp->ns_flag &= ~SLP_GETSTREAM;
+			return (0);
+		}
+		m = slp->ns_raw;
+		if (m->m_len >= NFSX_UNSIGNED) {
+			bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED);
+			m->m_data += NFSX_UNSIGNED;
+			m->m_len -= NFSX_UNSIGNED;
+		} else {
+			cp1 = (caddr_t)&recmark;
+			cp2 = mtod(m, caddr_t);
+			while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
+				while (m->m_len == 0) {
+					m = m->m_next;
+					cp2 = mtod(m, caddr_t);
+				}
+				*cp1++ = *cp2++;
+				m->m_data++;
+				m->m_len--;
+			}
+		}
+		slp->ns_cc -= NFSX_UNSIGNED;
+		slp->ns_reclen = ntohl(recmark) & ~0x80000000;
+		if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) {
+			slp->ns_flag &= ~SLP_GETSTREAM;
+			return (EPERM);
+		}
+	    }
+
+	    /*
+	     * Now get the record part.
+	     */
+	    if (slp->ns_cc == slp->ns_reclen) {
+		recm = slp->ns_raw;
+		slp->ns_raw = slp->ns_rawend = (struct mbuf *)0;
+		slp->ns_cc = slp->ns_reclen = 0;
+	    } else if (slp->ns_cc > slp->ns_reclen) {
+		len = 0;
+		m = slp->ns_raw;
+		om = (struct mbuf *)0;
+		while (len < slp->ns_reclen) {
+			if ((len + m->m_len) > slp->ns_reclen) {
+				m2 = m_copym(m, 0, slp->ns_reclen - len,
+					waitflag);
+				if (m2) {
+					if (om) {
+						om->m_next = m2;
+						recm = slp->ns_raw;
+					} else
+						recm = m2;
+					m->m_data += slp->ns_reclen - len;
+					m->m_len -= slp->ns_reclen - len;
+					len = slp->ns_reclen;
+				} else {
+					slp->ns_flag &= ~SLP_GETSTREAM;
+					return (EWOULDBLOCK);
+				}
+			} else if ((len + m->m_len) == slp->ns_reclen) {
+				om = m;
+				len += m->m_len;
+				m = m->m_next;
+				recm = slp->ns_raw;
+				om->m_next = (struct mbuf *)0;
+			} else {
+				om = m;
+				len += m->m_len;
+				m = m->m_next;
+			}
+		}
+		slp->ns_raw = m;
+		slp->ns_cc -= len;
+		slp->ns_reclen = 0;
+	    } else {
+		slp->ns_flag &= ~SLP_GETSTREAM;
+		return (0);
+	    }
+	    nfs_realign(recm, 10 * NFSX_UNSIGNED);
+	    if (slp->ns_recend)
+		slp->ns_recend->m_nextpkt = recm;
+	    else
+		slp->ns_rec = recm;
+	    slp->ns_recend = recm;
+	}
+}
+
+/*
+ * Parse an RPC header.
+ */
+nfsrv_dorec(slp, nd)
+	register struct nfssvc_sock *slp;
+	register struct nfsd *nd;
+{
+	register struct mbuf *m;
+	int error;
+
+	if ((slp->ns_flag & SLP_VALID) == 0 ||
+	    (m = slp->ns_rec) == (struct mbuf *)0)
+		return (ENOBUFS);
+	if (slp->ns_rec = m->m_nextpkt)
+		m->m_nextpkt = (struct mbuf *)0;
+	else
+		slp->ns_recend = (struct mbuf *)0;
+	if (m->m_type == MT_SONAME) {
+		nd->nd_nam = m;
+		nd->nd_md = nd->nd_mrep = m->m_next;
+		m->m_next = (struct mbuf *)0;
+	} else {
+		nd->nd_nam = (struct mbuf *)0;
+		nd->nd_md = nd->nd_mrep = m;
+	}
+	nd->nd_dpos = mtod(nd->nd_md, caddr_t);
+	if (error = nfs_getreq(nd, TRUE)) {
+		m_freem(nd->nd_nam);
+		return (error);
+	}
+	return (0);
+}
+
+/*
+ * Parse an RPC request
+ * - verify it
+ * - fill in the cred struct.
+ */
+nfs_getreq(nd, has_header)
+	register struct nfsd *nd;
+	int has_header;
+{
+	register int len, i;
+	register u_long *tl;
+	register long t1;
+	struct uio uio;
+	struct iovec iov;
+	caddr_t dpos, cp2;
+	u_long nfsvers, auth_type;
+	int error = 0, nqnfs = 0;
+	struct mbuf *mrep, *md;
+
+	mrep = nd->nd_mrep;
+	md = nd->nd_md;
+	dpos = nd->nd_dpos;
+	if (has_header) {
+		nfsm_dissect(tl, u_long *, 10*NFSX_UNSIGNED);
+		nd->nd_retxid = *tl++;
+		if (*tl++ != rpc_call) {
+			m_freem(mrep);
+			return (EBADRPC);
+		}
+	} else {
+		nfsm_dissect(tl, u_long *, 8*NFSX_UNSIGNED);
+	}
+	nd->nd_repstat = 0;
+	if (*tl++ != rpc_vers) {
+		nd->nd_repstat = ERPCMISMATCH;
+		nd->nd_procnum = NFSPROC_NOOP;
+		return (0);
+	}
+	nfsvers = nfs_vers;
+	if (*tl != nfs_prog) {
+		if (*tl == nqnfs_prog) {
+			nqnfs++;
+			nfsvers = nqnfs_vers;
+		} else {
+			nd->nd_repstat = EPROGUNAVAIL;
+			nd->nd_procnum = NFSPROC_NOOP;
+			return (0);
+		}
+	}
+	tl++;
+	if (*tl++ != nfsvers) {
+		nd->nd_repstat = EPROGMISMATCH;
+		nd->nd_procnum = NFSPROC_NOOP;
+		return (0);
+	}
+	nd->nd_procnum = fxdr_unsigned(u_long, *tl++);
+	if (nd->nd_procnum == NFSPROC_NULL)
+		return (0);
+	if (nd->nd_procnum >= NFS_NPROCS ||
+		(!nqnfs && nd->nd_procnum > NFSPROC_STATFS) ||
+		(*tl != rpc_auth_unix && *tl != rpc_auth_kerb)) {
+		nd->nd_repstat = EPROCUNAVAIL;
+		nd->nd_procnum = NFSPROC_NOOP;
+		return (0);
+	}
+	auth_type = *tl++;
+	len = fxdr_unsigned(int, *tl++);
+	if (len < 0 || len > RPCAUTH_MAXSIZ) {
+		m_freem(mrep);
+		return (EBADRPC);
+	}
+
+	/*
+	 * Handle auth_unix or auth_kerb.
+	 */
+	if (auth_type == rpc_auth_unix) {
+		len = fxdr_unsigned(int, *++tl);
+		if (len < 0 || len > NFS_MAXNAMLEN) {
+			m_freem(mrep);
+			return (EBADRPC);
+		}
+		nfsm_adv(nfsm_rndup(len));
+		nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED);
+		nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
+		nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++);
+		len = fxdr_unsigned(int, *tl);
+		if (len < 0 || len > RPCAUTH_UNIXGIDS) {
+			m_freem(mrep);
+			return (EBADRPC);
+		}
+		nfsm_dissect(tl, u_long *, (len + 2)*NFSX_UNSIGNED);
+		for (i = 1; i <= len; i++)
+			if (i < NGROUPS)
+				nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
+			else
+				tl++;
+		nd->nd_cr.cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
+	} else if (auth_type == rpc_auth_kerb) {
+		nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
+		nd->nd_authlen = fxdr_unsigned(int, *tl);
+		uio.uio_resid = nfsm_rndup(nd->nd_authlen);
+		if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) {
+			m_freem(mrep);
+			return (EBADRPC);
+		}
+		uio.uio_offset = 0;
+		uio.uio_iov = &iov;
+		uio.uio_iovcnt = 1;
+		uio.uio_segflg = UIO_SYSSPACE;
+		iov.iov_base = (caddr_t)nd->nd_authstr;
+		iov.iov_len = RPCAUTH_MAXSIZ;
+		nfsm_mtouio(&uio, uio.uio_resid);
+		nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
+		nd->nd_flag |= NFSD_NEEDAUTH;
+	}
+
+	/*
+	 * Do we have any use for the verifier.
+	 * According to the "Remote Procedure Call Protocol Spec." it
+	 * should be AUTH_NULL, but some clients make it AUTH_UNIX?
+	 * For now, just skip over it
+	 */
+	len = fxdr_unsigned(int, *++tl);
+	if (len < 0 || len > RPCAUTH_MAXSIZ) {
+		m_freem(mrep);
+		return (EBADRPC);
+	}
+	if (len > 0) {
+		nfsm_adv(nfsm_rndup(len));
+	}
+
+	/*
+	 * For nqnfs, get piggybacked lease request.
+	 */
+	if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) {
+		nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+		nd->nd_nqlflag = fxdr_unsigned(int, *tl);
+		if (nd->nd_nqlflag) {
+			nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+			nd->nd_duration = fxdr_unsigned(int, *tl);
+		} else
+			nd->nd_duration = NQ_MINLEASE;
+	} else {
+		nd->nd_nqlflag = NQL_NOVAL;
+		nd->nd_duration = NQ_MINLEASE;
+	}
+	nd->nd_md = md;
+	nd->nd_dpos = dpos;
+	return (0);
+nfsmout:
+	return (error);
+}
+
+/*
+ * Search for a sleeping nfsd and wake it up.
+ * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
+ * running nfsds will go look for the work in the nfssvc_sock list.
+ */
+void
+nfsrv_wakenfsd(slp)
+	struct nfssvc_sock *slp;
+{
+	register struct nfsd *nd = nfsd_head.nd_next;
+
+	if ((slp->ns_flag & SLP_VALID) == 0)
+		return;
+	while (nd != (struct nfsd *)&nfsd_head) {
+		if (nd->nd_flag & NFSD_WAITING) {
+			nd->nd_flag &= ~NFSD_WAITING;
+			if (nd->nd_slp)
+				panic("nfsd wakeup");
+			slp->ns_sref++;
+			nd->nd_slp = slp;
+			wakeup((caddr_t)nd);
+			return;
+		}
+		nd = nd->nd_next;
+	}
+	slp->ns_flag |= SLP_DOREC;
+	nfsd_head.nd_flag |= NFSD_CHECKSLP;
+}
+
+nfs_msg(p, server, msg)
+	struct proc *p;
+	char *server, *msg;
+{
+	tpr_t tpr;
+
+	if (p)
+		tpr = tprintf_open(p);
+	else
+		tpr = NULL;
+	tprintf(tpr, "nfs server %s: %s\n", server, msg);
+	tprintf_close(tpr);
+}
diff --git a/sys/nfs/nfs_srvcache.c b/sys/nfs/nfs_srvcache.c
new file mode 100644
index 00000000000..63d8bb72d82
--- /dev/null
+++ b/sys/nfs/nfs_srvcache.c
@@ -0,0 +1,348 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfs_srvcache.c	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Reference: Chet Juszczak, "Improving the Performance and Correctness
+ *		of an NFS Server", in Proc. Winter 1989 USENIX Conference,
+ *		pages 53-63. San Diego, February 1989.
+ */
+#include <sys/param.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mbuf.h>
+#include <sys/malloc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+
+#include <netinet/in.h>
+#ifdef ISO
+#include <netiso/iso.h>
+#endif
+#include <nfs/nfsm_subs.h>
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsrvcache.h>
+#include <nfs/nqnfs.h>
+
+long numnfsrvcache, desirednfsrvcache = NFSRVCACHESIZ;
+
+#define	NFSRCHASH(xid)		(((xid) + ((xid) >> 24)) & rheadhash)
+static struct nfsrvcache *nfsrvlruhead, **nfsrvlrutail = &nfsrvlruhead;
+static struct nfsrvcache **rheadhtbl;
+static u_long rheadhash;
+
+#define TRUE	1
+#define	FALSE	0
+
+#define	NETFAMILY(rp) \
+		(((rp)->rc_flag & RC_INETADDR) ? AF_INET : AF_ISO)
+
+/*
+ * Static array that defines which nfs rpc's are nonidempotent
+ */
+int nonidempotent[NFS_NPROCS] = {
+	FALSE,
+	FALSE,
+	TRUE,
+	FALSE,
+	FALSE,
+	FALSE,
+	FALSE,
+	FALSE,
+	TRUE,
+	TRUE,
+	TRUE,
+	TRUE,
+	TRUE,
+	TRUE,
+	TRUE,
+	TRUE,
+	FALSE,
+	FALSE,
+	FALSE,
+	FALSE,
+	FALSE,
+	FALSE,
+	FALSE,
+};
+
+/* True iff the rpc reply is an nfs status ONLY! */
+static int repliesstatus[NFS_NPROCS] = {
+	FALSE,
+	FALSE,
+	FALSE,
+	FALSE,
+	FALSE,
+	FALSE,
+	FALSE,
+	FALSE,
+	FALSE,
+	FALSE,
+	TRUE,
+	TRUE,
+	TRUE,
+	TRUE,
+	FALSE,
+	TRUE,
+	FALSE,
+	FALSE,
+	FALSE,
+	FALSE,
+	FALSE,
+	FALSE,
+	TRUE,
+};
+
+/*
+ * Initialize the server request cache list
+ */
+nfsrv_initcache()
+{
+
+	rheadhtbl = hashinit(desirednfsrvcache, M_NFSD, &rheadhash);
+}
+
+/*
+ * Look for the request in the cache
+ * If found then
+ *    return action and optionally reply
+ * else
+ *    insert it in the cache
+ *
+ * The rules are as follows:
+ * - if in progress, return DROP request
+ * - if completed within DELAY of the current time, return DROP it
+ * - if completed a longer time ago return REPLY if the reply was cached or
+ *   return DOIT
+ * Update/add new request at end of lru list
+ */
+nfsrv_getcache(nam, nd, repp)
+	struct mbuf *nam;
+	register struct nfsd *nd;
+	struct mbuf **repp;
+{
+	register struct nfsrvcache *rp, *rq, **rpp;
+	struct mbuf *mb;
+	struct sockaddr_in *saddr;
+	caddr_t bpos;
+	int ret;
+
+	if (nd->nd_nqlflag != NQL_NOVAL)
+		return (RC_DOIT);
+	rpp = &rheadhtbl[NFSRCHASH(nd->nd_retxid)];
+loop:
+	for (rp = *rpp; rp; rp = rp->rc_forw) {
+	    if (nd->nd_retxid == rp->rc_xid && nd->nd_procnum == rp->rc_proc &&
+		netaddr_match(NETFAMILY(rp), &rp->rc_haddr, nam)) {
+			if ((rp->rc_flag & RC_LOCKED) != 0) {
+				rp->rc_flag |= RC_WANTED;
+				(void) tsleep((caddr_t)rp, PZERO-1, "nfsrc", 0);
+				goto loop;
+			}
+			rp->rc_flag |= RC_LOCKED;
+			/* If not at end of LRU chain, move it there */
+			if (rp->rc_next) {
+				/* remove from LRU chain */
+				*rp->rc_prev = rp->rc_next;
+				rp->rc_next->rc_prev = rp->rc_prev;
+				/* and replace at end of it */
+				rp->rc_next = NULL;
+				rp->rc_prev = nfsrvlrutail;
+				*nfsrvlrutail = rp;
+				nfsrvlrutail = &rp->rc_next;
+			}
+			if (rp->rc_state == RC_UNUSED)
+				panic("nfsrv cache");
+			if (rp->rc_state == RC_INPROG) {
+				nfsstats.srvcache_inproghits++;
+				ret = RC_DROPIT;
+			} else if (rp->rc_flag & RC_REPSTATUS) {
+				nfsstats.srvcache_nonidemdonehits++;
+				nfs_rephead(0, nd, rp->rc_status,
+				   0, (u_quad_t *)0, repp, &mb, &bpos);
+				ret = RC_REPLY;
+			} else if (rp->rc_flag & RC_REPMBUF) {
+				nfsstats.srvcache_nonidemdonehits++;
+				*repp = m_copym(rp->rc_reply, 0, M_COPYALL,
+						M_WAIT);
+				ret = RC_REPLY;
+			} else {
+				nfsstats.srvcache_idemdonehits++;
+				rp->rc_state = RC_INPROG;
+				ret = RC_DOIT;
+			}
+			rp->rc_flag &= ~RC_LOCKED;
+			if (rp->rc_flag & RC_WANTED) {
+				rp->rc_flag &= ~RC_WANTED;
+				wakeup((caddr_t)rp);
+			}
+			return (ret);
+		}
+	}
+	nfsstats.srvcache_misses++;
+	if (numnfsrvcache < desirednfsrvcache) {
+		rp = (struct nfsrvcache *)malloc((u_long)sizeof *rp,
+		    M_NFSD, M_WAITOK);
+		bzero((char *)rp, sizeof *rp);
+		numnfsrvcache++;
+		rp->rc_flag = RC_LOCKED;
+	} else {
+		rp = nfsrvlruhead;
+		while ((rp->rc_flag & RC_LOCKED) != 0) {
+			rp->rc_flag |= RC_WANTED;
+			(void) tsleep((caddr_t)rp, PZERO-1, "nfsrc", 0);
+			rp = nfsrvlruhead;
+		}
+		rp->rc_flag |= RC_LOCKED;
+		/* remove from hash chain */
+		if (rq = rp->rc_forw)
+			rq->rc_back = rp->rc_back;
+		*rp->rc_back = rq;
+		/* remove from LRU chain */
+		*rp->rc_prev = rp->rc_next;
+		rp->rc_next->rc_prev = rp->rc_prev;
+		if (rp->rc_flag & RC_REPMBUF)
+			m_freem(rp->rc_reply);
+		if (rp->rc_flag & RC_NAM)
+			MFREE(rp->rc_nam, mb);
+		rp->rc_flag &= (RC_LOCKED | RC_WANTED);
+	}
+	/* place at end of LRU list */
+	rp->rc_next = NULL;
+	rp->rc_prev = nfsrvlrutail;
+	*nfsrvlrutail = rp;
+	nfsrvlrutail = &rp->rc_next;
+	rp->rc_state = RC_INPROG;
+	rp->rc_xid = nd->nd_retxid;
+	saddr = mtod(nam, struct sockaddr_in *);
+	switch (saddr->sin_family) {
+	case AF_INET:
+		rp->rc_flag |= RC_INETADDR;
+		rp->rc_inetaddr = saddr->sin_addr.s_addr;
+		break;
+	case AF_ISO:
+	default:
+		rp->rc_flag |= RC_NAM;
+		rp->rc_nam = m_copym(nam, 0, M_COPYALL, M_WAIT);
+		break;
+	};
+	rp->rc_proc = nd->nd_procnum;
+	/* insert into hash chain */
+	if (rq = *rpp)
+		rq->rc_back = &rp->rc_forw;
+	rp->rc_forw = rq;
+	rp->rc_back = rpp;
+	*rpp = rp;
+	rp->rc_flag &= ~RC_LOCKED;
+	if (rp->rc_flag & RC_WANTED) {
+		rp->rc_flag &= ~RC_WANTED;
+		wakeup((caddr_t)rp);
+	}
+	return (RC_DOIT);
+}
+
+/*
+ * Update a request cache entry after the rpc has been done
+ */
+void
+nfsrv_updatecache(nam, nd, repvalid, repmbuf)
+	struct mbuf *nam;
+	register struct nfsd *nd;
+	int repvalid;
+	struct mbuf *repmbuf;
+{
+	register struct nfsrvcache *rp;
+
+	if (nd->nd_nqlflag != NQL_NOVAL)
+		return;
+loop:
+	for (rp = rheadhtbl[NFSRCHASH(nd->nd_retxid)]; rp; rp = rp->rc_forw) {
+	    if (nd->nd_retxid == rp->rc_xid && nd->nd_procnum == rp->rc_proc &&
+		netaddr_match(NETFAMILY(rp), &rp->rc_haddr, nam)) {
+			if ((rp->rc_flag & RC_LOCKED) != 0) {
+				rp->rc_flag |= RC_WANTED;
+				(void) tsleep((caddr_t)rp, PZERO-1, "nfsrc", 0);
+				goto loop;
+			}
+			rp->rc_flag |= RC_LOCKED;
+			rp->rc_state = RC_DONE;
+			/*
+			 * If we have a valid reply update status and save
+			 * the reply for non-idempotent rpc's.
+			 */
+			if (repvalid && nonidempotent[nd->nd_procnum]) {
+				if (repliesstatus[nd->nd_procnum]) {
+					rp->rc_status = nd->nd_repstat;
+					rp->rc_flag |= RC_REPSTATUS;
+				} else {
+					rp->rc_reply = m_copym(repmbuf,
+						0, M_COPYALL, M_WAIT);
+					rp->rc_flag |= RC_REPMBUF;
+				}
+			}
+			rp->rc_flag &= ~RC_LOCKED;
+			if (rp->rc_flag & RC_WANTED) {
+				rp->rc_flag &= ~RC_WANTED;
+				wakeup((caddr_t)rp);
+			}
+			return;
+		}
+	}
+}
+
+/*
+ * Clean out the cache. Called when the last nfsd terminates.
+ */
+void
+nfsrv_cleancache()
+{
+	register struct nfsrvcache *rp, *nextrp;
+
+	for (rp = nfsrvlruhead; rp; rp = nextrp) {
+		nextrp = rp->rc_next;
+		free(rp, M_NFSD);
+	}
+	bzero((char *)rheadhtbl, (rheadhash + 1) * sizeof(void *));
+	nfsrvlruhead = NULL;
+	nfsrvlrutail = &nfsrvlruhead;
+	numnfsrvcache = 0;
+}
diff --git a/sys/nfs/nfs_subs.c b/sys/nfs/nfs_subs.c
new file mode 100644
index 00000000000..5778f7d7f01
--- /dev/null
+++ b/sys/nfs/nfs_subs.c
@@ -0,0 +1,1130 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfs_subs.c	8.3 (Berkeley) 1/4/94
+ */
+
+/*
+ * These functions support the macros and help fiddle mbuf chains for
+ * the nfs op functions. They do things like create the rpc header and
+ * copy data between mbuf chains and uio lists.
+ */
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/namei.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nqnfs.h>
+#include <nfs/nfsrtt.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <netinet/in.h>
+#ifdef ISO
+#include <netiso/iso.h>
+#endif
+
+#define TRUE	1
+#define	FALSE	0
+
+/*
+ * Data items converted to xdr at startup, since they are constant
+ * This is kinda hokey, but may save a little time doing byte swaps
+ */
+u_long nfs_procids[NFS_NPROCS];
+u_long nfs_xdrneg1;
+u_long rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr,
+	rpc_mismatch, rpc_auth_unix, rpc_msgaccepted, rpc_rejectedcred,
+	rpc_auth_kerb;
+u_long nfs_vers, nfs_prog, nfs_true, nfs_false;
+
+/* And other global data */
+static u_long nfs_xid = 0;
+enum vtype ntov_type[7] = { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON };
+extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
+extern struct nfsreq nfsreqh;
+extern int nqnfs_piggy[NFS_NPROCS];
+extern struct nfsrtt nfsrtt;
+extern time_t nqnfsstarttime;
+extern u_long nqnfs_prog, nqnfs_vers;
+extern int nqsrv_clockskew;
+extern int nqsrv_writeslack;
+extern int nqsrv_maxlease;
+
+/*
+ * Create the header for an rpc request packet
+ * The hsiz is the size of the rest of the nfs request header.
+ * (just used to decide if a cluster is a good idea)
+ */
+struct mbuf *
+nfsm_reqh(vp, procid, hsiz, bposp)
+	struct vnode *vp;
+	u_long procid;
+	int hsiz;
+	caddr_t *bposp;
+{
+	register struct mbuf *mb;
+	register u_long *tl;
+	register caddr_t bpos;
+	struct mbuf *mb2;
+	struct nfsmount *nmp;
+	int nqflag;
+
+	MGET(mb, M_WAIT, MT_DATA);
+	if (hsiz >= MINCLSIZE)
+		MCLGET(mb, M_WAIT);
+	mb->m_len = 0;
+	bpos = mtod(mb, caddr_t);
+	
+	/*
+	 * For NQNFS, add lease request.
+	 */
+	if (vp) {
+		nmp = VFSTONFS(vp->v_mount);
+		if (nmp->nm_flag & NFSMNT_NQNFS) {
+			nqflag = NQNFS_NEEDLEASE(vp, procid);
+			if (nqflag) {
+				nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+				*tl++ = txdr_unsigned(nqflag);
+				*tl = txdr_unsigned(nmp->nm_leaseterm);
+			} else {
+				nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+				*tl = 0;
+			}
+		}
+	}
+	/* Finally, return values */
+	*bposp = bpos;
+	return (mb);
+}
+
+/*
+ * Build the RPC header and fill in the authorization info.
+ * The authorization string argument is only used when the credentials
+ * come from outside of the kernel.
+ * Returns the head of the mbuf list.
+ */
+struct mbuf *
+nfsm_rpchead(cr, nqnfs, procid, auth_type, auth_len, auth_str, mrest,
+	mrest_len, mbp, xidp)
+	register struct ucred *cr;
+	int nqnfs;
+	int procid;
+	int auth_type;
+	int auth_len;
+	char *auth_str;
+	struct mbuf *mrest;
+	int mrest_len;
+	struct mbuf **mbp;
+	u_long *xidp;
+{
+	register struct mbuf *mb;
+	register u_long *tl;
+	register caddr_t bpos;
+	register int i;
+	struct mbuf *mreq, *mb2;
+	int siz, grpsiz, authsiz;
+
+	authsiz = nfsm_rndup(auth_len);
+	if (auth_type == RPCAUTH_NQNFS)
+		authsiz += 2 * NFSX_UNSIGNED;
+	MGETHDR(mb, M_WAIT, MT_DATA);
+	if ((authsiz + 10*NFSX_UNSIGNED) >= MINCLSIZE) {
+		MCLGET(mb, M_WAIT);
+	} else if ((authsiz + 10*NFSX_UNSIGNED) < MHLEN) {
+		MH_ALIGN(mb, authsiz + 10*NFSX_UNSIGNED);
+	} else {
+		MH_ALIGN(mb, 8*NFSX_UNSIGNED);
+	}
+	mb->m_len = 0;
+	mreq = mb;
+	bpos = mtod(mb, caddr_t);
+
+	/*
+	 * First the RPC header.
+	 */
+	nfsm_build(tl, u_long *, 8*NFSX_UNSIGNED);
+	if (++nfs_xid == 0)
+		nfs_xid++;
+	*tl++ = *xidp = txdr_unsigned(nfs_xid);
+	*tl++ = rpc_call;
+	*tl++ = rpc_vers;
+	if (nqnfs) {
+		*tl++ = txdr_unsigned(NQNFS_PROG);
+		*tl++ = txdr_unsigned(NQNFS_VER1);
+	} else {
+		*tl++ = txdr_unsigned(NFS_PROG);
+		*tl++ = txdr_unsigned(NFS_VER2);
+	}
+	*tl++ = txdr_unsigned(procid);
+
+	/*
+	 * And then the authorization cred.
+	 */
+	*tl++ = txdr_unsigned(auth_type);
+	*tl = txdr_unsigned(authsiz);
+	switch (auth_type) {
+	case RPCAUTH_UNIX:
+		nfsm_build(tl, u_long *, auth_len);
+		*tl++ = 0;		/* stamp ?? */
+		*tl++ = 0;		/* NULL hostname */
+		*tl++ = txdr_unsigned(cr->cr_uid);
+		*tl++ = txdr_unsigned(cr->cr_groups[0]);
+		grpsiz = (auth_len >> 2) - 5;
+		*tl++ = txdr_unsigned(grpsiz);
+		for (i = 1; i <= grpsiz; i++)
+			*tl++ = txdr_unsigned(cr->cr_groups[i]);
+		break;
+	case RPCAUTH_NQNFS:
+		nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+		*tl++ = txdr_unsigned(cr->cr_uid);
+		*tl = txdr_unsigned(auth_len);
+		siz = auth_len;
+		while (siz > 0) {
+			if (M_TRAILINGSPACE(mb) == 0) {
+				MGET(mb2, M_WAIT, MT_DATA);
+				if (siz >= MINCLSIZE)
+					MCLGET(mb2, M_WAIT);
+				mb->m_next = mb2;
+				mb = mb2;
+				mb->m_len = 0;
+				bpos = mtod(mb, caddr_t);
+			}
+			i = min(siz, M_TRAILINGSPACE(mb));
+			bcopy(auth_str, bpos, i);
+			mb->m_len += i;
+			auth_str += i;
+			bpos += i;
+			siz -= i;
+		}
+		if ((siz = (nfsm_rndup(auth_len) - auth_len)) > 0) {
+			for (i = 0; i < siz; i++)
+				*bpos++ = '\0';
+			mb->m_len += siz;
+		}
+		break;
+	};
+	nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+	*tl++ = txdr_unsigned(RPCAUTH_NULL);
+	*tl = 0;
+	mb->m_next = mrest;
+	mreq->m_pkthdr.len = authsiz + 10*NFSX_UNSIGNED + mrest_len;
+	mreq->m_pkthdr.rcvif = (struct ifnet *)0;
+	*mbp = mb;
+	return (mreq);
+}
+
+/*
+ * copies mbuf chain to the uio scatter/gather list
+ */
+nfsm_mbuftouio(mrep, uiop, siz, dpos)
+	struct mbuf **mrep;
+	register struct uio *uiop;
+	int siz;
+	caddr_t *dpos;
+{
+	register char *mbufcp, *uiocp;
+	register int xfer, left, len;
+	register struct mbuf *mp;
+	long uiosiz, rem;
+	int error = 0;
+
+	mp = *mrep;
+	mbufcp = *dpos;
+	len = mtod(mp, caddr_t)+mp->m_len-mbufcp;
+	rem = nfsm_rndup(siz)-siz;
+	while (siz > 0) {
+		if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
+			return (EFBIG);
+		left = uiop->uio_iov->iov_len;
+		uiocp = uiop->uio_iov->iov_base;
+		if (left > siz)
+			left = siz;
+		uiosiz = left;
+		while (left > 0) {
+			while (len == 0) {
+				mp = mp->m_next;
+				if (mp == NULL)
+					return (EBADRPC);
+				mbufcp = mtod(mp, caddr_t);
+				len = mp->m_len;
+			}
+			xfer = (left > len) ? len : left;
+#ifdef notdef
+			/* Not Yet.. */
+			if (uiop->uio_iov->iov_op != NULL)
+				(*(uiop->uio_iov->iov_op))
+				(mbufcp, uiocp, xfer);
+			else
+#endif
+			if (uiop->uio_segflg == UIO_SYSSPACE)
+				bcopy(mbufcp, uiocp, xfer);
+			else
+				copyout(mbufcp, uiocp, xfer);
+			left -= xfer;
+			len -= xfer;
+			mbufcp += xfer;
+			uiocp += xfer;
+			uiop->uio_offset += xfer;
+			uiop->uio_resid -= xfer;
+		}
+		if (uiop->uio_iov->iov_len <= siz) {
+			uiop->uio_iovcnt--;
+			uiop->uio_iov++;
+		} else {
+			uiop->uio_iov->iov_base += uiosiz;
+			uiop->uio_iov->iov_len -= uiosiz;
+		}
+		siz -= uiosiz;
+	}
+	*dpos = mbufcp;
+	*mrep = mp;
+	if (rem > 0) {
+		if (len < rem)
+			error = nfs_adv(mrep, dpos, rem, len);
+		else
+			*dpos += rem;
+	}
+	return (error);
+}
+
+/*
+ * copies a uio scatter/gather list to an mbuf chain...
+ */
+nfsm_uiotombuf(uiop, mq, siz, bpos)
+	register struct uio *uiop;
+	struct mbuf **mq;
+	int siz;
+	caddr_t *bpos;
+{
+	register char *uiocp;
+	register struct mbuf *mp, *mp2;
+	register int xfer, left, mlen;
+	int uiosiz, clflg, rem;
+	char *cp;
+
+	if (siz > MLEN)		/* or should it >= MCLBYTES ?? */
+		clflg = 1;
+	else
+		clflg = 0;
+	rem = nfsm_rndup(siz)-siz;
+	mp = mp2 = *mq;
+	while (siz > 0) {
+		if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
+			return (EINVAL);
+		left = uiop->uio_iov->iov_len;
+		uiocp = uiop->uio_iov->iov_base;
+		if (left > siz)
+			left = siz;
+		uiosiz = left;
+		while (left > 0) {
+			mlen = M_TRAILINGSPACE(mp);
+			if (mlen == 0) {
+				MGET(mp, M_WAIT, MT_DATA);
+				if (clflg)
+					MCLGET(mp, M_WAIT);
+				mp->m_len = 0;
+				mp2->m_next = mp;
+				mp2 = mp;
+				mlen = M_TRAILINGSPACE(mp);
+			}
+			xfer = (left > mlen) ? mlen : left;
+#ifdef notdef
+			/* Not Yet.. */
+			if (uiop->uio_iov->iov_op != NULL)
+				(*(uiop->uio_iov->iov_op))
+				(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+			else
+#endif
+			if (uiop->uio_segflg == UIO_SYSSPACE)
+				bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+			else
+				copyin(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+			mp->m_len += xfer;
+			left -= xfer;
+			uiocp += xfer;
+			uiop->uio_offset += xfer;
+			uiop->uio_resid -= xfer;
+		}
+		if (uiop->uio_iov->iov_len <= siz) {
+			uiop->uio_iovcnt--;
+			uiop->uio_iov++;
+		} else {
+			uiop->uio_iov->iov_base += uiosiz;
+			uiop->uio_iov->iov_len -= uiosiz;
+		}
+		siz -= uiosiz;
+	}
+	if (rem > 0) {
+		if (rem > M_TRAILINGSPACE(mp)) {
+			MGET(mp, M_WAIT, MT_DATA);
+			mp->m_len = 0;
+			mp2->m_next = mp;
+		}
+		cp = mtod(mp, caddr_t)+mp->m_len;
+		for (left = 0; left < rem; left++)
+			*cp++ = '\0';
+		mp->m_len += rem;
+		*bpos = cp;
+	} else
+		*bpos = mtod(mp, caddr_t)+mp->m_len;
+	*mq = mp;
+	return (0);
+}
+
+/*
+ * Help break down an mbuf chain by setting the first siz bytes contiguous
+ * pointed to by returned val.
+ * This is used by the macros nfsm_dissect and nfsm_dissecton for tough
+ * cases. (The macros use the vars. dpos and dpos2)
+ */
+nfsm_disct(mdp, dposp, siz, left, cp2)
+	struct mbuf **mdp;
+	caddr_t *dposp;
+	int siz;
+	int left;
+	caddr_t *cp2;
+{
+	register struct mbuf *mp, *mp2;
+	register int siz2, xfer;
+	register caddr_t p;
+
+	mp = *mdp;
+	while (left == 0) {
+		*mdp = mp = mp->m_next;
+		if (mp == NULL)
+			return (EBADRPC);
+		left = mp->m_len;
+		*dposp = mtod(mp, caddr_t);
+	}
+	if (left >= siz) {
+		*cp2 = *dposp;
+		*dposp += siz;
+	} else if (mp->m_next == NULL) {
+		return (EBADRPC);
+	} else if (siz > MHLEN) {
+		panic("nfs S too big");
+	} else {
+		MGET(mp2, M_WAIT, MT_DATA);
+		mp2->m_next = mp->m_next;
+		mp->m_next = mp2;
+		mp->m_len -= left;
+		mp = mp2;
+		*cp2 = p = mtod(mp, caddr_t);
+		bcopy(*dposp, p, left);		/* Copy what was left */
+		siz2 = siz-left;
+		p += left;
+		mp2 = mp->m_next;
+		/* Loop around copying up the siz2 bytes */
+		while (siz2 > 0) {
+			if (mp2 == NULL)
+				return (EBADRPC);
+			xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2;
+			if (xfer > 0) {
+				bcopy(mtod(mp2, caddr_t), p, xfer);
+				NFSMADV(mp2, xfer);
+				mp2->m_len -= xfer;
+				p += xfer;
+				siz2 -= xfer;
+			}
+			if (siz2 > 0)
+				mp2 = mp2->m_next;
+		}
+		mp->m_len = siz;
+		*mdp = mp2;
+		*dposp = mtod(mp2, caddr_t);
+	}
+	return (0);
+}
+
+/*
+ * Advance the position in the mbuf chain.
+ */
+nfs_adv(mdp, dposp, offs, left)
+	struct mbuf **mdp;
+	caddr_t *dposp;
+	int offs;
+	int left;
+{
+	register struct mbuf *m;
+	register int s;
+
+	m = *mdp;
+	s = left;
+	while (s < offs) {
+		offs -= s;
+		m = m->m_next;
+		if (m == NULL)
+			return (EBADRPC);
+		s = m->m_len;
+	}
+	*mdp = m;
+	*dposp = mtod(m, caddr_t)+offs;
+	return (0);
+}
+
+/*
+ * Copy a string into mbufs for the hard cases...
+ */
+nfsm_strtmbuf(mb, bpos, cp, siz)
+	struct mbuf **mb;
+	char **bpos;
+	char *cp;
+	long siz;
+{
+	register struct mbuf *m1, *m2;
+	long left, xfer, len, tlen;
+	u_long *tl;
+	int putsize;
+
+	putsize = 1;
+	m2 = *mb;
+	left = M_TRAILINGSPACE(m2);
+	if (left > 0) {
+		tl = ((u_long *)(*bpos));
+		*tl++ = txdr_unsigned(siz);
+		putsize = 0;
+		left -= NFSX_UNSIGNED;
+		m2->m_len += NFSX_UNSIGNED;
+		if (left > 0) {
+			bcopy(cp, (caddr_t) tl, left);
+			siz -= left;
+			cp += left;
+			m2->m_len += left;
+			left = 0;
+		}
+	}
+	/* Loop around adding mbufs */
+	while (siz > 0) {
+		MGET(m1, M_WAIT, MT_DATA);
+		if (siz > MLEN)
+			MCLGET(m1, M_WAIT);
+		m1->m_len = NFSMSIZ(m1);
+		m2->m_next = m1;
+		m2 = m1;
+		tl = mtod(m1, u_long *);
+		tlen = 0;
+		if (putsize) {
+			*tl++ = txdr_unsigned(siz);
+			m1->m_len -= NFSX_UNSIGNED;
+			tlen = NFSX_UNSIGNED;
+			putsize = 0;
+		}
+		if (siz < m1->m_len) {
+			len = nfsm_rndup(siz);
+			xfer = siz;
+			if (xfer < len)
+				*(tl+(xfer>>2)) = 0;
+		} else {
+			xfer = len = m1->m_len;
+		}
+		bcopy(cp, (caddr_t) tl, xfer);
+		m1->m_len = len+tlen;
+		siz -= xfer;
+		cp += xfer;
+	}
+	*mb = m1;
+	*bpos = mtod(m1, caddr_t)+m1->m_len;
+	return (0);
+}
+
+/*
+ * Called once to initialize data structures...
+ */
+nfs_init()
+{
+	register int i;
+
+	nfsrtt.pos = 0;
+	rpc_vers = txdr_unsigned(RPC_VER2);
+	rpc_call = txdr_unsigned(RPC_CALL);
+	rpc_reply = txdr_unsigned(RPC_REPLY);
+	rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED);
+	rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED);
+	rpc_mismatch = txdr_unsigned(RPC_MISMATCH);
+	rpc_autherr = txdr_unsigned(RPC_AUTHERR);
+	rpc_rejectedcred = txdr_unsigned(AUTH_REJECTCRED);
+	rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX);
+	rpc_auth_kerb = txdr_unsigned(RPCAUTH_NQNFS);
+	nfs_vers = txdr_unsigned(NFS_VER2);
+	nfs_prog = txdr_unsigned(NFS_PROG);
+	nfs_true = txdr_unsigned(TRUE);
+	nfs_false = txdr_unsigned(FALSE);
+	/* Loop thru nfs procids */
+	for (i = 0; i < NFS_NPROCS; i++)
+		nfs_procids[i] = txdr_unsigned(i);
+	/* Ensure async daemons disabled */
+	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
+		nfs_iodwant[i] = (struct proc *)0;
+	TAILQ_INIT(&nfs_bufq);
+	nfs_xdrneg1 = txdr_unsigned(-1);
+	nfs_nhinit();			/* Init the nfsnode table */
+	nfsrv_init(0);			/* Init server data structures */
+	nfsrv_initcache();		/* Init the server request cache */
+
+	/*
+	 * Initialize the nqnfs server stuff.
+	 */
+	if (nqnfsstarttime == 0) {
+		nqnfsstarttime = boottime.tv_sec + nqsrv_maxlease
+			+ nqsrv_clockskew + nqsrv_writeslack;
+		NQLOADNOVRAM(nqnfsstarttime);
+		nqnfs_prog = txdr_unsigned(NQNFS_PROG);
+		nqnfs_vers = txdr_unsigned(NQNFS_VER1);
+		nqthead.th_head[0] = &nqthead;
+		nqthead.th_head[1] = &nqthead;
+		nqfhead = hashinit(NQLCHSZ, M_NQLEASE, &nqfheadhash);
+	}
+
+	/*
+	 * Initialize reply list and start timer
+	 */
+	nfsreqh.r_prev = nfsreqh.r_next = &nfsreqh;
+	nfs_timer();
+}
+
+/*
+ * Attribute cache routines.
+ * nfs_loadattrcache() - loads or updates the cache contents from attributes
+ *	that are on the mbuf list
+ * nfs_getattrcache() - returns valid attributes if found in cache, returns
+ *	error otherwise
+ */
+
+/*
+ * Load the attribute cache (that lives in the nfsnode entry) with
+ * the values on the mbuf list and
+ * Iff vap not NULL
+ *    copy the attributes to *vaper
+ */
+nfs_loadattrcache(vpp, mdp, dposp, vaper)
+	struct vnode **vpp;
+	struct mbuf **mdp;
+	caddr_t *dposp;
+	struct vattr *vaper;
+{
+	register struct vnode *vp = *vpp;
+	register struct vattr *vap;
+	register struct nfsv2_fattr *fp;
+	extern int (**spec_nfsv2nodeop_p)();
+	register struct nfsnode *np, *nq, **nhpp;
+	register long t1;
+	caddr_t dpos, cp2;
+	int error = 0, isnq;
+	struct mbuf *md;
+	enum vtype vtyp;
+	u_short vmode;
+	long rdev;
+	struct timespec mtime;
+	struct vnode *nvp;
+
+	md = *mdp;
+	dpos = *dposp;
+	t1 = (mtod(md, caddr_t) + md->m_len) - dpos;
+	isnq = (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS);
+	if (error = nfsm_disct(&md, &dpos, NFSX_FATTR(isnq), t1, &cp2))
+		return (error);
+	fp = (struct nfsv2_fattr *)cp2;
+	vtyp = nfstov_type(fp->fa_type);
+	vmode = fxdr_unsigned(u_short, fp->fa_mode);
+	if (vtyp == VNON || vtyp == VREG)
+		vtyp = IFTOVT(vmode);
+	if (isnq) {
+		rdev = fxdr_unsigned(long, fp->fa_nqrdev);
+		fxdr_nqtime(&fp->fa_nqmtime, &mtime);
+	} else {
+		rdev = fxdr_unsigned(long, fp->fa_nfsrdev);
+		fxdr_nfstime(&fp->fa_nfsmtime, &mtime);
+	}
+	/*
+	 * If v_type == VNON it is a new node, so fill in the v_type,
+	 * n_mtime fields. Check to see if it represents a special 
+	 * device, and if so, check for a possible alias. Once the
+	 * correct vnode has been obtained, fill in the rest of the
+	 * information.
+	 */
+	np = VTONFS(vp);
+	if (vp->v_type == VNON) {
+		if (vtyp == VCHR && rdev == 0xffffffff)
+			vp->v_type = vtyp = VFIFO;
+		else
+			vp->v_type = vtyp;
+		if (vp->v_type == VFIFO) {
+#ifdef FIFO
+			extern int (**fifo_nfsv2nodeop_p)();
+			vp->v_op = fifo_nfsv2nodeop_p;
+#else
+			return (EOPNOTSUPP);
+#endif /* FIFO */
+		}
+		if (vp->v_type == VCHR || vp->v_type == VBLK) {
+			vp->v_op = spec_nfsv2nodeop_p;
+			if (nvp = checkalias(vp, (dev_t)rdev, vp->v_mount)) {
+				/*
+				 * Discard unneeded vnode, but save its nfsnode.
+				 */
+				if (nq = np->n_forw)
+					nq->n_back = np->n_back;
+				*np->n_back = nq;
+				nvp->v_data = vp->v_data;
+				vp->v_data = NULL;
+				vp->v_op = spec_vnodeop_p;
+				vrele(vp);
+				vgone(vp);
+				/*
+				 * Reinitialize aliased node.
+				 */
+				np->n_vnode = nvp;
+				nhpp = (struct nfsnode **)nfs_hash(&np->n_fh);
+				if (nq = *nhpp)
+					nq->n_back = &np->n_forw;
+				np->n_forw = nq;
+				np->n_back = nhpp;
+				*nhpp = np;
+				*vpp = vp = nvp;
+			}
+		}
+		np->n_mtime = mtime.ts_sec;
+	}
+	vap = &np->n_vattr;
+	vap->va_type = vtyp;
+	vap->va_mode = (vmode & 07777);
+	vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
+	vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid);
+	vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid);
+	vap->va_rdev = (dev_t)rdev;
+	vap->va_mtime = mtime;
+	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+	if (isnq) {
+		fxdr_hyper(&fp->fa_nqsize, &vap->va_size);
+		vap->va_blocksize = fxdr_unsigned(long, fp->fa_nqblocksize);
+		fxdr_hyper(&fp->fa_nqbytes, &vap->va_bytes);
+		vap->va_fileid = fxdr_unsigned(long, fp->fa_nqfileid);
+		fxdr_nqtime(&fp->fa_nqatime, &vap->va_atime);
+		vap->va_flags = fxdr_unsigned(u_long, fp->fa_nqflags);
+		fxdr_nqtime(&fp->fa_nqctime, &vap->va_ctime);
+		vap->va_gen = fxdr_unsigned(u_long, fp->fa_nqgen);
+		fxdr_hyper(&fp->fa_nqfilerev, &vap->va_filerev);
+	} else {
+		vap->va_size = fxdr_unsigned(u_long, fp->fa_nfssize);
+		vap->va_blocksize = fxdr_unsigned(long, fp->fa_nfsblocksize);
+		vap->va_bytes = fxdr_unsigned(long, fp->fa_nfsblocks) * NFS_FABLKSIZE;
+		vap->va_fileid = fxdr_unsigned(long, fp->fa_nfsfileid);
+		fxdr_nfstime(&fp->fa_nfsatime, &vap->va_atime);
+		vap->va_flags = 0;
+		vap->va_ctime.ts_sec = fxdr_unsigned(long, fp->fa_nfsctime.nfs_sec);
+		vap->va_ctime.ts_nsec = 0;
+		vap->va_gen = fxdr_unsigned(u_long, fp->fa_nfsctime.nfs_usec);
+		vap->va_filerev = 0;
+	}
+	if (vap->va_size != np->n_size) {
+		if (vap->va_type == VREG) {
+			if (np->n_flag & NMODIFIED) {
+				if (vap->va_size < np->n_size)
+					vap->va_size = np->n_size;
+				else
+					np->n_size = vap->va_size;
+			} else
+				np->n_size = vap->va_size;
+			vnode_pager_setsize(vp, (u_long)np->n_size);
+		} else
+			np->n_size = vap->va_size;
+	}
+	np->n_attrstamp = time.tv_sec;
+	*dposp = dpos;
+	*mdp = md;
+	if (vaper != NULL) {
+		bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap));
+#ifdef notdef
+		if ((np->n_flag & NMODIFIED) && np->n_size > vap->va_size)
+		if (np->n_size > vap->va_size)
+			vaper->va_size = np->n_size;
+#endif
+		if (np->n_flag & NCHG) {
+			if (np->n_flag & NACC) {
+				vaper->va_atime.ts_sec = np->n_atim.tv_sec;
+				vaper->va_atime.ts_nsec =
+				    np->n_atim.tv_usec * 1000;
+			}
+			if (np->n_flag & NUPD) {
+				vaper->va_mtime.ts_sec = np->n_mtim.tv_sec;
+				vaper->va_mtime.ts_nsec =
+				    np->n_mtim.tv_usec * 1000;
+			}
+		}
+	}
+	return (0);
+}
+
+/*
+ * Check the time stamp
+ * If the cache is valid, copy contents to *vap and return 0
+ * otherwise return an error
+ */
+nfs_getattrcache(vp, vaper)
+	register struct vnode *vp;
+	struct vattr *vaper;
+{
+	register struct nfsnode *np = VTONFS(vp);
+	register struct vattr *vap;
+
+	if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQLOOKLEASE) {
+		if (!NQNFS_CKCACHABLE(vp, NQL_READ) || np->n_attrstamp == 0) {
+			nfsstats.attrcache_misses++;
+			return (ENOENT);
+		}
+	} else if ((time.tv_sec - np->n_attrstamp) >= NFS_ATTRTIMEO(np)) {
+		nfsstats.attrcache_misses++;
+		return (ENOENT);
+	}
+	nfsstats.attrcache_hits++;
+	vap = &np->n_vattr;
+	if (vap->va_size != np->n_size) {
+		if (vap->va_type == VREG) {
+			if (np->n_flag & NMODIFIED) {
+				if (vap->va_size < np->n_size)
+					vap->va_size = np->n_size;
+				else
+					np->n_size = vap->va_size;
+			} else
+				np->n_size = vap->va_size;
+			vnode_pager_setsize(vp, (u_long)np->n_size);
+		} else
+			np->n_size = vap->va_size;
+	}
+	bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr));
+#ifdef notdef
+	if ((np->n_flag & NMODIFIED) == 0) {
+		np->n_size = vaper->va_size;
+		vnode_pager_setsize(vp, (u_long)np->n_size);
+	} else if (np->n_size > vaper->va_size)
+	if (np->n_size > vaper->va_size)
+		vaper->va_size = np->n_size;
+#endif
+	if (np->n_flag & NCHG) {
+		if (np->n_flag & NACC) {
+			vaper->va_atime.ts_sec = np->n_atim.tv_sec;
+			vaper->va_atime.ts_nsec = np->n_atim.tv_usec * 1000;
+		}
+		if (np->n_flag & NUPD) {
+			vaper->va_mtime.ts_sec = np->n_mtim.tv_sec;
+			vaper->va_mtime.ts_nsec = np->n_mtim.tv_usec * 1000;
+		}
+	}
+	return (0);
+}
+
+/*
+ * Set up nameidata for a lookup() call and do it
+ */
+nfs_namei(ndp, fhp, len, slp, nam, mdp, dposp, p)
+	register struct nameidata *ndp;
+	fhandle_t *fhp;
+	int len;
+	struct nfssvc_sock *slp;
+	struct mbuf *nam;
+	struct mbuf **mdp;
+	caddr_t *dposp;
+	struct proc *p;
+{
+	register int i, rem;
+	register struct mbuf *md;
+	register char *fromcp, *tocp;
+	struct vnode *dp;
+	int error, rdonly;
+	struct componentname *cnp = &ndp->ni_cnd;
+
+	MALLOC(cnp->cn_pnbuf, char *, len + 1, M_NAMEI, M_WAITOK);
+	/*
+	 * Copy the name from the mbuf list to ndp->ni_pnbuf
+	 * and set the various ndp fields appropriately.
+	 */
+	fromcp = *dposp;
+	tocp = cnp->cn_pnbuf;
+	md = *mdp;
+	rem = mtod(md, caddr_t) + md->m_len - fromcp;
+	cnp->cn_hash = 0;
+	for (i = 0; i < len; i++) {
+		while (rem == 0) {
+			md = md->m_next;
+			if (md == NULL) {
+				error = EBADRPC;
+				goto out;
+			}
+			fromcp = mtod(md, caddr_t);
+			rem = md->m_len;
+		}
+		if (*fromcp == '\0' || *fromcp == '/') {
+			error = EINVAL;
+			goto out;
+		}
+		cnp->cn_hash += (unsigned char)*fromcp;
+		*tocp++ = *fromcp++;
+		rem--;
+	}
+	*tocp = '\0';
+	*mdp = md;
+	*dposp = fromcp;
+	len = nfsm_rndup(len)-len;
+	if (len > 0) {
+		if (rem >= len)
+			*dposp += len;
+		else if (error = nfs_adv(mdp, dposp, len, rem))
+			goto out;
+	}
+	ndp->ni_pathlen = tocp - cnp->cn_pnbuf;
+	cnp->cn_nameptr = cnp->cn_pnbuf;
+	/*
+	 * Extract and set starting directory.
+	 */
+	if (error = nfsrv_fhtovp(fhp, FALSE, &dp, ndp->ni_cnd.cn_cred, slp,
+	    nam, &rdonly))
+		goto out;
+	if (dp->v_type != VDIR) {
+		vrele(dp);
+		error = ENOTDIR;
+		goto out;
+	}
+	ndp->ni_startdir = dp;
+	if (rdonly)
+		cnp->cn_flags |= (NOCROSSMOUNT | RDONLY);
+	else
+		cnp->cn_flags |= NOCROSSMOUNT;
+	/*
+	 * And call lookup() to do the real work
+	 */
+	cnp->cn_proc = p;
+	if (error = lookup(ndp))
+		goto out;
+	/*
+	 * Check for encountering a symbolic link
+	 */
+	if (cnp->cn_flags & ISSYMLINK) {
+		if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
+			vput(ndp->ni_dvp);
+		else
+			vrele(ndp->ni_dvp);
+		vput(ndp->ni_vp);
+		ndp->ni_vp = NULL;
+		error = EINVAL;
+		goto out;
+	}
+	/*
+	 * Check for saved name request
+	 */
+	if (cnp->cn_flags & (SAVENAME | SAVESTART)) {
+		cnp->cn_flags |= HASBUF;
+		return (0);
+	}
+out:
+	FREE(cnp->cn_pnbuf, M_NAMEI);
+	return (error);
+}
+
+/*
+ * A fiddled version of m_adj() that ensures null fill to a long
+ * boundary and only trims off the back end
+ */
+void
+nfsm_adj(mp, len, nul)
+	struct mbuf *mp;
+	register int len;
+	int nul;
+{
+	register struct mbuf *m;
+	register int count, i;
+	register char *cp;
+
+	/*
+	 * Trim from tail.  Scan the mbuf chain,
+	 * calculating its length and finding the last mbuf.
+	 * If the adjustment only affects this mbuf, then just
+	 * adjust and return.  Otherwise, rescan and truncate
+	 * after the remaining size.
+	 */
+	count = 0;
+	m = mp;
+	for (;;) {
+		count += m->m_len;
+		if (m->m_next == (struct mbuf *)0)
+			break;
+		m = m->m_next;
+	}
+	if (m->m_len > len) {
+		m->m_len -= len;
+		if (nul > 0) {
+			cp = mtod(m, caddr_t)+m->m_len-nul;
+			for (i = 0; i < nul; i++)
+				*cp++ = '\0';
+		}
+		return;
+	}
+	count -= len;
+	if (count < 0)
+		count = 0;
+	/*
+	 * Correct length for chain is "count".
+	 * Find the mbuf with last data, adjust its length,
+	 * and toss data from remaining mbufs on chain.
+	 */
+	for (m = mp; m; m = m->m_next) {
+		if (m->m_len >= count) {
+			m->m_len = count;
+			if (nul > 0) {
+				cp = mtod(m, caddr_t)+m->m_len-nul;
+				for (i = 0; i < nul; i++)
+					*cp++ = '\0';
+			}
+			break;
+		}
+		count -= m->m_len;
+	}
+	while (m = m->m_next)
+		m->m_len = 0;
+}
+
+/*
+ * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked)
+ * 	- look up fsid in mount list (if not found ret error)
+ *	- get vp and export rights by calling VFS_FHTOVP()
+ *	- if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
+ *	- if not lockflag unlock it with VOP_UNLOCK()
+ */
+nfsrv_fhtovp(fhp, lockflag, vpp, cred, slp, nam, rdonlyp)
+	fhandle_t *fhp;
+	int lockflag;
+	struct vnode **vpp;
+	struct ucred *cred;
+	struct nfssvc_sock *slp;
+	struct mbuf *nam;
+	int *rdonlyp;
+{
+	register struct mount *mp;
+	register struct nfsuid *uidp;
+	register int i;
+	struct ucred *credanon;
+	int error, exflags;
+
+	*vpp = (struct vnode *)0;
+	if ((mp = getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if (error = VFS_FHTOVP(mp, &fhp->fh_fid, nam, vpp, &exflags, &credanon))
+		return (error);
+	/*
+	 * Check/setup credentials.
+	 */
+	if (exflags & MNT_EXKERB) {
+		uidp = slp->ns_uidh[NUIDHASH(cred->cr_uid)];
+		while (uidp) {
+			if (uidp->nu_uid == cred->cr_uid)
+				break;
+			uidp = uidp->nu_hnext;
+		}
+		if (uidp) {
+			cred->cr_uid = uidp->nu_cr.cr_uid;
+			for (i = 0; i < uidp->nu_cr.cr_ngroups; i++)
+				cred->cr_groups[i] = uidp->nu_cr.cr_groups[i];
+		} else {
+			vput(*vpp);
+			return (NQNFS_AUTHERR);
+		}
+	} else if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) {
+		cred->cr_uid = credanon->cr_uid;
+		for (i = 0; i < credanon->cr_ngroups && i < NGROUPS; i++)
+			cred->cr_groups[i] = credanon->cr_groups[i];
+	}
+	if (exflags & MNT_EXRDONLY)
+		*rdonlyp = 1;
+	else
+		*rdonlyp = 0;
+	if (!lockflag)
+		VOP_UNLOCK(*vpp);
+	return (0);
+}
+
+/*
+ * This function compares two net addresses by family and returns TRUE
+ * if they are the same host.
+ * If there is any doubt, return FALSE.
+ * The AF_INET family is handled as a special case so that address mbufs
+ * don't need to be saved to store "struct in_addr", which is only 4 bytes.
+ */
+netaddr_match(family, haddr, nam)
+	int family;
+	union nethostaddr *haddr;
+	struct mbuf *nam;
+{
+	register struct sockaddr_in *inetaddr;
+
+	switch (family) {
+	case AF_INET:
+		inetaddr = mtod(nam, struct sockaddr_in *);
+		if (inetaddr->sin_family == AF_INET &&
+		    inetaddr->sin_addr.s_addr == haddr->had_inetaddr)
+			return (1);
+		break;
+#ifdef ISO
+	case AF_ISO:
+	    {
+		register struct sockaddr_iso *isoaddr1, *isoaddr2;
+
+		isoaddr1 = mtod(nam, struct sockaddr_iso *);
+		isoaddr2 = mtod(haddr->had_nam, struct sockaddr_iso *);
+		if (isoaddr1->siso_family == AF_ISO &&
+		    isoaddr1->siso_nlen > 0 &&
+		    isoaddr1->siso_nlen == isoaddr2->siso_nlen &&
+		    SAME_ISOADDR(isoaddr1, isoaddr2))
+			return (1);
+		break;
+	    }
+#endif	/* ISO */
+	default:
+		break;
+	};
+	return (0);
+}
diff --git a/sys/nfs/nfs_syscalls.c b/sys/nfs/nfs_syscalls.c
new file mode 100644
index 00000000000..5d86b42ee20
--- /dev/null
+++ b/sys/nfs/nfs_syscalls.c
@@ -0,0 +1,874 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfs_syscalls.c	8.3 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/uio.h>
+#include <sys/malloc.h>
+#include <sys/buf.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/namei.h>
+#include <sys/syslog.h>
+
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#ifdef ISO
+#include <netiso/iso.h>
+#endif
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsrvcache.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nqnfs.h>
+#include <nfs/nfsrtt.h>
+
+/* Global defs. */
+extern u_long nfs_prog, nfs_vers;
+extern int (*nfsrv_procs[NFS_NPROCS])();
+extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
+extern int nfs_numasync;
+extern time_t nqnfsstarttime;
+extern struct nfsrv_req nsrvq_head;
+extern struct nfsd nfsd_head;
+extern int nqsrv_writeslack;
+extern int nfsrtton;
+struct nfssvc_sock *nfs_udpsock, *nfs_cltpsock;
+int nuidhash_max = NFS_MAXUIDHASH;
+static int nfs_numnfsd = 0;
+int nfsd_waiting = 0;
+static int notstarted = 1;
+static int modify_flag = 0;
+static struct nfsdrt nfsdrt;
+void nfsrv_cleancache(), nfsrv_rcv(), nfsrv_wakenfsd(), nfs_sndunlock();
+static void nfsd_rt();
+void nfsrv_slpderef(), nfsrv_init();
+
+#define	TRUE	1
+#define	FALSE	0
+
+static int nfs_asyncdaemon[NFS_MAXASYNCDAEMON];
+/*
+ * NFS server system calls
+ * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c
+ */
+
+/*
+ * Get file handle system call
+ */
+struct getfh_args {
+	char	*fname;
+	fhandle_t *fhp;
+};
+getfh(p, uap, retval)
+	struct proc *p;
+	register struct getfh_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	fhandle_t fh;
+	int error;
+	struct nameidata nd;
+
+	/*
+	 * Must be super user
+	 */
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	bzero((caddr_t)&fh, sizeof(fh));
+	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
+	error = VFS_VPTOFH(vp, &fh.fh_fid);
+	vput(vp);
+	if (error)
+		return (error);
+	error = copyout((caddr_t)&fh, (caddr_t)uap->fhp, sizeof (fh));
+	return (error);
+}
+
+static struct nfssvc_sock nfssvc_sockhead;
+
+/*
+ * Nfs server psuedo system call for the nfsd's
+ * Based on the flag value it either:
+ * - adds a socket to the selection list
+ * - remains in the kernel as an nfsd
+ * - remains in the kernel as an nfsiod
+ */
+struct nfssvc_args {
+	int flag;
+	caddr_t argp;
+};
+nfssvc(p, uap, retval)
+	struct proc *p;
+	register struct nfssvc_args *uap;
+	int *retval;
+{
+	struct nameidata nd;
+	struct file *fp;
+	struct mbuf *nam;
+	struct nfsd_args nfsdarg;
+	struct nfsd_srvargs nfsd_srvargs, *nsd = &nfsd_srvargs;
+	struct nfsd_cargs ncd;
+	struct nfsd *nfsd;
+	struct nfssvc_sock *slp;
+	struct nfsuid *nuidp, **nuh;
+	struct nfsmount *nmp;
+	int error;
+
+	/*
+	 * Must be super user
+	 */
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+	while (nfssvc_sockhead.ns_flag & SLP_INIT) {
+		nfssvc_sockhead.ns_flag |= SLP_WANTINIT;
+		(void) tsleep((caddr_t)&nfssvc_sockhead, PSOCK, "nfsd init", 0);
+	}
+	if (uap->flag & NFSSVC_BIOD)
+		error = nfssvc_iod(p);
+	else if (uap->flag & NFSSVC_MNTD) {
+		if (error = copyin(uap->argp, (caddr_t)&ncd, sizeof (ncd)))
+			return (error);
+		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
+			ncd.ncd_dirp, p);
+		if (error = namei(&nd))
+			return (error);
+		if ((nd.ni_vp->v_flag & VROOT) == 0)
+			error = EINVAL;
+		nmp = VFSTONFS(nd.ni_vp->v_mount);
+		vput(nd.ni_vp);
+		if (error)
+			return (error);
+		if ((nmp->nm_flag & NFSMNT_MNTD) &&
+			(uap->flag & NFSSVC_GOTAUTH) == 0)
+			return (0);
+		nmp->nm_flag |= NFSMNT_MNTD;
+		error = nqnfs_clientd(nmp, p->p_ucred, &ncd, uap->flag,
+			uap->argp, p);
+	} else if (uap->flag & NFSSVC_ADDSOCK) {
+		if (error = copyin(uap->argp, (caddr_t)&nfsdarg,
+		    sizeof(nfsdarg)))
+			return (error);
+		if (error = getsock(p->p_fd, nfsdarg.sock, &fp))
+			return (error);
+		/*
+		 * Get the client address for connected sockets.
+		 */
+		if (nfsdarg.name == NULL || nfsdarg.namelen == 0)
+			nam = (struct mbuf *)0;
+		else if (error = sockargs(&nam, nfsdarg.name, nfsdarg.namelen,
+			MT_SONAME))
+			return (error);
+		error = nfssvc_addsock(fp, nam);
+	} else {
+		if (error = copyin(uap->argp, (caddr_t)nsd, sizeof (*nsd)))
+			return (error);
+		if ((uap->flag & NFSSVC_AUTHIN) && (nfsd = nsd->nsd_nfsd) &&
+			(nfsd->nd_slp->ns_flag & SLP_VALID)) {
+			slp = nfsd->nd_slp;
+
+			/*
+			 * First check to see if another nfsd has already
+			 * added this credential.
+			 */
+			nuidp = slp->ns_uidh[NUIDHASH(nsd->nsd_uid)];
+			while (nuidp) {
+				if (nuidp->nu_uid == nsd->nsd_uid)
+					break;
+				nuidp = nuidp->nu_hnext;
+			}
+			if (!nuidp) {
+			    /*
+			     * Nope, so we will.
+			     */
+			    if (slp->ns_numuids < nuidhash_max) {
+				slp->ns_numuids++;
+				nuidp = (struct nfsuid *)
+				   malloc(sizeof (struct nfsuid), M_NFSUID,
+					M_WAITOK);
+			    } else
+				nuidp = (struct nfsuid *)0;
+			    if ((slp->ns_flag & SLP_VALID) == 0) {
+				if (nuidp)
+				    free((caddr_t)nuidp, M_NFSUID);
+			    } else {
+				if (nuidp == (struct nfsuid *)0) {
+				    nuidp = slp->ns_lruprev;
+				    remque(nuidp);
+				    if (nuidp->nu_hprev)
+					nuidp->nu_hprev->nu_hnext =
+					    nuidp->nu_hnext;
+				    if (nuidp->nu_hnext)
+					nuidp->nu_hnext->nu_hprev =
+					    nuidp->nu_hprev;
+			        }
+				nuidp->nu_cr = nsd->nsd_cr;
+				if (nuidp->nu_cr.cr_ngroups > NGROUPS)
+					nuidp->nu_cr.cr_ngroups = NGROUPS;
+				nuidp->nu_cr.cr_ref = 1;
+				nuidp->nu_uid = nsd->nsd_uid;
+				insque(nuidp, (struct nfsuid *)slp);
+				nuh = &slp->ns_uidh[NUIDHASH(nsd->nsd_uid)];
+				if (nuidp->nu_hnext = *nuh)
+				    nuidp->nu_hnext->nu_hprev = nuidp;
+				nuidp->nu_hprev = (struct nfsuid *)0;
+				*nuh = nuidp;
+			    }
+			}
+		}
+		if ((uap->flag & NFSSVC_AUTHINFAIL) && (nfsd = nsd->nsd_nfsd))
+			nfsd->nd_flag |= NFSD_AUTHFAIL;
+		error = nfssvc_nfsd(nsd, uap->argp, p);
+	}
+	if (error == EINTR || error == ERESTART)
+		error = 0;
+	return (error);
+}
+
+/*
+ * Adds a socket to the list for servicing by nfsds.
+ */
+nfssvc_addsock(fp, mynam)
+	struct file *fp;
+	struct mbuf *mynam;
+{
+	register struct mbuf *m;
+	register int siz;
+	register struct nfssvc_sock *slp;
+	register struct socket *so;
+	struct nfssvc_sock *tslp;
+	int error, s;
+
+	so = (struct socket *)fp->f_data;
+	tslp = (struct nfssvc_sock *)0;
+	/*
+	 * Add it to the list, as required.
+	 */
+	if (so->so_proto->pr_protocol == IPPROTO_UDP) {
+		tslp = nfs_udpsock;
+		if (tslp->ns_flag & SLP_VALID) {
+			m_freem(mynam);
+			return (EPERM);
+		}
+#ifdef ISO
+	} else if (so->so_proto->pr_protocol == ISOPROTO_CLTP) {
+		tslp = nfs_cltpsock;
+		if (tslp->ns_flag & SLP_VALID) {
+			m_freem(mynam);
+			return (EPERM);
+		}
+#endif /* ISO */
+	}
+	if (so->so_type == SOCK_STREAM)
+		siz = NFS_MAXPACKET + sizeof (u_long);
+	else
+		siz = NFS_MAXPACKET;
+	if (error = soreserve(so, siz, siz)) {
+		m_freem(mynam);
+		return (error);
+	}
+
+	/*
+	 * Set protocol specific options { for now TCP only } and
+	 * reserve some space. For datagram sockets, this can get called
+	 * repeatedly for the same socket, but that isn't harmful.
+	 */
+	if (so->so_type == SOCK_STREAM) {
+		MGET(m, M_WAIT, MT_SOOPTS);
+		*mtod(m, int *) = 1;
+		m->m_len = sizeof(int);
+		sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
+	}
+	if (so->so_proto->pr_domain->dom_family == AF_INET &&
+	    so->so_proto->pr_protocol == IPPROTO_TCP) {
+		MGET(m, M_WAIT, MT_SOOPTS);
+		*mtod(m, int *) = 1;
+		m->m_len = sizeof(int);
+		sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
+	}
+	so->so_rcv.sb_flags &= ~SB_NOINTR;
+	so->so_rcv.sb_timeo = 0;
+	so->so_snd.sb_flags &= ~SB_NOINTR;
+	so->so_snd.sb_timeo = 0;
+	if (tslp)
+		slp = tslp;
+	else {
+		slp = (struct nfssvc_sock *)
+			malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
+		bzero((caddr_t)slp, sizeof (struct nfssvc_sock));
+		slp->ns_prev = nfssvc_sockhead.ns_prev;
+		slp->ns_prev->ns_next = slp;
+		slp->ns_next = &nfssvc_sockhead;
+		nfssvc_sockhead.ns_prev = slp;
+		slp->ns_lrunext = slp->ns_lruprev = (struct nfsuid *)slp;
+	}
+	slp->ns_so = so;
+	slp->ns_nam = mynam;
+	fp->f_count++;
+	slp->ns_fp = fp;
+	s = splnet();
+	so->so_upcallarg = (caddr_t)slp;
+	so->so_upcall = nfsrv_rcv;
+	slp->ns_flag = (SLP_VALID | SLP_NEEDQ);
+	nfsrv_wakenfsd(slp);
+	splx(s);
+	return (0);
+}
+
+/*
+ * Called by nfssvc() for nfsds. Just loops around servicing rpc requests
+ * until it is killed by a signal.
+ */
+nfssvc_nfsd(nsd, argp, p)
+	struct nfsd_srvargs *nsd;
+	caddr_t argp;
+	struct proc *p;
+{
+	register struct mbuf *m, *nam2;
+	register int siz;
+	register struct nfssvc_sock *slp;
+	register struct socket *so;
+	register int *solockp;
+	struct nfsd *nd = nsd->nsd_nfsd;
+	struct mbuf *mreq, *nam;
+	struct timeval starttime;
+	struct nfsuid *uidp;
+	int error, cacherep, s;
+	int sotype;
+
+	s = splnet();
+	if (nd == (struct nfsd *)0) {
+		nsd->nsd_nfsd = nd = (struct nfsd *)
+			malloc(sizeof (struct nfsd), M_NFSD, M_WAITOK);
+		bzero((caddr_t)nd, sizeof (struct nfsd));
+		nd->nd_procp = p;
+		nd->nd_cr.cr_ref = 1;
+		insque(nd, &nfsd_head);
+		nd->nd_nqlflag = NQL_NOVAL;
+		nfs_numnfsd++;
+	}
+	/*
+	 * Loop getting rpc requests until SIGKILL.
+	 */
+	for (;;) {
+		if ((nd->nd_flag & NFSD_REQINPROG) == 0) {
+			while (nd->nd_slp == (struct nfssvc_sock *)0 &&
+				 (nfsd_head.nd_flag & NFSD_CHECKSLP) == 0) {
+				nd->nd_flag |= NFSD_WAITING;
+				nfsd_waiting++;
+				error = tsleep((caddr_t)nd, PSOCK | PCATCH, "nfsd", 0);
+				nfsd_waiting--;
+				if (error)
+					goto done;
+			}
+			if (nd->nd_slp == (struct nfssvc_sock *)0 &&
+				(nfsd_head.nd_flag & NFSD_CHECKSLP)) {
+				slp = nfssvc_sockhead.ns_next;
+				while (slp != &nfssvc_sockhead) {
+				    if ((slp->ns_flag & (SLP_VALID | SLP_DOREC))
+					== (SLP_VALID | SLP_DOREC)) {
+					    slp->ns_flag &= ~SLP_DOREC;
+					    slp->ns_sref++;
+					    nd->nd_slp = slp;
+					    break;
+				    }
+				    slp = slp->ns_next;
+				}
+				if (slp == &nfssvc_sockhead)
+					nfsd_head.nd_flag &= ~NFSD_CHECKSLP;
+			}
+			if ((slp = nd->nd_slp) == (struct nfssvc_sock *)0)
+				continue;
+			if (slp->ns_flag & SLP_VALID) {
+				if (slp->ns_flag & SLP_DISCONN)
+					nfsrv_zapsock(slp);
+				else if (slp->ns_flag & SLP_NEEDQ) {
+					slp->ns_flag &= ~SLP_NEEDQ;
+					(void) nfs_sndlock(&slp->ns_solock,
+						(struct nfsreq *)0);
+					nfsrv_rcv(slp->ns_so, (caddr_t)slp,
+						M_WAIT);
+					nfs_sndunlock(&slp->ns_solock);
+				}
+				error = nfsrv_dorec(slp, nd);
+				nd->nd_flag |= NFSD_REQINPROG;
+			}
+		} else {
+			error = 0;
+			slp = nd->nd_slp;
+		}
+		if (error || (slp->ns_flag & SLP_VALID) == 0) {
+			nd->nd_slp = (struct nfssvc_sock *)0;
+			nd->nd_flag &= ~NFSD_REQINPROG;
+			nfsrv_slpderef(slp);
+			continue;
+		}
+		splx(s);
+		so = slp->ns_so;
+		sotype = so->so_type;
+		starttime = time;
+		if (so->so_proto->pr_flags & PR_CONNREQUIRED)
+			solockp = &slp->ns_solock;
+		else
+			solockp = (int *)0;
+		/*
+		 * nam == nam2 for connectionless protocols such as UDP
+		 * nam2 == NULL for connection based protocols to disable
+		 *    recent request caching.
+		 */
+		if (nam2 = nd->nd_nam) {
+			nam = nam2;
+			cacherep = RC_CHECKIT;
+		} else {
+			nam = slp->ns_nam;
+			cacherep = RC_DOIT;
+		}
+
+		/*
+		 * Check to see if authorization is needed.
+		 */
+		if (nd->nd_flag & NFSD_NEEDAUTH) {
+			static int logauth = 0;
+
+			nd->nd_flag &= ~NFSD_NEEDAUTH;
+			/*
+			 * Check for a mapping already installed.
+			 */
+			uidp = slp->ns_uidh[NUIDHASH(nd->nd_cr.cr_uid)];
+			while (uidp) {
+				if (uidp->nu_uid == nd->nd_cr.cr_uid)
+					break;
+				uidp = uidp->nu_hnext;
+			}
+			if (!uidp) {
+			    nsd->nsd_uid = nd->nd_cr.cr_uid;
+			    if (nam2 && logauth++ == 0)
+				log(LOG_WARNING, "Kerberized NFS using UDP\n");
+			    nsd->nsd_haddr =
+			      mtod(nam, struct sockaddr_in *)->sin_addr.s_addr;
+			    nsd->nsd_authlen = nd->nd_authlen;
+			    if (copyout(nd->nd_authstr, nsd->nsd_authstr,
+				nd->nd_authlen) == 0 &&
+				copyout((caddr_t)nsd, argp, sizeof (*nsd)) == 0)
+				return (ENEEDAUTH);
+			    cacherep = RC_DROPIT;
+			}
+		}
+		if (cacherep == RC_CHECKIT)
+			cacherep = nfsrv_getcache(nam2, nd, &mreq);
+
+		/*
+		 * Check for just starting up for NQNFS and send
+		 * fake "try again later" replies to the NQNFS clients.
+		 */
+		if (notstarted && nqnfsstarttime <= time.tv_sec) {
+			if (modify_flag) {
+				nqnfsstarttime = time.tv_sec + nqsrv_writeslack;
+				modify_flag = 0;
+			} else
+				notstarted = 0;
+		}
+		if (notstarted) {
+			if (nd->nd_nqlflag == NQL_NOVAL)
+				cacherep = RC_DROPIT;
+			else if (nd->nd_procnum != NFSPROC_WRITE) {
+				nd->nd_procnum = NFSPROC_NOOP;
+				nd->nd_repstat = NQNFS_TRYLATER;
+				cacherep = RC_DOIT;
+			} else
+				modify_flag = 1;
+		} else if (nd->nd_flag & NFSD_AUTHFAIL) {
+			nd->nd_flag &= ~NFSD_AUTHFAIL;
+			nd->nd_procnum = NFSPROC_NOOP;
+			nd->nd_repstat = NQNFS_AUTHERR;
+			cacherep = RC_DOIT;
+		}
+
+		switch (cacherep) {
+		case RC_DOIT:
+			error = (*(nfsrv_procs[nd->nd_procnum]))(nd,
+				nd->nd_mrep, nd->nd_md, nd->nd_dpos, &nd->nd_cr,
+				nam, &mreq);
+			if (nd->nd_cr.cr_ref != 1) {
+				printf("nfssvc cref=%d\n", nd->nd_cr.cr_ref);
+				panic("nfssvc cref");
+			}
+			if (error) {
+				if (nd->nd_procnum != NQNFSPROC_VACATED)
+					nfsstats.srv_errs++;
+				if (nam2) {
+					nfsrv_updatecache(nam2, nd, FALSE, mreq);
+					m_freem(nam2);
+				}
+				break;
+			}
+			nfsstats.srvrpccnt[nd->nd_procnum]++;
+			if (nam2)
+				nfsrv_updatecache(nam2, nd, TRUE, mreq);
+			nd->nd_mrep = (struct mbuf *)0;
+		case RC_REPLY:
+			m = mreq;
+			siz = 0;
+			while (m) {
+				siz += m->m_len;
+				m = m->m_next;
+			}
+			if (siz <= 0 || siz > NFS_MAXPACKET) {
+				printf("mbuf siz=%d\n",siz);
+				panic("Bad nfs svc reply");
+			}
+			m = mreq;
+			m->m_pkthdr.len = siz;
+			m->m_pkthdr.rcvif = (struct ifnet *)0;
+			/*
+			 * For stream protocols, prepend a Sun RPC
+			 * Record Mark.
+			 */
+			if (sotype == SOCK_STREAM) {
+				M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
+				*mtod(m, u_long *) = htonl(0x80000000 | siz);
+			}
+			if (solockp)
+				(void) nfs_sndlock(solockp, (struct nfsreq *)0);
+			if (slp->ns_flag & SLP_VALID)
+			    error = nfs_send(so, nam2, m, (struct nfsreq *)0);
+			else {
+			    error = EPIPE;
+			    m_freem(m);
+			}
+			if (nfsrtton)
+				nfsd_rt(&starttime, sotype, nd, nam, cacherep);
+			if (nam2)
+				MFREE(nam2, m);
+			if (nd->nd_mrep)
+				m_freem(nd->nd_mrep);
+			if (error == EPIPE)
+				nfsrv_zapsock(slp);
+			if (solockp)
+				nfs_sndunlock(solockp);
+			if (error == EINTR || error == ERESTART) {
+				nfsrv_slpderef(slp);
+				s = splnet();
+				goto done;
+			}
+			break;
+		case RC_DROPIT:
+			if (nfsrtton)
+				nfsd_rt(&starttime, sotype, nd, nam, cacherep);
+			m_freem(nd->nd_mrep);
+			m_freem(nam2);
+			break;
+		};
+		s = splnet();
+		if (nfsrv_dorec(slp, nd)) {
+			nd->nd_flag &= ~NFSD_REQINPROG;
+			nd->nd_slp = (struct nfssvc_sock *)0;
+			nfsrv_slpderef(slp);
+		}
+	}
+done:
+	remque(nd);
+	splx(s);
+	free((caddr_t)nd, M_NFSD);
+	nsd->nsd_nfsd = (struct nfsd *)0;
+	if (--nfs_numnfsd == 0)
+		nfsrv_init(TRUE);	/* Reinitialize everything */
+	return (error);
+}
+
+/*
+ * Asynchronous I/O daemons for client nfs.
+ * They do read-ahead and write-behind operations on the block I/O cache.
+ * Never returns unless it fails or gets killed.
+ */
+nfssvc_iod(p)
+	struct proc *p;
+{
+	register struct buf *bp;
+	register int i, myiod;
+	int error = 0;
+
+	/*
+	 * Assign my position or return error if too many already running
+	 */
+	myiod = -1;
+	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
+		if (nfs_asyncdaemon[i] == 0) {
+			nfs_asyncdaemon[i]++;
+			myiod = i;
+			break;
+		}
+	if (myiod == -1)
+		return (EBUSY);
+	nfs_numasync++;
+	/*
+	 * Just loop around doin our stuff until SIGKILL
+	 */
+	for (;;) {
+		while (nfs_bufq.tqh_first == NULL && error == 0) {
+			nfs_iodwant[myiod] = p;
+			error = tsleep((caddr_t)&nfs_iodwant[myiod],
+				PWAIT | PCATCH, "nfsidl", 0);
+		}
+		while ((bp = nfs_bufq.tqh_first) != NULL) {
+			/* Take one off the front of the list */
+			TAILQ_REMOVE(&nfs_bufq, bp, b_freelist);
+			if (bp->b_flags & B_READ)
+			    (void) nfs_doio(bp, bp->b_rcred, (struct proc *)0);
+			else
+			    (void) nfs_doio(bp, bp->b_wcred, (struct proc *)0);
+		}
+		if (error) {
+			nfs_asyncdaemon[myiod] = 0;
+			nfs_numasync--;
+			return (error);
+		}
+	}
+}
+
+/*
+ * Shut down a socket associated with an nfssvc_sock structure.
+ * Should be called with the send lock set, if required.
+ * The trick here is to increment the sref at the start, so that the nfsds
+ * will stop using it and clear ns_flag at the end so that it will not be
+ * reassigned during cleanup.
+ */
+nfsrv_zapsock(slp)
+	register struct nfssvc_sock *slp;
+{
+	register struct nfsuid *nuidp, *onuidp;
+	register int i;
+	struct socket *so;
+	struct file *fp;
+	struct mbuf *m;
+
+	slp->ns_flag &= ~SLP_ALLFLAGS;
+	if (fp = slp->ns_fp) {
+		slp->ns_fp = (struct file *)0;
+		so = slp->ns_so;
+		so->so_upcall = NULL;
+		soshutdown(so, 2);
+		closef(fp, (struct proc *)0);
+		if (slp->ns_nam)
+			MFREE(slp->ns_nam, m);
+		m_freem(slp->ns_raw);
+		m_freem(slp->ns_rec);
+		nuidp = slp->ns_lrunext;
+		while (nuidp != (struct nfsuid *)slp) {
+			onuidp = nuidp;
+			nuidp = nuidp->nu_lrunext;
+			free((caddr_t)onuidp, M_NFSUID);
+		}
+		slp->ns_lrunext = slp->ns_lruprev = (struct nfsuid *)slp;
+		for (i = 0; i < NUIDHASHSIZ; i++)
+			slp->ns_uidh[i] = (struct nfsuid *)0;
+	}
+}
+
+/*
+ * Get an authorization string for the uid by having the mount_nfs sitting
+ * on this mount point porpous out of the kernel and do it.
+ */
+nfs_getauth(nmp, rep, cred, auth_type, auth_str, auth_len)
+	register struct nfsmount *nmp;
+	struct nfsreq *rep;
+	struct ucred *cred;
+	int *auth_type;
+	char **auth_str;
+	int *auth_len;
+{
+	int error = 0;
+
+	while ((nmp->nm_flag & NFSMNT_WAITAUTH) == 0) {
+		nmp->nm_flag |= NFSMNT_WANTAUTH;
+		(void) tsleep((caddr_t)&nmp->nm_authtype, PSOCK,
+			"nfsauth1", 2 * hz);
+		if (error = nfs_sigintr(nmp, rep, rep->r_procp)) {
+			nmp->nm_flag &= ~NFSMNT_WANTAUTH;
+			return (error);
+		}
+	}
+	nmp->nm_flag &= ~(NFSMNT_WAITAUTH | NFSMNT_WANTAUTH);
+	nmp->nm_authstr = *auth_str = (char *)malloc(RPCAUTH_MAXSIZ, M_TEMP, M_WAITOK);
+	nmp->nm_authuid = cred->cr_uid;
+	wakeup((caddr_t)&nmp->nm_authstr);
+
+	/*
+	 * And wait for mount_nfs to do its stuff.
+	 */
+	while ((nmp->nm_flag & NFSMNT_HASAUTH) == 0 && error == 0) {
+		(void) tsleep((caddr_t)&nmp->nm_authlen, PSOCK,
+			"nfsauth2", 2 * hz);
+		error = nfs_sigintr(nmp, rep, rep->r_procp);
+	}
+	if (nmp->nm_flag & NFSMNT_AUTHERR) {
+		nmp->nm_flag &= ~NFSMNT_AUTHERR;
+		error = EAUTH;
+	}
+	if (error)
+		free((caddr_t)*auth_str, M_TEMP);
+	else {
+		*auth_type = nmp->nm_authtype;
+		*auth_len = nmp->nm_authlen;
+	}
+	nmp->nm_flag &= ~NFSMNT_HASAUTH;
+	nmp->nm_flag |= NFSMNT_WAITAUTH;
+	if (nmp->nm_flag & NFSMNT_WANTAUTH) {
+		nmp->nm_flag &= ~NFSMNT_WANTAUTH;
+		wakeup((caddr_t)&nmp->nm_authtype);
+	}
+	return (error);
+}
+
+/*
+ * Derefence a server socket structure. If it has no more references and
+ * is no longer valid, you can throw it away.
+ */
+void
+nfsrv_slpderef(slp)
+	register struct nfssvc_sock *slp;
+{
+	if (--(slp->ns_sref) == 0 && (slp->ns_flag & SLP_VALID) == 0) {
+		slp->ns_prev->ns_next = slp->ns_next;
+		slp->ns_next->ns_prev = slp->ns_prev;
+		free((caddr_t)slp, M_NFSSVC);
+	}
+}
+
+/*
+ * Initialize the data structures for the server.
+ * Handshake with any new nfsds starting up to avoid any chance of
+ * corruption.
+ */
+void
+nfsrv_init(terminating)
+	int terminating;
+{
+	register struct nfssvc_sock *slp;
+	struct nfssvc_sock *oslp;
+
+	if (nfssvc_sockhead.ns_flag & SLP_INIT)
+		panic("nfsd init");
+	nfssvc_sockhead.ns_flag |= SLP_INIT;
+	if (terminating) {
+		slp = nfssvc_sockhead.ns_next;
+		while (slp != &nfssvc_sockhead) {
+			if (slp->ns_flag & SLP_VALID)
+				nfsrv_zapsock(slp);
+			slp->ns_next->ns_prev = slp->ns_prev;
+			slp->ns_prev->ns_next = slp->ns_next;
+			oslp = slp;
+			slp = slp->ns_next;
+			free((caddr_t)oslp, M_NFSSVC);
+		}
+		nfsrv_cleancache();	/* And clear out server cache */
+	}
+	nfs_udpsock = (struct nfssvc_sock *)
+	    malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
+	bzero((caddr_t)nfs_udpsock, sizeof (struct nfssvc_sock));
+	nfs_cltpsock = (struct nfssvc_sock *)
+	    malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
+	bzero((caddr_t)nfs_cltpsock, sizeof (struct nfssvc_sock));
+	nfssvc_sockhead.ns_next = nfs_udpsock;
+	nfs_udpsock->ns_next = nfs_cltpsock;
+	nfs_cltpsock->ns_next = &nfssvc_sockhead;
+	nfssvc_sockhead.ns_prev = nfs_cltpsock;
+	nfs_cltpsock->ns_prev = nfs_udpsock;
+	nfs_udpsock->ns_prev = &nfssvc_sockhead;
+	nfs_udpsock->ns_lrunext = nfs_udpsock->ns_lruprev =
+		(struct nfsuid *)nfs_udpsock;
+	nfs_cltpsock->ns_lrunext = nfs_cltpsock->ns_lruprev =
+		(struct nfsuid *)nfs_cltpsock;
+	nfsd_head.nd_next = nfsd_head.nd_prev = &nfsd_head;
+	nfsd_head.nd_flag = 0;
+	nfssvc_sockhead.ns_flag &= ~SLP_INIT;
+	if (nfssvc_sockhead.ns_flag & SLP_WANTINIT) {
+		nfssvc_sockhead.ns_flag &= ~SLP_WANTINIT;
+		wakeup((caddr_t)&nfssvc_sockhead);
+	}
+}
+
+/*
+ * Add entries to the server monitor log.
+ */
+static void
+nfsd_rt(startp, sotype, nd, nam, cacherep)
+	struct timeval *startp;
+	int sotype;
+	register struct nfsd *nd;
+	struct mbuf *nam;
+	int cacherep;
+{
+	register struct drt *rt;
+
+	rt = &nfsdrt.drt[nfsdrt.pos];
+	if (cacherep == RC_DOIT)
+		rt->flag = 0;
+	else if (cacherep == RC_REPLY)
+		rt->flag = DRT_CACHEREPLY;
+	else
+		rt->flag = DRT_CACHEDROP;
+	if (sotype == SOCK_STREAM)
+		rt->flag |= DRT_TCP;
+	if (nd->nd_nqlflag != NQL_NOVAL)
+		rt->flag |= DRT_NQNFS;
+	rt->proc = nd->nd_procnum;
+	if (mtod(nam, struct sockaddr *)->sa_family == AF_INET)
+		rt->ipadr = mtod(nam, struct sockaddr_in *)->sin_addr.s_addr;
+	else
+		rt->ipadr = INADDR_ANY;
+	rt->resptime = ((time.tv_sec - startp->tv_sec) * 1000000) +
+		(time.tv_usec - startp->tv_usec);
+	rt->tstamp = time;
+	nfsdrt.pos = (nfsdrt.pos + 1) % NFSRTTLOGSIZ;
+}
diff --git a/sys/nfs/nfs_vfsops.c b/sys/nfs/nfs_vfsops.c
new file mode 100644
index 00000000000..1f186760689
--- /dev/null
+++ b/sys/nfs/nfs_vfsops.c
@@ -0,0 +1,740 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfs_vfsops.c	8.3 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/conf.h>
+#include <sys/ioctl.h>
+#include <sys/signal.h>
+#include <sys/proc.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/kernel.h>
+#include <sys/mount.h>
+#include <sys/buf.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+
+#include <net/if.h>
+#include <net/route.h>
+#include <netinet/in.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nfs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nfsdiskless.h>
+#include <nfs/nqnfs.h>
+
+/*
+ * nfs vfs operations.
+ */
+struct vfsops nfs_vfsops = {
+	nfs_mount,
+	nfs_start,
+	nfs_unmount,
+	nfs_root,
+	nfs_quotactl,
+	nfs_statfs,
+	nfs_sync,
+	nfs_vget,
+	nfs_fhtovp,
+	nfs_vptofh,
+	nfs_init,
+};
+
+/*
+ * This structure must be filled in by a primary bootstrap or bootstrap
+ * server for a diskless/dataless machine. It is initialized below just
+ * to ensure that it is allocated to initialized data (.data not .bss).
+ */
+struct nfs_diskless nfs_diskless = { 0 };
+
+extern u_long nfs_procids[NFS_NPROCS];
+extern u_long nfs_prog, nfs_vers;
+void nfs_disconnect __P((struct nfsmount *));
+void nfsargs_ntoh __P((struct nfs_args *));
+static struct mount *nfs_mountdiskless __P((char *, char *, int,
+    struct sockaddr_in *, struct nfs_args *, register struct vnode **));
+
+#define TRUE	1
+#define	FALSE	0
+
+/*
+ * nfs statfs call
+ */
+int
+nfs_statfs(mp, sbp, p)
+	struct mount *mp;
+	register struct statfs *sbp;
+	struct proc *p;
+{
+	register struct vnode *vp;
+	register struct nfsv2_statfs *sfp;
+	register caddr_t cp;
+	register long t1;
+	caddr_t bpos, dpos, cp2;
+	int error = 0, isnq;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+	struct nfsmount *nmp;
+	struct ucred *cred;
+	struct nfsnode *np;
+
+	nmp = VFSTONFS(mp);
+	isnq = (nmp->nm_flag & NFSMNT_NQNFS);
+	if (error = nfs_nget(mp, &nmp->nm_fh, &np))
+		return (error);
+	vp = NFSTOV(np);
+	nfsstats.rpccnt[NFSPROC_STATFS]++;
+	cred = crget();
+	cred->cr_ngroups = 1;
+	nfsm_reqhead(vp, NFSPROC_STATFS, NFSX_FH);
+	nfsm_fhtom(vp);
+	nfsm_request(vp, NFSPROC_STATFS, p, cred);
+	nfsm_dissect(sfp, struct nfsv2_statfs *, NFSX_STATFS(isnq));
+	sbp->f_type = MOUNT_NFS;
+	sbp->f_flags = nmp->nm_flag;
+	sbp->f_iosize = NFS_MAXDGRAMDATA;
+	sbp->f_bsize = fxdr_unsigned(long, sfp->sf_bsize);
+	sbp->f_blocks = fxdr_unsigned(long, sfp->sf_blocks);
+	sbp->f_bfree = fxdr_unsigned(long, sfp->sf_bfree);
+	sbp->f_bavail = fxdr_unsigned(long, sfp->sf_bavail);
+	if (isnq) {
+		sbp->f_files = fxdr_unsigned(long, sfp->sf_files);
+		sbp->f_ffree = fxdr_unsigned(long, sfp->sf_ffree);
+	} else {
+		sbp->f_files = 0;
+		sbp->f_ffree = 0;
+	}
+	if (sbp != &mp->mnt_stat) {
+		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+	}
+	nfsm_reqdone;
+	vrele(vp);
+	crfree(cred);
+	return (error);
+}
+
+/*
+ * Mount a remote root fs via. nfs. This depends on the info in the
+ * nfs_diskless structure that has been filled in properly by some primary
+ * bootstrap.
+ * It goes something like this:
+ * - do enough of "ifconfig" by calling ifioctl() so that the system
+ *   can talk to the server
+ * - If nfs_diskless.mygateway is filled in, use that address as
+ *   a default gateway.
+ * - hand craft the swap nfs vnode hanging off a fake mount point
+ *	if swdevt[0].sw_dev == NODEV
+ * - build the rootfs mount point and call mountnfs() to do the rest.
+ */
+int
+nfs_mountroot()
+{
+	register struct mount *mp;
+	register struct nfs_diskless *nd = &nfs_diskless;
+	struct socket *so;
+	struct vnode *vp;
+	struct proc *p = curproc;		/* XXX */
+	int error, i;
+
+	/*
+	 * XXX time must be non-zero when we init the interface or else
+	 * the arp code will wedge...
+	 */
+	if (time.tv_sec == 0)
+		time.tv_sec = 1;
+
+#ifdef notyet
+	/* Set up swap credentials. */
+	proc0.p_ucred->cr_uid = ntohl(nd->swap_ucred.cr_uid);
+	proc0.p_ucred->cr_gid = ntohl(nd->swap_ucred.cr_gid);
+	if ((proc0.p_ucred->cr_ngroups = ntohs(nd->swap_ucred.cr_ngroups)) >
+		NGROUPS)
+		proc0.p_ucred->cr_ngroups = NGROUPS;
+	for (i = 0; i < proc0.p_ucred->cr_ngroups; i++)
+	    proc0.p_ucred->cr_groups[i] = ntohl(nd->swap_ucred.cr_groups[i]);
+#endif
+
+	/*
+	 * Do enough of ifconfig(8) so that the critical net interface can
+	 * talk to the server.
+	 */
+	if (error = socreate(nd->myif.ifra_addr.sa_family, &so, SOCK_DGRAM, 0))
+		panic("nfs_mountroot: socreate: %d", error);
+	if (error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, p))
+		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
+	soclose(so);
+
+	/*
+	 * If the gateway field is filled in, set it as the default route.
+	 */
+	if (nd->mygateway.sin_len != 0) {
+		struct sockaddr_in mask, sin;
+
+		bzero((caddr_t)&mask, sizeof(mask));
+		sin = mask;
+		sin.sin_family = AF_INET;
+		sin.sin_len = sizeof(sin);
+		if (error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
+		    (struct sockaddr *)&nd->mygateway,
+		    (struct sockaddr *)&mask,
+		    RTF_UP | RTF_GATEWAY, (struct rtentry **)0))
+			panic("nfs_mountroot: RTM_ADD: %d", error);
+	}
+
+	/*
+	 * If swapping to an nfs node (indicated by swdevt[0].sw_dev == NODEV):
+	 * Create a fake mount point just for the swap vnode so that the
+	 * swap file can be on a different server from the rootfs.
+	 */
+	if (swdevt[0].sw_dev == NODEV) {
+		nd->swap_args.fh = (nfsv2fh_t *)nd->swap_fh;
+		(void) nfs_mountdiskless(nd->swap_hostnam, "/swap", 0,
+		    &nd->swap_saddr, &nd->swap_args, &vp);
+	
+		/*
+		 * Since the swap file is not the root dir of a file system,
+		 * hack it to a regular file.
+		 */
+		vp->v_type = VREG;
+		vp->v_flag = 0;
+		swapdev_vp = vp;
+		VREF(vp);
+		swdevt[0].sw_vp = vp;
+		swdevt[0].sw_nblks = ntohl(nd->swap_nblks);
+	} else if (bdevvp(swapdev, &swapdev_vp))
+		panic("nfs_mountroot: can't setup swapdev_vp");
+
+	/*
+	 * Create the rootfs mount point.
+	 */
+	nd->root_args.fh = (nfsv2fh_t *)nd->root_fh;
+	mp = nfs_mountdiskless(nd->root_hostnam, "/", MNT_RDONLY,
+	    &nd->root_saddr, &nd->root_args, &vp);
+
+	if (vfs_lock(mp))
+		panic("nfs_mountroot: vfs_lock");
+	TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+	mp->mnt_flag |= MNT_ROOTFS;
+	mp->mnt_vnodecovered = NULLVP;
+	vfs_unlock(mp);
+	rootvp = vp;
+
+	/*
+	 * This is not really an nfs issue, but it is much easier to
+	 * set hostname here and then let the "/etc/rc.xxx" files
+	 * mount the right /var based upon its preset value.
+	 */
+	bcopy(nd->my_hostnam, hostname, MAXHOSTNAMELEN);
+	hostname[MAXHOSTNAMELEN - 1] = '\0';
+	for (i = 0; i < MAXHOSTNAMELEN; i++)
+		if (hostname[i] == '\0')
+			break;
+	hostnamelen = i;
+	inittodr(ntohl(nd->root_time));
+	return (0);
+}
+
+/*
+ * Internal version of mount system call for diskless setup.
+ */
+static struct mount *
+nfs_mountdiskless(path, which, mountflag, sin, args, vpp)
+	char *path;
+	char *which;
+	int mountflag;
+	struct sockaddr_in *sin;
+	struct nfs_args *args;
+	register struct vnode **vpp;
+{
+	register struct mount *mp;
+	register struct mbuf *m;
+	register int error;
+
+	mp = (struct mount *)malloc((u_long)sizeof(struct mount),
+	    M_MOUNT, M_NOWAIT);
+	if (mp == NULL)
+		panic("nfs_mountroot: %s mount malloc", which);
+	bzero((char *)mp, (u_long)sizeof(struct mount));
+	mp->mnt_op = &nfs_vfsops;
+	mp->mnt_flag = mountflag;
+
+	MGET(m, MT_SONAME, M_DONTWAIT);
+	if (m == NULL)
+		panic("nfs_mountroot: %s mount mbuf", which);
+	bcopy((caddr_t)sin, mtod(m, caddr_t), sin->sin_len);
+	m->m_len = sin->sin_len;
+	nfsargs_ntoh(args);
+	if (error = mountnfs(args, mp, m, which, path, vpp))
+		panic("nfs_mountroot: mount %s on %s: %d", path, which, error);
+
+	return (mp);
+}
+
+/*
+ * Convert the integer fields of the nfs_args structure from net byte order
+ * to host byte order. Called by nfs_mountroot() above.
+ */
+void
+nfsargs_ntoh(nfsp)
+	register struct nfs_args *nfsp;
+{
+
+	NTOHL(nfsp->sotype);
+	NTOHL(nfsp->proto);
+	NTOHL(nfsp->flags);
+	NTOHL(nfsp->wsize);
+	NTOHL(nfsp->rsize);
+	NTOHL(nfsp->timeo);
+	NTOHL(nfsp->retrans);
+	NTOHL(nfsp->maxgrouplist);
+	NTOHL(nfsp->readahead);
+	NTOHL(nfsp->leaseterm);
+	NTOHL(nfsp->deadthresh);
+}
+
+/*
+ * VFS Operations.
+ *
+ * mount system call
+ * It seems a bit dumb to copyinstr() the host and path here and then
+ * bcopy() them in mountnfs(), but I wanted to detect errors before
+ * doing the sockargs() call because sockargs() allocates an mbuf and
+ * an error after that means that I have to release the mbuf.
+ */
+/* ARGSUSED */
+int
+nfs_mount(mp, path, data, ndp, p)
+	struct mount *mp;
+	char *path;
+	caddr_t data;
+	struct nameidata *ndp;
+	struct proc *p;
+{
+	int error;
+	struct nfs_args args;
+	struct mbuf *nam;
+	struct vnode *vp;
+	char pth[MNAMELEN], hst[MNAMELEN];
+	u_int len;
+	nfsv2fh_t nfh;
+
+	if (error = copyin(data, (caddr_t)&args, sizeof (struct nfs_args)))
+		return (error);
+	if (error = copyin((caddr_t)args.fh, (caddr_t)&nfh, sizeof (nfsv2fh_t)))
+		return (error);
+	if (error = copyinstr(path, pth, MNAMELEN-1, &len))
+		return (error);
+	bzero(&pth[len], MNAMELEN - len);
+	if (error = copyinstr(args.hostname, hst, MNAMELEN-1, &len))
+		return (error);
+	bzero(&hst[len], MNAMELEN - len);
+	/* sockargs() call must be after above copyin() calls */
+	if (error = sockargs(&nam, (caddr_t)args.addr,
+		args.addrlen, MT_SONAME))
+		return (error);
+	args.fh = &nfh;
+	error = mountnfs(&args, mp, nam, pth, hst, &vp);
+	return (error);
+}
+
+/*
+ * Common code for mount and mountroot
+ */
+int
+mountnfs(argp, mp, nam, pth, hst, vpp)
+	register struct nfs_args *argp;
+	register struct mount *mp;
+	struct mbuf *nam;
+	char *pth, *hst;
+	struct vnode **vpp;
+{
+	register struct nfsmount *nmp;
+	struct nfsnode *np;
+	int error;
+
+	if (mp->mnt_flag & MNT_UPDATE) {
+		nmp = VFSTONFS(mp);
+		/* update paths, file handles, etc, here	XXX */
+		m_freem(nam);
+		return (0);
+	} else {
+		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount),
+		    M_NFSMNT, M_WAITOK);
+		bzero((caddr_t)nmp, sizeof (struct nfsmount));
+		mp->mnt_data = (qaddr_t)nmp;
+	}
+	getnewfsid(mp, MOUNT_NFS);
+	nmp->nm_mountp = mp;
+	nmp->nm_flag = argp->flags;
+	if ((nmp->nm_flag & (NFSMNT_NQNFS | NFSMNT_MYWRITE)) ==
+		(NFSMNT_NQNFS | NFSMNT_MYWRITE)) {
+		error = EPERM;
+		goto bad;
+	}
+	if (nmp->nm_flag & NFSMNT_NQNFS)
+		/*
+		 * We have to set mnt_maxsymlink to a non-zero value so
+		 * that COMPAT_43 routines will know that we are setting
+		 * the d_type field in directories (and can zero it for
+		 * unsuspecting binaries).
+		 */
+		mp->mnt_maxsymlinklen = 1;
+	nmp->nm_timeo = NFS_TIMEO;
+	nmp->nm_retry = NFS_RETRANS;
+	nmp->nm_wsize = NFS_WSIZE;
+	nmp->nm_rsize = NFS_RSIZE;
+	nmp->nm_numgrps = NFS_MAXGRPS;
+	nmp->nm_readahead = NFS_DEFRAHEAD;
+	nmp->nm_leaseterm = NQ_DEFLEASE;
+	nmp->nm_deadthresh = NQ_DEADTHRESH;
+	nmp->nm_tnext = (struct nfsnode *)nmp;
+	nmp->nm_tprev = (struct nfsnode *)nmp;
+	nmp->nm_inprog = NULLVP;
+	bcopy((caddr_t)argp->fh, (caddr_t)&nmp->nm_fh, sizeof(nfsv2fh_t));
+	mp->mnt_stat.f_type = MOUNT_NFS;
+	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
+	bcopy(pth, mp->mnt_stat.f_mntonname, MNAMELEN);
+	nmp->nm_nam = nam;
+
+	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
+		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
+		if (nmp->nm_timeo < NFS_MINTIMEO)
+			nmp->nm_timeo = NFS_MINTIMEO;
+		else if (nmp->nm_timeo > NFS_MAXTIMEO)
+			nmp->nm_timeo = NFS_MAXTIMEO;
+	}
+
+	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
+		nmp->nm_retry = argp->retrans;
+		if (nmp->nm_retry > NFS_MAXREXMIT)
+			nmp->nm_retry = NFS_MAXREXMIT;
+	}
+
+	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
+		nmp->nm_wsize = argp->wsize;
+		/* Round down to multiple of blocksize */
+		nmp->nm_wsize &= ~0x1ff;
+		if (nmp->nm_wsize <= 0)
+			nmp->nm_wsize = 512;
+		else if (nmp->nm_wsize > NFS_MAXDATA)
+			nmp->nm_wsize = NFS_MAXDATA;
+	}
+	if (nmp->nm_wsize > MAXBSIZE)
+		nmp->nm_wsize = MAXBSIZE;
+
+	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
+		nmp->nm_rsize = argp->rsize;
+		/* Round down to multiple of blocksize */
+		nmp->nm_rsize &= ~0x1ff;
+		if (nmp->nm_rsize <= 0)
+			nmp->nm_rsize = 512;
+		else if (nmp->nm_rsize > NFS_MAXDATA)
+			nmp->nm_rsize = NFS_MAXDATA;
+	}
+	if (nmp->nm_rsize > MAXBSIZE)
+		nmp->nm_rsize = MAXBSIZE;
+	if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0 &&
+		argp->maxgrouplist <= NFS_MAXGRPS)
+		nmp->nm_numgrps = argp->maxgrouplist;
+	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0 &&
+		argp->readahead <= NFS_MAXRAHEAD)
+		nmp->nm_readahead = argp->readahead;
+	if ((argp->flags & NFSMNT_LEASETERM) && argp->leaseterm >= 2 &&
+		argp->leaseterm <= NQ_MAXLEASE)
+		nmp->nm_leaseterm = argp->leaseterm;
+	if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 1 &&
+		argp->deadthresh <= NQ_NEVERDEAD)
+		nmp->nm_deadthresh = argp->deadthresh;
+	/* Set up the sockets and per-host congestion */
+	nmp->nm_sotype = argp->sotype;
+	nmp->nm_soproto = argp->proto;
+
+	/*
+	 * For Connection based sockets (TCP,...) defer the connect until
+	 * the first request, in case the server is not responding.
+	 */
+	if (nmp->nm_sotype == SOCK_DGRAM &&
+		(error = nfs_connect(nmp, (struct nfsreq *)0)))
+		goto bad;
+
+	/*
+	 * This is silly, but it has to be set so that vinifod() works.
+	 * We do not want to do an nfs_statfs() here since we can get
+	 * stuck on a dead server and we are holding a lock on the mount
+	 * point.
+	 */
+	mp->mnt_stat.f_iosize = NFS_MAXDGRAMDATA;
+	/*
+	 * A reference count is needed on the nfsnode representing the
+	 * remote root.  If this object is not persistent, then backward
+	 * traversals of the mount point (i.e. "..") will not work if
+	 * the nfsnode gets flushed out of the cache. Ufs does not have
+	 * this problem, because one can identify root inodes by their
+	 * number == ROOTINO (2).
+	 */
+	if (error = nfs_nget(mp, &nmp->nm_fh, &np))
+		goto bad;
+	*vpp = NFSTOV(np);
+
+	return (0);
+bad:
+	nfs_disconnect(nmp);
+	free((caddr_t)nmp, M_NFSMNT);
+	m_freem(nam);
+	return (error);
+}
+
+/*
+ * unmount system call
+ */
+int
+nfs_unmount(mp, mntflags, p)
+	struct mount *mp;
+	int mntflags;
+	struct proc *p;
+{
+	register struct nfsmount *nmp;
+	struct nfsnode *np;
+	struct vnode *vp;
+	int error, flags = 0;
+	extern int doforce;
+
+	if (mntflags & MNT_FORCE) {
+		if (!doforce || (mp->mnt_flag & MNT_ROOTFS))
+			return (EINVAL);
+		flags |= FORCECLOSE;
+	}
+	nmp = VFSTONFS(mp);
+	/*
+	 * Goes something like this..
+	 * - Check for activity on the root vnode (other than ourselves).
+	 * - Call vflush() to clear out vnodes for this file system,
+	 *   except for the root vnode.
+	 * - Decrement reference on the vnode representing remote root.
+	 * - Close the socket
+	 * - Free up the data structures
+	 */
+	/*
+	 * We need to decrement the ref. count on the nfsnode representing
+	 * the remote root.  See comment in mountnfs().  The VFS unmount()
+	 * has done vput on this vnode, otherwise we would get deadlock!
+	 */
+	if (error = nfs_nget(mp, &nmp->nm_fh, &np))
+		return(error);
+	vp = NFSTOV(np);
+	if (vp->v_usecount > 2) {
+		vput(vp);
+		return (EBUSY);
+	}
+
+	/*
+	 * Must handshake with nqnfs_clientd() if it is active.
+	 */
+	nmp->nm_flag |= NFSMNT_DISMINPROG;
+	while (nmp->nm_inprog != NULLVP)
+		(void) tsleep((caddr_t)&lbolt, PSOCK, "nfsdism", 0);
+	if (error = vflush(mp, vp, flags)) {
+		vput(vp);
+		nmp->nm_flag &= ~NFSMNT_DISMINPROG;
+		return (error);
+	}
+
+	/*
+	 * We are now committed to the unmount.
+	 * For NQNFS, let the server daemon free the nfsmount structure.
+	 */
+	if (nmp->nm_flag & (NFSMNT_NQNFS | NFSMNT_KERB))
+		nmp->nm_flag |= NFSMNT_DISMNT;
+
+	/*
+	 * There are two reference counts to get rid of here.
+	 */
+	vrele(vp);
+	vrele(vp);
+	vgone(vp);
+	nfs_disconnect(nmp);
+	m_freem(nmp->nm_nam);
+
+	if ((nmp->nm_flag & (NFSMNT_NQNFS | NFSMNT_KERB)) == 0)
+		free((caddr_t)nmp, M_NFSMNT);
+	return (0);
+}
+
+/*
+ * Return root of a filesystem
+ */
+int
+nfs_root(mp, vpp)
+	struct mount *mp;
+	struct vnode **vpp;
+{
+	register struct vnode *vp;
+	struct nfsmount *nmp;
+	struct nfsnode *np;
+	int error;
+
+	nmp = VFSTONFS(mp);
+	if (error = nfs_nget(mp, &nmp->nm_fh, &np))
+		return (error);
+	vp = NFSTOV(np);
+	vp->v_type = VDIR;
+	vp->v_flag = VROOT;
+	*vpp = vp;
+	return (0);
+}
+
+extern int syncprt;
+
+/*
+ * Flush out the buffer cache
+ */
+/* ARGSUSED */
+int
+nfs_sync(mp, waitfor, cred, p)
+	struct mount *mp;
+	int waitfor;
+	struct ucred *cred;
+	struct proc *p;
+{
+	register struct vnode *vp;
+	int error, allerror = 0;
+
+	/*
+	 * Force stale buffer cache information to be flushed.
+	 */
+loop:
+	for (vp = mp->mnt_vnodelist.lh_first;
+	     vp != NULL;
+	     vp = vp->v_mntvnodes.le_next) {
+		/*
+		 * If the vnode that we are about to sync is no longer
+		 * associated with this mount point, start over.
+		 */
+		if (vp->v_mount != mp)
+			goto loop;
+		if (VOP_ISLOCKED(vp) || vp->v_dirtyblkhd.lh_first == NULL)
+			continue;
+		if (vget(vp, 1))
+			goto loop;
+		if (error = VOP_FSYNC(vp, cred, waitfor, p))
+			allerror = error;
+		vput(vp);
+	}
+	return (allerror);
+}
+
+/*
+ * NFS flat namespace lookup.
+ * Currently unsupported.
+ */
+/* ARGSUSED */
+int
+nfs_vget(mp, ino, vpp)
+	struct mount *mp;
+	ino_t ino;
+	struct vnode **vpp;
+{
+
+	return (EOPNOTSUPP);
+}
+
+/*
+ * At this point, this should never happen
+ */
+/* ARGSUSED */
+int
+nfs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
+	register struct mount *mp;
+	struct fid *fhp;
+	struct mbuf *nam;
+	struct vnode **vpp;
+	int *exflagsp;
+	struct ucred **credanonp;
+{
+
+	return (EINVAL);
+}
+
+/*
+ * Vnode pointer to File handle, should never happen either
+ */
+/* ARGSUSED */
+int
+nfs_vptofh(vp, fhp)
+	struct vnode *vp;
+	struct fid *fhp;
+{
+
+	return (EINVAL);
+}
+
+/*
+ * Vfs start routine, a no-op.
+ */
+/* ARGSUSED */
+int
+nfs_start(mp, flags, p)
+	struct mount *mp;
+	int flags;
+	struct proc *p;
+{
+
+	return (0);
+}
+
+/*
+ * Do operations associated with quotas, not supported
+ */
+/* ARGSUSED */
+int
+nfs_quotactl(mp, cmd, uid, arg, p)
+	struct mount *mp;
+	int cmd;
+	uid_t uid;
+	caddr_t arg;
+	struct proc *p;
+{
+
+	return (EOPNOTSUPP);
+}
diff --git a/sys/nfs/nfs_vnops.c b/sys/nfs/nfs_vnops.c
new file mode 100644
index 00000000000..a909b48dc67
--- /dev/null
+++ b/sys/nfs/nfs_vnops.c
@@ -0,0 +1,2539 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfs_vnops.c	8.5 (Berkeley) 2/13/94
+ */
+
+/*
+ * vnode op calls for sun nfs version 2
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/mount.h>
+#include <sys/buf.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/conf.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/map.h>
+#include <sys/dirent.h>
+
+#include <vm/vm.h>
+
+#include <miscfs/specfs/specdev.h>
+#include <miscfs/fifofs/fifo.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfsmount.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nqnfs.h>
+
+/* Defs */
+#define	TRUE	1
+#define	FALSE	0
+
+/*
+ * Global vfs data structures for nfs
+ */
+int (**nfsv2_vnodeop_p)();
+struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, nfs_lookup },	/* lookup */
+	{ &vop_create_desc, nfs_create },	/* create */
+	{ &vop_mknod_desc, nfs_mknod },		/* mknod */
+	{ &vop_open_desc, nfs_open },		/* open */
+	{ &vop_close_desc, nfs_close },		/* close */
+	{ &vop_access_desc, nfs_access },	/* access */
+	{ &vop_getattr_desc, nfs_getattr },	/* getattr */
+	{ &vop_setattr_desc, nfs_setattr },	/* setattr */
+	{ &vop_read_desc, nfs_read },		/* read */
+	{ &vop_write_desc, nfs_write },		/* write */
+	{ &vop_ioctl_desc, nfs_ioctl },		/* ioctl */
+	{ &vop_select_desc, nfs_select },	/* select */
+	{ &vop_mmap_desc, nfs_mmap },		/* mmap */
+	{ &vop_fsync_desc, nfs_fsync },		/* fsync */
+	{ &vop_seek_desc, nfs_seek },		/* seek */
+	{ &vop_remove_desc, nfs_remove },	/* remove */
+	{ &vop_link_desc, nfs_link },		/* link */
+	{ &vop_rename_desc, nfs_rename },	/* rename */
+	{ &vop_mkdir_desc, nfs_mkdir },		/* mkdir */
+	{ &vop_rmdir_desc, nfs_rmdir },		/* rmdir */
+	{ &vop_symlink_desc, nfs_symlink },	/* symlink */
+	{ &vop_readdir_desc, nfs_readdir },	/* readdir */
+	{ &vop_readlink_desc, nfs_readlink },	/* readlink */
+	{ &vop_abortop_desc, nfs_abortop },	/* abortop */
+	{ &vop_inactive_desc, nfs_inactive },	/* inactive */
+	{ &vop_reclaim_desc, nfs_reclaim },	/* reclaim */
+	{ &vop_lock_desc, nfs_lock },		/* lock */
+	{ &vop_unlock_desc, nfs_unlock },	/* unlock */
+	{ &vop_bmap_desc, nfs_bmap },		/* bmap */
+	{ &vop_strategy_desc, nfs_strategy },	/* strategy */
+	{ &vop_print_desc, nfs_print },		/* print */
+	{ &vop_islocked_desc, nfs_islocked },	/* islocked */
+	{ &vop_pathconf_desc, nfs_pathconf },	/* pathconf */
+	{ &vop_advlock_desc, nfs_advlock },	/* advlock */
+	{ &vop_blkatoff_desc, nfs_blkatoff },	/* blkatoff */
+	{ &vop_valloc_desc, nfs_valloc },	/* valloc */
+	{ &vop_reallocblks_desc, nfs_reallocblks },	/* reallocblks */
+	{ &vop_vfree_desc, nfs_vfree },		/* vfree */
+	{ &vop_truncate_desc, nfs_truncate },	/* truncate */
+	{ &vop_update_desc, nfs_update },	/* update */
+	{ &vop_bwrite_desc, vn_bwrite },
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc nfsv2_vnodeop_opv_desc =
+	{ &nfsv2_vnodeop_p, nfsv2_vnodeop_entries };
+
+/*
+ * Special device vnode ops
+ */
+int (**spec_nfsv2nodeop_p)();
+struct vnodeopv_entry_desc spec_nfsv2nodeop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, spec_lookup },	/* lookup */
+	{ &vop_create_desc, spec_create },	/* create */
+	{ &vop_mknod_desc, spec_mknod },	/* mknod */
+	{ &vop_open_desc, spec_open },		/* open */
+	{ &vop_close_desc, nfsspec_close },	/* close */
+	{ &vop_access_desc, nfsspec_access },	/* access */
+	{ &vop_getattr_desc, nfs_getattr },	/* getattr */
+	{ &vop_setattr_desc, nfs_setattr },	/* setattr */
+	{ &vop_read_desc, nfsspec_read },	/* read */
+	{ &vop_write_desc, nfsspec_write },	/* write */
+	{ &vop_ioctl_desc, spec_ioctl },	/* ioctl */
+	{ &vop_select_desc, spec_select },	/* select */
+	{ &vop_mmap_desc, spec_mmap },		/* mmap */
+	{ &vop_fsync_desc, nfs_fsync },		/* fsync */
+	{ &vop_seek_desc, spec_seek },		/* seek */
+	{ &vop_remove_desc, spec_remove },	/* remove */
+	{ &vop_link_desc, spec_link },		/* link */
+	{ &vop_rename_desc, spec_rename },	/* rename */
+	{ &vop_mkdir_desc, spec_mkdir },	/* mkdir */
+	{ &vop_rmdir_desc, spec_rmdir },	/* rmdir */
+	{ &vop_symlink_desc, spec_symlink },	/* symlink */
+	{ &vop_readdir_desc, spec_readdir },	/* readdir */
+	{ &vop_readlink_desc, spec_readlink },	/* readlink */
+	{ &vop_abortop_desc, spec_abortop },	/* abortop */
+	{ &vop_inactive_desc, nfs_inactive },	/* inactive */
+	{ &vop_reclaim_desc, nfs_reclaim },	/* reclaim */
+	{ &vop_lock_desc, nfs_lock },		/* lock */
+	{ &vop_unlock_desc, nfs_unlock },	/* unlock */
+	{ &vop_bmap_desc, spec_bmap },		/* bmap */
+	{ &vop_strategy_desc, spec_strategy },	/* strategy */
+	{ &vop_print_desc, nfs_print },		/* print */
+	{ &vop_islocked_desc, nfs_islocked },	/* islocked */
+	{ &vop_pathconf_desc, spec_pathconf },	/* pathconf */
+	{ &vop_advlock_desc, spec_advlock },	/* advlock */
+	{ &vop_blkatoff_desc, spec_blkatoff },	/* blkatoff */
+	{ &vop_valloc_desc, spec_valloc },	/* valloc */
+	{ &vop_reallocblks_desc, spec_reallocblks },	/* reallocblks */
+	{ &vop_vfree_desc, spec_vfree },	/* vfree */
+	{ &vop_truncate_desc, spec_truncate },	/* truncate */
+	{ &vop_update_desc, nfs_update },	/* update */
+	{ &vop_bwrite_desc, vn_bwrite },
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc spec_nfsv2nodeop_opv_desc =
+	{ &spec_nfsv2nodeop_p, spec_nfsv2nodeop_entries };
+
+#ifdef FIFO
+int (**fifo_nfsv2nodeop_p)();
+struct vnodeopv_entry_desc fifo_nfsv2nodeop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, fifo_lookup },	/* lookup */
+	{ &vop_create_desc, fifo_create },	/* create */
+	{ &vop_mknod_desc, fifo_mknod },	/* mknod */
+	{ &vop_open_desc, fifo_open },		/* open */
+	{ &vop_close_desc, nfsfifo_close },	/* close */
+	{ &vop_access_desc, nfsspec_access },	/* access */
+	{ &vop_getattr_desc, nfs_getattr },	/* getattr */
+	{ &vop_setattr_desc, nfs_setattr },	/* setattr */
+	{ &vop_read_desc, nfsfifo_read },	/* read */
+	{ &vop_write_desc, nfsfifo_write },	/* write */
+	{ &vop_ioctl_desc, fifo_ioctl },	/* ioctl */
+	{ &vop_select_desc, fifo_select },	/* select */
+	{ &vop_mmap_desc, fifo_mmap },		/* mmap */
+	{ &vop_fsync_desc, nfs_fsync },		/* fsync */
+	{ &vop_seek_desc, fifo_seek },		/* seek */
+	{ &vop_remove_desc, fifo_remove },	/* remove */
+	{ &vop_link_desc, fifo_link },		/* link */
+	{ &vop_rename_desc, fifo_rename },	/* rename */
+	{ &vop_mkdir_desc, fifo_mkdir },	/* mkdir */
+	{ &vop_rmdir_desc, fifo_rmdir },	/* rmdir */
+	{ &vop_symlink_desc, fifo_symlink },	/* symlink */
+	{ &vop_readdir_desc, fifo_readdir },	/* readdir */
+	{ &vop_readlink_desc, fifo_readlink },	/* readlink */
+	{ &vop_abortop_desc, fifo_abortop },	/* abortop */
+	{ &vop_inactive_desc, nfs_inactive },	/* inactive */
+	{ &vop_reclaim_desc, nfs_reclaim },	/* reclaim */
+	{ &vop_lock_desc, nfs_lock },		/* lock */
+	{ &vop_unlock_desc, nfs_unlock },	/* unlock */
+	{ &vop_bmap_desc, fifo_bmap },		/* bmap */
+	{ &vop_strategy_desc, fifo_badop },	/* strategy */
+	{ &vop_print_desc, nfs_print },		/* print */
+	{ &vop_islocked_desc, nfs_islocked },	/* islocked */
+	{ &vop_pathconf_desc, fifo_pathconf },	/* pathconf */
+	{ &vop_advlock_desc, fifo_advlock },	/* advlock */
+	{ &vop_blkatoff_desc, fifo_blkatoff },	/* blkatoff */
+	{ &vop_valloc_desc, fifo_valloc },	/* valloc */
+	{ &vop_reallocblks_desc, fifo_reallocblks },	/* reallocblks */
+	{ &vop_vfree_desc, fifo_vfree },	/* vfree */
+	{ &vop_truncate_desc, fifo_truncate },	/* truncate */
+	{ &vop_update_desc, nfs_update },	/* update */
+	{ &vop_bwrite_desc, vn_bwrite },
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc =
+	{ &fifo_nfsv2nodeop_p, fifo_nfsv2nodeop_entries };
+#endif /* FIFO */
+
+void nqnfs_clientlease();
+
+/*
+ * Global variables
+ */
+extern u_long nfs_procids[NFS_NPROCS];
+extern u_long nfs_prog, nfs_vers, nfs_true, nfs_false;
+extern char nfsiobuf[MAXPHYS+NBPG];
+struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
+int nfs_numasync = 0;
+#define	DIRHDSIZ	(sizeof (struct dirent) - (MAXNAMLEN + 1))
+
+/*
+ * nfs null call from vfs.
+ */
+int
+nfs_null(vp, cred, procp)
+	struct vnode *vp;
+	struct ucred *cred;
+	struct proc *procp;
+{
+	caddr_t bpos, dpos;
+	int error = 0;
+	struct mbuf *mreq, *mrep, *md, *mb;
+	
+	nfsm_reqhead(vp, NFSPROC_NULL, 0);
+	nfsm_request(vp, NFSPROC_NULL, procp, cred);
+	nfsm_reqdone;
+	return (error);
+}
+
+/*
+ * nfs access vnode op.
+ * For nfs, just return ok. File accesses may fail later.
+ * For nqnfs, use the access rpc to check accessibility. If file modes are
+ * changed on the server, accesses might still fail later.
+ */
+int
+nfs_access(ap)
+	struct vop_access_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register u_long *tl;
+	register caddr_t cp;
+	caddr_t bpos, dpos;
+	int error = 0;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+	/*
+	 * For nqnfs, do an access rpc, otherwise you are stuck emulating
+	 * ufs_access() locally using the vattr. This may not be correct,
+	 * since the server may apply other access criteria such as
+	 * client uid-->server uid mapping that we do not know about, but
+	 * this is better than just returning anything that is lying about
+	 * in the cache.
+	 */
+	if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) {
+		nfsstats.rpccnt[NQNFSPROC_ACCESS]++;
+		nfsm_reqhead(vp, NQNFSPROC_ACCESS, NFSX_FH + 3 * NFSX_UNSIGNED);
+		nfsm_fhtom(vp);
+		nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
+		if (ap->a_mode & VREAD)
+			*tl++ = nfs_true;
+		else
+			*tl++ = nfs_false;
+		if (ap->a_mode & VWRITE)
+			*tl++ = nfs_true;
+		else
+			*tl++ = nfs_false;
+		if (ap->a_mode & VEXEC)
+			*tl = nfs_true;
+		else
+			*tl = nfs_false;
+		nfsm_request(vp, NQNFSPROC_ACCESS, ap->a_p, ap->a_cred);
+		nfsm_reqdone;
+		return (error);
+	} else
+		return (nfsspec_access(ap));
+}
+
+/*
+ * nfs open vnode op
+ * Check to see if the type is ok
+ * and that deletion is not in progress.
+ * For paged in text files, you will need to flush the page cache
+ * if consistency is lost.
+ */
+/* ARGSUSED */
+int
+nfs_open(ap)
+	struct vop_open_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	struct nfsnode *np = VTONFS(vp);
+	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+	struct vattr vattr;
+	int error;
+
+	if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
+		return (EACCES);
+	if (vp->v_flag & VTEXT) {
+	    /*
+	     * Get a valid lease. If cached data is stale, flush it.
+	     */
+	    if (nmp->nm_flag & NFSMNT_NQNFS) {
+		if (NQNFS_CKINVALID(vp, np, NQL_READ)) {
+		    do {
+			error = nqnfs_getlease(vp, NQL_READ, ap->a_cred, ap->a_p);
+		    } while (error == NQNFS_EXPIRED);
+		    if (error)
+			return (error);
+		    if (np->n_lrev != np->n_brev ||
+			(np->n_flag & NQNFSNONCACHE)) {
+			if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
+				ap->a_p, 1)) == EINTR)
+				return (error);
+			(void) vnode_pager_uncache(vp);
+			np->n_brev = np->n_lrev;
+		    }
+		}
+	    } else {
+		if (np->n_flag & NMODIFIED) {
+			if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
+				ap->a_p, 1)) == EINTR)
+				return (error);
+			(void) vnode_pager_uncache(vp);
+			np->n_attrstamp = 0;
+			np->n_direofoffset = 0;
+			if (error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p))
+				return (error);
+			np->n_mtime = vattr.va_mtime.ts_sec;
+		} else {
+			if (error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p))
+				return (error);
+			if (np->n_mtime != vattr.va_mtime.ts_sec) {
+				np->n_direofoffset = 0;
+				if ((error = nfs_vinvalbuf(vp, V_SAVE,
+					ap->a_cred, ap->a_p, 1)) == EINTR)
+					return (error);
+				(void) vnode_pager_uncache(vp);
+				np->n_mtime = vattr.va_mtime.ts_sec;
+			}
+		}
+	    }
+	} else if ((nmp->nm_flag & NFSMNT_NQNFS) == 0)
+		np->n_attrstamp = 0; /* For Open/Close consistency */
+	return (0);
+}
+
+/*
+ * nfs close vnode op
+ * For reg files, invalidate any buffer cache entries.
+ */
+/* ARGSUSED */
+int
+nfs_close(ap)
+	struct vop_close_args /* {
+		struct vnodeop_desc *a_desc;
+		struct vnode *a_vp;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct nfsnode *np = VTONFS(vp);
+	int error = 0;
+
+	if (vp->v_type == VREG) {
+	    if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) == 0 &&
+		(np->n_flag & NMODIFIED)) {
+		error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1);
+		np->n_attrstamp = 0;
+	    }
+	    if (np->n_flag & NWRITEERR) {
+		np->n_flag &= ~NWRITEERR;
+		error = np->n_error;
+	    }
+	}
+	return (error);
+}
+
+/*
+ * nfs getattr call from vfs.
+ */
+int
+nfs_getattr(ap)
+	struct vop_getattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct nfsnode *np = VTONFS(vp);
+	register caddr_t cp;
+	caddr_t bpos, dpos;
+	int error = 0;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+	
+	/*
+	 * Update local times for special files.
+	 */
+	if (np->n_flag & (NACC | NUPD))
+		np->n_flag |= NCHG;
+	/*
+	 * First look in the cache.
+	 */
+	if (nfs_getattrcache(vp, ap->a_vap) == 0)
+		return (0);
+	nfsstats.rpccnt[NFSPROC_GETATTR]++;
+	nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH);
+	nfsm_fhtom(vp);
+	nfsm_request(vp, NFSPROC_GETATTR, ap->a_p, ap->a_cred);
+	nfsm_loadattr(vp, ap->a_vap);
+	nfsm_reqdone;
+	return (error);
+}
+
+/*
+ * nfs setattr call.
+ */
+int
+nfs_setattr(ap)
+	struct vop_setattr_args /* {
+		struct vnodeop_desc *a_desc;
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct nfsv2_sattr *sp;
+	register caddr_t cp;
+	register long t1;
+	caddr_t bpos, dpos, cp2;
+	u_long *tl;
+	int error = 0, isnq;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+	register struct vnode *vp = ap->a_vp;
+	register struct nfsnode *np = VTONFS(vp);
+	register struct vattr *vap = ap->a_vap;
+	u_quad_t frev, tsize;
+
+	if (vap->va_size != VNOVAL || vap->va_mtime.ts_sec != VNOVAL ||
+		vap->va_atime.ts_sec != VNOVAL) {
+		if (vap->va_size != VNOVAL) {
+			if (np->n_flag & NMODIFIED) {
+			    if (vap->va_size == 0)
+				error = nfs_vinvalbuf(vp, 0, ap->a_cred,
+					ap->a_p, 1);
+			    else
+				error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
+					ap->a_p, 1);
+			    if (error)
+				return (error);
+			}
+			tsize = np->n_size;
+			np->n_size = np->n_vattr.va_size = vap->va_size;
+			vnode_pager_setsize(vp, (u_long)np->n_size);
+		} else if ((np->n_flag & NMODIFIED) &&
+			(error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
+			 ap->a_p, 1)) == EINTR)
+			return (error);
+	}
+	nfsstats.rpccnt[NFSPROC_SETATTR]++;
+	isnq = (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS);
+	nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH+NFSX_SATTR(isnq));
+	nfsm_fhtom(vp);
+	nfsm_build(sp, struct nfsv2_sattr *, NFSX_SATTR(isnq));
+	if (vap->va_mode == (u_short)-1)
+		sp->sa_mode = VNOVAL;
+	else
+		sp->sa_mode = vtonfs_mode(vp->v_type, vap->va_mode);
+	if (vap->va_uid == (uid_t)-1)
+		sp->sa_uid = VNOVAL;
+	else
+		sp->sa_uid = txdr_unsigned(vap->va_uid);
+	if (vap->va_gid == (gid_t)-1)
+		sp->sa_gid = VNOVAL;
+	else
+		sp->sa_gid = txdr_unsigned(vap->va_gid);
+	if (isnq) {
+		txdr_hyper(&vap->va_size, &sp->sa_nqsize);
+		txdr_nqtime(&vap->va_atime, &sp->sa_nqatime);
+		txdr_nqtime(&vap->va_mtime, &sp->sa_nqmtime);
+		sp->sa_nqflags = txdr_unsigned(vap->va_flags);
+		sp->sa_nqrdev = VNOVAL;
+	} else {
+		sp->sa_nfssize = txdr_unsigned(vap->va_size);
+		txdr_nfstime(&vap->va_atime, &sp->sa_nfsatime);
+		txdr_nfstime(&vap->va_mtime, &sp->sa_nfsmtime);
+	}
+	nfsm_request(vp, NFSPROC_SETATTR, ap->a_p, ap->a_cred);
+	nfsm_loadattr(vp, (struct vattr *)0);
+	if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) &&
+	    NQNFS_CKCACHABLE(vp, NQL_WRITE)) {
+		nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
+		fxdr_hyper(tl, &frev);
+		if (frev > np->n_brev)
+			np->n_brev = frev;
+	}
+	nfsm_reqdone;
+	if (error) {
+		np->n_size = np->n_vattr.va_size = tsize;
+		vnode_pager_setsize(vp, (u_long)np->n_size);
+	}
+	return (error);
+}
+
+/*
+ * nfs lookup call, one step at a time...
+ * First look in cache
+ * If not found, unlock the directory nfsnode and do the rpc
+ */
+int
+nfs_lookup(ap)
+	struct vop_lookup_args /* {
+		struct vnodeop_desc *a_desc;
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	register struct componentname *cnp = ap->a_cnp;
+	register struct vnode *dvp = ap->a_dvp;
+	register struct vnode **vpp = ap->a_vpp;
+	register int flags = cnp->cn_flags;
+	register struct vnode *vdp;
+	register u_long *tl;
+	register caddr_t cp;
+	register long t1, t2;
+	struct nfsmount *nmp;
+	caddr_t bpos, dpos, cp2;
+	time_t reqtime;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+	struct vnode *newvp;
+	long len;
+	nfsv2fh_t *fhp;
+	struct nfsnode *np;
+	int lockparent, wantparent, error = 0;
+	int nqlflag, cachable;
+	u_quad_t frev;
+
+	*vpp = NULL;
+	if (dvp->v_type != VDIR)
+		return (ENOTDIR);
+	lockparent = flags & LOCKPARENT;
+	wantparent = flags & (LOCKPARENT|WANTPARENT);
+	nmp = VFSTONFS(dvp->v_mount);
+	np = VTONFS(dvp);
+	if ((error = cache_lookup(dvp, vpp, cnp)) && error != ENOENT) {
+		struct vattr vattr;
+		int vpid;
+
+		vdp = *vpp;
+		vpid = vdp->v_id;
+		/*
+		 * See the comment starting `Step through' in ufs/ufs_lookup.c
+		 * for an explanation of the locking protocol
+		 */
+		if (dvp == vdp) {
+			VREF(vdp);
+			error = 0;
+		} else
+			error = vget(vdp, 1);
+		if (!error) {
+			if (vpid == vdp->v_id) {
+			   if (nmp->nm_flag & NFSMNT_NQNFS) {
+				if ((nmp->nm_flag & NFSMNT_NQLOOKLEASE) == 0) {
+					nfsstats.lookupcache_hits++;
+					if (cnp->cn_nameiop != LOOKUP &&
+					    (flags & ISLASTCN))
+					    cnp->cn_flags |= SAVENAME;
+					return (0);
+			        } else if (NQNFS_CKCACHABLE(dvp, NQL_READ)) {
+					if (np->n_lrev != np->n_brev ||
+					    (np->n_flag & NMODIFIED)) {
+						np->n_direofoffset = 0;
+						cache_purge(dvp);
+						error = nfs_vinvalbuf(dvp, 0,
+						    cnp->cn_cred, cnp->cn_proc,
+						    1);
+						if (error == EINTR)
+							return (error);
+						np->n_brev = np->n_lrev;
+					} else {
+						nfsstats.lookupcache_hits++;
+						if (cnp->cn_nameiop != LOOKUP &&
+						    (flags & ISLASTCN))
+						    cnp->cn_flags |= SAVENAME;
+						return (0);
+					}
+				}
+			   } else if (!VOP_GETATTR(vdp, &vattr, cnp->cn_cred, cnp->cn_proc) &&
+			       vattr.va_ctime.ts_sec == VTONFS(vdp)->n_ctime) {
+				nfsstats.lookupcache_hits++;
+				if (cnp->cn_nameiop != LOOKUP &&
+				    (flags & ISLASTCN))
+					cnp->cn_flags |= SAVENAME;
+				return (0);
+			   }
+			   cache_purge(vdp);
+			}
+			vrele(vdp);
+		}
+		*vpp = NULLVP;
+	}
+	error = 0;
+	nfsstats.lookupcache_misses++;
+	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
+	len = cnp->cn_namelen;
+	nfsm_reqhead(dvp, NFSPROC_LOOKUP, NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(len));
+
+	/*
+	 * For nqnfs optionally piggyback a getlease request for the name
+	 * being looked up.
+	 */
+	if (nmp->nm_flag & NFSMNT_NQNFS) {
+		nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+		if ((nmp->nm_flag & NFSMNT_NQLOOKLEASE) &&
+		    ((cnp->cn_flags & MAKEENTRY) &&
+		    (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))))
+			*tl = txdr_unsigned(nmp->nm_leaseterm);
+		else
+			*tl = 0;
+	}
+	nfsm_fhtom(dvp);
+	nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
+	reqtime = time.tv_sec;
+	nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred);
+nfsmout:
+	if (error) {
+		if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
+		    (flags & ISLASTCN) && error == ENOENT)
+			error = EJUSTRETURN;
+		if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
+			cnp->cn_flags |= SAVENAME;
+		return (error);
+	}
+	if (nmp->nm_flag & NFSMNT_NQNFS) {
+		nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+		if (*tl) {
+			nqlflag = fxdr_unsigned(int, *tl);
+			nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED);
+			cachable = fxdr_unsigned(int, *tl++);
+			reqtime += fxdr_unsigned(int, *tl++);
+			fxdr_hyper(tl, &frev);
+		} else
+			nqlflag = 0;
+	}
+	nfsm_dissect(fhp, nfsv2fh_t *, NFSX_FH);
+
+	/*
+	 * Handle RENAME case...
+	 */
+	if (cnp->cn_nameiop == RENAME && wantparent && (flags & ISLASTCN)) {
+		if (!bcmp(np->n_fh.fh_bytes, (caddr_t)fhp, NFSX_FH)) {
+			m_freem(mrep);
+			return (EISDIR);
+		}
+		if (error = nfs_nget(dvp->v_mount, fhp, &np)) {
+			m_freem(mrep);
+			return (error);
+		}
+		newvp = NFSTOV(np);
+		if (error =
+		    nfs_loadattrcache(&newvp, &md, &dpos, (struct vattr *)0)) {
+			vrele(newvp);
+			m_freem(mrep);
+			return (error);
+		}
+		*vpp = newvp;
+		m_freem(mrep);
+		cnp->cn_flags |= SAVENAME;
+		return (0);
+	}
+
+	if (!bcmp(np->n_fh.fh_bytes, (caddr_t)fhp, NFSX_FH)) {
+		VREF(dvp);
+		newvp = dvp;
+	} else {
+		if (error = nfs_nget(dvp->v_mount, fhp, &np)) {
+			m_freem(mrep);
+			return (error);
+		}
+		newvp = NFSTOV(np);
+	}
+	if (error = nfs_loadattrcache(&newvp, &md, &dpos, (struct vattr *)0)) {
+		vrele(newvp);
+		m_freem(mrep);
+		return (error);
+	}
+	m_freem(mrep);
+	*vpp = newvp;
+	if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
+		cnp->cn_flags |= SAVENAME;
+	if ((cnp->cn_flags & MAKEENTRY) &&
+	    (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) {
+		if ((nmp->nm_flag & NFSMNT_NQNFS) == 0)
+			np->n_ctime = np->n_vattr.va_ctime.ts_sec;
+		else if (nqlflag && reqtime > time.tv_sec)
+			nqnfs_clientlease(nmp, np, nqlflag, cachable, reqtime,
+				frev);
+		cache_enter(dvp, *vpp, cnp);
+	}
+	return (0);
+}
+
+/*
+ * nfs read call.
+ * Just call nfs_bioread() to do the work.
+ */
+int
+nfs_read(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+
+	if (vp->v_type != VREG)
+		return (EPERM);
+	return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred));
+}
+
+/*
+ * nfs readlink call
+ */
+int
+nfs_readlink(ap)
+	struct vop_readlink_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+
+	if (vp->v_type != VLNK)
+		return (EPERM);
+	return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred));
+}
+
+/*
+ * Do a readlink rpc.
+ * Called by nfs_doio() from below the buffer cache.
+ */
+int
+nfs_readlinkrpc(vp, uiop, cred)
+	register struct vnode *vp;
+	struct uio *uiop;
+	struct ucred *cred;
+{
+	register u_long *tl;
+	register caddr_t cp;
+	register long t1;
+	caddr_t bpos, dpos, cp2;
+	int error = 0;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+	long len;
+
+	nfsstats.rpccnt[NFSPROC_READLINK]++;
+	nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH);
+	nfsm_fhtom(vp);
+	nfsm_request(vp, NFSPROC_READLINK, uiop->uio_procp, cred);
+	nfsm_strsiz(len, NFS_MAXPATHLEN);
+	nfsm_mtouio(uiop, len);
+	nfsm_reqdone;
+	return (error);
+}
+
+/*
+ * nfs read rpc call
+ * Ditto above
+ */
+int
+nfs_readrpc(vp, uiop, cred)
+	register struct vnode *vp;
+	struct uio *uiop;
+	struct ucred *cred;
+{
+	register u_long *tl;
+	register caddr_t cp;
+	register long t1;
+	caddr_t bpos, dpos, cp2;
+	int error = 0;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+	struct nfsmount *nmp;
+	long len, retlen, tsiz;
+
+	nmp = VFSTONFS(vp->v_mount);
+	tsiz = uiop->uio_resid;
+	if (uiop->uio_offset + tsiz > 0xffffffff &&
+	    (nmp->nm_flag & NFSMNT_NQNFS) == 0)
+		return (EFBIG);
+	while (tsiz > 0) {
+		nfsstats.rpccnt[NFSPROC_READ]++;
+		len = (tsiz > nmp->nm_rsize) ? nmp->nm_rsize : tsiz;
+		nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH+NFSX_UNSIGNED*3);
+		nfsm_fhtom(vp);
+		nfsm_build(tl, u_long *, NFSX_UNSIGNED*3);
+		if (nmp->nm_flag & NFSMNT_NQNFS) {
+			txdr_hyper(&uiop->uio_offset, tl);
+			*(tl + 2) = txdr_unsigned(len);
+		} else {
+			*tl++ = txdr_unsigned(uiop->uio_offset);
+			*tl++ = txdr_unsigned(len);
+			*tl = 0;
+		}
+		nfsm_request(vp, NFSPROC_READ, uiop->uio_procp, cred);
+		nfsm_loadattr(vp, (struct vattr *)0);
+		nfsm_strsiz(retlen, nmp->nm_rsize);
+		nfsm_mtouio(uiop, retlen);
+		m_freem(mrep);
+		if (retlen < len)
+			tsiz = 0;
+		else
+			tsiz -= len;
+	}
+nfsmout:
+	return (error);
+}
+
+/*
+ * nfs write call
+ */
+int
+nfs_writerpc(vp, uiop, cred, ioflags)
+	register struct vnode *vp;
+	struct uio *uiop;
+	struct ucred *cred;
+	int ioflags;
+{
+	register u_long *tl;
+	register caddr_t cp;
+	register long t1;
+	caddr_t bpos, dpos, cp2;
+	int error = 0;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+	struct nfsmount *nmp;
+	struct nfsnode *np = VTONFS(vp);
+	u_quad_t frev;
+	long len, tsiz;
+
+	nmp = VFSTONFS(vp->v_mount);
+	tsiz = uiop->uio_resid;
+	if (uiop->uio_offset + tsiz > 0xffffffff &&
+	    (nmp->nm_flag & NFSMNT_NQNFS) == 0)
+		return (EFBIG);
+	while (tsiz > 0) {
+		nfsstats.rpccnt[NFSPROC_WRITE]++;
+		len = (tsiz > nmp->nm_wsize) ? nmp->nm_wsize : tsiz;
+		nfsm_reqhead(vp, NFSPROC_WRITE,
+			NFSX_FH+NFSX_UNSIGNED*4+nfsm_rndup(len));
+		nfsm_fhtom(vp);
+		nfsm_build(tl, u_long *, NFSX_UNSIGNED * 4);
+		if (nmp->nm_flag & NFSMNT_NQNFS) {
+			txdr_hyper(&uiop->uio_offset, tl);
+			tl += 2;
+			if (ioflags & IO_APPEND)
+				*tl++ = txdr_unsigned(1);
+			else
+				*tl++ = 0;
+		} else {
+			*++tl = txdr_unsigned(uiop->uio_offset);
+			tl += 2;
+		}
+		*tl = txdr_unsigned(len);
+		nfsm_uiotom(uiop, len);
+		nfsm_request(vp, NFSPROC_WRITE, uiop->uio_procp, cred);
+		nfsm_loadattr(vp, (struct vattr *)0);
+		if (nmp->nm_flag & NFSMNT_MYWRITE)
+			VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime.ts_sec;
+		else if ((nmp->nm_flag & NFSMNT_NQNFS) &&
+			 NQNFS_CKCACHABLE(vp, NQL_WRITE)) {
+			nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
+			fxdr_hyper(tl, &frev);
+			if (frev > np->n_brev)
+				np->n_brev = frev;
+		}
+		m_freem(mrep);
+		tsiz -= len;
+	}
+nfsmout:
+	if (error)
+		uiop->uio_resid = tsiz;
+	return (error);
+}
+
+/*
+ * nfs mknod call
+ * This is a kludge. Use a create rpc but with the IFMT bits of the mode
+ * set to specify the file type and the size field for rdev.
+ */
+/* ARGSUSED */
+int
+nfs_mknod(ap)
+	struct vop_mknod_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	register struct vnode *dvp = ap->a_dvp;
+	register struct vattr *vap = ap->a_vap;
+	register struct componentname *cnp = ap->a_cnp;
+	register struct nfsv2_sattr *sp;
+	register u_long *tl;
+	register caddr_t cp;
+	register long t1, t2;
+	struct vnode *newvp;
+	struct vattr vattr;
+	char *cp2;
+	caddr_t bpos, dpos;
+	int error = 0, isnq;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+	u_long rdev;
+
+	isnq = (VFSTONFS(dvp->v_mount)->nm_flag & NFSMNT_NQNFS);
+	if (vap->va_type == VCHR || vap->va_type == VBLK)
+		rdev = txdr_unsigned(vap->va_rdev);
+#ifdef FIFO
+	else if (vap->va_type == VFIFO)
+		rdev = 0xffffffff;
+#endif /* FIFO */
+	else {
+		VOP_ABORTOP(dvp, cnp);
+		vput(dvp);
+		return (EOPNOTSUPP);
+	}
+	if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) {
+		VOP_ABORTOP(dvp, cnp);
+		vput(dvp);
+		return (error);
+	}
+	nfsstats.rpccnt[NFSPROC_CREATE]++;
+	nfsm_reqhead(dvp, NFSPROC_CREATE,
+	  NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(cnp->cn_namelen)+NFSX_SATTR(isnq));
+	nfsm_fhtom(dvp);
+	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+	nfsm_build(sp, struct nfsv2_sattr *, NFSX_SATTR(isnq));
+	sp->sa_mode = vtonfs_mode(vap->va_type, vap->va_mode);
+	sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
+	sp->sa_gid = txdr_unsigned(vattr.va_gid);
+	if (isnq) {
+		sp->sa_nqrdev = rdev;
+		sp->sa_nqflags = 0;
+		txdr_nqtime(&vap->va_atime, &sp->sa_nqatime);
+		txdr_nqtime(&vap->va_mtime, &sp->sa_nqmtime);
+	} else {
+		sp->sa_nfssize = rdev;
+		txdr_nfstime(&vap->va_atime, &sp->sa_nfsatime);
+		txdr_nfstime(&vap->va_mtime, &sp->sa_nfsmtime);
+	}
+	nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred);
+	nfsm_mtofh(dvp, newvp);
+	nfsm_reqdone;
+	if (!error && (cnp->cn_flags & MAKEENTRY))
+		cache_enter(dvp, newvp, cnp);
+	FREE(cnp->cn_pnbuf, M_NAMEI);
+	VTONFS(dvp)->n_flag |= NMODIFIED;
+	VTONFS(dvp)->n_attrstamp = 0;
+	vrele(dvp);
+	return (error);
+}
+
+/*
+ * nfs file create call
+ */
+int
+nfs_create(ap)
+	struct vop_create_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	register struct vnode *dvp = ap->a_dvp;
+	register struct vattr *vap = ap->a_vap;
+	register struct componentname *cnp = ap->a_cnp;
+	register struct nfsv2_sattr *sp;
+	register u_long *tl;
+	register caddr_t cp;
+	register long t1, t2;
+	caddr_t bpos, dpos, cp2;
+	int error = 0, isnq;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+	struct vattr vattr;
+
+	if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) {
+		VOP_ABORTOP(dvp, cnp);
+		vput(dvp);
+		return (error);
+	}
+	nfsstats.rpccnt[NFSPROC_CREATE]++;
+	isnq = (VFSTONFS(dvp->v_mount)->nm_flag & NFSMNT_NQNFS);
+	nfsm_reqhead(dvp, NFSPROC_CREATE,
+	  NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(cnp->cn_namelen)+NFSX_SATTR(isnq));
+	nfsm_fhtom(dvp);
+	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+	nfsm_build(sp, struct nfsv2_sattr *, NFSX_SATTR(isnq));
+	sp->sa_mode = vtonfs_mode(vap->va_type, vap->va_mode);
+	sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
+	sp->sa_gid = txdr_unsigned(vattr.va_gid);
+	if (isnq) {
+		u_quad_t qval = 0;
+
+		txdr_hyper(&qval, &sp->sa_nqsize);
+		sp->sa_nqflags = 0;
+		sp->sa_nqrdev = -1;
+		txdr_nqtime(&vap->va_atime, &sp->sa_nqatime);
+		txdr_nqtime(&vap->va_mtime, &sp->sa_nqmtime);
+	} else {
+		sp->sa_nfssize = 0;
+		txdr_nfstime(&vap->va_atime, &sp->sa_nfsatime);
+		txdr_nfstime(&vap->va_mtime, &sp->sa_nfsmtime);
+	}
+	nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred);
+	nfsm_mtofh(dvp, *ap->a_vpp);
+	nfsm_reqdone;
+	if (!error && (cnp->cn_flags & MAKEENTRY))
+		cache_enter(dvp, *ap->a_vpp, cnp);
+	FREE(cnp->cn_pnbuf, M_NAMEI);
+	VTONFS(dvp)->n_flag |= NMODIFIED;
+	VTONFS(dvp)->n_attrstamp = 0;
+	vrele(dvp);
+	return (error);
+}
+
+/*
+ * nfs file remove call
+ * To try and make nfs semantics closer to ufs semantics, a file that has
+ * other processes using the vnode is renamed instead of removed and then
+ * removed later on the last close.
+ * - If v_usecount > 1
+ *	  If a rename is not already in the works
+ *	     call nfs_sillyrename() to set it up
+ *     else
+ *	  do the remove rpc
+ */
+int
+nfs_remove(ap)
+	struct vop_remove_args /* {
+		struct vnodeop_desc *a_desc;
+		struct vnode * a_dvp;
+		struct vnode * a_vp;
+		struct componentname * a_cnp;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct vnode *dvp = ap->a_dvp;
+	register struct componentname *cnp = ap->a_cnp;
+	register struct nfsnode *np = VTONFS(vp);
+	register u_long *tl;
+	register caddr_t cp;
+	register long t2;
+	caddr_t bpos, dpos;
+	int error = 0;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+	if (vp->v_usecount > 1) {
+		if (!np->n_sillyrename)
+			error = nfs_sillyrename(dvp, vp, cnp);
+	} else {
+		/*
+		 * Purge the name cache so that the chance of a lookup for
+		 * the name succeeding while the remove is in progress is
+		 * minimized. Without node locking it can still happen, such
+		 * that an I/O op returns ESTALE, but since you get this if
+		 * another host removes the file..
+		 */
+		cache_purge(vp);
+		/*
+		 * Throw away biocache buffers. Mainly to avoid
+		 * unnecessary delayed writes.
+		 */
+		error = nfs_vinvalbuf(vp, 0, cnp->cn_cred, cnp->cn_proc, 1);
+		if (error == EINTR)
+			return (error);
+		/* Do the rpc */
+		nfsstats.rpccnt[NFSPROC_REMOVE]++;
+		nfsm_reqhead(dvp, NFSPROC_REMOVE,
+			NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(cnp->cn_namelen));
+		nfsm_fhtom(dvp);
+		nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+		nfsm_request(dvp, NFSPROC_REMOVE, cnp->cn_proc, cnp->cn_cred);
+		nfsm_reqdone;
+		FREE(cnp->cn_pnbuf, M_NAMEI);
+		VTONFS(dvp)->n_flag |= NMODIFIED;
+		VTONFS(dvp)->n_attrstamp = 0;
+		/*
+		 * Kludge City: If the first reply to the remove rpc is lost..
+		 *   the reply to the retransmitted request will be ENOENT
+		 *   since the file was in fact removed
+		 *   Therefore, we cheat and return success.
+		 */
+		if (error == ENOENT)
+			error = 0;
+	}
+	np->n_attrstamp = 0;
+	vrele(dvp);
+	vrele(vp);
+	return (error);
+}
+
+/*
+ * nfs file remove rpc called from nfs_inactive
+ */
+int
+nfs_removeit(sp)
+	register struct sillyrename *sp;
+{
+	register u_long *tl;
+	register caddr_t cp;
+	register long t2;
+	caddr_t bpos, dpos;
+	int error = 0;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+	nfsstats.rpccnt[NFSPROC_REMOVE]++;
+	nfsm_reqhead(sp->s_dvp, NFSPROC_REMOVE,
+		NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(sp->s_namlen));
+	nfsm_fhtom(sp->s_dvp);
+	nfsm_strtom(sp->s_name, sp->s_namlen, NFS_MAXNAMLEN);
+	nfsm_request(sp->s_dvp, NFSPROC_REMOVE, NULL, sp->s_cred);
+	nfsm_reqdone;
+	VTONFS(sp->s_dvp)->n_flag |= NMODIFIED;
+	VTONFS(sp->s_dvp)->n_attrstamp = 0;
+	return (error);
+}
+
+/*
+ * nfs file rename call
+ */
+int
+nfs_rename(ap)
+	struct vop_rename_args  /* {
+		struct vnode *a_fdvp;
+		struct vnode *a_fvp;
+		struct componentname *a_fcnp;
+		struct vnode *a_tdvp;
+		struct vnode *a_tvp;
+		struct componentname *a_tcnp;
+	} */ *ap;
+{
+	register struct vnode *fvp = ap->a_fvp;
+	register struct vnode *tvp = ap->a_tvp;
+	register struct vnode *fdvp = ap->a_fdvp;
+	register struct vnode *tdvp = ap->a_tdvp;
+	register struct componentname *tcnp = ap->a_tcnp;
+	register struct componentname *fcnp = ap->a_fcnp;
+	register u_long *tl;
+	register caddr_t cp;
+	register long t2;
+	caddr_t bpos, dpos;
+	int error = 0;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+	/* Check for cross-device rename */
+	if ((fvp->v_mount != tdvp->v_mount) ||
+	    (tvp && (fvp->v_mount != tvp->v_mount))) {
+		error = EXDEV;
+		goto out;
+	}
+
+
+	nfsstats.rpccnt[NFSPROC_RENAME]++;
+	nfsm_reqhead(fdvp, NFSPROC_RENAME,
+		(NFSX_FH+NFSX_UNSIGNED)*2+nfsm_rndup(fcnp->cn_namelen)+
+		nfsm_rndup(fcnp->cn_namelen)); /* or fcnp->cn_cred?*/
+	nfsm_fhtom(fdvp);
+	nfsm_strtom(fcnp->cn_nameptr, fcnp->cn_namelen, NFS_MAXNAMLEN);
+	nfsm_fhtom(tdvp);
+	nfsm_strtom(tcnp->cn_nameptr, tcnp->cn_namelen, NFS_MAXNAMLEN);
+	nfsm_request(fdvp, NFSPROC_RENAME, tcnp->cn_proc, tcnp->cn_cred);
+	nfsm_reqdone;
+	VTONFS(fdvp)->n_flag |= NMODIFIED;
+	VTONFS(fdvp)->n_attrstamp = 0;
+	VTONFS(tdvp)->n_flag |= NMODIFIED;
+	VTONFS(tdvp)->n_attrstamp = 0;
+	if (fvp->v_type == VDIR) {
+		if (tvp != NULL && tvp->v_type == VDIR)
+			cache_purge(tdvp);
+		cache_purge(fdvp);
+	}
+out:
+	if (tdvp == tvp)
+		vrele(tdvp);
+	else
+		vput(tdvp);
+	if (tvp)
+		vput(tvp);
+	vrele(fdvp);
+	vrele(fvp);
+	/*
+	 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
+	 */
+	if (error == ENOENT)
+		error = 0;
+	return (error);
+}
+
+/*
+ * nfs file rename rpc called from nfs_remove() above
+ */
+int
+nfs_renameit(sdvp, scnp, sp)
+	struct vnode *sdvp;
+	struct componentname *scnp;
+	register struct sillyrename *sp;
+{
+	register u_long *tl;
+	register caddr_t cp;
+	register long t2;
+	caddr_t bpos, dpos;
+	int error = 0;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+	nfsstats.rpccnt[NFSPROC_RENAME]++;
+	nfsm_reqhead(sdvp, NFSPROC_RENAME,
+		(NFSX_FH+NFSX_UNSIGNED)*2+nfsm_rndup(scnp->cn_namelen)+
+		nfsm_rndup(sp->s_namlen));
+	nfsm_fhtom(sdvp);
+	nfsm_strtom(scnp->cn_nameptr, scnp->cn_namelen, NFS_MAXNAMLEN);
+	nfsm_fhtom(sdvp);
+	nfsm_strtom(sp->s_name, sp->s_namlen, NFS_MAXNAMLEN);
+	nfsm_request(sdvp, NFSPROC_RENAME, scnp->cn_proc, scnp->cn_cred);
+	nfsm_reqdone;
+	FREE(scnp->cn_pnbuf, M_NAMEI);
+	VTONFS(sdvp)->n_flag |= NMODIFIED;
+	VTONFS(sdvp)->n_attrstamp = 0;
+	return (error);
+}
+
+/*
+ * nfs hard link create call
+ */
+int
+nfs_link(ap)
+	struct vop_link_args /* {
+		struct vnode *a_vp;
+		struct vnode *a_tdvp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct vnode *tdvp = ap->a_tdvp;
+	register struct componentname *cnp = ap->a_cnp;
+	register u_long *tl;
+	register caddr_t cp;
+	register long t2;
+	caddr_t bpos, dpos;
+	int error = 0;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+	if (vp->v_mount != tdvp->v_mount) {
+		/*VOP_ABORTOP(vp, cnp);*/
+		if (tdvp == vp)
+			vrele(vp);
+		else
+			vput(vp);
+		return (EXDEV);
+	}
+
+	nfsstats.rpccnt[NFSPROC_LINK]++;
+	nfsm_reqhead(tdvp, NFSPROC_LINK,
+		NFSX_FH*2+NFSX_UNSIGNED+nfsm_rndup(cnp->cn_namelen));
+	nfsm_fhtom(tdvp);
+	nfsm_fhtom(vp);
+	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+	nfsm_request(tdvp, NFSPROC_LINK, cnp->cn_proc, cnp->cn_cred);
+	nfsm_reqdone;
+	FREE(cnp->cn_pnbuf, M_NAMEI);
+	VTONFS(tdvp)->n_attrstamp = 0;
+	VTONFS(tdvp)->n_flag |= NMODIFIED;
+	VTONFS(vp)->n_attrstamp = 0;
+	vrele(vp);
+	/*
+	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
+	 */
+	if (error == EEXIST)
+		error = 0;
+	return (error);
+}
+
+/*
+ * nfs symbolic link create call
+ */
+/* start here */
+int
+nfs_symlink(ap)
+	struct vop_symlink_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+		char *a_target;
+	} */ *ap;
+{
+	register struct vnode *dvp = ap->a_dvp;
+	register struct vattr *vap = ap->a_vap;
+	register struct componentname *cnp = ap->a_cnp;
+	register struct nfsv2_sattr *sp;
+	register u_long *tl;
+	register caddr_t cp;
+	register long t2;
+	caddr_t bpos, dpos;
+	int slen, error = 0, isnq;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+	nfsstats.rpccnt[NFSPROC_SYMLINK]++;
+	slen = strlen(ap->a_target);
+	isnq = (VFSTONFS(dvp->v_mount)->nm_flag & NFSMNT_NQNFS);
+	nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH+2*NFSX_UNSIGNED+
+	    nfsm_rndup(cnp->cn_namelen)+nfsm_rndup(slen)+NFSX_SATTR(isnq));
+	nfsm_fhtom(dvp);
+	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+	nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN);
+	nfsm_build(sp, struct nfsv2_sattr *, NFSX_SATTR(isnq));
+	sp->sa_mode = vtonfs_mode(VLNK, vap->va_mode);
+	sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
+	sp->sa_gid = txdr_unsigned(cnp->cn_cred->cr_gid);
+	if (isnq) {
+		quad_t qval = -1;
+
+		txdr_hyper(&qval, &sp->sa_nqsize);
+		sp->sa_nqflags = 0;
+		txdr_nqtime(&vap->va_atime, &sp->sa_nqatime);
+		txdr_nqtime(&vap->va_mtime, &sp->sa_nqmtime);
+	} else {
+		sp->sa_nfssize = -1;
+		txdr_nfstime(&vap->va_atime, &sp->sa_nfsatime);
+		txdr_nfstime(&vap->va_mtime, &sp->sa_nfsmtime);
+	}
+	nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_proc, cnp->cn_cred);
+	nfsm_reqdone;
+	FREE(cnp->cn_pnbuf, M_NAMEI);
+	VTONFS(dvp)->n_flag |= NMODIFIED;
+	VTONFS(dvp)->n_attrstamp = 0;
+	vrele(dvp);
+	/*
+	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
+	 */
+	if (error == EEXIST)
+		error = 0;
+	return (error);
+}
+
+/*
+ * nfs make dir call
+ */
+int
+nfs_mkdir(ap)
+	struct vop_mkdir_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	register struct vnode *dvp = ap->a_dvp;
+	register struct vattr *vap = ap->a_vap;
+	register struct componentname *cnp = ap->a_cnp;
+	register struct vnode **vpp = ap->a_vpp;
+	register struct nfsv2_sattr *sp;
+	register u_long *tl;
+	register caddr_t cp;
+	register long t1, t2;
+	register int len;
+	caddr_t bpos, dpos, cp2;
+	int error = 0, firsttry = 1, isnq;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+	struct vattr vattr;
+
+	if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) {
+		VOP_ABORTOP(dvp, cnp);
+		vput(dvp);
+		return (error);
+	}
+	len = cnp->cn_namelen;
+	isnq = (VFSTONFS(dvp->v_mount)->nm_flag & NFSMNT_NQNFS);
+	nfsstats.rpccnt[NFSPROC_MKDIR]++;
+	nfsm_reqhead(dvp, NFSPROC_MKDIR,
+	  NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(len)+NFSX_SATTR(isnq));
+	nfsm_fhtom(dvp);
+	nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
+	nfsm_build(sp, struct nfsv2_sattr *, NFSX_SATTR(isnq));
+	sp->sa_mode = vtonfs_mode(VDIR, vap->va_mode);
+	sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
+	sp->sa_gid = txdr_unsigned(vattr.va_gid);
+	if (isnq) {
+		quad_t qval = -1;
+
+		txdr_hyper(&qval, &sp->sa_nqsize);
+		sp->sa_nqflags = 0;
+		txdr_nqtime(&vap->va_atime, &sp->sa_nqatime);
+		txdr_nqtime(&vap->va_mtime, &sp->sa_nqmtime);
+	} else {
+		sp->sa_nfssize = -1;
+		txdr_nfstime(&vap->va_atime, &sp->sa_nfsatime);
+		txdr_nfstime(&vap->va_mtime, &sp->sa_nfsmtime);
+	}
+	nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_proc, cnp->cn_cred);
+	nfsm_mtofh(dvp, *vpp);
+	nfsm_reqdone;
+	VTONFS(dvp)->n_flag |= NMODIFIED;
+	VTONFS(dvp)->n_attrstamp = 0;
+	/*
+	 * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
+	 * if we can succeed in looking up the directory.
+	 * "firsttry" is necessary since the macros may "goto nfsmout" which
+	 * is above the if on errors. (Ugh)
+	 */
+	if (error == EEXIST && firsttry) {
+		firsttry = 0;
+		error = 0;
+		nfsstats.rpccnt[NFSPROC_LOOKUP]++;
+		*vpp = NULL;
+		nfsm_reqhead(dvp, NFSPROC_LOOKUP,
+		    NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(len));
+		nfsm_fhtom(dvp);
+		nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
+		nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred);
+		nfsm_mtofh(dvp, *vpp);
+		if ((*vpp)->v_type != VDIR) {
+			vput(*vpp);
+			error = EEXIST;
+		}
+		m_freem(mrep);
+	}
+	FREE(cnp->cn_pnbuf, M_NAMEI);
+	vrele(dvp);
+	return (error);
+}
+
+/*
+ * nfs remove directory call
+ */
+int
+nfs_rmdir(ap)
+	struct vop_rmdir_args /* {
+		struct vnode *a_dvp;
+		struct vnode *a_vp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct vnode *dvp = ap->a_dvp;
+	register struct componentname *cnp = ap->a_cnp;
+	register u_long *tl;
+	register caddr_t cp;
+	register long t2;
+	caddr_t bpos, dpos;
+	int error = 0;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+	if (dvp == vp) {
+		vrele(dvp);
+		vrele(dvp);
+		FREE(cnp->cn_pnbuf, M_NAMEI);
+		return (EINVAL);
+	}
+	nfsstats.rpccnt[NFSPROC_RMDIR]++;
+	nfsm_reqhead(dvp, NFSPROC_RMDIR,
+		NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(cnp->cn_namelen));
+	nfsm_fhtom(dvp);
+	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+	nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_proc, cnp->cn_cred);
+	nfsm_reqdone;
+	FREE(cnp->cn_pnbuf, M_NAMEI);
+	VTONFS(dvp)->n_flag |= NMODIFIED;
+	VTONFS(dvp)->n_attrstamp = 0;
+	cache_purge(dvp);
+	cache_purge(vp);
+	vrele(vp);
+	vrele(dvp);
+	/*
+	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
+	 */
+	if (error == ENOENT)
+		error = 0;
+	return (error);
+}
+
+/*
+ * nfs readdir call
+ * Although cookie is defined as opaque, I translate it to/from net byte
+ * order so that it looks more sensible. This appears consistent with the
+ * Ultrix implementation of NFS.
+ */
+int
+nfs_readdir(ap)
+	struct vop_readdir_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct nfsnode *np = VTONFS(vp);
+	register struct uio *uio = ap->a_uio;
+	int tresid, error;
+	struct vattr vattr;
+
+	if (vp->v_type != VDIR)
+		return (EPERM);
+	/*
+	 * First, check for hit on the EOF offset cache
+	 */
+	if (uio->uio_offset != 0 && uio->uio_offset == np->n_direofoffset &&
+	    (np->n_flag & NMODIFIED) == 0) {
+		if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) {
+			if (NQNFS_CKCACHABLE(vp, NQL_READ)) {
+				nfsstats.direofcache_hits++;
+				return (0);
+			}
+		} else if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_procp) == 0 &&
+			np->n_mtime == vattr.va_mtime.ts_sec) {
+			nfsstats.direofcache_hits++;
+			return (0);
+		}
+	}
+
+	/*
+	 * Call nfs_bioread() to do the real work.
+	 */
+	tresid = uio->uio_resid;
+	error = nfs_bioread(vp, uio, 0, ap->a_cred);
+
+	if (!error && uio->uio_resid == tresid)
+		nfsstats.direofcache_misses++;
+	return (error);
+}
+
+/*
+ * Readdir rpc call.
+ * Called from below the buffer cache by nfs_doio().
+ */
+int
+nfs_readdirrpc(vp, uiop, cred)
+	register struct vnode *vp;
+	struct uio *uiop;
+	struct ucred *cred;
+{
+	register long len;
+	register struct dirent *dp;
+	register u_long *tl;
+	register caddr_t cp;
+	register long t1;
+	long tlen, lastlen;
+	caddr_t bpos, dpos, cp2;
+	int error = 0;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+	struct mbuf *md2;
+	caddr_t dpos2;
+	int siz;
+	int more_dirs = 1;
+	u_long off, savoff;
+	struct dirent *savdp;
+	struct nfsmount *nmp;
+	struct nfsnode *np = VTONFS(vp);
+	long tresid;
+
+	nmp = VFSTONFS(vp->v_mount);
+	tresid = uiop->uio_resid;
+	/*
+	 * Loop around doing readdir rpc's of size uio_resid or nm_rsize,
+	 * whichever is smaller, truncated to a multiple of NFS_DIRBLKSIZ.
+	 * The stopping criteria is EOF or buffer full.
+	 */
+	while (more_dirs && uiop->uio_resid >= NFS_DIRBLKSIZ) {
+		nfsstats.rpccnt[NFSPROC_READDIR]++;
+		nfsm_reqhead(vp, NFSPROC_READDIR,
+			NFSX_FH + 2 * NFSX_UNSIGNED);
+		nfsm_fhtom(vp);
+		nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
+		off = (u_long)uiop->uio_offset;
+		*tl++ = txdr_unsigned(off);
+		*tl = txdr_unsigned(((uiop->uio_resid > nmp->nm_rsize) ?
+			nmp->nm_rsize : uiop->uio_resid) & ~(NFS_DIRBLKSIZ-1));
+		nfsm_request(vp, NFSPROC_READDIR, uiop->uio_procp, cred);
+		siz = 0;
+		nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+		more_dirs = fxdr_unsigned(int, *tl);
+	
+		/* Save the position so that we can do nfsm_mtouio() later */
+		dpos2 = dpos;
+		md2 = md;
+	
+		/* loop thru the dir entries, doctoring them to 4bsd form */
+#ifdef lint
+		dp = (struct dirent *)0;
+#endif /* lint */
+		while (more_dirs && siz < uiop->uio_resid) {
+			savoff = off;		/* Hold onto offset and dp */
+			savdp = dp;
+			nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
+			dp = (struct dirent *)tl;
+			dp->d_fileno = fxdr_unsigned(u_long, *tl++);
+			len = fxdr_unsigned(int, *tl);
+			if (len <= 0 || len > NFS_MAXNAMLEN) {
+				error = EBADRPC;
+				m_freem(mrep);
+				goto nfsmout;
+			}
+			dp->d_namlen = (u_char)len;
+			dp->d_type = DT_UNKNOWN;
+			nfsm_adv(len);		/* Point past name */
+			tlen = nfsm_rndup(len);
+			/*
+			 * This should not be necessary, but some servers have
+			 * broken XDR such that these bytes are not null filled.
+			 */
+			if (tlen != len) {
+				*dpos = '\0';	/* Null-terminate */
+				nfsm_adv(tlen - len);
+				len = tlen;
+			}
+			nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
+			off = fxdr_unsigned(u_long, *tl);
+			*tl++ = 0;	/* Ensures null termination of name */
+			more_dirs = fxdr_unsigned(int, *tl);
+			dp->d_reclen = len + 4 * NFSX_UNSIGNED;
+			siz += dp->d_reclen;
+		}
+		/*
+		 * If at end of rpc data, get the eof boolean
+		 */
+		if (!more_dirs) {
+			nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+			more_dirs = (fxdr_unsigned(int, *tl) == 0);
+
+			/*
+			 * If at EOF, cache directory offset
+			 */
+			if (!more_dirs)
+				np->n_direofoffset = off;
+		}
+		/*
+		 * If there is too much to fit in the data buffer, use savoff and
+		 * savdp to trim off the last record.
+		 * --> we are not at eof
+		 */
+		if (siz > uiop->uio_resid) {
+			off = savoff;
+			siz -= dp->d_reclen;
+			dp = savdp;
+			more_dirs = 0;	/* Paranoia */
+		}
+		if (siz > 0) {
+			lastlen = dp->d_reclen;
+			md = md2;
+			dpos = dpos2;
+			nfsm_mtouio(uiop, siz);
+			uiop->uio_offset = (off_t)off;
+		} else
+			more_dirs = 0;	/* Ugh, never happens, but in case.. */
+		m_freem(mrep);
+	}
+	/*
+	 * Fill last record, iff any, out to a multiple of NFS_DIRBLKSIZ
+	 * by increasing d_reclen for the last record.
+	 */
+	if (uiop->uio_resid < tresid) {
+		len = uiop->uio_resid & (NFS_DIRBLKSIZ - 1);
+		if (len > 0) {
+			dp = (struct dirent *)
+				(uiop->uio_iov->iov_base - lastlen);
+			dp->d_reclen += len;
+			uiop->uio_iov->iov_base += len;
+			uiop->uio_iov->iov_len -= len;
+			uiop->uio_resid -= len;
+		}
+	}
+nfsmout:
+	return (error);
+}
+
+/*
+ * Nqnfs readdir_and_lookup RPC. Used in place of nfs_readdirrpc().
+ */
+int
+nfs_readdirlookrpc(vp, uiop, cred)
+	struct vnode *vp;
+	register struct uio *uiop;
+	struct ucred *cred;
+{
+	register int len;
+	register struct dirent *dp;
+	register u_long *tl;
+	register caddr_t cp;
+	register long t1;
+	caddr_t bpos, dpos, cp2;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+	struct nameidata nami, *ndp = &nami;
+	struct componentname *cnp = &ndp->ni_cnd;
+	u_long off, endoff, fileno;
+	time_t reqtime, ltime;
+	struct nfsmount *nmp;
+	struct nfsnode *np;
+	struct vnode *newvp;
+	nfsv2fh_t *fhp;
+	u_quad_t frev;
+	int error = 0, tlen, more_dirs = 1, tresid, doit, bigenough, i;
+	int cachable;
+
+	if (uiop->uio_iovcnt != 1)
+		panic("nfs rdirlook");
+	nmp = VFSTONFS(vp->v_mount);
+	tresid = uiop->uio_resid;
+	ndp->ni_dvp = vp;
+	newvp = NULLVP;
+	/*
+	 * Loop around doing readdir rpc's of size uio_resid or nm_rsize,
+	 * whichever is smaller, truncated to a multiple of NFS_DIRBLKSIZ.
+	 * The stopping criteria is EOF or buffer full.
+	 */
+	while (more_dirs && uiop->uio_resid >= NFS_DIRBLKSIZ) {
+		nfsstats.rpccnt[NQNFSPROC_READDIRLOOK]++;
+		nfsm_reqhead(vp, NQNFSPROC_READDIRLOOK,
+			NFSX_FH + 3 * NFSX_UNSIGNED);
+		nfsm_fhtom(vp);
+ 		nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
+		off = (u_long)uiop->uio_offset;
+		*tl++ = txdr_unsigned(off);
+		*tl++ = txdr_unsigned(((uiop->uio_resid > nmp->nm_rsize) ?
+			nmp->nm_rsize : uiop->uio_resid) & ~(NFS_DIRBLKSIZ-1));
+		if (nmp->nm_flag & NFSMNT_NQLOOKLEASE)
+			*tl = txdr_unsigned(nmp->nm_leaseterm);
+		else
+			*tl = 0;
+		reqtime = time.tv_sec;
+		nfsm_request(vp, NQNFSPROC_READDIRLOOK, uiop->uio_procp, cred);
+		nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+		more_dirs = fxdr_unsigned(int, *tl);
+	
+		/* loop thru the dir entries, doctoring them to 4bsd form */
+		bigenough = 1;
+		while (more_dirs && bigenough) {
+			doit = 1;
+			nfsm_dissect(tl, u_long *, 4 * NFSX_UNSIGNED);
+			if (nmp->nm_flag & NFSMNT_NQLOOKLEASE) {
+				cachable = fxdr_unsigned(int, *tl++);
+				ltime = reqtime + fxdr_unsigned(int, *tl++);
+				fxdr_hyper(tl, &frev);
+			}
+			nfsm_dissect(fhp, nfsv2fh_t *, NFSX_FH);
+			if (!bcmp(VTONFS(vp)->n_fh.fh_bytes, (caddr_t)fhp, NFSX_FH)) {
+				VREF(vp);
+				newvp = vp;
+				np = VTONFS(vp);
+			} else {
+				if (error = nfs_nget(vp->v_mount, fhp, &np))
+					doit = 0;
+				newvp = NFSTOV(np);
+			}
+			if (error = nfs_loadattrcache(&newvp, &md, &dpos,
+				(struct vattr *)0))
+				doit = 0;
+			nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
+			fileno = fxdr_unsigned(u_long, *tl++);
+			len = fxdr_unsigned(int, *tl);
+			if (len <= 0 || len > NFS_MAXNAMLEN) {
+				error = EBADRPC;
+				m_freem(mrep);
+				goto nfsmout;
+			}
+			tlen = (len + 4) & ~0x3;
+			if ((tlen + DIRHDSIZ) > uiop->uio_resid)
+				bigenough = 0;
+			if (bigenough && doit) {
+				dp = (struct dirent *)uiop->uio_iov->iov_base;
+				dp->d_fileno = fileno;
+				dp->d_namlen = len;
+				dp->d_reclen = tlen + DIRHDSIZ;
+				dp->d_type =
+				    IFTODT(VTTOIF(np->n_vattr.va_type));
+				uiop->uio_resid -= DIRHDSIZ;
+				uiop->uio_iov->iov_base += DIRHDSIZ;
+				uiop->uio_iov->iov_len -= DIRHDSIZ;
+				cnp->cn_nameptr = uiop->uio_iov->iov_base;
+				cnp->cn_namelen = len;
+				ndp->ni_vp = newvp;
+				nfsm_mtouio(uiop, len);
+				cp = uiop->uio_iov->iov_base;
+				tlen -= len;
+				for (i = 0; i < tlen; i++)
+					*cp++ = '\0';
+				uiop->uio_iov->iov_base += tlen;
+				uiop->uio_iov->iov_len -= tlen;
+				uiop->uio_resid -= tlen;
+				cnp->cn_hash = 0;
+				for (cp = cnp->cn_nameptr, i = 1; i <= len; i++, cp++)
+					cnp->cn_hash += (unsigned char)*cp * i;
+				if ((nmp->nm_flag & NFSMNT_NQLOOKLEASE) &&
+					ltime > time.tv_sec)
+					nqnfs_clientlease(nmp, np, NQL_READ,
+						cachable, ltime, frev);
+				if (cnp->cn_namelen <= NCHNAMLEN)
+				    cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp);
+			} else {
+				nfsm_adv(nfsm_rndup(len));
+			}
+			if (newvp != NULLVP) {
+				vrele(newvp);
+				newvp = NULLVP;
+			}
+			nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
+			if (bigenough)
+				endoff = off = fxdr_unsigned(u_long, *tl++);
+			else
+				endoff = fxdr_unsigned(u_long, *tl++);
+			more_dirs = fxdr_unsigned(int, *tl);
+		}
+		/*
+		 * If at end of rpc data, get the eof boolean
+		 */
+		if (!more_dirs) {
+			nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+			more_dirs = (fxdr_unsigned(int, *tl) == 0);
+
+			/*
+			 * If at EOF, cache directory offset
+			 */
+			if (!more_dirs)
+				VTONFS(vp)->n_direofoffset = endoff;
+		}
+		if (uiop->uio_resid < tresid)
+			uiop->uio_offset = (off_t)off;
+		else
+			more_dirs = 0;
+		m_freem(mrep);
+	}
+	/*
+	 * Fill last record, iff any, out to a multiple of NFS_DIRBLKSIZ
+	 * by increasing d_reclen for the last record.
+	 */
+	if (uiop->uio_resid < tresid) {
+		len = uiop->uio_resid & (NFS_DIRBLKSIZ - 1);
+		if (len > 0) {
+			dp->d_reclen += len;
+			uiop->uio_iov->iov_base += len;
+			uiop->uio_iov->iov_len -= len;
+			uiop->uio_resid -= len;
+		}
+	}
+nfsmout:
+	if (newvp != NULLVP)
+		vrele(newvp);
+	return (error);
+}
+static char hextoasc[] = "0123456789abcdef";
+
+/*
+ * Silly rename. To make the NFS filesystem that is stateless look a little
+ * more like the "ufs" a remove of an active vnode is translated to a rename
+ * to a funny looking filename that is removed by nfs_inactive on the
+ * nfsnode. There is the potential for another process on a different client
+ * to create the same funny name between the nfs_lookitup() fails and the
+ * nfs_rename() completes, but...
+ */
+int
+nfs_sillyrename(dvp, vp, cnp)
+	struct vnode *dvp, *vp;
+	struct componentname *cnp;
+{
+	register struct nfsnode *np;
+	register struct sillyrename *sp;
+	int error;
+	short pid;
+
+	cache_purge(dvp);
+	np = VTONFS(vp);
+#ifdef SILLYSEPARATE
+	MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename),
+		M_NFSREQ, M_WAITOK);
+#else
+	sp = &np->n_silly;
+#endif
+	sp->s_cred = crdup(cnp->cn_cred);
+	sp->s_dvp = dvp;
+	VREF(dvp);
+
+	/* Fudge together a funny name */
+	pid = cnp->cn_proc->p_pid;
+	bcopy(".nfsAxxxx4.4", sp->s_name, 13);
+	sp->s_namlen = 12;
+	sp->s_name[8] = hextoasc[pid & 0xf];
+	sp->s_name[7] = hextoasc[(pid >> 4) & 0xf];
+	sp->s_name[6] = hextoasc[(pid >> 8) & 0xf];
+	sp->s_name[5] = hextoasc[(pid >> 12) & 0xf];
+
+	/* Try lookitups until we get one that isn't there */
+	while (nfs_lookitup(sp, (nfsv2fh_t *)0, cnp->cn_proc) == 0) {
+		sp->s_name[4]++;
+		if (sp->s_name[4] > 'z') {
+			error = EINVAL;
+			goto bad;
+		}
+	}
+	if (error = nfs_renameit(dvp, cnp, sp))
+		goto bad;
+	nfs_lookitup(sp, &np->n_fh, cnp->cn_proc);
+	np->n_sillyrename = sp;
+	return (0);
+bad:
+	vrele(sp->s_dvp);
+	crfree(sp->s_cred);
+#ifdef SILLYSEPARATE
+	free((caddr_t)sp, M_NFSREQ);
+#endif
+	return (error);
+}
+
+/*
+ * Look up a file name for silly rename stuff.
+ * Just like nfs_lookup() except that it doesn't load returned values
+ * into the nfsnode table.
+ * If fhp != NULL it copies the returned file handle out
+ */
+int
+nfs_lookitup(sp, fhp, procp)
+	register struct sillyrename *sp;
+	nfsv2fh_t *fhp;
+	struct proc *procp;
+{
+	register struct vnode *vp = sp->s_dvp;
+	register u_long *tl;
+	register caddr_t cp;
+	register long t1, t2;
+	caddr_t bpos, dpos, cp2;
+	int error = 0, isnq;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+	long len;
+
+	isnq = (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS);
+	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
+	len = sp->s_namlen;
+	nfsm_reqhead(vp, NFSPROC_LOOKUP, NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(len));
+	if (isnq) {
+		nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+		*tl = 0;
+	}
+	nfsm_fhtom(vp);
+	nfsm_strtom(sp->s_name, len, NFS_MAXNAMLEN);
+	nfsm_request(vp, NFSPROC_LOOKUP, procp, sp->s_cred);
+	if (fhp != NULL) {
+		if (isnq)
+			nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+		nfsm_dissect(cp, caddr_t, NFSX_FH);
+		bcopy(cp, (caddr_t)fhp, NFSX_FH);
+	}
+	nfsm_reqdone;
+	return (error);
+}
+
+/*
+ * Kludge City..
+ * - make nfs_bmap() essentially a no-op that does no translation
+ * - do nfs_strategy() by faking physical I/O with nfs_readrpc/nfs_writerpc
+ *   after mapping the physical addresses into Kernel Virtual space in the
+ *   nfsiobuf area.
+ *   (Maybe I could use the process's page mapping, but I was concerned that
+ *    Kernel Write might not be enabled and also figured copyout() would do
+ *    a lot more work than bcopy() and also it currently happens in the
+ *    context of the swapper process (2).
+ */
+int
+nfs_bmap(ap)
+	struct vop_bmap_args /* {
+		struct vnode *a_vp;
+		daddr_t  a_bn;
+		struct vnode **a_vpp;
+		daddr_t *a_bnp;
+		int *a_runp;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+
+	if (ap->a_vpp != NULL)
+		*ap->a_vpp = vp;
+	if (ap->a_bnp != NULL)
+		*ap->a_bnp = ap->a_bn * btodb(vp->v_mount->mnt_stat.f_iosize);
+	return (0);
+}
+
+/*
+ * Strategy routine.
+ * For async requests when nfsiod(s) are running, queue the request by
+ * calling nfs_asyncio(), otherwise just all nfs_doio() to do the
+ * request.
+ */
+int
+nfs_strategy(ap)
+	struct vop_strategy_args *ap;
+{
+	register struct buf *bp = ap->a_bp;
+	struct ucred *cr;
+	struct proc *p;
+	int error = 0;
+
+	if (bp->b_flags & B_PHYS)
+		panic("nfs physio");
+	if (bp->b_flags & B_ASYNC)
+		p = (struct proc *)0;
+	else
+		p = curproc;	/* XXX */
+	if (bp->b_flags & B_READ)
+		cr = bp->b_rcred;
+	else
+		cr = bp->b_wcred;
+	/*
+	 * If the op is asynchronous and an i/o daemon is waiting
+	 * queue the request, wake it up and wait for completion
+	 * otherwise just do it ourselves.
+	 */
+	if ((bp->b_flags & B_ASYNC) == 0 ||
+		nfs_asyncio(bp, NOCRED))
+		error = nfs_doio(bp, cr, p);
+	return (error);
+}
+
+/*
+ * Mmap a file
+ *
+ * NB Currently unsupported.
+ */
+/* ARGSUSED */
+int
+nfs_mmap(ap)
+	struct vop_mmap_args /* {
+		struct vnode *a_vp;
+		int  a_fflags;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	return (EINVAL);
+}
+
+/*
+ * Flush all the blocks associated with a vnode.
+ * 	Walk through the buffer pool and push any dirty pages
+ *	associated with the vnode.
+ */
+/* ARGSUSED */
+int
+nfs_fsync(ap)
+	struct vop_fsync_args /* {
+		struct vnodeop_desc *a_desc;
+		struct vnode * a_vp;
+		struct ucred * a_cred;
+		int  a_waitfor;
+		struct proc * a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct nfsnode *np = VTONFS(vp);
+	register struct buf *bp;
+	struct buf *nbp;
+	struct nfsmount *nmp;
+	int s, error = 0, slptimeo = 0, slpflag = 0;
+
+	nmp = VFSTONFS(vp->v_mount);
+	if (nmp->nm_flag & NFSMNT_INT)
+		slpflag = PCATCH;
+loop:
+	s = splbio();
+	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
+		nbp = bp->b_vnbufs.le_next;
+		if (bp->b_flags & B_BUSY) {
+			if (ap->a_waitfor != MNT_WAIT)
+				continue;
+			bp->b_flags |= B_WANTED;
+			error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
+				"nfsfsync", slptimeo);
+			splx(s);
+			if (error) {
+			    if (nfs_sigintr(nmp, (struct nfsreq *)0, ap->a_p))
+				return (EINTR);
+			    if (slpflag == PCATCH) {
+				slpflag = 0;
+				slptimeo = 2 * hz;
+			    }
+			}
+			goto loop;
+		}
+		if ((bp->b_flags & B_DELWRI) == 0)
+			panic("nfs_fsync: not dirty");
+		bremfree(bp);
+		bp->b_flags |= B_BUSY;
+		splx(s);
+		bp->b_flags |= B_ASYNC;
+		VOP_BWRITE(bp);
+		goto loop;
+	}
+	splx(s);
+	if (ap->a_waitfor == MNT_WAIT) {
+		while (vp->v_numoutput) {
+			vp->v_flag |= VBWAIT;
+			error = tsleep((caddr_t)&vp->v_numoutput,
+				slpflag | (PRIBIO + 1), "nfsfsync", slptimeo);
+			if (error) {
+			    if (nfs_sigintr(nmp, (struct nfsreq *)0, ap->a_p))
+				return (EINTR);
+			    if (slpflag == PCATCH) {
+				slpflag = 0;
+				slptimeo = 2 * hz;
+			    }
+			}
+		}
+		if (vp->v_dirtyblkhd.lh_first) {
+#ifdef DIAGNOSTIC
+			vprint("nfs_fsync: dirty", vp);
+#endif
+			goto loop;
+		}
+	}
+	if (np->n_flag & NWRITEERR) {
+		error = np->n_error;
+		np->n_flag &= ~NWRITEERR;
+	}
+	return (error);
+}
+
+/*
+ * Return POSIX pathconf information applicable to nfs.
+ *
+ * Currently the NFS protocol does not support getting such
+ * information from the remote server.
+ */
+/* ARGSUSED */
+nfs_pathconf(ap)
+	struct vop_pathconf_args /* {
+		struct vnode *a_vp;
+		int a_name;
+		int *a_retval;
+	} */ *ap;
+{
+
+	return (EINVAL);
+}
+
+/*
+ * NFS advisory byte-level locks.
+ * Currently unsupported.
+ */
+int
+nfs_advlock(ap)
+	struct vop_advlock_args /* {
+		struct vnode *a_vp;
+		caddr_t  a_id;
+		int  a_op;
+		struct flock *a_fl;
+		int  a_flags;
+	} */ *ap;
+{
+
+	return (EOPNOTSUPP);
+}
+
+/*
+ * Print out the contents of an nfsnode.
+ */
+int
+nfs_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct nfsnode *np = VTONFS(vp);
+
+	printf("tag VT_NFS, fileid %d fsid 0x%x",
+		np->n_vattr.va_fileid, np->n_vattr.va_fsid);
+#ifdef FIFO
+	if (vp->v_type == VFIFO)
+		fifo_printinfo(vp);
+#endif /* FIFO */
+	printf("\n");
+}
+
+/*
+ * NFS directory offset lookup.
+ * Currently unsupported.
+ */
+int
+nfs_blkatoff(ap)
+	struct vop_blkatoff_args /* {
+		struct vnode *a_vp;
+		off_t a_offset;
+		char **a_res;
+		struct buf **a_bpp;
+	} */ *ap;
+{
+
+	return (EOPNOTSUPP);
+}
+
+/*
+ * NFS flat namespace allocation.
+ * Currently unsupported.
+ */
+int
+nfs_valloc(ap)
+	struct vop_valloc_args /* {
+		struct vnode *a_pvp;
+		int a_mode;
+		struct ucred *a_cred;
+		struct vnode **a_vpp;
+	} */ *ap;
+{
+
+	return (EOPNOTSUPP);
+}
+
+/*
+ * NFS flat namespace free.
+ * Currently unsupported.
+ */
+int
+nfs_vfree(ap)
+	struct vop_vfree_args /* {
+		struct vnode *a_pvp;
+		ino_t a_ino;
+		int a_mode;
+	} */ *ap;
+{
+
+	return (EOPNOTSUPP);
+}
+
+/*
+ * NFS file truncation.
+ */
+int
+nfs_truncate(ap)
+	struct vop_truncate_args /* {
+		struct vnode *a_vp;
+		off_t a_length;
+		int a_flags;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	/* Use nfs_setattr */
+	printf("nfs_truncate: need to implement!!");
+	return (EOPNOTSUPP);
+}
+
+/*
+ * NFS update.
+ */
+int
+nfs_update(ap)
+	struct vop_update_args /* {
+		struct vnode *a_vp;
+		struct timeval *a_ta;
+		struct timeval *a_tm;
+		int a_waitfor;
+	} */ *ap;
+{
+
+	/* Use nfs_setattr */
+	printf("nfs_update: need to implement!!");
+	return (EOPNOTSUPP);
+}
+
+/*
+ * nfs special file access vnode op.
+ * Essentially just get vattr and then imitate iaccess() since the device is
+ * local to the client.
+ */
+int
+nfsspec_access(ap)
+	struct vop_access_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vattr *vap;
+	register gid_t *gp;
+	register struct ucred *cred = ap->a_cred;
+	mode_t mode = ap->a_mode;
+	struct vattr vattr;
+	register int i;
+	int error;
+
+	/*
+	 * If you're the super-user,
+	 * you always get access.
+	 */
+	if (cred->cr_uid == 0)
+		return (0);
+	vap = &vattr;
+	if (error = VOP_GETATTR(ap->a_vp, vap, cred, ap->a_p))
+		return (error);
+	/*
+	 * Access check is based on only one of owner, group, public.
+	 * If not owner, then check group. If not a member of the
+	 * group, then check public access.
+	 */
+	if (cred->cr_uid != vap->va_uid) {
+		mode >>= 3;
+		gp = cred->cr_groups;
+		for (i = 0; i < cred->cr_ngroups; i++, gp++)
+			if (vap->va_gid == *gp)
+				goto found;
+		mode >>= 3;
+found:
+		;
+	}
+	return ((vap->va_mode & mode) == mode ? 0 : EACCES);
+}
+
+/*
+ * Read wrapper for special devices.
+ */
+int
+nfsspec_read(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	register struct nfsnode *np = VTONFS(ap->a_vp);
+
+	/*
+	 * Set access flag.
+	 */
+	np->n_flag |= NACC;
+	np->n_atim = time;
+	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap));
+}
+
+/*
+ * Write wrapper for special devices.
+ */
+int
+nfsspec_write(ap)
+	struct vop_write_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	register struct nfsnode *np = VTONFS(ap->a_vp);
+
+	/*
+	 * Set update flag.
+	 */
+	np->n_flag |= NUPD;
+	np->n_mtim = time;
+	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap));
+}
+
+/*
+ * Close wrapper for special devices.
+ *
+ * Update the times on the nfsnode then do device close.
+ */
+int
+nfsspec_close(ap)
+	struct vop_close_args /* {
+		struct vnode *a_vp;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct nfsnode *np = VTONFS(vp);
+	struct vattr vattr;
+
+	if (np->n_flag & (NACC | NUPD)) {
+		np->n_flag |= NCHG;
+		if (vp->v_usecount == 1 &&
+		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
+			VATTR_NULL(&vattr);
+			if (np->n_flag & NACC) {
+				vattr.va_atime.ts_sec = np->n_atim.tv_sec;
+				vattr.va_atime.ts_nsec =
+				    np->n_atim.tv_usec * 1000;
+			}
+			if (np->n_flag & NUPD) {
+				vattr.va_mtime.ts_sec = np->n_mtim.tv_sec;
+				vattr.va_mtime.ts_nsec =
+				    np->n_mtim.tv_usec * 1000;
+			}
+			(void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p);
+		}
+	}
+	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_close), ap));
+}
+
+#ifdef FIFO
+/*
+ * Read wrapper for fifos.
+ */
+int
+nfsfifo_read(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	extern int (**fifo_vnodeop_p)();
+	register struct nfsnode *np = VTONFS(ap->a_vp);
+
+	/*
+	 * Set access flag.
+	 */
+	np->n_flag |= NACC;
+	np->n_atim = time;
+	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap));
+}
+
+/*
+ * Write wrapper for fifos.
+ */
+int
+nfsfifo_write(ap)
+	struct vop_write_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	extern int (**fifo_vnodeop_p)();
+	register struct nfsnode *np = VTONFS(ap->a_vp);
+
+	/*
+	 * Set update flag.
+	 */
+	np->n_flag |= NUPD;
+	np->n_mtim = time;
+	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap));
+}
+
+/*
+ * Close wrapper for fifos.
+ *
+ * Update the times on the nfsnode then do fifo close.
+ */
+int
+nfsfifo_close(ap)
+	struct vop_close_args /* {
+		struct vnode *a_vp;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct nfsnode *np = VTONFS(vp);
+	struct vattr vattr;
+	extern int (**fifo_vnodeop_p)();
+
+	if (np->n_flag & (NACC | NUPD)) {
+		if (np->n_flag & NACC)
+			np->n_atim = time;
+		if (np->n_flag & NUPD)
+			np->n_mtim = time;
+		np->n_flag |= NCHG;
+		if (vp->v_usecount == 1 &&
+		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
+			VATTR_NULL(&vattr);
+			if (np->n_flag & NACC) {
+				vattr.va_atime.ts_sec = np->n_atim.tv_sec;
+				vattr.va_atime.ts_nsec =
+				    np->n_atim.tv_usec * 1000;
+			}
+			if (np->n_flag & NUPD) {
+				vattr.va_mtime.ts_sec = np->n_mtim.tv_sec;
+				vattr.va_mtime.ts_nsec =
+				    np->n_mtim.tv_usec * 1000;
+			}
+			(void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p);
+		}
+	}
+	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap));
+}
+#endif /* FIFO */
diff --git a/sys/nfs/nfsdiskless.h b/sys/nfs/nfsdiskless.h
new file mode 100644
index 00000000000..74e6b7bca43
--- /dev/null
+++ b/sys/nfs/nfsdiskless.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfsdiskless.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Structure that must be initialized for a diskless nfs client.
+ * This structure is used by nfs_mountroot() to set up the root and swap
+ * vnodes plus do a partial ifconfig(8) and route(8) so that the critical net
+ * interface can communicate with the server.
+ * The primary bootstrap is expected to fill in the appropriate fields before
+ * starting vmunix. Whether or not the swap area is nfs mounted is determined
+ * by the value in swdevt[0]. (equal to NODEV --> swap over nfs)
+ * Currently only works for AF_INET protocols.
+ * NB: All fields are stored in net byte order to avoid hassles with
+ * client/server byte ordering differences.
+ */
+struct nfs_diskless {
+	struct ifaliasreq myif;			/* Default interface */
+	struct sockaddr_in mygateway;		/* Default gateway */
+	struct nfs_args	swap_args;		/* Mount args for swap file */
+	u_char		swap_fh[NFS_FHSIZE];	/* Swap file's file handle */
+	struct sockaddr_in swap_saddr;		/* Address of swap server */
+	char		swap_hostnam[MNAMELEN];	/* Host name for mount pt */
+	int		swap_nblks;		/* Size of server swap file */
+	struct ucred	swap_ucred;		/* Swap credentials */
+	struct nfs_args	root_args;		/* Mount args for root fs */
+	u_char		root_fh[NFS_FHSIZE];	/* File handle of root dir */
+	struct sockaddr_in root_saddr;		/* Address of root server */
+	char		root_hostnam[MNAMELEN];	/* Host name for mount pt */
+	long		root_time;		/* Timestamp of root fs */
+	char		my_hostnam[MAXHOSTNAMELEN]; /* Client host name */
+};
diff --git a/sys/nfs/nfsm_subs.h b/sys/nfs/nfsm_subs.h
new file mode 100644
index 00000000000..879db360057
--- /dev/null
+++ b/sys/nfs/nfsm_subs.h
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfsm_subs.h	8.1 (Berkeley) 6/16/93
+ */
+
+/*
+ * These macros do strange and peculiar things to mbuf chains for
+ * the assistance of the nfs code. To attempt to use them for any
+ * other purpose will be dangerous. (they make weird assumptions)
+ */
+
+/*
+ * First define what the actual subs. return
+ */
+extern struct mbuf *nfsm_reqh();
+
+#define	M_HASCL(m)	((m)->m_flags & M_EXT)
+#define	NFSMINOFF(m) \
+		if (M_HASCL(m)) \
+			(m)->m_data = (m)->m_ext.ext_buf; \
+		else if ((m)->m_flags & M_PKTHDR) \
+			(m)->m_data = (m)->m_pktdat; \
+		else \
+			(m)->m_data = (m)->m_dat
+#define	NFSMADV(m, s)	(m)->m_data += (s)
+#define	NFSMSIZ(m)	((M_HASCL(m))?MCLBYTES: \
+				(((m)->m_flags & M_PKTHDR)?MHLEN:MLEN))
+
+/*
+ * Now for the macros that do the simple stuff and call the functions
+ * for the hard stuff.
+ * These macros use several vars. declared in nfsm_reqhead and these
+ * vars. must not be used elsewhere unless you are careful not to corrupt
+ * them. The vars. starting with pN and tN (N=1,2,3,..) are temporaries
+ * that may be used so long as the value is not expected to retained
+ * after a macro.
+ * I know, this is kind of dorkey, but it makes the actual op functions
+ * fairly clean and deals with the mess caused by the xdr discriminating
+ * unions.
+ */
+
+#define	nfsm_build(a,c,s) \
+		{ if ((s) > M_TRAILINGSPACE(mb)) { \
+			MGET(mb2, M_WAIT, MT_DATA); \
+			if ((s) > MLEN) \
+				panic("build > MLEN"); \
+			mb->m_next = mb2; \
+			mb = mb2; \
+			mb->m_len = 0; \
+			bpos = mtod(mb, caddr_t); \
+		} \
+		(a) = (c)(bpos); \
+		mb->m_len += (s); \
+		bpos += (s); }
+
+#define	nfsm_dissect(a,c,s) \
+		{ t1 = mtod(md, caddr_t)+md->m_len-dpos; \
+		if (t1 >= (s)) { \
+			(a) = (c)(dpos); \
+			dpos += (s); \
+		} else if (error = nfsm_disct(&md, &dpos, (s), t1, &cp2)) { \
+			m_freem(mrep); \
+			goto nfsmout; \
+		} else { \
+			(a) = (c)cp2; \
+		} }
+
+#define nfsm_fhtom(v) \
+		nfsm_build(cp,caddr_t,NFSX_FH); \
+		bcopy((caddr_t)&(VTONFS(v)->n_fh), cp, NFSX_FH)
+
+#define nfsm_srvfhtom(f) \
+		nfsm_build(cp,caddr_t,NFSX_FH); \
+		bcopy((caddr_t)(f), cp, NFSX_FH)
+
+#define nfsm_mtofh(d,v) \
+		{ struct nfsnode *np; nfsv2fh_t *fhp; \
+		nfsm_dissect(fhp,nfsv2fh_t *,NFSX_FH); \
+		if (error = nfs_nget((d)->v_mount, fhp, &np)) { \
+			m_freem(mrep); \
+			goto nfsmout; \
+		} \
+		(v) = NFSTOV(np); \
+		nfsm_loadattr(v, (struct vattr *)0); \
+		}
+
+#define	nfsm_loadattr(v,a) \
+		{ struct vnode *tvp = (v); \
+		if (error = nfs_loadattrcache(&tvp, &md, &dpos, (a))) { \
+			m_freem(mrep); \
+			goto nfsmout; \
+		} \
+		(v) = tvp; }
+
+#define	nfsm_strsiz(s,m) \
+		{ nfsm_dissect(tl,u_long *,NFSX_UNSIGNED); \
+		if (((s) = fxdr_unsigned(long,*tl)) > (m)) { \
+			m_freem(mrep); \
+			error = EBADRPC; \
+			goto nfsmout; \
+		} }
+
+#define	nfsm_srvstrsiz(s,m) \
+		{ nfsm_dissect(tl,u_long *,NFSX_UNSIGNED); \
+		if (((s) = fxdr_unsigned(long,*tl)) > (m) || (s) <= 0) { \
+			error = EBADRPC; \
+			nfsm_reply(0); \
+		} }
+
+#define nfsm_mtouio(p,s) \
+		if ((s) > 0 && \
+		   (error = nfsm_mbuftouio(&md,(p),(s),&dpos))) { \
+			m_freem(mrep); \
+			goto nfsmout; \
+		}
+
+#define nfsm_uiotom(p,s) \
+		if (error = nfsm_uiotombuf((p),&mb,(s),&bpos)) { \
+			m_freem(mreq); \
+			goto nfsmout; \
+		}
+
+#define	nfsm_reqhead(v,a,s) \
+		mb = mreq = nfsm_reqh((v),(a),(s),&bpos)
+
+#define nfsm_reqdone	m_freem(mrep); \
+		nfsmout: 
+
+#define nfsm_rndup(a)	(((a)+3)&(~0x3))
+
+#define	nfsm_request(v, t, p, c)	\
+		if (error = nfs_request((v), mreq, (t), (p), \
+		   (c), &mrep, &md, &dpos)) \
+			goto nfsmout
+
+#define	nfsm_strtom(a,s,m) \
+		if ((s) > (m)) { \
+			m_freem(mreq); \
+			error = ENAMETOOLONG; \
+			goto nfsmout; \
+		} \
+		t2 = nfsm_rndup(s)+NFSX_UNSIGNED; \
+		if (t2 <= M_TRAILINGSPACE(mb)) { \
+			nfsm_build(tl,u_long *,t2); \
+			*tl++ = txdr_unsigned(s); \
+			*(tl+((t2>>2)-2)) = 0; \
+			bcopy((caddr_t)(a), (caddr_t)tl, (s)); \
+		} else if (error = nfsm_strtmbuf(&mb, &bpos, (a), (s))) { \
+			m_freem(mreq); \
+			goto nfsmout; \
+		}
+
+#define	nfsm_srvdone \
+		nfsmout: \
+		return(error)
+
+#define	nfsm_reply(s) \
+		{ \
+		nfsd->nd_repstat = error; \
+		if (error) \
+		   (void) nfs_rephead(0, nfsd, error, cache, &frev, \
+			mrq, &mb, &bpos); \
+		else \
+		   (void) nfs_rephead((s), nfsd, error, cache, &frev, \
+			mrq, &mb, &bpos); \
+		m_freem(mrep); \
+		mreq = *mrq; \
+		if (error) \
+			return(0); \
+		}
+
+#define	nfsm_adv(s) \
+		t1 = mtod(md, caddr_t)+md->m_len-dpos; \
+		if (t1 >= (s)) { \
+			dpos += (s); \
+		} else if (error = nfs_adv(&md, &dpos, (s), t1)) { \
+			m_freem(mrep); \
+			goto nfsmout; \
+		}
+
+#define nfsm_srvmtofh(f) \
+		nfsm_dissect(tl, u_long *, NFSX_FH); \
+		bcopy((caddr_t)tl, (caddr_t)f, NFSX_FH)
+
+#define	nfsm_clget \
+		if (bp >= be) { \
+			if (mp == mb) \
+				mp->m_len += bp-bpos; \
+			MGET(mp, M_WAIT, MT_DATA); \
+			MCLGET(mp, M_WAIT); \
+			mp->m_len = NFSMSIZ(mp); \
+			mp2->m_next = mp; \
+			mp2 = mp; \
+			bp = mtod(mp, caddr_t); \
+			be = bp+mp->m_len; \
+		} \
+		tl = (u_long *)bp
+
+#define	nfsm_srvfillattr \
+	fp->fa_type = vtonfs_type(vap->va_type); \
+	fp->fa_mode = vtonfs_mode(vap->va_type, vap->va_mode); \
+	fp->fa_nlink = txdr_unsigned(vap->va_nlink); \
+	fp->fa_uid = txdr_unsigned(vap->va_uid); \
+	fp->fa_gid = txdr_unsigned(vap->va_gid); \
+	if (nfsd->nd_nqlflag == NQL_NOVAL) { \
+		fp->fa_nfsblocksize = txdr_unsigned(vap->va_blocksize); \
+		if (vap->va_type == VFIFO) \
+			fp->fa_nfsrdev = 0xffffffff; \
+		else \
+			fp->fa_nfsrdev = txdr_unsigned(vap->va_rdev); \
+		fp->fa_nfsfsid = txdr_unsigned(vap->va_fsid); \
+		fp->fa_nfsfileid = txdr_unsigned(vap->va_fileid); \
+		fp->fa_nfssize = txdr_unsigned(vap->va_size); \
+		fp->fa_nfsblocks = txdr_unsigned(vap->va_bytes / NFS_FABLKSIZE); \
+		txdr_nfstime(&vap->va_atime, &fp->fa_nfsatime); \
+		txdr_nfstime(&vap->va_mtime, &fp->fa_nfsmtime); \
+		fp->fa_nfsctime.nfs_sec = txdr_unsigned(vap->va_ctime.ts_sec); \
+		fp->fa_nfsctime.nfs_usec = txdr_unsigned(vap->va_gen); \
+	} else { \
+		fp->fa_nqblocksize = txdr_unsigned(vap->va_blocksize); \
+		if (vap->va_type == VFIFO) \
+			fp->fa_nqrdev = 0xffffffff; \
+		else \
+			fp->fa_nqrdev = txdr_unsigned(vap->va_rdev); \
+		fp->fa_nqfsid = txdr_unsigned(vap->va_fsid); \
+		fp->fa_nqfileid = txdr_unsigned(vap->va_fileid); \
+		txdr_hyper(&vap->va_size, &fp->fa_nqsize); \
+		txdr_hyper(&vap->va_bytes, &fp->fa_nqbytes); \
+		txdr_nqtime(&vap->va_atime, &fp->fa_nqatime); \
+		txdr_nqtime(&vap->va_mtime, &fp->fa_nqmtime); \
+		txdr_nqtime(&vap->va_ctime, &fp->fa_nqctime); \
+		fp->fa_nqflags = txdr_unsigned(vap->va_flags); \
+		fp->fa_nqgen = txdr_unsigned(vap->va_gen); \
+		txdr_hyper(&vap->va_filerev, &fp->fa_nqfilerev); \
+	}
+
diff --git a/sys/nfs/nfsmount.h b/sys/nfs/nfsmount.h
new file mode 100644
index 00000000000..4d74acb38a5
--- /dev/null
+++ b/sys/nfs/nfsmount.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfsmount.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Mount structure.
+ * One allocated on every NFS mount.
+ * Holds NFS specific information for mount.
+ */
+struct	nfsmount {
+	int	nm_flag;		/* Flags for soft/hard... */
+	struct	mount *nm_mountp;	/* Vfs structure for this filesystem */
+	int	nm_numgrps;		/* Max. size of groupslist */
+	nfsv2fh_t nm_fh;		/* File handle of root dir */
+	struct	socket *nm_so;		/* Rpc socket */
+	int	nm_sotype;		/* Type of socket */
+	int	nm_soproto;		/* and protocol */
+	int	nm_soflags;		/* pr_flags for socket protocol */
+	struct	mbuf *nm_nam;		/* Addr of server */
+	int	nm_timeo;		/* Init timer for NFSMNT_DUMBTIMR */
+	int	nm_retry;		/* Max retries */
+	int	nm_srtt[4];		/* Timers for rpcs */
+	int	nm_sdrtt[4];
+	int	nm_sent;		/* Request send count */
+	int	nm_cwnd;		/* Request send window */
+	int	nm_timeouts;		/* Request timeouts */
+	int	nm_deadthresh;		/* Threshold of timeouts-->dead server*/
+	int	nm_rsize;		/* Max size of read rpc */
+	int	nm_wsize;		/* Max size of write rpc */
+	int	nm_readahead;		/* Num. of blocks to readahead */
+	int	nm_leaseterm;		/* Term (sec) for NQNFS lease */
+	struct nfsnode *nm_tnext;	/* Head of lease timer queue */
+	struct nfsnode *nm_tprev;
+	struct vnode *nm_inprog;	/* Vnode in prog by nqnfs_clientd() */
+	uid_t	nm_authuid;		/* Uid for authenticator */
+	int	nm_authtype;		/* Authenticator type */
+	int	nm_authlen;		/* and length */
+	char	*nm_authstr;		/* Authenticator string */
+};
+
+#ifdef KERNEL
+/*
+ * Convert mount ptr to nfsmount ptr.
+ */
+#define VFSTONFS(mp)	((struct nfsmount *)((mp)->mnt_data))
+#endif /* KERNEL */
+
+/*
+ * Prototypes for NFS mount operations
+ */
+int	nfs_mount __P((
+		struct mount *mp,
+		char *path,
+		caddr_t data,
+		struct nameidata *ndp,
+		struct proc *p));
+int	nfs_start __P((
+		struct mount *mp,
+		int flags,
+		struct proc *p));
+int	nfs_unmount __P((
+		struct mount *mp,
+		int mntflags,
+		struct proc *p));
+int	nfs_root __P((
+		struct mount *mp,
+		struct vnode **vpp));
+int	nfs_quotactl __P((
+		struct mount *mp,
+		int cmds,
+		uid_t uid,
+		caddr_t arg,
+		struct proc *p));
+int	nfs_statfs __P((
+		struct mount *mp,
+		struct statfs *sbp,
+		struct proc *p));
+int	nfs_sync __P((
+		struct mount *mp,
+		int waitfor,
+		struct ucred *cred,
+		struct proc *p));
+int	nfs_fhtovp __P((
+		struct mount *mp,
+		struct fid *fhp,
+		struct mbuf *nam,
+		struct vnode **vpp,
+		int *exflagsp,
+		struct ucred **credanonp));
+int	nfs_vptofh __P((
+		struct vnode *vp,
+		struct fid *fhp));
+int	nfs_init __P(());
diff --git a/sys/nfs/nfsnode.h b/sys/nfs/nfsnode.h
new file mode 100644
index 00000000000..f5fee5bf2f3
--- /dev/null
+++ b/sys/nfs/nfsnode.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfsnode.h	8.4 (Berkeley) 2/13/94
+ */
+
+/*
+ * Silly rename structure that hangs off the nfsnode until the name
+ * can be removed by nfs_inactive()
+ */
+struct sillyrename {
+	struct	ucred *s_cred;
+	struct	vnode *s_dvp;
+	long	s_namlen;
+	char	s_name[20];
+};
+
+/*
+ * The nfsnode is the nfs equivalent to ufs's inode. Any similarity
+ * is purely coincidental.
+ * There is a unique nfsnode allocated for each active file,
+ * each current directory, each mounted-on file, text file, and the root.
+ * An nfsnode is 'named' by its file handle. (nget/nfs_node.c)
+ */
+
+struct nfsnode {
+	struct	nfsnode *n_forw;	/* hash, forward */
+	struct	nfsnode **n_back;	/* hash, backward */
+	nfsv2fh_t n_fh;			/* NFS File Handle */
+	long	n_flag;			/* Flag for locking.. */
+	struct	vnode *n_vnode;		/* vnode associated with this node */
+	struct	vattr n_vattr;		/* Vnode attribute cache */
+	time_t	n_attrstamp;		/* Time stamp for cached attributes */
+	struct	sillyrename *n_sillyrename; /* Ptr to silly rename struct */
+	u_quad_t n_size;		/* Current size of file */
+	int	n_error;		/* Save write error value */
+	u_long	n_direofoffset;		/* Dir. EOF offset cache */
+	time_t	n_mtime;		 /* Prev modify time. */
+	time_t	n_ctime;		 /* Prev create time. */
+	u_quad_t n_brev;		 /* Modify rev when cached */
+	u_quad_t n_lrev;		 /* Modify rev for lease */
+	time_t	n_expiry;		 /* Lease expiry time */
+	struct	nfsnode *n_tnext;	 /* Nqnfs timer chain */
+	struct	nfsnode *n_tprev;		
+	long	spare1;			/* To 8 byte boundary */
+	struct	sillyrename n_silly;	/* Silly rename struct */
+	struct	timeval n_atim;		/* Special file times */
+	struct	timeval n_mtim;
+};
+
+/*
+ * Flags for n_flag
+ */
+#define	NFLUSHWANT	0x0001	/* Want wakeup from a flush in prog. */
+#define	NFLUSHINPROG	0x0002	/* Avoid multiple calls to vinvalbuf() */
+#define	NMODIFIED	0x0004	/* Might have a modified buffer in bio */
+#define	NWRITEERR	0x0008	/* Flag write errors so close will know */
+#define	NQNFSNONCACHE	0x0020	/* Non-cachable lease */
+#define	NQNFSWRITE	0x0040	/* Write lease */
+#define	NQNFSEVICTED	0x0080	/* Has been evicted */
+#define	NACC		0x0100	/* Special file accessed */
+#define	NUPD		0x0200	/* Special file updated */
+#define	NCHG		0x0400	/* Special file times changed */
+
+/*
+ * Convert between nfsnode pointers and vnode pointers
+ */
+#define VTONFS(vp)	((struct nfsnode *)(vp)->v_data)
+#define NFSTOV(np)	((struct vnode *)(np)->n_vnode)
+
+/*
+ * Queue head for nfsiod's
+ */
+TAILQ_HEAD(nfsbufs, buf) nfs_bufq;
+
+#ifdef KERNEL
+/*
+ * Prototypes for NFS vnode operations
+ */
+int	nfs_lookup __P((struct vop_lookup_args *));
+int	nfs_create __P((struct vop_create_args *));
+int	nfs_mknod __P((struct vop_mknod_args *));
+int	nfs_open __P((struct vop_open_args *));
+int	nfs_close __P((struct vop_close_args *));
+int	nfsspec_close __P((struct vop_close_args *));
+#ifdef FIFO
+int	nfsfifo_close __P((struct vop_close_args *));
+#endif
+int	nfs_access __P((struct vop_access_args *));
+int	nfsspec_access __P((struct vop_access_args *));
+int	nfs_getattr __P((struct vop_getattr_args *));
+int	nfs_setattr __P((struct vop_setattr_args *));
+int	nfs_read __P((struct vop_read_args *));
+int	nfs_write __P((struct vop_write_args *));
+int	nfsspec_read __P((struct vop_read_args *));
+int	nfsspec_write __P((struct vop_write_args *));
+#ifdef FIFO
+int	nfsfifo_read __P((struct vop_read_args *));
+int	nfsfifo_write __P((struct vop_write_args *));
+#endif
+#define nfs_ioctl ((int (*) __P((struct  vop_ioctl_args *)))enoioctl)
+#define nfs_select ((int (*) __P((struct  vop_select_args *)))seltrue)
+int	nfs_mmap __P((struct vop_mmap_args *));
+int	nfs_fsync __P((struct vop_fsync_args *));
+#define nfs_seek ((int (*) __P((struct  vop_seek_args *)))nullop)
+int	nfs_remove __P((struct vop_remove_args *));
+int	nfs_link __P((struct vop_link_args *));
+int	nfs_rename __P((struct vop_rename_args *));
+int	nfs_mkdir __P((struct vop_mkdir_args *));
+int	nfs_rmdir __P((struct vop_rmdir_args *));
+int	nfs_symlink __P((struct vop_symlink_args *));
+int	nfs_readdir __P((struct vop_readdir_args *));
+int	nfs_readlink __P((struct vop_readlink_args *));
+int	nfs_abortop __P((struct vop_abortop_args *));
+int	nfs_inactive __P((struct vop_inactive_args *));
+int	nfs_reclaim __P((struct vop_reclaim_args *));
+int	nfs_lock __P((struct vop_lock_args *));
+int	nfs_unlock __P((struct vop_unlock_args *));
+int	nfs_bmap __P((struct vop_bmap_args *));
+int	nfs_strategy __P((struct vop_strategy_args *));
+int	nfs_print __P((struct vop_print_args *));
+int	nfs_islocked __P((struct vop_islocked_args *));
+int	nfs_pathconf __P((struct vop_pathconf_args *));
+int	nfs_advlock __P((struct vop_advlock_args *));
+int	nfs_blkatoff __P((struct vop_blkatoff_args *));
+int	nfs_vget __P((struct mount *, ino_t, struct vnode **));
+int	nfs_valloc __P((struct vop_valloc_args *));
+#define nfs_reallocblks \
+	((int (*) __P((struct  vop_reallocblks_args *)))eopnotsupp)
+int	nfs_vfree __P((struct vop_vfree_args *));
+int	nfs_truncate __P((struct vop_truncate_args *));
+int	nfs_update __P((struct vop_update_args *));
+int	nfs_bwrite __P((struct vop_bwrite_args *));
+#endif /* KERNEL */
diff --git a/sys/nfs/nfsrtt.h b/sys/nfs/nfsrtt.h
new file mode 100644
index 00000000000..0d23880019b
--- /dev/null
+++ b/sys/nfs/nfsrtt.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfsrtt.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Definitions for performance monitor.
+ * The client and server logging are turned on by setting the global
+ * constant "nfsrtton" to 1.
+ */
+#define	NFSRTTLOGSIZ	128
+
+/*
+ * Circular log of client side rpc activity. Each log entry is for one
+ * rpc filled in upon completion. (ie. in order of completion)
+ * The "pos" is the table index for the "next" entry, therefore the
+ * list goes from nfsrtt.rttl[pos] --> nfsrtt.rttl[pos - 1] in
+ * chronological order of completion.
+ */
+struct nfsrtt {
+	int pos;			/* Position in array for next entry */
+	struct rttl {
+		int	proc;		/* NFS procedure number */
+		int	rtt;		/* Measured round trip time */
+		int	rto;		/* Round Trip Timeout */
+		int	sent;		/* # rpcs in progress */
+		int	cwnd;		/* Send window */
+		int	srtt;		/* Ave Round Trip Time */
+		int	sdrtt;		/* Ave mean deviation of RTT */
+		fsid_t	fsid;		/* Fsid for mount point */
+		struct timeval tstamp;	/* Timestamp of log entry */
+	} rttl[NFSRTTLOGSIZ];
+};
+
+/*
+ * And definitions for server side performance monitor.
+ * The log organization is the same as above except it is filled in at the
+ * time the server sends the rpc reply.
+ */
+
+/*
+ * Bits for the flags field.
+ */
+#define	DRT_NQNFS	0x01	/* Rpc used Nqnfs protocol */
+#define	DRT_TCP		0x02	/* Client used TCP transport */
+#define	DRT_CACHEREPLY	0x04	/* Reply was from recent request cache */
+#define	DRT_CACHEDROP	0x08	/* Rpc request dropped, due to recent reply */
+
+/*
+ * Server log structure
+ * NB: ipadr == INADDR_ANY indicates a client using a non IP protocol.
+ *	(ISO perhaps?)
+ */
+struct nfsdrt {
+	int pos;			/* Position of next log entry */
+	struct drt {
+		int	flag;		/* Bits as defined above */
+		int	proc;		/* NFS procedure number */
+		u_long	ipadr;		/* IP address of client */
+		int	resptime;	/* Response time (usec) */
+		struct timeval tstamp;	/* Timestamp of log entry */
+	} drt[NFSRTTLOGSIZ];
+};
diff --git a/sys/nfs/nfsrvcache.h b/sys/nfs/nfsrvcache.h
new file mode 100644
index 00000000000..26da2c275df
--- /dev/null
+++ b/sys/nfs/nfsrvcache.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfsrvcache.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Definitions for the server recent request cache
+ */
+
+#define	NFSRVCACHESIZ	256
+
+struct nfsrvcache {
+	struct	nfsrvcache *rc_forw;		/* Hash chain links */
+	struct	nfsrvcache **rc_back;		/* Hash chain links */
+	struct	nfsrvcache *rc_next;		/* Lru list */
+	struct	nfsrvcache **rc_prev;		/* Lru list */
+	u_long	rc_xid;				/* rpc id number */
+	union {
+		struct mbuf *ru_repmb;		/* Reply mbuf list OR */
+		int ru_repstat;			/* Reply status */
+	} rc_un;
+	union nethostaddr rc_haddr;		/* Host address */
+	short	rc_proc;			/* rpc proc number */
+	u_char	rc_state;		/* Current state of request */
+	u_char	rc_flag;		/* Flag bits */
+};
+
+#define	rc_reply	rc_un.ru_repmb
+#define	rc_status	rc_un.ru_repstat
+#define	rc_inetaddr	rc_haddr.had_inetaddr
+#define	rc_nam		rc_haddr.had_nam
+
+/* Cache entry states */
+#define	RC_UNUSED	0
+#define	RC_INPROG	1
+#define	RC_DONE		2
+
+/* Return values */
+#define	RC_DROPIT	0
+#define	RC_REPLY	1
+#define	RC_DOIT		2
+#define	RC_CHECKIT	3
+
+/* Flag bits */
+#define	RC_LOCKED	0x01
+#define	RC_WANTED	0x02
+#define	RC_REPSTATUS	0x04
+#define	RC_REPMBUF	0x08
+#define	RC_NQNFS	0x10
+#define	RC_INETADDR	0x20
+#define	RC_NAM		0x40
diff --git a/sys/nfs/nfsv2.h b/sys/nfs/nfsv2.h
new file mode 100644
index 00000000000..e9d2985efac
--- /dev/null
+++ b/sys/nfs/nfsv2.h
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfsv2.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * nfs definitions as per the version 2 specs
+ */
+
+/*
+ * Constants as defined in the Sun NFS Version 2 spec.
+ * "NFS: Network File System Protocol Specification" RFC1094
+ */
+
+#define NFS_PORT	2049
+#define	NFS_PROG	100003
+#define NFS_VER2	2
+#define	NFS_MAXDGRAMDATA 8192
+#define	NFS_MAXDATA	32768
+#define	NFS_MAXPATHLEN	1024
+#define	NFS_MAXNAMLEN	255
+#define	NFS_FHSIZE	32
+#define	NFS_MAXPKTHDR	404
+#define NFS_MAXPACKET	(NFS_MAXPKTHDR+NFS_MAXDATA)
+#define	NFS_MINPACKET	20
+#define	NFS_FABLKSIZE	512	/* Size in bytes of a block wrt fa_blocks */
+
+/* Stat numbers for rpc returns */
+#define	NFS_OK		0
+#define	NFSERR_PERM	1
+#define	NFSERR_NOENT	2
+#define	NFSERR_IO	5
+#define	NFSERR_NXIO	6
+#define	NFSERR_ACCES	13
+#define	NFSERR_EXIST	17
+#define	NFSERR_NODEV	19
+#define	NFSERR_NOTDIR	20
+#define	NFSERR_ISDIR	21
+#define	NFSERR_FBIG	27
+#define	NFSERR_NOSPC	28
+#define	NFSERR_ROFS	30
+#define	NFSERR_NAMETOL	63
+#define	NFSERR_NOTEMPTY	66
+#define	NFSERR_DQUOT	69
+#define	NFSERR_STALE	70
+#define	NFSERR_WFLUSH	99
+
+/* Sizes in bytes of various nfs rpc components */
+#define	NFSX_FH		32
+#define	NFSX_UNSIGNED	4
+#define	NFSX_NFSFATTR	68
+#define	NFSX_NQFATTR	92
+#define	NFSX_NFSSATTR	32
+#define	NFSX_NQSATTR	44
+#define	NFSX_COOKIE	4
+#define NFSX_NFSSTATFS	20
+#define	NFSX_NQSTATFS	28
+#define	NFSX_FATTR(isnq)	((isnq) ? NFSX_NQFATTR : NFSX_NFSFATTR)
+#define	NFSX_SATTR(isnq)	((isnq) ? NFSX_NQSATTR : NFSX_NFSSATTR)
+#define	NFSX_STATFS(isnq)	((isnq) ? NFSX_NQSTATFS : NFSX_NFSSTATFS)
+
+/* nfs rpc procedure numbers */
+#define	NFSPROC_NULL		0
+#define	NFSPROC_GETATTR		1
+#define	NFSPROC_SETATTR		2
+#define	NFSPROC_NOOP		3
+#define	NFSPROC_ROOT		NFSPROC_NOOP	/* Obsolete */
+#define	NFSPROC_LOOKUP		4
+#define	NFSPROC_READLINK	5
+#define	NFSPROC_READ		6
+#define	NFSPROC_WRITECACHE	NFSPROC_NOOP	/* Obsolete */
+#define	NFSPROC_WRITE		8
+#define	NFSPROC_CREATE		9
+#define	NFSPROC_REMOVE		10
+#define	NFSPROC_RENAME		11
+#define	NFSPROC_LINK		12
+#define	NFSPROC_SYMLINK		13
+#define	NFSPROC_MKDIR		14
+#define	NFSPROC_RMDIR		15
+#define	NFSPROC_READDIR		16
+#define	NFSPROC_STATFS		17
+
+/* NQ nfs numbers */
+#define	NQNFSPROC_READDIRLOOK	18
+#define	NQNFSPROC_GETLEASE	19
+#define	NQNFSPROC_VACATED	20
+#define	NQNFSPROC_EVICTED	21
+#define	NQNFSPROC_ACCESS	22
+
+#define	NFS_NPROCS		23
+/* Conversion macros */
+extern int		vttoif_tab[];
+#define	vtonfs_mode(t,m) \
+		txdr_unsigned(((t) == VFIFO) ? MAKEIMODE(VCHR, (m)) : \
+				MAKEIMODE((t), (m)))
+#define	nfstov_mode(a)	(fxdr_unsigned(u_short, (a))&07777)
+#define	vtonfs_type(a)	txdr_unsigned(nfs_type[((long)(a))])
+#define	nfstov_type(a)	ntov_type[fxdr_unsigned(u_long,(a))&0x7]
+
+/* File types */
+typedef enum { NFNON=0, NFREG=1, NFDIR=2, NFBLK=3, NFCHR=4, NFLNK=5 } nfstype;
+
+/* Structs for common parts of the rpc's */
+struct nfsv2_time {
+	u_long	nfs_sec;
+	u_long	nfs_usec;
+};
+
+struct nqnfs_time {
+	u_long	nq_sec;
+	u_long	nq_nsec;
+};
+
+/*
+ * File attributes and setable attributes. These structures cover both
+ * NFS version 2 and the NQNFS protocol. Note that the union is only
+ * used to that one pointer can refer to both variants. These structures
+ * go out on the wire and must be densely packed, so no quad data types
+ * are used. (all fields are longs or u_longs or structures of same)
+ * NB: You can't do sizeof(struct nfsv2_fattr), you must use the
+ *     NFSX_FATTR(isnq) macro.
+ */
+struct nfsv2_fattr {
+	u_long	fa_type;
+	u_long	fa_mode;
+	u_long	fa_nlink;
+	u_long	fa_uid;
+	u_long	fa_gid;
+	union {
+		struct {
+			u_long	nfsfa_size;
+			u_long	nfsfa_blocksize;
+			u_long	nfsfa_rdev;
+			u_long	nfsfa_blocks;
+			u_long	nfsfa_fsid;
+			u_long	nfsfa_fileid;
+			struct nfsv2_time nfsfa_atime;
+			struct nfsv2_time nfsfa_mtime;
+			struct nfsv2_time nfsfa_ctime;
+		} fa_nfsv2;
+		struct {
+			struct {
+				u_long	nqfa_qsize[2];
+			} nqfa_size;
+			u_long	nqfa_blocksize;
+			u_long	nqfa_rdev;
+			struct {
+				u_long	nqfa_qbytes[2];
+			} nqfa_bytes;
+			u_long	nqfa_fsid;
+			u_long	nqfa_fileid;
+			struct nqnfs_time nqfa_atime;
+			struct nqnfs_time nqfa_mtime;
+			struct nqnfs_time nqfa_ctime;
+			u_long	nqfa_flags;
+			u_long	nqfa_gen;
+			struct {
+				u_long	nqfa_qfilerev[2];
+			} nqfa_filerev;
+		} fa_nqnfs;
+	} fa_un;
+};
+
+/* and some ugly defines for accessing union components */
+#define	fa_nfssize		fa_un.fa_nfsv2.nfsfa_size
+#define	fa_nfsblocksize		fa_un.fa_nfsv2.nfsfa_blocksize
+#define	fa_nfsrdev		fa_un.fa_nfsv2.nfsfa_rdev
+#define	fa_nfsblocks		fa_un.fa_nfsv2.nfsfa_blocks
+#define	fa_nfsfsid		fa_un.fa_nfsv2.nfsfa_fsid
+#define	fa_nfsfileid		fa_un.fa_nfsv2.nfsfa_fileid
+#define	fa_nfsatime		fa_un.fa_nfsv2.nfsfa_atime
+#define	fa_nfsmtime		fa_un.fa_nfsv2.nfsfa_mtime
+#define	fa_nfsctime		fa_un.fa_nfsv2.nfsfa_ctime
+#define	fa_nqsize		fa_un.fa_nqnfs.nqfa_size
+#define	fa_nqblocksize		fa_un.fa_nqnfs.nqfa_blocksize
+#define	fa_nqrdev		fa_un.fa_nqnfs.nqfa_rdev
+#define	fa_nqbytes		fa_un.fa_nqnfs.nqfa_bytes
+#define	fa_nqfsid		fa_un.fa_nqnfs.nqfa_fsid
+#define	fa_nqfileid		fa_un.fa_nqnfs.nqfa_fileid
+#define	fa_nqatime		fa_un.fa_nqnfs.nqfa_atime
+#define	fa_nqmtime		fa_un.fa_nqnfs.nqfa_mtime
+#define	fa_nqctime		fa_un.fa_nqnfs.nqfa_ctime
+#define	fa_nqflags		fa_un.fa_nqnfs.nqfa_flags
+#define	fa_nqgen		fa_un.fa_nqnfs.nqfa_gen
+#define	fa_nqfilerev		fa_un.fa_nqnfs.nqfa_filerev
+
+struct nfsv2_sattr {
+	u_long	sa_mode;
+	u_long	sa_uid;
+	u_long	sa_gid;
+	union {
+		struct {
+			u_long	nfssa_size;
+			struct nfsv2_time nfssa_atime;
+			struct nfsv2_time nfssa_mtime;
+		} sa_nfsv2;
+		struct {
+			struct {
+				u_long	nqsa_qsize[2];
+			} nqsa_size;
+			struct nqnfs_time nqsa_atime;
+			struct nqnfs_time nqsa_mtime;
+			u_long	nqsa_flags;
+			u_long	nqsa_rdev;
+		} sa_nqnfs;
+	} sa_un;
+};
+
+/* and some ugly defines for accessing the unions */
+#define	sa_nfssize		sa_un.sa_nfsv2.nfssa_size
+#define	sa_nfsatime		sa_un.sa_nfsv2.nfssa_atime
+#define	sa_nfsmtime		sa_un.sa_nfsv2.nfssa_mtime
+#define	sa_nqsize		sa_un.sa_nqnfs.nqsa_size
+#define	sa_nqatime		sa_un.sa_nqnfs.nqsa_atime
+#define	sa_nqmtime		sa_un.sa_nqnfs.nqsa_mtime
+#define	sa_nqflags		sa_un.sa_nqnfs.nqsa_flags
+#define	sa_nqrdev		sa_un.sa_nqnfs.nqsa_rdev
+
+struct nfsv2_statfs {
+	u_long	sf_tsize;
+	u_long	sf_bsize;
+	u_long	sf_blocks;
+	u_long	sf_bfree;
+	u_long	sf_bavail;
+	u_long	sf_files;	/* Nqnfs only */
+	u_long	sf_ffree;	/* ditto      */
+};
diff --git a/sys/nfs/nqnfs.h b/sys/nfs/nqnfs.h
new file mode 100644
index 00000000000..730741a4137
--- /dev/null
+++ b/sys/nfs/nqnfs.h
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nqnfs.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Definitions for NQNFS (Not Quite NFS) cache consistency protocol.
+ */
+
+/* Tunable constants */
+#define	NQ_CLOCKSKEW	3	/* Clock skew factor (sec) */
+#define	NQ_WRITESLACK	5	/* Delay for write cache flushing */
+#define	NQ_MAXLEASE	60	/* Max lease duration (sec) */
+#define	NQ_MINLEASE	5	/* Min lease duration (sec) */
+#define	NQ_DEFLEASE	30	/* Default lease duration (sec) */
+#define	NQ_RENEWAL	3	/* Time before expiry (sec) to renew */
+#define	NQ_TRYLATERDEL	15	/* Initial try later delay (sec) */
+#define	NQ_MAXNUMLEASE	2048	/* Upper bound on number of server leases */
+#define	NQ_DEADTHRESH	NQ_NEVERDEAD	/* Default nm_deadthresh */
+#define	NQ_NEVERDEAD	9	/* Greater than max. nm_timeouts */
+#define	NQLCHSZ		256	/* Server hash table size */
+
+#define	NQNFS_PROG	300105	/* As assigned by Sun */
+#define	NQNFS_VER1	1
+#define	NQNFS_EVICTSIZ	156	/* Size of eviction request in bytes */
+
+/*
+ * Definitions used for saving the "last lease expires" time in Non-volatile
+ * RAM on the server. The default definitions below assume that NOVRAM is not
+ * available.
+ */
+#define	NQSTORENOVRAM(t)
+#define	NQLOADNOVRAM(t)
+
+/*
+ * Defn and structs used on the server to maintain state for current leases.
+ * The list of host(s) that hold the lease are kept as nqhost structures.
+ * The first one lives in nqlease and any others are held in a linked
+ * list of nqm structures hanging off of nqlease.
+ *
+ * Each nqlease structure is chained into two lists. The first is a list
+ * ordered by increasing expiry time for nqsrv_timer() and the second is a chain
+ * hashed on lc_fh.
+ */
+#define	LC_MOREHOSTSIZ	10
+
+struct nqhost {
+	union {
+		struct {
+			u_short udp_flag;
+			u_short	udp_port;
+			union nethostaddr udp_haddr;
+		} un_udp;
+		struct {
+			u_short connless_flag;
+			u_short connless_spare;
+			union nethostaddr connless_haddr;
+		} un_connless;
+		struct {
+			u_short conn_flag;
+			u_short conn_spare;
+			struct nfssvc_sock *conn_slp;
+		} un_conn;
+	} lph_un;
+};
+#define	lph_flag	lph_un.un_udp.udp_flag
+#define	lph_port	lph_un.un_udp.udp_port
+#define	lph_haddr	lph_un.un_udp.udp_haddr
+#define	lph_inetaddr	lph_un.un_udp.udp_haddr.had_inetaddr
+#define	lph_claddr	lph_un.un_connless.connless_haddr
+#define	lph_nam		lph_un.un_connless.connless_haddr.had_nam
+#define	lph_slp		lph_un.un_conn.conn_slp
+
+struct nqlease {
+	struct nqlease *lc_chain1[2];	/* Timer queue list (must be first) */
+	struct nqlease *lc_fhnext;	/* Fhandle hash list */
+	struct nqlease **lc_fhprev;
+	time_t		lc_expiry;	/* Expiry time (sec) */
+	struct nqhost	lc_host;	/* Host that got lease */
+	struct nqm	*lc_morehosts;	/* Other hosts that share read lease */
+	fsid_t		lc_fsid;	/* Fhandle */
+	char		lc_fiddata[MAXFIDSZ];
+	struct vnode	*lc_vp;		/* Soft reference to associated vnode */
+};
+#define	lc_flag		lc_host.lph_un.un_udp.udp_flag
+
+/* lc_flag bits */
+#define	LC_VALID	0x0001	/* Host address valid */
+#define	LC_WRITE	0x0002	/* Write cache */
+#define	LC_NONCACHABLE	0x0004	/* Non-cachable lease */
+#define	LC_LOCKED	0x0008	/* Locked */
+#define	LC_WANTED	0x0010	/* Lock wanted */
+#define	LC_EXPIREDWANTED 0x0020	/* Want lease when expired */
+#define	LC_UDP		0x0040	/* Host address for udp socket */
+#define	LC_CLTP		0x0080	/* Host address for other connectionless */
+#define	LC_LOCAL	0x0100	/* Host is server */
+#define	LC_VACATED	0x0200	/* Host has vacated lease */
+#define	LC_WRITTEN	0x0400	/* Recently wrote to the leased file */
+#define	LC_SREF		0x0800	/* Holds a nfssvc_sock reference */
+
+struct nqm {
+	struct nqm	*lpm_next;
+	struct nqhost	lpm_hosts[LC_MOREHOSTSIZ];
+};
+
+/*
+ * Flag bits for flags argument to nqsrv_getlease.
+ */
+#define	NQL_READ	LEASE_READ	/* Read Request */
+#define	NQL_WRITE	LEASE_WRITE	/* Write Request */
+#define	NQL_CHECK	0x4		/* Check for lease */
+#define	NQL_NOVAL	0xffffffff	/* Invalid */
+
+/*
+ * Special value for slp for local server calls.
+ */
+#define	NQLOCALSLP	((struct nfssvc_sock *) -1)
+
+/*
+ * Server side macros.
+ */
+#define	nqsrv_getl(v, l) \
+		(void) nqsrv_getlease((v), &nfsd->nd_duration, \
+		 ((nfsd->nd_nqlflag != 0 && nfsd->nd_nqlflag != NQL_NOVAL) ? nfsd->nd_nqlflag : \
+		 ((l) | NQL_CHECK)), \
+		 nfsd, nam, &cache, &frev, cred)
+
+/*
+ * Client side macros that check for a valid lease.
+ */
+#define	NQNFS_CKINVALID(v, n, f) \
+ ((time.tv_sec > (n)->n_expiry && \
+ VFSTONFS((v)->v_mount)->nm_timeouts < VFSTONFS((v)->v_mount)->nm_deadthresh) \
+  || ((f) == NQL_WRITE && ((n)->n_flag & NQNFSWRITE) == 0))
+
+#define	NQNFS_CKCACHABLE(v, f) \
+ ((time.tv_sec <= VTONFS(v)->n_expiry || \
+  VFSTONFS((v)->v_mount)->nm_timeouts >= VFSTONFS((v)->v_mount)->nm_deadthresh) \
+   && (VTONFS(v)->n_flag & NQNFSNONCACHE) == 0 && \
+   ((f) == NQL_READ || (VTONFS(v)->n_flag & NQNFSWRITE)))
+
+#define	NQNFS_NEEDLEASE(v, p) \
+		(time.tv_sec > VTONFS(v)->n_expiry ? \
+		 ((VTONFS(v)->n_flag & NQNFSEVICTED) ? 0 : nqnfs_piggy[p]) : \
+		 (((time.tv_sec + NQ_RENEWAL) > VTONFS(v)->n_expiry && \
+		   nqnfs_piggy[p]) ? \
+		   ((VTONFS(v)->n_flag & NQNFSWRITE) ? \
+		    NQL_WRITE : nqnfs_piggy[p]) : 0))
+
+/*
+ * List head for timer queue.
+ */
+extern union nqsrvthead {
+	union	nqsrvthead *th_head[2];
+	struct	nqlease *th_chain[2];
+} nqthead;
+extern struct nqlease **nqfhead;
+extern u_long nqfheadhash;
+
+/*
+ * Nqnfs return status numbers.
+ */
+#define	NQNFS_EXPIRED	500
+#define	NQNFS_TRYLATER	501
+#define NQNFS_AUTHERR	502
diff --git a/sys/nfs/rpcv2.h b/sys/nfs/rpcv2.h
new file mode 100644
index 00000000000..9c793a7f875
--- /dev/null
+++ b/sys/nfs/rpcv2.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)rpcv2.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Definitions for Sun RPC Version 2, from
+ * "RPC: Remote Procedure Call Protocol Specification" RFC1057
+ */
+
+/* Version # */
+#define	RPC_VER2	2
+
+/* Authentication */
+#define	RPCAUTH_NULL	0
+#define	RPCAUTH_UNIX	1
+#define	RPCAUTH_SHORT	2
+#define	RPCAUTH_NQNFS	300000
+#define	RPCAUTH_MAXSIZ	400
+#define	RPCAUTH_UNIXGIDS 16
+
+/* Rpc Constants */
+#define	RPC_CALL	0
+#define	RPC_REPLY	1
+#define	RPC_MSGACCEPTED	0
+#define	RPC_MSGDENIED	1
+#define	RPC_PROGUNAVAIL	1
+#define	RPC_PROGMISMATCH	2
+#define	RPC_PROCUNAVAIL	3
+#define	RPC_GARBAGE	4		/* I like this one */
+#define	RPC_MISMATCH	0
+#define	RPC_AUTHERR	1
+
+/* Authentication failures */
+#define	AUTH_BADCRED	1
+#define	AUTH_REJECTCRED	2
+#define	AUTH_BADVERF	3
+#define	AUTH_REJECTVERF	4
+#define	AUTH_TOOWEAK	5		/* Give em wheaties */
+
+/* Sizes of rpc header parts */
+#define	RPC_SIZ		24
+#define	RPC_REPLYSIZ	28
+
+/* RPC Prog definitions */
+#define	RPCPROG_MNT	100005
+#define	RPCMNT_VER1	1
+#define	RPCMNT_MOUNT	1
+#define	RPCMNT_DUMP	2
+#define	RPCMNT_UMOUNT	3
+#define	RPCMNT_UMNTALL	4
+#define	RPCMNT_EXPORT	5
+#define	RPCMNT_NAMELEN	255
+#define	RPCMNT_PATHLEN	1024
+#define	RPCPROG_NFS	100003
diff --git a/sys/nfs/xdr_subs.h b/sys/nfs/xdr_subs.h
new file mode 100644
index 00000000000..c2aa4f3f343
--- /dev/null
+++ b/sys/nfs/xdr_subs.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)xdr_subs.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Macros used for conversion to/from xdr representation by nfs...
+ * These use the MACHINE DEPENDENT routines ntohl, htonl
+ * As defined by "XDR: External Data Representation Standard" RFC1014
+ *
+ * To simplify the implementation, we use ntohl/htonl even on big-endian
+ * machines, and count on them being `#define'd away.  Some of these
+ * might be slightly more efficient as quad_t copies on a big-endian,
+ * but we cannot count on their alignment anyway.
+ */
+
+#define	fxdr_unsigned(t, v)	((t)ntohl((long)(v)))
+#define	txdr_unsigned(v)	(htonl((long)(v)))
+
+#define	fxdr_nfstime(f, t) { \
+	(t)->ts_sec = ntohl(((struct nfsv2_time *)(f))->nfs_sec); \
+	(t)->ts_nsec = 1000 * ntohl(((struct nfsv2_time *)(f))->nfs_usec); \
+}
+#define	txdr_nfstime(f, t) { \
+	((struct nfsv2_time *)(t))->nfs_sec = htonl((f)->ts_sec); \
+	((struct nfsv2_time *)(t))->nfs_usec = htonl((f)->ts_nsec) / 1000; \
+}
+
+#define	fxdr_nqtime(f, t) { \
+	(t)->ts_sec = ntohl(((struct nqnfs_time *)(f))->nq_sec); \
+	(t)->ts_nsec = ntohl(((struct nqnfs_time *)(f))->nq_nsec); \
+}
+#define	txdr_nqtime(f, t) { \
+	((struct nqnfs_time *)(t))->nq_sec = htonl((f)->ts_sec); \
+	((struct nqnfs_time *)(t))->nq_nsec = htonl((f)->ts_nsec); \
+}
+
+#define	fxdr_hyper(f, t) { \
+	((long *)(t))[_QUAD_HIGHWORD] = ntohl(((long *)(f))[0]); \
+	((long *)(t))[_QUAD_LOWWORD] = ntohl(((long *)(f))[1]); \
+}
+#define	txdr_hyper(f, t) { \
+	((long *)(t))[0] = htonl(((long *)(f))[_QUAD_HIGHWORD]); \
+	((long *)(t))[1] = htonl(((long *)(f))[_QUAD_LOWWORD]); \
+}
diff --git a/sys/nfsclient/nfs.h b/sys/nfsclient/nfs.h
new file mode 100644
index 00000000000..261fd42657a
--- /dev/null
+++ b/sys/nfsclient/nfs.h
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfs.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Tunable constants for nfs
+ */
+
+#define	NFS_MAXIOVEC	34
+#define NFS_HZ		25		/* Ticks per second for NFS timeouts */
+#define	NFS_TIMEO	(1*NFS_HZ)	/* Default timeout = 1 second */
+#define	NFS_MINTIMEO	(1*NFS_HZ)	/* Min timeout to use */
+#define	NFS_MAXTIMEO	(60*NFS_HZ)	/* Max timeout to backoff to */
+#define	NFS_MINIDEMTIMEO (5*NFS_HZ)	/* Min timeout for non-idempotent ops*/
+#define	NFS_MAXREXMIT	100		/* Stop counting after this many */
+#define	NFS_MAXWINDOW	1024		/* Max number of outstanding requests */
+#define	NFS_RETRANS	10		/* Num of retrans for soft mounts */
+#define	NFS_MAXGRPS	16		/* Max. size of groups list */
+#define	NFS_MINATTRTIMO 5		/* Attribute cache timeout in sec */
+#define	NFS_MAXATTRTIMO 60
+#define	NFS_WSIZE	8192		/* Def. write data size <= 8192 */
+#define	NFS_RSIZE	8192		/* Def. read data size <= 8192 */
+#define	NFS_DEFRAHEAD	1		/* Def. read ahead # blocks */
+#define	NFS_MAXRAHEAD	4		/* Max. read ahead # blocks */
+#define	NFS_MAXREADDIR	NFS_MAXDATA	/* Max. size of directory read */
+#define	NFS_MAXUIDHASH	64		/* Max. # of hashed uid entries/mp */
+#define	NFS_MAXASYNCDAEMON 20	/* Max. number async_daemons runable */
+#define	NFS_DIRBLKSIZ	1024		/* Size of an NFS directory block */
+#define	NMOD(a)		((a) % nfs_asyncdaemons)
+
+/*
+ * Set the attribute timeout based on how recently the file has been modified.
+ */
+#define	NFS_ATTRTIMEO(np) \
+	((((np)->n_flag & NMODIFIED) || \
+	 (time.tv_sec - (np)->n_mtime) / 10 < NFS_MINATTRTIMO) ? NFS_MINATTRTIMO : \
+	 ((time.tv_sec - (np)->n_mtime) / 10 > NFS_MAXATTRTIMO ? NFS_MAXATTRTIMO : \
+	  (time.tv_sec - (np)->n_mtime) / 10))
+
+/*
+ * Structures for the nfssvc(2) syscall. Not that anyone but nfsd and mount_nfs
+ * should ever try and use it.
+ */
+struct nfsd_args {
+	int	sock;		/* Socket to serve */
+	caddr_t	name;		/* Client address for connection based sockets */
+	int	namelen;	/* Length of name */
+};
+
+struct nfsd_srvargs {
+	struct nfsd	*nsd_nfsd;	/* Pointer to in kernel nfsd struct */
+	uid_t		nsd_uid;	/* Effective uid mapped to cred */
+	u_long		nsd_haddr;	/* Ip address of client */
+	struct ucred	nsd_cr;		/* Cred. uid maps to */
+	int		nsd_authlen;	/* Length of auth string (ret) */
+	char		*nsd_authstr;	/* Auth string (ret) */
+};
+
+struct nfsd_cargs {
+	char		*ncd_dirp;	/* Mount dir path */
+	uid_t		ncd_authuid;	/* Effective uid */
+	int		ncd_authtype;	/* Type of authenticator */
+	int		ncd_authlen;	/* Length of authenticator string */
+	char		*ncd_authstr;	/* Authenticator string */
+};
+
+/*
+ * Stats structure
+ */
+struct nfsstats {
+	int	attrcache_hits;
+	int	attrcache_misses;
+	int	lookupcache_hits;
+	int	lookupcache_misses;
+	int	direofcache_hits;
+	int	direofcache_misses;
+	int	biocache_reads;
+	int	read_bios;
+	int	read_physios;
+	int	biocache_writes;
+	int	write_bios;
+	int	write_physios;
+	int	biocache_readlinks;
+	int	readlink_bios;
+	int	biocache_readdirs;
+	int	readdir_bios;
+	int	rpccnt[NFS_NPROCS];
+	int	rpcretries;
+	int	srvrpccnt[NFS_NPROCS];
+	int	srvrpc_errs;
+	int	srv_errs;
+	int	rpcrequests;
+	int	rpctimeouts;
+	int	rpcunexpected;
+	int	rpcinvalid;
+	int	srvcache_inproghits;
+	int	srvcache_idemdonehits;
+	int	srvcache_nonidemdonehits;
+	int	srvcache_misses;
+	int	srvnqnfs_leases;
+	int	srvnqnfs_maxleases;
+	int	srvnqnfs_getleases;
+};
+
+/*
+ * Flags for nfssvc() system call.
+ */
+#define	NFSSVC_BIOD	0x002
+#define	NFSSVC_NFSD	0x004
+#define	NFSSVC_ADDSOCK	0x008
+#define	NFSSVC_AUTHIN	0x010
+#define	NFSSVC_GOTAUTH	0x040
+#define	NFSSVC_AUTHINFAIL 0x080
+#define	NFSSVC_MNTD	0x100
+
+/*
+ * The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts.
+ * What should be in this set is open to debate, but I believe that since
+ * I/O system calls on ufs are never interrupted by signals the set should
+ * be minimal. My reasoning is that many current programs that use signals
+ * such as SIGALRM will not expect file I/O system calls to be interrupted
+ * by them and break.
+ */
+#ifdef KERNEL
+#define	NFSINT_SIGMASK	(sigmask(SIGINT)|sigmask(SIGTERM)|sigmask(SIGKILL)| \
+			 sigmask(SIGHUP)|sigmask(SIGQUIT))
+
+/*
+ * Socket errors ignored for connectionless sockets??
+ * For now, ignore them all
+ */
+#define	NFSIGNORE_SOERROR(s, e) \
+		((e) != EINTR && (e) != ERESTART && (e) != EWOULDBLOCK && \
+		((s) & PR_CONNREQUIRED) == 0)
+
+/*
+ * Nfs outstanding request list element
+ */
+struct nfsreq {
+	struct nfsreq	*r_next;
+	struct nfsreq	*r_prev;
+	struct mbuf	*r_mreq;
+	struct mbuf	*r_mrep;
+	struct mbuf	*r_md;
+	caddr_t		r_dpos;
+	struct nfsmount *r_nmp;
+	struct vnode	*r_vp;
+	u_long		r_xid;
+	int		r_flags;	/* flags on request, see below */
+	int		r_retry;	/* max retransmission count */
+	int		r_rexmit;	/* current retrans count */
+	int		r_timer;	/* tick counter on reply */
+	int		r_procnum;	/* NFS procedure number */
+	int		r_rtt;		/* RTT for rpc */
+	struct proc	*r_procp;	/* Proc that did I/O system call */
+};
+
+/* Flag values for r_flags */
+#define R_TIMING	0x01		/* timing request (in mntp) */
+#define R_SENT		0x02		/* request has been sent */
+#define	R_SOFTTERM	0x04		/* soft mnt, too many retries */
+#define	R_INTR		0x08		/* intr mnt, signal pending */
+#define	R_SOCKERR	0x10		/* Fatal error on socket */
+#define	R_TPRINTFMSG	0x20		/* Did a tprintf msg. */
+#define	R_MUSTRESEND	0x40		/* Must resend request */
+#define	R_GETONEREP	0x80		/* Probe for one reply only */
+
+struct nfsstats nfsstats;
+
+/*
+ * A list of nfssvc_sock structures is maintained with all the sockets
+ * that require service by the nfsd.
+ * The nfsuid structs hang off of the nfssvc_sock structs in both lru
+ * and uid hash lists.
+ */
+#define	NUIDHASHSIZ	32
+#define	NUIDHASH(uid)	((uid) & (NUIDHASHSIZ - 1))
+
+/*
+ * Network address hash list element
+ */
+union nethostaddr {
+	u_long had_inetaddr;
+	struct mbuf *had_nam;
+};
+
+struct nfsuid {
+	struct nfsuid	*nu_lrunext;	/* MUST be first */
+	struct nfsuid	*nu_lruprev;
+	struct nfsuid	*nu_hnext;
+	struct nfsuid	*nu_hprev;
+	int		nu_flag;	/* Flags */
+	uid_t		nu_uid;		/* Uid mapped by this entry */
+	union nethostaddr nu_haddr;	/* Host addr. for dgram sockets */
+	struct ucred	nu_cr;		/* Cred uid mapped to */
+};
+
+#define	nu_inetaddr	nu_haddr.had_inetaddr
+#define	nu_nam		nu_haddr.had_nam
+/* Bits for nu_flag */
+#define	NU_INETADDR	0x1
+
+struct nfssvc_sock {
+	struct nfsuid	*ns_lrunext;	/* MUST be first */
+	struct nfsuid	*ns_lruprev;
+	struct nfssvc_sock *ns_next;
+	struct nfssvc_sock *ns_prev;
+	int		ns_flag;
+	u_long		ns_sref;
+	struct file	*ns_fp;
+	struct socket	*ns_so;
+	int		ns_solock;
+	struct mbuf	*ns_nam;
+	int		ns_cc;
+	struct mbuf	*ns_raw;
+	struct mbuf	*ns_rawend;
+	int		ns_reclen;
+	struct mbuf	*ns_rec;
+	struct mbuf	*ns_recend;
+	int		ns_numuids;
+	struct nfsuid	*ns_uidh[NUIDHASHSIZ];
+};
+
+/* Bits for "ns_flag" */
+#define	SLP_VALID	0x01
+#define	SLP_DOREC	0x02
+#define	SLP_NEEDQ	0x04
+#define	SLP_DISCONN	0x08
+#define	SLP_GETSTREAM	0x10
+#define	SLP_INIT	0x20
+#define	SLP_WANTINIT	0x40
+
+#define SLP_ALLFLAGS	0xff
+
+/*
+ * One of these structures is allocated for each nfsd.
+ */
+struct nfsd {
+	struct nfsd	*nd_next;	/* Must be first */
+	struct nfsd	*nd_prev;
+	int		nd_flag;	/* NFSD_ flags */
+	struct nfssvc_sock *nd_slp;	/* Current socket */
+	struct mbuf	*nd_nam;	/* Client addr for datagram req. */
+	struct mbuf	*nd_mrep;	/* Req. mbuf list */
+	struct mbuf	*nd_md;
+	caddr_t		nd_dpos;	/* Position in list */
+	int		nd_procnum;	/* RPC procedure number */
+	u_long		nd_retxid;	/* RPC xid */
+	int		nd_repstat;	/* Reply status value */
+	struct ucred	nd_cr;		/* Credentials for req. */
+	int		nd_nqlflag;	/* Leasing flag */
+	int		nd_duration;	/* Lease duration */
+	int		nd_authlen;	/* Authenticator len */
+	u_char		nd_authstr[RPCAUTH_MAXSIZ]; /* Authenticator data */
+	struct proc	*nd_procp;	/* Proc ptr */
+};
+
+#define	NFSD_WAITING	0x01
+#define	NFSD_CHECKSLP	0x02
+#define	NFSD_REQINPROG	0x04
+#define	NFSD_NEEDAUTH	0x08
+#define	NFSD_AUTHFAIL	0x10
+#endif	/* KERNEL */
diff --git a/sys/nfsclient/nfs_bio.c b/sys/nfsclient/nfs_bio.c
new file mode 100644
index 00000000000..177a278b631
--- /dev/null
+++ b/sys/nfsclient/nfs_bio.c
@@ -0,0 +1,799 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfs_bio.c	8.5 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/resourcevar.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/trace.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+
+#include <vm/vm.h>
+
+#include <nfs/nfsnode.h>
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nqnfs.h>
+
+struct buf *incore(), *nfs_getcacheblk();
+extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
+extern int nfs_numasync;
+
+/*
+ * Vnode op for read using bio
+ * Any similarity to readip() is purely coincidental
+ */
+nfs_bioread(vp, uio, ioflag, cred)
+	register struct vnode *vp;
+	register struct uio *uio;
+	int ioflag;
+	struct ucred *cred;
+{
+	register struct nfsnode *np = VTONFS(vp);
+	register int biosize, diff;
+	struct buf *bp, *rabp;
+	struct vattr vattr;
+	struct proc *p;
+	struct nfsmount *nmp;
+	daddr_t lbn, bn, rabn;
+	caddr_t baddr;
+	int got_buf, nra, error = 0, n, on, not_readin;
+
+#ifdef lint
+	ioflag = ioflag;
+#endif /* lint */
+#ifdef DIAGNOSTIC
+	if (uio->uio_rw != UIO_READ)
+		panic("nfs_read mode");
+#endif
+	if (uio->uio_resid == 0)
+		return (0);
+	if (uio->uio_offset < 0 && vp->v_type != VDIR)
+		return (EINVAL);
+	nmp = VFSTONFS(vp->v_mount);
+	biosize = nmp->nm_rsize;
+	p = uio->uio_procp;
+	/*
+	 * For nfs, cache consistency can only be maintained approximately.
+	 * Although RFC1094 does not specify the criteria, the following is
+	 * believed to be compatible with the reference port.
+	 * For nqnfs, full cache consistency is maintained within the loop.
+	 * For nfs:
+	 * If the file's modify time on the server has changed since the
+	 * last read rpc or you have written to the file,
+	 * you may have lost data cache consistency with the
+	 * server, so flush all of the file's data out of the cache.
+	 * Then force a getattr rpc to ensure that you have up to date
+	 * attributes.
+	 * The mount flag NFSMNT_MYWRITE says "Assume that my writes are
+	 * the ones changing the modify time.
+	 * NB: This implies that cache data can be read when up to
+	 * NFS_ATTRTIMEO seconds out of date. If you find that you need current
+	 * attributes this could be forced by setting n_attrstamp to 0 before
+	 * the VOP_GETATTR() call.
+	 */
+	if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) {
+		if (np->n_flag & NMODIFIED) {
+			if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 ||
+			     vp->v_type != VREG) {
+				if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+					return (error);
+			}
+			np->n_attrstamp = 0;
+			np->n_direofoffset = 0;
+			if (error = VOP_GETATTR(vp, &vattr, cred, p))
+				return (error);
+			np->n_mtime = vattr.va_mtime.ts_sec;
+		} else {
+			if (error = VOP_GETATTR(vp, &vattr, cred, p))
+				return (error);
+			if (np->n_mtime != vattr.va_mtime.ts_sec) {
+				np->n_direofoffset = 0;
+				if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+					return (error);
+				np->n_mtime = vattr.va_mtime.ts_sec;
+			}
+		}
+	}
+	do {
+
+	    /*
+	     * Get a valid lease. If cached data is stale, flush it.
+	     */
+	    if (nmp->nm_flag & NFSMNT_NQNFS) {
+		if (NQNFS_CKINVALID(vp, np, NQL_READ)) {
+		    do {
+			error = nqnfs_getlease(vp, NQL_READ, cred, p);
+		    } while (error == NQNFS_EXPIRED);
+		    if (error)
+			return (error);
+		    if (np->n_lrev != np->n_brev ||
+			(np->n_flag & NQNFSNONCACHE) ||
+			((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
+			if (vp->v_type == VDIR) {
+			    np->n_direofoffset = 0;
+			    cache_purge(vp);
+			}
+			if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+			    return (error);
+			np->n_brev = np->n_lrev;
+		    }
+		} else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) {
+		    np->n_direofoffset = 0;
+		    cache_purge(vp);
+		    if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+			return (error);
+		}
+	    }
+	    if (np->n_flag & NQNFSNONCACHE) {
+		switch (vp->v_type) {
+		case VREG:
+			error = nfs_readrpc(vp, uio, cred);
+			break;
+		case VLNK:
+			error = nfs_readlinkrpc(vp, uio, cred);
+			break;
+		case VDIR:
+			error = nfs_readdirrpc(vp, uio, cred);
+			break;
+		};
+		return (error);
+	    }
+	    baddr = (caddr_t)0;
+	    switch (vp->v_type) {
+	    case VREG:
+		nfsstats.biocache_reads++;
+		lbn = uio->uio_offset / biosize;
+		on = uio->uio_offset & (biosize-1);
+		bn = lbn * (biosize / DEV_BSIZE);
+		not_readin = 1;
+
+		/*
+		 * Start the read ahead(s), as required.
+		 */
+		if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
+		    lbn == vp->v_lastr + 1) {
+		    for (nra = 0; nra < nmp->nm_readahead &&
+			(lbn + 1 + nra) * biosize < np->n_size; nra++) {
+			rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE);
+			if (!incore(vp, rabn)) {
+			    rabp = nfs_getcacheblk(vp, rabn, biosize, p);
+			    if (!rabp)
+				return (EINTR);
+			    if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) {
+				rabp->b_flags |= (B_READ | B_ASYNC);
+				if (nfs_asyncio(rabp, cred)) {
+				    rabp->b_flags |= B_INVAL;
+				    brelse(rabp);
+				}
+			    }
+			}
+		    }
+		}
+
+		/*
+		 * If the block is in the cache and has the required data
+		 * in a valid region, just copy it out.
+		 * Otherwise, get the block and write back/read in,
+		 * as required.
+		 */
+		if ((bp = incore(vp, bn)) &&
+		    (bp->b_flags & (B_BUSY | B_WRITEINPROG)) ==
+		    (B_BUSY | B_WRITEINPROG))
+			got_buf = 0;
+		else {
+again:
+			bp = nfs_getcacheblk(vp, bn, biosize, p);
+			if (!bp)
+				return (EINTR);
+			got_buf = 1;
+			if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {
+				bp->b_flags |= B_READ;
+				not_readin = 0;
+				if (error = nfs_doio(bp, cred, p)) {
+				    brelse(bp);
+				    return (error);
+				}
+			}
+		}
+		n = min((unsigned)(biosize - on), uio->uio_resid);
+		diff = np->n_size - uio->uio_offset;
+		if (diff < n)
+			n = diff;
+		if (not_readin && n > 0) {
+			if (on < bp->b_validoff || (on + n) > bp->b_validend) {
+				if (!got_buf) {
+				    bp = nfs_getcacheblk(vp, bn, biosize, p);
+				    if (!bp)
+					return (EINTR);
+				    got_buf = 1;
+				}
+				bp->b_flags |= B_INVAL;
+				if (bp->b_dirtyend > 0) {
+				    if ((bp->b_flags & B_DELWRI) == 0)
+					panic("nfsbioread");
+				    if (VOP_BWRITE(bp) == EINTR)
+					return (EINTR);
+				} else
+				    brelse(bp);
+				goto again;
+			}
+		}
+		vp->v_lastr = lbn;
+		diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);
+		if (diff < n)
+			n = diff;
+		break;
+	    case VLNK:
+		nfsstats.biocache_readlinks++;
+		bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p);
+		if (!bp)
+			return (EINTR);
+		if ((bp->b_flags & B_DONE) == 0) {
+			bp->b_flags |= B_READ;
+			if (error = nfs_doio(bp, cred, p)) {
+				brelse(bp);
+				return (error);
+			}
+		}
+		n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
+		got_buf = 1;
+		on = 0;
+		break;
+	    case VDIR:
+		nfsstats.biocache_readdirs++;
+		bn = (daddr_t)uio->uio_offset;
+		bp = nfs_getcacheblk(vp, bn, NFS_DIRBLKSIZ, p);
+		if (!bp)
+			return (EINTR);
+		if ((bp->b_flags & B_DONE) == 0) {
+			bp->b_flags |= B_READ;
+			if (error = nfs_doio(bp, cred, p)) {
+				brelse(bp);
+				return (error);
+			}
+		}
+
+		/*
+		 * If not eof and read aheads are enabled, start one.
+		 * (You need the current block first, so that you have the
+		 *  directory offset cookie of the next block.
+		 */
+		rabn = bp->b_blkno;
+		if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
+		    rabn != 0 && rabn != np->n_direofoffset &&
+		    !incore(vp, rabn)) {
+			rabp = nfs_getcacheblk(vp, rabn, NFS_DIRBLKSIZ, p);
+			if (rabp) {
+			    if ((rabp->b_flags & (B_DONE | B_DELWRI)) == 0) {
+				rabp->b_flags |= (B_READ | B_ASYNC);
+				if (nfs_asyncio(rabp, cred)) {
+				    rabp->b_flags |= B_INVAL;
+				    brelse(rabp);
+				}
+			    }
+			}
+		}
+		on = 0;
+		n = min(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid);
+		got_buf = 1;
+		break;
+	    };
+
+	    if (n > 0) {
+		if (!baddr)
+			baddr = bp->b_data;
+		error = uiomove(baddr + on, (int)n, uio);
+	    }
+	    switch (vp->v_type) {
+	    case VREG:
+		if (n + on == biosize || uio->uio_offset == np->n_size)
+			bp->b_flags |= B_AGE;
+		break;
+	    case VLNK:
+		n = 0;
+		break;
+	    case VDIR:
+		uio->uio_offset = bp->b_blkno;
+		break;
+	    };
+	    if (got_buf)
+		brelse(bp);
+	} while (error == 0 && uio->uio_resid > 0 && n > 0);
+	return (error);
+}
+
+/*
+ * Vnode op for write using bio
+ */
+nfs_write(ap)
+	struct vop_write_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	register int biosize;
+	register struct uio *uio = ap->a_uio;
+	struct proc *p = uio->uio_procp;
+	register struct vnode *vp = ap->a_vp;
+	struct nfsnode *np = VTONFS(vp);
+	register struct ucred *cred = ap->a_cred;
+	int ioflag = ap->a_ioflag;
+	struct buf *bp;
+	struct vattr vattr;
+	struct nfsmount *nmp;
+	daddr_t lbn, bn;
+	int n, on, error = 0;
+
+#ifdef DIAGNOSTIC
+	if (uio->uio_rw != UIO_WRITE)
+		panic("nfs_write mode");
+	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
+		panic("nfs_write proc");
+#endif
+	if (vp->v_type != VREG)
+		return (EIO);
+	if (np->n_flag & NWRITEERR) {
+		np->n_flag &= ~NWRITEERR;
+		return (np->n_error);
+	}
+	if (ioflag & (IO_APPEND | IO_SYNC)) {
+		if (np->n_flag & NMODIFIED) {
+			np->n_attrstamp = 0;
+			if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+				return (error);
+		}
+		if (ioflag & IO_APPEND) {
+			np->n_attrstamp = 0;
+			if (error = VOP_GETATTR(vp, &vattr, cred, p))
+				return (error);
+			uio->uio_offset = np->n_size;
+		}
+	}
+	nmp = VFSTONFS(vp->v_mount);
+	if (uio->uio_offset < 0)
+		return (EINVAL);
+	if (uio->uio_resid == 0)
+		return (0);
+	/*
+	 * Maybe this should be above the vnode op call, but so long as
+	 * file servers have no limits, i don't think it matters
+	 */
+	if (p && uio->uio_offset + uio->uio_resid >
+	      p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
+		psignal(p, SIGXFSZ);
+		return (EFBIG);
+	}
+	/*
+	 * I use nm_rsize, not nm_wsize so that all buffer cache blocks
+	 * will be the same size within a filesystem. nfs_writerpc will
+	 * still use nm_wsize when sizing the rpc's.
+	 */
+	biosize = nmp->nm_rsize;
+	do {
+
+		/*
+		 * Check for a valid write lease.
+		 * If non-cachable, just do the rpc
+		 */
+		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
+		    NQNFS_CKINVALID(vp, np, NQL_WRITE)) {
+			do {
+				error = nqnfs_getlease(vp, NQL_WRITE, cred, p);
+			} while (error == NQNFS_EXPIRED);
+			if (error)
+				return (error);
+			if (np->n_lrev != np->n_brev ||
+			    (np->n_flag & NQNFSNONCACHE)) {
+				if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+					return (error);
+				np->n_brev = np->n_lrev;
+			}
+		}
+		if (np->n_flag & NQNFSNONCACHE)
+			return (nfs_writerpc(vp, uio, cred, ioflag));
+		nfsstats.biocache_writes++;
+		lbn = uio->uio_offset / biosize;
+		on = uio->uio_offset & (biosize-1);
+		n = min((unsigned)(biosize - on), uio->uio_resid);
+		bn = lbn * (biosize / DEV_BSIZE);
+again:
+		bp = nfs_getcacheblk(vp, bn, biosize, p);
+		if (!bp)
+			return (EINTR);
+		if (bp->b_wcred == NOCRED) {
+			crhold(cred);
+			bp->b_wcred = cred;
+		}
+		np->n_flag |= NMODIFIED;
+		if (uio->uio_offset + n > np->n_size) {
+			np->n_size = uio->uio_offset + n;
+			vnode_pager_setsize(vp, (u_long)np->n_size);
+		}
+
+		/*
+		 * If the new write will leave a contiguous dirty
+		 * area, just update the b_dirtyoff and b_dirtyend,
+		 * otherwise force a write rpc of the old dirty area.
+		 */
+		if (bp->b_dirtyend > 0 &&
+		    (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
+			bp->b_proc = p;
+			if (VOP_BWRITE(bp) == EINTR)
+				return (EINTR);
+			goto again;
+		}
+
+		/*
+		 * Check for valid write lease and get one as required.
+		 * In case getblk() and/or bwrite() delayed us.
+		 */
+		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
+		    NQNFS_CKINVALID(vp, np, NQL_WRITE)) {
+			do {
+				error = nqnfs_getlease(vp, NQL_WRITE, cred, p);
+			} while (error == NQNFS_EXPIRED);
+			if (error) {
+				brelse(bp);
+				return (error);
+			}
+			if (np->n_lrev != np->n_brev ||
+			    (np->n_flag & NQNFSNONCACHE)) {
+				brelse(bp);
+				if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+					return (error);
+				np->n_brev = np->n_lrev;
+				goto again;
+			}
+		}
+		if (error = uiomove((char *)bp->b_data + on, n, uio)) {
+			bp->b_flags |= B_ERROR;
+			brelse(bp);
+			return (error);
+		}
+		if (bp->b_dirtyend > 0) {
+			bp->b_dirtyoff = min(on, bp->b_dirtyoff);
+			bp->b_dirtyend = max((on + n), bp->b_dirtyend);
+		} else {
+			bp->b_dirtyoff = on;
+			bp->b_dirtyend = on + n;
+		}
+#ifndef notdef
+		if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
+		    bp->b_validoff > bp->b_dirtyend) {
+			bp->b_validoff = bp->b_dirtyoff;
+			bp->b_validend = bp->b_dirtyend;
+		} else {
+			bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
+			bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
+		}
+#else
+		bp->b_validoff = bp->b_dirtyoff;
+		bp->b_validend = bp->b_dirtyend;
+#endif
+		if (ioflag & IO_APPEND)
+			bp->b_flags |= B_APPENDWRITE;
+
+		/*
+		 * If the lease is non-cachable or IO_SYNC do bwrite().
+		 */
+		if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) {
+			bp->b_proc = p;
+			if (error = VOP_BWRITE(bp))
+				return (error);
+		} else if ((n + on) == biosize &&
+			(nmp->nm_flag & NFSMNT_NQNFS) == 0) {
+			bp->b_proc = (struct proc *)0;
+			bawrite(bp);
+		} else
+			bdwrite(bp);
+	} while (uio->uio_resid > 0 && n > 0);
+	return (0);
+}
+
+/*
+ * Get an nfs cache block.
+ * Allocate a new one if the block isn't currently in the cache
+ * and return the block marked busy. If the calling process is
+ * interrupted by a signal for an interruptible mount point, return
+ * NULL.
+ */
+struct buf *
+nfs_getcacheblk(vp, bn, size, p)
+	struct vnode *vp;
+	daddr_t bn;
+	int size;
+	struct proc *p;
+{
+	register struct buf *bp;
+	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+
+	if (nmp->nm_flag & NFSMNT_INT) {
+		bp = getblk(vp, bn, size, PCATCH, 0);
+		while (bp == (struct buf *)0) {
+			if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
+				return ((struct buf *)0);
+			bp = getblk(vp, bn, size, 0, 2 * hz);
+		}
+	} else
+		bp = getblk(vp, bn, size, 0, 0);
+	return (bp);
+}
+
+/*
+ * Flush and invalidate all dirty buffers. If another process is already
+ * doing the flush, just wait for completion.
+ */
+nfs_vinvalbuf(vp, flags, cred, p, intrflg)
+	struct vnode *vp;
+	int flags;
+	struct ucred *cred;
+	struct proc *p;
+	int intrflg;
+{
+	register struct nfsnode *np = VTONFS(vp);
+	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+	int error = 0, slpflag, slptimeo;
+
+	if ((nmp->nm_flag & NFSMNT_INT) == 0)
+		intrflg = 0;
+	if (intrflg) {
+		slpflag = PCATCH;
+		slptimeo = 2 * hz;
+	} else {
+		slpflag = 0;
+		slptimeo = 0;
+	}
+	/*
+	 * First wait for any other process doing a flush to complete.
+	 */
+	while (np->n_flag & NFLUSHINPROG) {
+		np->n_flag |= NFLUSHWANT;
+		error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
+			slptimeo);
+		if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
+			return (EINTR);
+	}
+
+	/*
+	 * Now, flush as required.
+	 */
+	np->n_flag |= NFLUSHINPROG;
+	error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
+	while (error) {
+		if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
+			np->n_flag &= ~NFLUSHINPROG;
+			if (np->n_flag & NFLUSHWANT) {
+				np->n_flag &= ~NFLUSHWANT;
+				wakeup((caddr_t)&np->n_flag);
+			}
+			return (EINTR);
+		}
+		error = vinvalbuf(vp, flags, cred, p, 0, slptimeo);
+	}
+	np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
+	if (np->n_flag & NFLUSHWANT) {
+		np->n_flag &= ~NFLUSHWANT;
+		wakeup((caddr_t)&np->n_flag);
+	}
+	return (0);
+}
+
+/*
+ * Initiate asynchronous I/O. Return an error if no nfsiods are available.
+ * This is mainly to avoid queueing async I/O requests when the nfsiods
+ * are all hung on a dead server.
+ */
+nfs_asyncio(bp, cred)
+	register struct buf *bp;
+	struct ucred *cred;
+{
+	register int i;
+
+	if (nfs_numasync == 0)
+		return (EIO);
+	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
+	    if (nfs_iodwant[i]) {
+		if (bp->b_flags & B_READ) {
+			if (bp->b_rcred == NOCRED && cred != NOCRED) {
+				crhold(cred);
+				bp->b_rcred = cred;
+			}
+		} else {
+			if (bp->b_wcred == NOCRED && cred != NOCRED) {
+				crhold(cred);
+				bp->b_wcred = cred;
+			}
+		}
+	
+		TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist);
+		nfs_iodwant[i] = (struct proc *)0;
+		wakeup((caddr_t)&nfs_iodwant[i]);
+		return (0);
+	    }
+	return (EIO);
+}
+
+/*
+ * Do an I/O operation to/from a cache block. This may be called
+ * synchronously or from an nfsiod.
+ */
+int
+nfs_doio(bp, cr, p)
+	register struct buf *bp;
+	struct cred *cr;
+	struct proc *p;
+{
+	register struct uio *uiop;
+	register struct vnode *vp;
+	struct nfsnode *np;
+	struct nfsmount *nmp;
+	int error, diff, len;
+	struct uio uio;
+	struct iovec io;
+
+	vp = bp->b_vp;
+	np = VTONFS(vp);
+	nmp = VFSTONFS(vp->v_mount);
+	uiop = &uio;
+	uiop->uio_iov = &io;
+	uiop->uio_iovcnt = 1;
+	uiop->uio_segflg = UIO_SYSSPACE;
+	uiop->uio_procp = p;
+
+	/*
+	 * Historically, paging was done with physio, but no more.
+	 */
+	if (bp->b_flags & B_PHYS)
+	    panic("doio phys");
+	if (bp->b_flags & B_READ) {
+	    io.iov_len = uiop->uio_resid = bp->b_bcount;
+	    io.iov_base = bp->b_data;
+	    uiop->uio_rw = UIO_READ;
+	    switch (vp->v_type) {
+	    case VREG:
+		uiop->uio_offset = bp->b_blkno * DEV_BSIZE;
+		nfsstats.read_bios++;
+		error = nfs_readrpc(vp, uiop, cr);
+		if (!error) {
+		    bp->b_validoff = 0;
+		    if (uiop->uio_resid) {
+			/*
+			 * If len > 0, there is a hole in the file and
+			 * no writes after the hole have been pushed to
+			 * the server yet.
+			 * Just zero fill the rest of the valid area.
+			 */
+			diff = bp->b_bcount - uiop->uio_resid;
+			len = np->n_size - (bp->b_blkno * DEV_BSIZE
+				+ diff);
+			if (len > 0) {
+			    len = min(len, uiop->uio_resid);
+			    bzero((char *)bp->b_data + diff, len);
+			    bp->b_validend = diff + len;
+			} else
+			    bp->b_validend = diff;
+		    } else
+			bp->b_validend = bp->b_bcount;
+		}
+		if (p && (vp->v_flag & VTEXT) &&
+			(((nmp->nm_flag & NFSMNT_NQNFS) &&
+			  np->n_lrev != np->n_brev) ||
+			 (!(nmp->nm_flag & NFSMNT_NQNFS) &&
+			  np->n_mtime != np->n_vattr.va_mtime.ts_sec))) {
+			uprintf("Process killed due to text file modification\n");
+			psignal(p, SIGKILL);
+			p->p_flag |= P_NOSWAP;
+		}
+		break;
+	    case VLNK:
+		uiop->uio_offset = 0;
+		nfsstats.readlink_bios++;
+		error = nfs_readlinkrpc(vp, uiop, cr);
+		break;
+	    case VDIR:
+		uiop->uio_offset = bp->b_lblkno;
+		nfsstats.readdir_bios++;
+		if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS)
+		    error = nfs_readdirlookrpc(vp, uiop, cr);
+		else
+		    error = nfs_readdirrpc(vp, uiop, cr);
+		/*
+		 * Save offset cookie in b_blkno.
+		 */
+		bp->b_blkno = uiop->uio_offset;
+		break;
+	    };
+	    if (error) {
+		bp->b_flags |= B_ERROR;
+		bp->b_error = error;
+	    }
+	} else {
+	    io.iov_len = uiop->uio_resid = bp->b_dirtyend
+		- bp->b_dirtyoff;
+	    uiop->uio_offset = (bp->b_blkno * DEV_BSIZE)
+		+ bp->b_dirtyoff;
+	    io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
+	    uiop->uio_rw = UIO_WRITE;
+	    nfsstats.write_bios++;
+	    if (bp->b_flags & B_APPENDWRITE)
+		error = nfs_writerpc(vp, uiop, cr, IO_APPEND);
+	    else
+		error = nfs_writerpc(vp, uiop, cr, 0);
+	    bp->b_flags &= ~(B_WRITEINPROG | B_APPENDWRITE);
+
+	    /*
+	     * For an interrupted write, the buffer is still valid and the
+	     * write hasn't been pushed to the server yet, so we can't set
+	     * B_ERROR and report the interruption by setting B_EINTR. For
+	     * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt
+	     * is essentially a noop.
+	     */
+	    if (error == EINTR) {
+		bp->b_flags &= ~B_INVAL;
+		bp->b_flags |= B_DELWRI;
+
+		/*
+		 * Since for the B_ASYNC case, nfs_bwrite() has reassigned the
+		 * buffer to the clean list, we have to reassign it back to the
+		 * dirty one. Ugh.
+		 */
+		if (bp->b_flags & B_ASYNC)
+		    reassignbuf(bp, vp);
+		else
+		    bp->b_flags |= B_EINTR;
+	    } else {
+		if (error) {
+		    bp->b_flags |= B_ERROR;
+		    bp->b_error = np->n_error = error;
+		    np->n_flag |= NWRITEERR;
+		}
+		bp->b_dirtyoff = bp->b_dirtyend = 0;
+	    }
+	}
+	bp->b_resid = uiop->uio_resid;
+	biodone(bp);
+	return (error);
+}
diff --git a/sys/nfsclient/nfs_nfsiod.c b/sys/nfsclient/nfs_nfsiod.c
new file mode 100644
index 00000000000..5d86b42ee20
--- /dev/null
+++ b/sys/nfsclient/nfs_nfsiod.c
@@ -0,0 +1,874 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfs_syscalls.c	8.3 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/uio.h>
+#include <sys/malloc.h>
+#include <sys/buf.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/namei.h>
+#include <sys/syslog.h>
+
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#ifdef ISO
+#include <netiso/iso.h>
+#endif
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsrvcache.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nqnfs.h>
+#include <nfs/nfsrtt.h>
+
+/* Global defs. */
+extern u_long nfs_prog, nfs_vers;
+extern int (*nfsrv_procs[NFS_NPROCS])();
+extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
+extern int nfs_numasync;
+extern time_t nqnfsstarttime;
+extern struct nfsrv_req nsrvq_head;
+extern struct nfsd nfsd_head;
+extern int nqsrv_writeslack;
+extern int nfsrtton;
+struct nfssvc_sock *nfs_udpsock, *nfs_cltpsock;
+int nuidhash_max = NFS_MAXUIDHASH;
+static int nfs_numnfsd = 0;
+int nfsd_waiting = 0;
+static int notstarted = 1;
+static int modify_flag = 0;
+static struct nfsdrt nfsdrt;
+void nfsrv_cleancache(), nfsrv_rcv(), nfsrv_wakenfsd(), nfs_sndunlock();
+static void nfsd_rt();
+void nfsrv_slpderef(), nfsrv_init();
+
+#define	TRUE	1
+#define	FALSE	0
+
+static int nfs_asyncdaemon[NFS_MAXASYNCDAEMON];
+/*
+ * NFS server system calls
+ * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c
+ */
+
+/*
+ * Get file handle system call
+ */
+struct getfh_args {
+	char	*fname;
+	fhandle_t *fhp;
+};
+getfh(p, uap, retval)
+	struct proc *p;
+	register struct getfh_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	fhandle_t fh;
+	int error;
+	struct nameidata nd;
+
+	/*
+	 * Must be super user
+	 */
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	bzero((caddr_t)&fh, sizeof(fh));
+	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
+	error = VFS_VPTOFH(vp, &fh.fh_fid);
+	vput(vp);
+	if (error)
+		return (error);
+	error = copyout((caddr_t)&fh, (caddr_t)uap->fhp, sizeof (fh));
+	return (error);
+}
+
+static struct nfssvc_sock nfssvc_sockhead;
+
+/*
+ * Nfs server psuedo system call for the nfsd's
+ * Based on the flag value it either:
+ * - adds a socket to the selection list
+ * - remains in the kernel as an nfsd
+ * - remains in the kernel as an nfsiod
+ */
+struct nfssvc_args {
+	int flag;
+	caddr_t argp;
+};
+nfssvc(p, uap, retval)
+	struct proc *p;
+	register struct nfssvc_args *uap;
+	int *retval;
+{
+	struct nameidata nd;
+	struct file *fp;
+	struct mbuf *nam;
+	struct nfsd_args nfsdarg;
+	struct nfsd_srvargs nfsd_srvargs, *nsd = &nfsd_srvargs;
+	struct nfsd_cargs ncd;
+	struct nfsd *nfsd;
+	struct nfssvc_sock *slp;
+	struct nfsuid *nuidp, **nuh;
+	struct nfsmount *nmp;
+	int error;
+
+	/*
+	 * Must be super user
+	 */
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+	while (nfssvc_sockhead.ns_flag & SLP_INIT) {
+		nfssvc_sockhead.ns_flag |= SLP_WANTINIT;
+		(void) tsleep((caddr_t)&nfssvc_sockhead, PSOCK, "nfsd init", 0);
+	}
+	if (uap->flag & NFSSVC_BIOD)
+		error = nfssvc_iod(p);
+	else if (uap->flag & NFSSVC_MNTD) {
+		if (error = copyin(uap->argp, (caddr_t)&ncd, sizeof (ncd)))
+			return (error);
+		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
+			ncd.ncd_dirp, p);
+		if (error = namei(&nd))
+			return (error);
+		if ((nd.ni_vp->v_flag & VROOT) == 0)
+			error = EINVAL;
+		nmp = VFSTONFS(nd.ni_vp->v_mount);
+		vput(nd.ni_vp);
+		if (error)
+			return (error);
+		if ((nmp->nm_flag & NFSMNT_MNTD) &&
+			(uap->flag & NFSSVC_GOTAUTH) == 0)
+			return (0);
+		nmp->nm_flag |= NFSMNT_MNTD;
+		error = nqnfs_clientd(nmp, p->p_ucred, &ncd, uap->flag,
+			uap->argp, p);
+	} else if (uap->flag & NFSSVC_ADDSOCK) {
+		if (error = copyin(uap->argp, (caddr_t)&nfsdarg,
+		    sizeof(nfsdarg)))
+			return (error);
+		if (error = getsock(p->p_fd, nfsdarg.sock, &fp))
+			return (error);
+		/*
+		 * Get the client address for connected sockets.
+		 */
+		if (nfsdarg.name == NULL || nfsdarg.namelen == 0)
+			nam = (struct mbuf *)0;
+		else if (error = sockargs(&nam, nfsdarg.name, nfsdarg.namelen,
+			MT_SONAME))
+			return (error);
+		error = nfssvc_addsock(fp, nam);
+	} else {
+		if (error = copyin(uap->argp, (caddr_t)nsd, sizeof (*nsd)))
+			return (error);
+		if ((uap->flag & NFSSVC_AUTHIN) && (nfsd = nsd->nsd_nfsd) &&
+			(nfsd->nd_slp->ns_flag & SLP_VALID)) {
+			slp = nfsd->nd_slp;
+
+			/*
+			 * First check to see if another nfsd has already
+			 * added this credential.
+			 */
+			nuidp = slp->ns_uidh[NUIDHASH(nsd->nsd_uid)];
+			while (nuidp) {
+				if (nuidp->nu_uid == nsd->nsd_uid)
+					break;
+				nuidp = nuidp->nu_hnext;
+			}
+			if (!nuidp) {
+			    /*
+			     * Nope, so we will.
+			     */
+			    if (slp->ns_numuids < nuidhash_max) {
+				slp->ns_numuids++;
+				nuidp = (struct nfsuid *)
+				   malloc(sizeof (struct nfsuid), M_NFSUID,
+					M_WAITOK);
+			    } else
+				nuidp = (struct nfsuid *)0;
+			    if ((slp->ns_flag & SLP_VALID) == 0) {
+				if (nuidp)
+				    free((caddr_t)nuidp, M_NFSUID);
+			    } else {
+				if (nuidp == (struct nfsuid *)0) {
+				    nuidp = slp->ns_lruprev;
+				    remque(nuidp);
+				    if (nuidp->nu_hprev)
+					nuidp->nu_hprev->nu_hnext =
+					    nuidp->nu_hnext;
+				    if (nuidp->nu_hnext)
+					nuidp->nu_hnext->nu_hprev =
+					    nuidp->nu_hprev;
+			        }
+				nuidp->nu_cr = nsd->nsd_cr;
+				if (nuidp->nu_cr.cr_ngroups > NGROUPS)
+					nuidp->nu_cr.cr_ngroups = NGROUPS;
+				nuidp->nu_cr.cr_ref = 1;
+				nuidp->nu_uid = nsd->nsd_uid;
+				insque(nuidp, (struct nfsuid *)slp);
+				nuh = &slp->ns_uidh[NUIDHASH(nsd->nsd_uid)];
+				if (nuidp->nu_hnext = *nuh)
+				    nuidp->nu_hnext->nu_hprev = nuidp;
+				nuidp->nu_hprev = (struct nfsuid *)0;
+				*nuh = nuidp;
+			    }
+			}
+		}
+		if ((uap->flag & NFSSVC_AUTHINFAIL) && (nfsd = nsd->nsd_nfsd))
+			nfsd->nd_flag |= NFSD_AUTHFAIL;
+		error = nfssvc_nfsd(nsd, uap->argp, p);
+	}
+	if (error == EINTR || error == ERESTART)
+		error = 0;
+	return (error);
+}
+
+/*
+ * Adds a socket to the list for servicing by nfsds.
+ */
+nfssvc_addsock(fp, mynam)
+	struct file *fp;
+	struct mbuf *mynam;
+{
+	register struct mbuf *m;
+	register int siz;
+	register struct nfssvc_sock *slp;
+	register struct socket *so;
+	struct nfssvc_sock *tslp;
+	int error, s;
+
+	so = (struct socket *)fp->f_data;
+	tslp = (struct nfssvc_sock *)0;
+	/*
+	 * Add it to the list, as required.
+	 */
+	if (so->so_proto->pr_protocol == IPPROTO_UDP) {
+		tslp = nfs_udpsock;
+		if (tslp->ns_flag & SLP_VALID) {
+			m_freem(mynam);
+			return (EPERM);
+		}
+#ifdef ISO
+	} else if (so->so_proto->pr_protocol == ISOPROTO_CLTP) {
+		tslp = nfs_cltpsock;
+		if (tslp->ns_flag & SLP_VALID) {
+			m_freem(mynam);
+			return (EPERM);
+		}
+#endif /* ISO */
+	}
+	if (so->so_type == SOCK_STREAM)
+		siz = NFS_MAXPACKET + sizeof (u_long);
+	else
+		siz = NFS_MAXPACKET;
+	if (error = soreserve(so, siz, siz)) {
+		m_freem(mynam);
+		return (error);
+	}
+
+	/*
+	 * Set protocol specific options { for now TCP only } and
+	 * reserve some space. For datagram sockets, this can get called
+	 * repeatedly for the same socket, but that isn't harmful.
+	 */
+	if (so->so_type == SOCK_STREAM) {
+		MGET(m, M_WAIT, MT_SOOPTS);
+		*mtod(m, int *) = 1;
+		m->m_len = sizeof(int);
+		sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
+	}
+	if (so->so_proto->pr_domain->dom_family == AF_INET &&
+	    so->so_proto->pr_protocol == IPPROTO_TCP) {
+		MGET(m, M_WAIT, MT_SOOPTS);
+		*mtod(m, int *) = 1;
+		m->m_len = sizeof(int);
+		sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
+	}
+	so->so_rcv.sb_flags &= ~SB_NOINTR;
+	so->so_rcv.sb_timeo = 0;
+	so->so_snd.sb_flags &= ~SB_NOINTR;
+	so->so_snd.sb_timeo = 0;
+	if (tslp)
+		slp = tslp;
+	else {
+		slp = (struct nfssvc_sock *)
+			malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
+		bzero((caddr_t)slp, sizeof (struct nfssvc_sock));
+		slp->ns_prev = nfssvc_sockhead.ns_prev;
+		slp->ns_prev->ns_next = slp;
+		slp->ns_next = &nfssvc_sockhead;
+		nfssvc_sockhead.ns_prev = slp;
+		slp->ns_lrunext = slp->ns_lruprev = (struct nfsuid *)slp;
+	}
+	slp->ns_so = so;
+	slp->ns_nam = mynam;
+	fp->f_count++;
+	slp->ns_fp = fp;
+	s = splnet();
+	so->so_upcallarg = (caddr_t)slp;
+	so->so_upcall = nfsrv_rcv;
+	slp->ns_flag = (SLP_VALID | SLP_NEEDQ);
+	nfsrv_wakenfsd(slp);
+	splx(s);
+	return (0);
+}
+
+/*
+ * Called by nfssvc() for nfsds. Just loops around servicing rpc requests
+ * until it is killed by a signal.
+ */
+nfssvc_nfsd(nsd, argp, p)
+	struct nfsd_srvargs *nsd;
+	caddr_t argp;
+	struct proc *p;
+{
+	register struct mbuf *m, *nam2;
+	register int siz;
+	register struct nfssvc_sock *slp;
+	register struct socket *so;
+	register int *solockp;
+	struct nfsd *nd = nsd->nsd_nfsd;
+	struct mbuf *mreq, *nam;
+	struct timeval starttime;
+	struct nfsuid *uidp;
+	int error, cacherep, s;
+	int sotype;
+
+	s = splnet();
+	if (nd == (struct nfsd *)0) {
+		nsd->nsd_nfsd = nd = (struct nfsd *)
+			malloc(sizeof (struct nfsd), M_NFSD, M_WAITOK);
+		bzero((caddr_t)nd, sizeof (struct nfsd));
+		nd->nd_procp = p;
+		nd->nd_cr.cr_ref = 1;
+		insque(nd, &nfsd_head);
+		nd->nd_nqlflag = NQL_NOVAL;
+		nfs_numnfsd++;
+	}
+	/*
+	 * Loop getting rpc requests until SIGKILL.
+	 */
+	for (;;) {
+		if ((nd->nd_flag & NFSD_REQINPROG) == 0) {
+			while (nd->nd_slp == (struct nfssvc_sock *)0 &&
+				 (nfsd_head.nd_flag & NFSD_CHECKSLP) == 0) {
+				nd->nd_flag |= NFSD_WAITING;
+				nfsd_waiting++;
+				error = tsleep((caddr_t)nd, PSOCK | PCATCH, "nfsd", 0);
+				nfsd_waiting--;
+				if (error)
+					goto done;
+			}
+			if (nd->nd_slp == (struct nfssvc_sock *)0 &&
+				(nfsd_head.nd_flag & NFSD_CHECKSLP)) {
+				slp = nfssvc_sockhead.ns_next;
+				while (slp != &nfssvc_sockhead) {
+				    if ((slp->ns_flag & (SLP_VALID | SLP_DOREC))
+					== (SLP_VALID | SLP_DOREC)) {
+					    slp->ns_flag &= ~SLP_DOREC;
+					    slp->ns_sref++;
+					    nd->nd_slp = slp;
+					    break;
+				    }
+				    slp = slp->ns_next;
+				}
+				if (slp == &nfssvc_sockhead)
+					nfsd_head.nd_flag &= ~NFSD_CHECKSLP;
+			}
+			if ((slp = nd->nd_slp) == (struct nfssvc_sock *)0)
+				continue;
+			if (slp->ns_flag & SLP_VALID) {
+				if (slp->ns_flag & SLP_DISCONN)
+					nfsrv_zapsock(slp);
+				else if (slp->ns_flag & SLP_NEEDQ) {
+					slp->ns_flag &= ~SLP_NEEDQ;
+					(void) nfs_sndlock(&slp->ns_solock,
+						(struct nfsreq *)0);
+					nfsrv_rcv(slp->ns_so, (caddr_t)slp,
+						M_WAIT);
+					nfs_sndunlock(&slp->ns_solock);
+				}
+				error = nfsrv_dorec(slp, nd);
+				nd->nd_flag |= NFSD_REQINPROG;
+			}
+		} else {
+			error = 0;
+			slp = nd->nd_slp;
+		}
+		if (error || (slp->ns_flag & SLP_VALID) == 0) {
+			nd->nd_slp = (struct nfssvc_sock *)0;
+			nd->nd_flag &= ~NFSD_REQINPROG;
+			nfsrv_slpderef(slp);
+			continue;
+		}
+		splx(s);
+		so = slp->ns_so;
+		sotype = so->so_type;
+		starttime = time;
+		if (so->so_proto->pr_flags & PR_CONNREQUIRED)
+			solockp = &slp->ns_solock;
+		else
+			solockp = (int *)0;
+		/*
+		 * nam == nam2 for connectionless protocols such as UDP
+		 * nam2 == NULL for connection based protocols to disable
+		 *    recent request caching.
+		 */
+		if (nam2 = nd->nd_nam) {
+			nam = nam2;
+			cacherep = RC_CHECKIT;
+		} else {
+			nam = slp->ns_nam;
+			cacherep = RC_DOIT;
+		}
+
+		/*
+		 * Check to see if authorization is needed.
+		 */
+		if (nd->nd_flag & NFSD_NEEDAUTH) {
+			static int logauth = 0;
+
+			nd->nd_flag &= ~NFSD_NEEDAUTH;
+			/*
+			 * Check for a mapping already installed.
+			 */
+			uidp = slp->ns_uidh[NUIDHASH(nd->nd_cr.cr_uid)];
+			while (uidp) {
+				if (uidp->nu_uid == nd->nd_cr.cr_uid)
+					break;
+				uidp = uidp->nu_hnext;
+			}
+			if (!uidp) {
+			    nsd->nsd_uid = nd->nd_cr.cr_uid;
+			    if (nam2 && logauth++ == 0)
+				log(LOG_WARNING, "Kerberized NFS using UDP\n");
+			    nsd->nsd_haddr =
+			      mtod(nam, struct sockaddr_in *)->sin_addr.s_addr;
+			    nsd->nsd_authlen = nd->nd_authlen;
+			    if (copyout(nd->nd_authstr, nsd->nsd_authstr,
+				nd->nd_authlen) == 0 &&
+				copyout((caddr_t)nsd, argp, sizeof (*nsd)) == 0)
+				return (ENEEDAUTH);
+			    cacherep = RC_DROPIT;
+			}
+		}
+		if (cacherep == RC_CHECKIT)
+			cacherep = nfsrv_getcache(nam2, nd, &mreq);
+
+		/*
+		 * Check for just starting up for NQNFS and send
+		 * fake "try again later" replies to the NQNFS clients.
+		 */
+		if (notstarted && nqnfsstarttime <= time.tv_sec) {
+			if (modify_flag) {
+				nqnfsstarttime = time.tv_sec + nqsrv_writeslack;
+				modify_flag = 0;
+			} else
+				notstarted = 0;
+		}
+		if (notstarted) {
+			if (nd->nd_nqlflag == NQL_NOVAL)
+				cacherep = RC_DROPIT;
+			else if (nd->nd_procnum != NFSPROC_WRITE) {
+				nd->nd_procnum = NFSPROC_NOOP;
+				nd->nd_repstat = NQNFS_TRYLATER;
+				cacherep = RC_DOIT;
+			} else
+				modify_flag = 1;
+		} else if (nd->nd_flag & NFSD_AUTHFAIL) {
+			nd->nd_flag &= ~NFSD_AUTHFAIL;
+			nd->nd_procnum = NFSPROC_NOOP;
+			nd->nd_repstat = NQNFS_AUTHERR;
+			cacherep = RC_DOIT;
+		}
+
+		switch (cacherep) {
+		case RC_DOIT:
+			error = (*(nfsrv_procs[nd->nd_procnum]))(nd,
+				nd->nd_mrep, nd->nd_md, nd->nd_dpos, &nd->nd_cr,
+				nam, &mreq);
+			if (nd->nd_cr.cr_ref != 1) {
+				printf("nfssvc cref=%d\n", nd->nd_cr.cr_ref);
+				panic("nfssvc cref");
+			}
+			if (error) {
+				if (nd->nd_procnum != NQNFSPROC_VACATED)
+					nfsstats.srv_errs++;
+				if (nam2) {
+					nfsrv_updatecache(nam2, nd, FALSE, mreq);
+					m_freem(nam2);
+				}
+				break;
+			}
+			nfsstats.srvrpccnt[nd->nd_procnum]++;
+			if (nam2)
+				nfsrv_updatecache(nam2, nd, TRUE, mreq);
+			nd->nd_mrep = (struct mbuf *)0;
+		case RC_REPLY:
+			m = mreq;
+			siz = 0;
+			while (m) {
+				siz += m->m_len;
+				m = m->m_next;
+			}
+			if (siz <= 0 || siz > NFS_MAXPACKET) {
+				printf("mbuf siz=%d\n",siz);
+				panic("Bad nfs svc reply");
+			}
+			m = mreq;
+			m->m_pkthdr.len = siz;
+			m->m_pkthdr.rcvif = (struct ifnet *)0;
+			/*
+			 * For stream protocols, prepend a Sun RPC
+			 * Record Mark.
+			 */
+			if (sotype == SOCK_STREAM) {
+				M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
+				*mtod(m, u_long *) = htonl(0x80000000 | siz);
+			}
+			if (solockp)
+				(void) nfs_sndlock(solockp, (struct nfsreq *)0);
+			if (slp->ns_flag & SLP_VALID)
+			    error = nfs_send(so, nam2, m, (struct nfsreq *)0);
+			else {
+			    error = EPIPE;
+			    m_freem(m);
+			}
+			if (nfsrtton)
+				nfsd_rt(&starttime, sotype, nd, nam, cacherep);
+			if (nam2)
+				MFREE(nam2, m);
+			if (nd->nd_mrep)
+				m_freem(nd->nd_mrep);
+			if (error == EPIPE)
+				nfsrv_zapsock(slp);
+			if (solockp)
+				nfs_sndunlock(solockp);
+			if (error == EINTR || error == ERESTART) {
+				nfsrv_slpderef(slp);
+				s = splnet();
+				goto done;
+			}
+			break;
+		case RC_DROPIT:
+			if (nfsrtton)
+				nfsd_rt(&starttime, sotype, nd, nam, cacherep);
+			m_freem(nd->nd_mrep);
+			m_freem(nam2);
+			break;
+		};
+		s = splnet();
+		if (nfsrv_dorec(slp, nd)) {
+			nd->nd_flag &= ~NFSD_REQINPROG;
+			nd->nd_slp = (struct nfssvc_sock *)0;
+			nfsrv_slpderef(slp);
+		}
+	}
+done:
+	remque(nd);
+	splx(s);
+	free((caddr_t)nd, M_NFSD);
+	nsd->nsd_nfsd = (struct nfsd *)0;
+	if (--nfs_numnfsd == 0)
+		nfsrv_init(TRUE);	/* Reinitialize everything */
+	return (error);
+}
+
+/*
+ * Asynchronous I/O daemons for client nfs.
+ * They do read-ahead and write-behind operations on the block I/O cache.
+ * Never returns unless it fails or gets killed.
+ */
+nfssvc_iod(p)
+	struct proc *p;
+{
+	register struct buf *bp;
+	register int i, myiod;
+	int error = 0;
+
+	/*
+	 * Assign my position or return error if too many already running
+	 */
+	myiod = -1;
+	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
+		if (nfs_asyncdaemon[i] == 0) {
+			nfs_asyncdaemon[i]++;
+			myiod = i;
+			break;
+		}
+	if (myiod == -1)
+		return (EBUSY);
+	nfs_numasync++;
+	/*
+	 * Just loop around doin our stuff until SIGKILL
+	 */
+	for (;;) {
+		while (nfs_bufq.tqh_first == NULL && error == 0) {
+			nfs_iodwant[myiod] = p;
+			error = tsleep((caddr_t)&nfs_iodwant[myiod],
+				PWAIT | PCATCH, "nfsidl", 0);
+		}
+		while ((bp = nfs_bufq.tqh_first) != NULL) {
+			/* Take one off the front of the list */
+			TAILQ_REMOVE(&nfs_bufq, bp, b_freelist);
+			if (bp->b_flags & B_READ)
+			    (void) nfs_doio(bp, bp->b_rcred, (struct proc *)0);
+			else
+			    (void) nfs_doio(bp, bp->b_wcred, (struct proc *)0);
+		}
+		if (error) {
+			nfs_asyncdaemon[myiod] = 0;
+			nfs_numasync--;
+			return (error);
+		}
+	}
+}
+
+/*
+ * Shut down a socket associated with an nfssvc_sock structure.
+ * Should be called with the send lock set, if required.
+ * The trick here is to increment the sref at the start, so that the nfsds
+ * will stop using it and clear ns_flag at the end so that it will not be
+ * reassigned during cleanup.
+ */
+nfsrv_zapsock(slp)
+	register struct nfssvc_sock *slp;
+{
+	register struct nfsuid *nuidp, *onuidp;
+	register int i;
+	struct socket *so;
+	struct file *fp;
+	struct mbuf *m;
+
+	slp->ns_flag &= ~SLP_ALLFLAGS;
+	if (fp = slp->ns_fp) {
+		slp->ns_fp = (struct file *)0;
+		so = slp->ns_so;
+		so->so_upcall = NULL;
+		soshutdown(so, 2);
+		closef(fp, (struct proc *)0);
+		if (slp->ns_nam)
+			MFREE(slp->ns_nam, m);
+		m_freem(slp->ns_raw);
+		m_freem(slp->ns_rec);
+		nuidp = slp->ns_lrunext;
+		while (nuidp != (struct nfsuid *)slp) {
+			onuidp = nuidp;
+			nuidp = nuidp->nu_lrunext;
+			free((caddr_t)onuidp, M_NFSUID);
+		}
+		slp->ns_lrunext = slp->ns_lruprev = (struct nfsuid *)slp;
+		for (i = 0; i < NUIDHASHSIZ; i++)
+			slp->ns_uidh[i] = (struct nfsuid *)0;
+	}
+}
+
+/*
+ * Get an authorization string for the uid by having the mount_nfs sitting
+ * on this mount point porpous out of the kernel and do it.
+ */
+nfs_getauth(nmp, rep, cred, auth_type, auth_str, auth_len)
+	register struct nfsmount *nmp;
+	struct nfsreq *rep;
+	struct ucred *cred;
+	int *auth_type;
+	char **auth_str;
+	int *auth_len;
+{
+	int error = 0;
+
+	while ((nmp->nm_flag & NFSMNT_WAITAUTH) == 0) {
+		nmp->nm_flag |= NFSMNT_WANTAUTH;
+		(void) tsleep((caddr_t)&nmp->nm_authtype, PSOCK,
+			"nfsauth1", 2 * hz);
+		if (error = nfs_sigintr(nmp, rep, rep->r_procp)) {
+			nmp->nm_flag &= ~NFSMNT_WANTAUTH;
+			return (error);
+		}
+	}
+	nmp->nm_flag &= ~(NFSMNT_WAITAUTH | NFSMNT_WANTAUTH);
+	nmp->nm_authstr = *auth_str = (char *)malloc(RPCAUTH_MAXSIZ, M_TEMP, M_WAITOK);
+	nmp->nm_authuid = cred->cr_uid;
+	wakeup((caddr_t)&nmp->nm_authstr);
+
+	/*
+	 * And wait for mount_nfs to do its stuff.
+	 */
+	while ((nmp->nm_flag & NFSMNT_HASAUTH) == 0 && error == 0) {
+		(void) tsleep((caddr_t)&nmp->nm_authlen, PSOCK,
+			"nfsauth2", 2 * hz);
+		error = nfs_sigintr(nmp, rep, rep->r_procp);
+	}
+	if (nmp->nm_flag & NFSMNT_AUTHERR) {
+		nmp->nm_flag &= ~NFSMNT_AUTHERR;
+		error = EAUTH;
+	}
+	if (error)
+		free((caddr_t)*auth_str, M_TEMP);
+	else {
+		*auth_type = nmp->nm_authtype;
+		*auth_len = nmp->nm_authlen;
+	}
+	nmp->nm_flag &= ~NFSMNT_HASAUTH;
+	nmp->nm_flag |= NFSMNT_WAITAUTH;
+	if (nmp->nm_flag & NFSMNT_WANTAUTH) {
+		nmp->nm_flag &= ~NFSMNT_WANTAUTH;
+		wakeup((caddr_t)&nmp->nm_authtype);
+	}
+	return (error);
+}
+
+/*
+ * Derefence a server socket structure. If it has no more references and
+ * is no longer valid, you can throw it away.
+ */
+void
+nfsrv_slpderef(slp)
+	register struct nfssvc_sock *slp;
+{
+	if (--(slp->ns_sref) == 0 && (slp->ns_flag & SLP_VALID) == 0) {
+		slp->ns_prev->ns_next = slp->ns_next;
+		slp->ns_next->ns_prev = slp->ns_prev;
+		free((caddr_t)slp, M_NFSSVC);
+	}
+}
+
+/*
+ * Initialize the data structures for the server.
+ * Handshake with any new nfsds starting up to avoid any chance of
+ * corruption.
+ */
+void
+nfsrv_init(terminating)
+	int terminating;
+{
+	register struct nfssvc_sock *slp;
+	struct nfssvc_sock *oslp;
+
+	if (nfssvc_sockhead.ns_flag & SLP_INIT)
+		panic("nfsd init");
+	nfssvc_sockhead.ns_flag |= SLP_INIT;
+	if (terminating) {
+		slp = nfssvc_sockhead.ns_next;
+		while (slp != &nfssvc_sockhead) {
+			if (slp->ns_flag & SLP_VALID)
+				nfsrv_zapsock(slp);
+			slp->ns_next->ns_prev = slp->ns_prev;
+			slp->ns_prev->ns_next = slp->ns_next;
+			oslp = slp;
+			slp = slp->ns_next;
+			free((caddr_t)oslp, M_NFSSVC);
+		}
+		nfsrv_cleancache();	/* And clear out server cache */
+	}
+	nfs_udpsock = (struct nfssvc_sock *)
+	    malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
+	bzero((caddr_t)nfs_udpsock, sizeof (struct nfssvc_sock));
+	nfs_cltpsock = (struct nfssvc_sock *)
+	    malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
+	bzero((caddr_t)nfs_cltpsock, sizeof (struct nfssvc_sock));
+	nfssvc_sockhead.ns_next = nfs_udpsock;
+	nfs_udpsock->ns_next = nfs_cltpsock;
+	nfs_cltpsock->ns_next = &nfssvc_sockhead;
+	nfssvc_sockhead.ns_prev = nfs_cltpsock;
+	nfs_cltpsock->ns_prev = nfs_udpsock;
+	nfs_udpsock->ns_prev = &nfssvc_sockhead;
+	nfs_udpsock->ns_lrunext = nfs_udpsock->ns_lruprev =
+		(struct nfsuid *)nfs_udpsock;
+	nfs_cltpsock->ns_lrunext = nfs_cltpsock->ns_lruprev =
+		(struct nfsuid *)nfs_cltpsock;
+	nfsd_head.nd_next = nfsd_head.nd_prev = &nfsd_head;
+	nfsd_head.nd_flag = 0;
+	nfssvc_sockhead.ns_flag &= ~SLP_INIT;
+	if (nfssvc_sockhead.ns_flag & SLP_WANTINIT) {
+		nfssvc_sockhead.ns_flag &= ~SLP_WANTINIT;
+		wakeup((caddr_t)&nfssvc_sockhead);
+	}
+}
+
+/*
+ * Add entries to the server monitor log.
+ */
+static void
+nfsd_rt(startp, sotype, nd, nam, cacherep)
+	struct timeval *startp;
+	int sotype;
+	register struct nfsd *nd;
+	struct mbuf *nam;
+	int cacherep;
+{
+	register struct drt *rt;
+
+	rt = &nfsdrt.drt[nfsdrt.pos];
+	if (cacherep == RC_DOIT)
+		rt->flag = 0;
+	else if (cacherep == RC_REPLY)
+		rt->flag = DRT_CACHEREPLY;
+	else
+		rt->flag = DRT_CACHEDROP;
+	if (sotype == SOCK_STREAM)
+		rt->flag |= DRT_TCP;
+	if (nd->nd_nqlflag != NQL_NOVAL)
+		rt->flag |= DRT_NQNFS;
+	rt->proc = nd->nd_procnum;
+	if (mtod(nam, struct sockaddr *)->sa_family == AF_INET)
+		rt->ipadr = mtod(nam, struct sockaddr_in *)->sin_addr.s_addr;
+	else
+		rt->ipadr = INADDR_ANY;
+	rt->resptime = ((time.tv_sec - startp->tv_sec) * 1000000) +
+		(time.tv_usec - startp->tv_usec);
+	rt->tstamp = time;
+	nfsdrt.pos = (nfsdrt.pos + 1) % NFSRTTLOGSIZ;
+}
diff --git a/sys/nfsclient/nfs_node.c b/sys/nfsclient/nfs_node.c
new file mode 100644
index 00000000000..032bdef0d5a
--- /dev/null
+++ b/sys/nfsclient/nfs_node.c
@@ -0,0 +1,294 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfs_node.c	8.2 (Berkeley) 12/30/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nqnfs.h>
+
+struct nfsnode **nheadhashtbl;
+u_long nheadhash;
+#define	NFSNOHASH(fhsum)	((fhsum)&nheadhash)
+
+#define TRUE	1
+#define	FALSE	0
+
+/*
+ * Initialize hash links for nfsnodes
+ * and build nfsnode free list.
+ */
+nfs_nhinit()
+{
+
+#ifndef lint
+	if ((sizeof(struct nfsnode) - 1) & sizeof(struct nfsnode))
+		printf("nfs_nhinit: bad size %d\n", sizeof(struct nfsnode));
+#endif /* not lint */
+	nheadhashtbl = hashinit(desiredvnodes, M_NFSNODE, &nheadhash);
+}
+
+/*
+ * Compute an entry in the NFS hash table structure
+ */
+struct nfsnode **
+nfs_hash(fhp)
+	register nfsv2fh_t *fhp;
+{
+	register u_char *fhpp;
+	register u_long fhsum;
+	int i;
+
+	fhpp = &fhp->fh_bytes[0];
+	fhsum = 0;
+	for (i = 0; i < NFSX_FH; i++)
+		fhsum += *fhpp++;
+	return (&nheadhashtbl[NFSNOHASH(fhsum)]);
+}
+
+/*
+ * Look up a vnode/nfsnode by file handle.
+ * Callers must check for mount points!!
+ * In all cases, a pointer to a
+ * nfsnode structure is returned.
+ */
+nfs_nget(mntp, fhp, npp)
+	struct mount *mntp;
+	register nfsv2fh_t *fhp;
+	struct nfsnode **npp;
+{
+	register struct nfsnode *np, *nq, **nhpp;
+	register struct vnode *vp;
+	extern int (**nfsv2_vnodeop_p)();
+	struct vnode *nvp;
+	int error;
+
+	nhpp = nfs_hash(fhp);
+loop:
+	for (np = *nhpp; np; np = np->n_forw) {
+		if (mntp != NFSTOV(np)->v_mount ||
+		    bcmp((caddr_t)fhp, (caddr_t)&np->n_fh, NFSX_FH))
+			continue;
+		vp = NFSTOV(np);
+		if (vget(vp, 1))
+			goto loop;
+		*npp = np;
+		return(0);
+	}
+	if (error = getnewvnode(VT_NFS, mntp, nfsv2_vnodeop_p, &nvp)) {
+		*npp = 0;
+		return (error);
+	}
+	vp = nvp;
+	MALLOC(np, struct nfsnode *, sizeof *np, M_NFSNODE, M_WAITOK);
+	vp->v_data = np;
+	np->n_vnode = vp;
+	/*
+	 * Insert the nfsnode in the hash queue for its new file handle
+	 */
+	np->n_flag = 0;
+	if (nq = *nhpp)
+		nq->n_back = &np->n_forw;
+	np->n_forw = nq;
+	np->n_back = nhpp;
+	*nhpp = np;
+	bcopy((caddr_t)fhp, (caddr_t)&np->n_fh, NFSX_FH);
+	np->n_attrstamp = 0;
+	np->n_direofoffset = 0;
+	np->n_sillyrename = (struct sillyrename *)0;
+	np->n_size = 0;
+	np->n_mtime = 0;
+	if (VFSTONFS(mntp)->nm_flag & NFSMNT_NQNFS) {
+		np->n_brev = 0;
+		np->n_lrev = 0;
+		np->n_expiry = (time_t)0;
+		np->n_tnext = (struct nfsnode *)0;
+	}
+	*npp = np;
+	return (0);
+}
+
+nfs_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	register struct nfsnode *np;
+	register struct sillyrename *sp;
+	struct proc *p = curproc;	/* XXX */
+	extern int prtactive;
+
+	np = VTONFS(ap->a_vp);
+	if (prtactive && ap->a_vp->v_usecount != 0)
+		vprint("nfs_inactive: pushing active", ap->a_vp);
+	sp = np->n_sillyrename;
+	np->n_sillyrename = (struct sillyrename *)0;
+	if (sp) {
+		/*
+		 * Remove the silly file that was rename'd earlier
+		 */
+		(void) nfs_vinvalbuf(ap->a_vp, 0, sp->s_cred, p, 1);
+		nfs_removeit(sp);
+		crfree(sp->s_cred);
+		vrele(sp->s_dvp);
+#ifdef SILLYSEPARATE
+		free((caddr_t)sp, M_NFSREQ);
+#endif
+	}
+	np->n_flag &= (NMODIFIED | NFLUSHINPROG | NFLUSHWANT | NQNFSEVICTED |
+		NQNFSNONCACHE | NQNFSWRITE);
+	return (0);
+}
+
+/*
+ * Reclaim an nfsnode so that it can be used for other purposes.
+ */
+nfs_reclaim(ap)
+	struct vop_reclaim_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct nfsnode *np = VTONFS(vp);
+	register struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+	register struct nfsnode *nq;
+	extern int prtactive;
+
+	if (prtactive && vp->v_usecount != 0)
+		vprint("nfs_reclaim: pushing active", vp);
+	/*
+	 * Remove the nfsnode from its hash chain.
+	 */
+	if (nq = np->n_forw)
+		nq->n_back = np->n_back;
+	*np->n_back = nq;
+
+	/*
+	 * For nqnfs, take it off the timer queue as required.
+	 */
+	if ((nmp->nm_flag & NFSMNT_NQNFS) && np->n_tnext) {
+		if (np->n_tnext == (struct nfsnode *)nmp)
+			nmp->nm_tprev = np->n_tprev;
+		else
+			np->n_tnext->n_tprev = np->n_tprev;
+		if (np->n_tprev == (struct nfsnode *)nmp)
+			nmp->nm_tnext = np->n_tnext;
+		else
+			np->n_tprev->n_tnext = np->n_tnext;
+	}
+	cache_purge(vp);
+	FREE(vp->v_data, M_NFSNODE);
+	vp->v_data = (void *)0;
+	return (0);
+}
+
+/*
+ * Lock an nfsnode
+ */
+nfs_lock(ap)
+	struct vop_lock_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+
+	/*
+	 * Ugh, another place where interruptible mounts will get hung.
+	 * If you make this sleep interruptible, then you have to fix all
+	 * the VOP_LOCK() calls to expect interruptibility.
+	 */
+	while (vp->v_flag & VXLOCK) {
+		vp->v_flag |= VXWANT;
+		sleep((caddr_t)vp, PINOD);
+	}
+	if (vp->v_tag == VT_NON)
+		return (ENOENT);
+	return (0);
+}
+
+/*
+ * Unlock an nfsnode
+ */
+nfs_unlock(ap)
+	struct vop_unlock_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	return (0);
+}
+
+/*
+ * Check for a locked nfsnode
+ */
+nfs_islocked(ap)
+	struct vop_islocked_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	return (0);
+}
+
+/*
+ * Nfs abort op, called after namei() when a CREATE/DELETE isn't actually
+ * done. Currently nothing to do.
+ */
+/* ARGSUSED */
+int
+nfs_abortop(ap)
+	struct vop_abortop_args /* {
+		struct vnode *a_dvp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+
+	if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
+		FREE(ap->a_cnp->cn_pnbuf, M_NAMEI);
+	return (0);
+}
diff --git a/sys/nfsclient/nfs_socket.c b/sys/nfsclient/nfs_socket.c
new file mode 100644
index 00000000000..cf88ed33d92
--- /dev/null
+++ b/sys/nfsclient/nfs_socket.c
@@ -0,0 +1,1990 @@
+/*
+ * Copyright (c) 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfs_socket.c	8.3 (Berkeley) 1/12/94
+ */
+
+/*
+ * Socket operations for use by nfs
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/vnode.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/syslog.h>
+#include <sys/tprintf.h>
+
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfsrtt.h>
+#include <nfs/nqnfs.h>
+
+#define	TRUE	1
+#define	FALSE	0
+
+/*
+ * Estimate rto for an nfs rpc sent via. an unreliable datagram.
+ * Use the mean and mean deviation of rtt for the appropriate type of rpc
+ * for the frequent rpcs and a default for the others.
+ * The justification for doing "other" this way is that these rpcs
+ * happen so infrequently that timer est. would probably be stale.
+ * Also, since many of these rpcs are
+ * non-idempotent, a conservative timeout is desired.
+ * getattr, lookup - A+2D
+ * read, write     - A+4D
+ * other           - nm_timeo
+ */
+#define	NFS_RTO(n, t) \
+	((t) == 0 ? (n)->nm_timeo : \
+	 ((t) < 3 ? \
+	  (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
+	  ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
+#define	NFS_SRTT(r)	(r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
+#define	NFS_SDRTT(r)	(r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
+/*
+ * External data, mostly RPC constants in XDR form
+ */
+extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix,
+	rpc_msgaccepted, rpc_call, rpc_autherr, rpc_rejectedcred,
+	rpc_auth_kerb;
+extern u_long nfs_prog, nfs_vers, nqnfs_prog, nqnfs_vers;
+extern time_t nqnfsstarttime;
+extern int nonidempotent[NFS_NPROCS];
+
+/*
+ * Maps errno values to nfs error numbers.
+ * Use NFSERR_IO as the catch all for ones not specifically defined in
+ * RFC 1094.
+ */
+static int nfsrv_errmap[ELAST] = {
+  NFSERR_PERM,	NFSERR_NOENT,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_NXIO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_IO,	NFSERR_IO,	NFSERR_ACCES,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_IO,	NFSERR_EXIST,	NFSERR_IO,	NFSERR_NODEV,	NFSERR_NOTDIR,
+  NFSERR_ISDIR,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_IO,	NFSERR_FBIG,	NFSERR_NOSPC,	NFSERR_IO,	NFSERR_ROFS,
+  NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_IO,	NFSERR_IO,	NFSERR_NAMETOL,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_NOTEMPTY, NFSERR_IO,	NFSERR_IO,	NFSERR_DQUOT,	NFSERR_STALE,
+  NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_IO,
+};
+
+/*
+ * Defines which timer to use for the procnum.
+ * 0 - default
+ * 1 - getattr
+ * 2 - lookup
+ * 3 - read
+ * 4 - write
+ */
+static int proct[NFS_NPROCS] = {
+	0, 1, 0, 0, 2, 3, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 0, 0, 0,
+};
+
+/*
+ * There is a congestion window for outstanding rpcs maintained per mount
+ * point. The cwnd size is adjusted in roughly the way that:
+ * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
+ * SIGCOMM '88". ACM, August 1988.
+ * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
+ * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
+ * of rpcs is in progress.
+ * (The sent count and cwnd are scaled for integer arith.)
+ * Variants of "slow start" were tried and were found to be too much of a
+ * performance hit (ave. rtt 3 times larger),
+ * I suspect due to the large rtt that nfs rpcs have.
+ */
+#define	NFS_CWNDSCALE	256
+#define	NFS_MAXCWND	(NFS_CWNDSCALE * 32)
+static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
+int	nfs_sbwait();
+void	nfs_disconnect(), nfs_realign(), nfsrv_wakenfsd(), nfs_sndunlock();
+void	nfs_rcvunlock(), nqnfs_serverd(), nqnfs_clientlease();
+struct mbuf *nfsm_rpchead();
+int nfsrtton = 0;
+struct nfsrtt nfsrtt;
+struct nfsd nfsd_head;
+
+int	nfsrv_null(),
+	nfsrv_getattr(),
+	nfsrv_setattr(),
+	nfsrv_lookup(),
+	nfsrv_readlink(),
+	nfsrv_read(),
+	nfsrv_write(),
+	nfsrv_create(),
+	nfsrv_remove(),
+	nfsrv_rename(),
+	nfsrv_link(),
+	nfsrv_symlink(),
+	nfsrv_mkdir(),
+	nfsrv_rmdir(),
+	nfsrv_readdir(),
+	nfsrv_statfs(),
+	nfsrv_noop(),
+	nqnfsrv_readdirlook(),
+	nqnfsrv_getlease(),
+	nqnfsrv_vacated(),
+	nqnfsrv_access();
+
+int (*nfsrv_procs[NFS_NPROCS])() = {
+	nfsrv_null,
+	nfsrv_getattr,
+	nfsrv_setattr,
+	nfsrv_noop,
+	nfsrv_lookup,
+	nfsrv_readlink,
+	nfsrv_read,
+	nfsrv_noop,
+	nfsrv_write,
+	nfsrv_create,
+	nfsrv_remove,
+	nfsrv_rename,
+	nfsrv_link,
+	nfsrv_symlink,
+	nfsrv_mkdir,
+	nfsrv_rmdir,
+	nfsrv_readdir,
+	nfsrv_statfs,
+	nqnfsrv_readdirlook,
+	nqnfsrv_getlease,
+	nqnfsrv_vacated,
+	nfsrv_noop,
+	nqnfsrv_access,
+};
+
+struct nfsreq nfsreqh;
+
+/*
+ * Initialize sockets and congestion for a new NFS connection.
+ * We do not free the sockaddr if error.
+ */
+nfs_connect(nmp, rep)
+	register struct nfsmount *nmp;
+	struct nfsreq *rep;
+{
+	register struct socket *so;
+	int s, error, rcvreserve, sndreserve;
+	struct sockaddr *saddr;
+	struct sockaddr_in *sin;
+	struct mbuf *m;
+	u_short tport;
+
+	nmp->nm_so = (struct socket *)0;
+	saddr = mtod(nmp->nm_nam, struct sockaddr *);
+	if (error = socreate(saddr->sa_family,
+		&nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto))
+		goto bad;
+	so = nmp->nm_so;
+	nmp->nm_soflags = so->so_proto->pr_flags;
+
+	/*
+	 * Some servers require that the client port be a reserved port number.
+	 */
+	if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
+		MGET(m, M_WAIT, MT_SONAME);
+		sin = mtod(m, struct sockaddr_in *);
+		sin->sin_len = m->m_len = sizeof (struct sockaddr_in);
+		sin->sin_family = AF_INET;
+		sin->sin_addr.s_addr = INADDR_ANY;
+		tport = IPPORT_RESERVED - 1;
+		sin->sin_port = htons(tport);
+		while ((error = sobind(so, m)) == EADDRINUSE &&
+		       --tport > IPPORT_RESERVED / 2)
+			sin->sin_port = htons(tport);
+		m_freem(m);
+		if (error)
+			goto bad;
+	}
+
+	/*
+	 * Protocols that do not require connections may be optionally left
+	 * unconnected for servers that reply from a port other than NFS_PORT.
+	 */
+	if (nmp->nm_flag & NFSMNT_NOCONN) {
+		if (nmp->nm_soflags & PR_CONNREQUIRED) {
+			error = ENOTCONN;
+			goto bad;
+		}
+	} else {
+		if (error = soconnect(so, nmp->nm_nam))
+			goto bad;
+
+		/*
+		 * Wait for the connection to complete. Cribbed from the
+		 * connect system call but with the wait timing out so
+		 * that interruptible mounts don't hang here for a long time.
+		 */
+		s = splnet();
+		while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
+			(void) tsleep((caddr_t)&so->so_timeo, PSOCK,
+				"nfscon", 2 * hz);
+			if ((so->so_state & SS_ISCONNECTING) &&
+			    so->so_error == 0 && rep &&
+			    (error = nfs_sigintr(nmp, rep, rep->r_procp))) {
+				so->so_state &= ~SS_ISCONNECTING;
+				splx(s);
+				goto bad;
+			}
+		}
+		if (so->so_error) {
+			error = so->so_error;
+			so->so_error = 0;
+			splx(s);
+			goto bad;
+		}
+		splx(s);
+	}
+	if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) {
+		so->so_rcv.sb_timeo = (5 * hz);
+		so->so_snd.sb_timeo = (5 * hz);
+	} else {
+		so->so_rcv.sb_timeo = 0;
+		so->so_snd.sb_timeo = 0;
+	}
+	if (nmp->nm_sotype == SOCK_DGRAM) {
+		sndreserve = nmp->nm_wsize + NFS_MAXPKTHDR;
+		rcvreserve = nmp->nm_rsize + NFS_MAXPKTHDR;
+	} else if (nmp->nm_sotype == SOCK_SEQPACKET) {
+		sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2;
+		rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) * 2;
+	} else {
+		if (nmp->nm_sotype != SOCK_STREAM)
+			panic("nfscon sotype");
+		if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
+			MGET(m, M_WAIT, MT_SOOPTS);
+			*mtod(m, int *) = 1;
+			m->m_len = sizeof(int);
+			sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
+		}
+		if (so->so_proto->pr_protocol == IPPROTO_TCP) {
+			MGET(m, M_WAIT, MT_SOOPTS);
+			*mtod(m, int *) = 1;
+			m->m_len = sizeof(int);
+			sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
+		}
+		sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long))
+				* 2;
+		rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_long))
+				* 2;
+	}
+	if (error = soreserve(so, sndreserve, rcvreserve))
+		goto bad;
+	so->so_rcv.sb_flags |= SB_NOINTR;
+	so->so_snd.sb_flags |= SB_NOINTR;
+
+	/* Initialize other non-zero congestion variables */
+	nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] =
+		nmp->nm_srtt[4] = (NFS_TIMEO << 3);
+	nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
+		nmp->nm_sdrtt[3] = nmp->nm_sdrtt[4] = 0;
+	nmp->nm_cwnd = NFS_MAXCWND / 2;	    /* Initial send window */
+	nmp->nm_sent = 0;
+	nmp->nm_timeouts = 0;
+	return (0);
+
+bad:
+	nfs_disconnect(nmp);
+	return (error);
+}
+
+/*
+ * Reconnect routine:
+ * Called when a connection is broken on a reliable protocol.
+ * - clean up the old socket
+ * - nfs_connect() again
+ * - set R_MUSTRESEND for all outstanding requests on mount point
+ * If this fails the mount point is DEAD!
+ * nb: Must be called with the nfs_sndlock() set on the mount point.
+ */
+nfs_reconnect(rep)
+	register struct nfsreq *rep;
+{
+	register struct nfsreq *rp;
+	register struct nfsmount *nmp = rep->r_nmp;
+	int error;
+
+	nfs_disconnect(nmp);
+	while (error = nfs_connect(nmp, rep)) {
+		if (error == EINTR || error == ERESTART)
+			return (EINTR);
+		(void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
+	}
+
+	/*
+	 * Loop through outstanding request list and fix up all requests
+	 * on old socket.
+	 */
+	rp = nfsreqh.r_next;
+	while (rp != &nfsreqh) {
+		if (rp->r_nmp == nmp)
+			rp->r_flags |= R_MUSTRESEND;
+		rp = rp->r_next;
+	}
+	return (0);
+}
+
+/*
+ * NFS disconnect. Clean up and unlink.
+ */
+void
+nfs_disconnect(nmp)
+	register struct nfsmount *nmp;
+{
+	register struct socket *so;
+
+	if (nmp->nm_so) {
+		so = nmp->nm_so;
+		nmp->nm_so = (struct socket *)0;
+		soshutdown(so, 2);
+		soclose(so);
+	}
+}
+
+/*
+ * This is the nfs send routine. For connection based socket types, it
+ * must be called with an nfs_sndlock() on the socket.
+ * "rep == NULL" indicates that it has been called from a server.
+ * For the client side:
+ * - return EINTR if the RPC is terminated, 0 otherwise
+ * - set R_MUSTRESEND if the send fails for any reason
+ * - do any cleanup required by recoverable socket errors (???)
+ * For the server side:
+ * - return EINTR or ERESTART if interrupted by a signal
+ * - return EPIPE if a connection is lost for connection based sockets (TCP...)
+ * - do any cleanup required by recoverable socket errors (???)
+ */
+nfs_send(so, nam, top, rep)
+	register struct socket *so;
+	struct mbuf *nam;
+	register struct mbuf *top;
+	struct nfsreq *rep;
+{
+	struct mbuf *sendnam;
+	int error, soflags, flags;
+
+	if (rep) {
+		if (rep->r_flags & R_SOFTTERM) {
+			m_freem(top);
+			return (EINTR);
+		}
+		if ((so = rep->r_nmp->nm_so) == NULL) {
+			rep->r_flags |= R_MUSTRESEND;
+			m_freem(top);
+			return (0);
+		}
+		rep->r_flags &= ~R_MUSTRESEND;
+		soflags = rep->r_nmp->nm_soflags;
+	} else
+		soflags = so->so_proto->pr_flags;
+	if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
+		sendnam = (struct mbuf *)0;
+	else
+		sendnam = nam;
+	if (so->so_type == SOCK_SEQPACKET)
+		flags = MSG_EOR;
+	else
+		flags = 0;
+
+	error = sosend(so, sendnam, (struct uio *)0, top,
+		(struct mbuf *)0, flags);
+	if (error) {
+		if (rep) {
+			log(LOG_INFO, "nfs send error %d for server %s\n",error,
+			    rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+			/*
+			 * Deal with errors for the client side.
+			 */
+			if (rep->r_flags & R_SOFTTERM)
+				error = EINTR;
+			else
+				rep->r_flags |= R_MUSTRESEND;
+		} else
+			log(LOG_INFO, "nfsd send error %d\n", error);
+
+		/*
+		 * Handle any recoverable (soft) socket errors here. (???)
+		 */
+		if (error != EINTR && error != ERESTART &&
+			error != EWOULDBLOCK && error != EPIPE)
+			error = 0;
+	}
+	return (error);
+}
+
+/*
+ * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
+ * done by soreceive(), but for SOCK_STREAM we must deal with the Record
+ * Mark and consolidate the data into a new mbuf list.
+ * nb: Sometimes TCP passes the data up to soreceive() in long lists of
+ *     small mbufs.
+ * For SOCK_STREAM we must be very careful to read an entire record once
+ * we have read any of it, even if the system call has been interrupted.
+ */
+nfs_receive(rep, aname, mp)
+	register struct nfsreq *rep;
+	struct mbuf **aname;
+	struct mbuf **mp;
+{
+	register struct socket *so;
+	struct uio auio;
+	struct iovec aio;
+	register struct mbuf *m;
+	struct mbuf *control;
+	u_long len;
+	struct mbuf **getnam;
+	int error, sotype, rcvflg;
+	struct proc *p = curproc;	/* XXX */
+
+	/*
+	 * Set up arguments for soreceive()
+	 */
+	*mp = (struct mbuf *)0;
+	*aname = (struct mbuf *)0;
+	sotype = rep->r_nmp->nm_sotype;
+
+	/*
+	 * For reliable protocols, lock against other senders/receivers
+	 * in case a reconnect is necessary.
+	 * For SOCK_STREAM, first get the Record Mark to find out how much
+	 * more there is to get.
+	 * We must lock the socket against other receivers
+	 * until we have an entire rpc request/reply.
+	 */
+	if (sotype != SOCK_DGRAM) {
+		if (error = nfs_sndlock(&rep->r_nmp->nm_flag, rep))
+			return (error);
+tryagain:
+		/*
+		 * Check for fatal errors and resending request.
+		 */
+		/*
+		 * Ugh: If a reconnect attempt just happened, nm_so
+		 * would have changed. NULL indicates a failed
+		 * attempt that has essentially shut down this
+		 * mount point.
+		 */
+		if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
+			nfs_sndunlock(&rep->r_nmp->nm_flag);
+			return (EINTR);
+		}
+		if ((so = rep->r_nmp->nm_so) == NULL) {
+			if (error = nfs_reconnect(rep)) {
+				nfs_sndunlock(&rep->r_nmp->nm_flag);
+				return (error);
+			}
+			goto tryagain;
+		}
+		while (rep->r_flags & R_MUSTRESEND) {
+			m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
+			nfsstats.rpcretries++;
+			if (error = nfs_send(so, rep->r_nmp->nm_nam, m, rep)) {
+				if (error == EINTR || error == ERESTART ||
+				    (error = nfs_reconnect(rep))) {
+					nfs_sndunlock(&rep->r_nmp->nm_flag);
+					return (error);
+				}
+				goto tryagain;
+			}
+		}
+		nfs_sndunlock(&rep->r_nmp->nm_flag);
+		if (sotype == SOCK_STREAM) {
+			aio.iov_base = (caddr_t) &len;
+			aio.iov_len = sizeof(u_long);
+			auio.uio_iov = &aio;
+			auio.uio_iovcnt = 1;
+			auio.uio_segflg = UIO_SYSSPACE;
+			auio.uio_rw = UIO_READ;
+			auio.uio_offset = 0;
+			auio.uio_resid = sizeof(u_long);
+			auio.uio_procp = p;
+			do {
+			   rcvflg = MSG_WAITALL;
+			   error = soreceive(so, (struct mbuf **)0, &auio,
+				(struct mbuf **)0, (struct mbuf **)0, &rcvflg);
+			   if (error == EWOULDBLOCK && rep) {
+				if (rep->r_flags & R_SOFTTERM)
+					return (EINTR);
+			   }
+			} while (error == EWOULDBLOCK);
+			if (!error && auio.uio_resid > 0) {
+			    log(LOG_INFO,
+				 "short receive (%d/%d) from nfs server %s\n",
+				 sizeof(u_long) - auio.uio_resid,
+				 sizeof(u_long),
+				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+			    error = EPIPE;
+			}
+			if (error)
+				goto errout;
+			len = ntohl(len) & ~0x80000000;
+			/*
+			 * This is SERIOUS! We are out of sync with the sender
+			 * and forcing a disconnect/reconnect is all I can do.
+			 */
+			if (len > NFS_MAXPACKET) {
+			    log(LOG_ERR, "%s (%d) from nfs server %s\n",
+				"impossible packet length",
+				len,
+				rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+			    error = EFBIG;
+			    goto errout;
+			}
+			auio.uio_resid = len;
+			do {
+			    rcvflg = MSG_WAITALL;
+			    error =  soreceive(so, (struct mbuf **)0,
+				&auio, mp, (struct mbuf **)0, &rcvflg);
+			} while (error == EWOULDBLOCK || error == EINTR ||
+				 error == ERESTART);
+			if (!error && auio.uio_resid > 0) {
+			    log(LOG_INFO,
+				"short receive (%d/%d) from nfs server %s\n",
+				len - auio.uio_resid, len,
+				rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+			    error = EPIPE;
+			}
+		} else {
+			/*
+			 * NB: Since uio_resid is big, MSG_WAITALL is ignored
+			 * and soreceive() will return when it has either a
+			 * control msg or a data msg.
+			 * We have no use for control msg., but must grab them
+			 * and then throw them away so we know what is going
+			 * on.
+			 */
+			auio.uio_resid = len = 100000000; /* Anything Big */
+			auio.uio_procp = p;
+			do {
+			    rcvflg = 0;
+			    error =  soreceive(so, (struct mbuf **)0,
+				&auio, mp, &control, &rcvflg);
+			    if (control)
+				m_freem(control);
+			    if (error == EWOULDBLOCK && rep) {
+				if (rep->r_flags & R_SOFTTERM)
+					return (EINTR);
+			    }
+			} while (error == EWOULDBLOCK ||
+				 (!error && *mp == NULL && control));
+			if ((rcvflg & MSG_EOR) == 0)
+				printf("Egad!!\n");
+			if (!error && *mp == NULL)
+				error = EPIPE;
+			len -= auio.uio_resid;
+		}
+errout:
+		if (error && error != EINTR && error != ERESTART) {
+			m_freem(*mp);
+			*mp = (struct mbuf *)0;
+			if (error != EPIPE)
+				log(LOG_INFO,
+				    "receive error %d from nfs server %s\n",
+				    error,
+				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+			error = nfs_sndlock(&rep->r_nmp->nm_flag, rep);
+			if (!error)
+				error = nfs_reconnect(rep);
+			if (!error)
+				goto tryagain;
+		}
+	} else {
+		if ((so = rep->r_nmp->nm_so) == NULL)
+			return (EACCES);
+		if (so->so_state & SS_ISCONNECTED)
+			getnam = (struct mbuf **)0;
+		else
+			getnam = aname;
+		auio.uio_resid = len = 1000000;
+		auio.uio_procp = p;
+		do {
+			rcvflg = 0;
+			error =  soreceive(so, getnam, &auio, mp,
+				(struct mbuf **)0, &rcvflg);
+			if (error == EWOULDBLOCK &&
+			    (rep->r_flags & R_SOFTTERM))
+				return (EINTR);
+		} while (error == EWOULDBLOCK);
+		len -= auio.uio_resid;
+	}
+	if (error) {
+		m_freem(*mp);
+		*mp = (struct mbuf *)0;
+	}
+	/*
+	 * Search for any mbufs that are not a multiple of 4 bytes long
+	 * or with m_data not longword aligned.
+	 * These could cause pointer alignment problems, so copy them to
+	 * well aligned mbufs.
+	 */
+	nfs_realign(*mp, 5 * NFSX_UNSIGNED);
+	return (error);
+}
+
+/*
+ * Implement receipt of reply on a socket.
+ * We must search through the list of received datagrams matching them
+ * with outstanding requests using the xid, until ours is found.
+ */
+/* ARGSUSED */
+nfs_reply(myrep)
+	struct nfsreq *myrep;
+{
+	register struct nfsreq *rep;
+	register struct nfsmount *nmp = myrep->r_nmp;
+	register long t1;
+	struct mbuf *mrep, *nam, *md;
+	u_long rxid, *tl;
+	caddr_t dpos, cp2;
+	int error;
+
+	/*
+	 * Loop around until we get our own reply
+	 */
+	for (;;) {
+		/*
+		 * Lock against other receivers so that I don't get stuck in
+		 * sbwait() after someone else has received my reply for me.
+		 * Also necessary for connection based protocols to avoid
+		 * race conditions during a reconnect.
+		 */
+		if (error = nfs_rcvlock(myrep))
+			return (error);
+		/* Already received, bye bye */
+		if (myrep->r_mrep != NULL) {
+			nfs_rcvunlock(&nmp->nm_flag);
+			return (0);
+		}
+		/*
+		 * Get the next Rpc reply off the socket
+		 */
+		error = nfs_receive(myrep, &nam, &mrep);
+		nfs_rcvunlock(&nmp->nm_flag);
+		if (error) {
+
+			/*
+			 * Ignore routing errors on connectionless protocols??
+			 */
+			if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
+				nmp->nm_so->so_error = 0;
+				if (myrep->r_flags & R_GETONEREP)
+					return (0);
+				continue;
+			}
+			return (error);
+		}
+		if (nam)
+			m_freem(nam);
+	
+		/*
+		 * Get the xid and check that it is an rpc reply
+		 */
+		md = mrep;
+		dpos = mtod(md, caddr_t);
+		nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
+		rxid = *tl++;
+		if (*tl != rpc_reply) {
+			if (nmp->nm_flag & NFSMNT_NQNFS) {
+				if (nqnfs_callback(nmp, mrep, md, dpos))
+					nfsstats.rpcinvalid++;
+			} else {
+				nfsstats.rpcinvalid++;
+				m_freem(mrep);
+			}
+nfsmout:
+			if (myrep->r_flags & R_GETONEREP)
+				return (0);
+			continue;
+		}
+
+		/*
+		 * Loop through the request list to match up the reply
+		 * Iff no match, just drop the datagram
+		 */
+		rep = nfsreqh.r_next;
+		while (rep != &nfsreqh) {
+			if (rep->r_mrep == NULL && rxid == rep->r_xid) {
+				/* Found it.. */
+				rep->r_mrep = mrep;
+				rep->r_md = md;
+				rep->r_dpos = dpos;
+				if (nfsrtton) {
+					struct rttl *rt;
+
+					rt = &nfsrtt.rttl[nfsrtt.pos];
+					rt->proc = rep->r_procnum;
+					rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]);
+					rt->sent = nmp->nm_sent;
+					rt->cwnd = nmp->nm_cwnd;
+					rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1];
+					rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1];
+					rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid;
+					rt->tstamp = time;
+					if (rep->r_flags & R_TIMING)
+						rt->rtt = rep->r_rtt;
+					else
+						rt->rtt = 1000000;
+					nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ;
+				}
+				/*
+				 * Update congestion window.
+				 * Do the additive increase of
+				 * one rpc/rtt.
+				 */
+				if (nmp->nm_cwnd <= nmp->nm_sent) {
+					nmp->nm_cwnd +=
+					   (NFS_CWNDSCALE * NFS_CWNDSCALE +
+					   (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
+					if (nmp->nm_cwnd > NFS_MAXCWND)
+						nmp->nm_cwnd = NFS_MAXCWND;
+				}
+				rep->r_flags &= ~R_SENT;
+				nmp->nm_sent -= NFS_CWNDSCALE;
+				/*
+				 * Update rtt using a gain of 0.125 on the mean
+				 * and a gain of 0.25 on the deviation.
+				 */
+				if (rep->r_flags & R_TIMING) {
+					/*
+					 * Since the timer resolution of
+					 * NFS_HZ is so course, it can often
+					 * result in r_rtt == 0. Since
+					 * r_rtt == N means that the actual
+					 * rtt is between N+dt and N+2-dt ticks,
+					 * add 1.
+					 */
+					t1 = rep->r_rtt + 1;
+					t1 -= (NFS_SRTT(rep) >> 3);
+					NFS_SRTT(rep) += t1;
+					if (t1 < 0)
+						t1 = -t1;
+					t1 -= (NFS_SDRTT(rep) >> 2);
+					NFS_SDRTT(rep) += t1;
+				}
+				nmp->nm_timeouts = 0;
+				break;
+			}
+			rep = rep->r_next;
+		}
+		/*
+		 * If not matched to a request, drop it.
+		 * If it's mine, get out.
+		 */
+		if (rep == &nfsreqh) {
+			nfsstats.rpcunexpected++;
+			m_freem(mrep);
+		} else if (rep == myrep) {
+			if (rep->r_mrep == NULL)
+				panic("nfsreply nil");
+			return (0);
+		}
+		if (myrep->r_flags & R_GETONEREP)
+			return (0);
+	}
+}
+
+/*
+ * nfs_request - goes something like this
+ *	- fill in request struct
+ *	- links it into list
+ *	- calls nfs_send() for first transmit
+ *	- calls nfs_receive() to get reply
+ *	- break down rpc header and return with nfs reply pointed to
+ *	  by mrep or error
+ * nb: always frees up mreq mbuf list
+ */
+nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp)
+	struct vnode *vp;
+	struct mbuf *mrest;
+	int procnum;
+	struct proc *procp;
+	struct ucred *cred;
+	struct mbuf **mrp;
+	struct mbuf **mdp;
+	caddr_t *dposp;
+{
+	register struct mbuf *m, *mrep;
+	register struct nfsreq *rep;
+	register u_long *tl;
+	register int i;
+	struct nfsmount *nmp;
+	struct mbuf *md, *mheadend;
+	struct nfsreq *reph;
+	struct nfsnode *np;
+	time_t reqtime, waituntil;
+	caddr_t dpos, cp2;
+	int t1, nqlflag, cachable, s, error = 0, mrest_len, auth_len, auth_type;
+	int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0, failed_auth = 0;
+	u_long xid;
+	u_quad_t frev;
+	char *auth_str;
+
+	nmp = VFSTONFS(vp->v_mount);
+	MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
+	rep->r_nmp = nmp;
+	rep->r_vp = vp;
+	rep->r_procp = procp;
+	rep->r_procnum = procnum;
+	i = 0;
+	m = mrest;
+	while (m) {
+		i += m->m_len;
+		m = m->m_next;
+	}
+	mrest_len = i;
+
+	/*
+	 * Get the RPC header with authorization.
+	 */
+kerbauth:
+	auth_str = (char *)0;
+	if (nmp->nm_flag & NFSMNT_KERB) {
+		if (failed_auth) {
+			error = nfs_getauth(nmp, rep, cred, &auth_type,
+				&auth_str, &auth_len);
+			if (error) {
+				free((caddr_t)rep, M_NFSREQ);
+				m_freem(mrest);
+				return (error);
+			}
+		} else {
+			auth_type = RPCAUTH_UNIX;
+			auth_len = 5 * NFSX_UNSIGNED;
+		}
+	} else {
+		auth_type = RPCAUTH_UNIX;
+		if (cred->cr_ngroups < 1)
+			panic("nfsreq nogrps");
+		auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ?
+			nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) +
+			5 * NFSX_UNSIGNED;
+	}
+	m = nfsm_rpchead(cred, (nmp->nm_flag & NFSMNT_NQNFS), procnum,
+	     auth_type, auth_len, auth_str, mrest, mrest_len, &mheadend, &xid);
+	if (auth_str)
+		free(auth_str, M_TEMP);
+
+	/*
+	 * For stream protocols, insert a Sun RPC Record Mark.
+	 */
+	if (nmp->nm_sotype == SOCK_STREAM) {
+		M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
+		*mtod(m, u_long *) = htonl(0x80000000 |
+			 (m->m_pkthdr.len - NFSX_UNSIGNED));
+	}
+	rep->r_mreq = m;
+	rep->r_xid = xid;
+tryagain:
+	if (nmp->nm_flag & NFSMNT_SOFT)
+		rep->r_retry = nmp->nm_retry;
+	else
+		rep->r_retry = NFS_MAXREXMIT + 1;	/* past clip limit */
+	rep->r_rtt = rep->r_rexmit = 0;
+	if (proct[procnum] > 0)
+		rep->r_flags = R_TIMING;
+	else
+		rep->r_flags = 0;
+	rep->r_mrep = NULL;
+
+	/*
+	 * Do the client side RPC.
+	 */
+	nfsstats.rpcrequests++;
+	/*
+	 * Chain request into list of outstanding requests. Be sure
+	 * to put it LAST so timer finds oldest requests first.
+	 */
+	s = splsoftclock();
+	reph = &nfsreqh;
+	reph->r_prev->r_next = rep;
+	rep->r_prev = reph->r_prev;
+	reph->r_prev = rep;
+	rep->r_next = reph;
+
+	/* Get send time for nqnfs */
+	reqtime = time.tv_sec;
+
+	/*
+	 * If backing off another request or avoiding congestion, don't
+	 * send this one now but let timer do it. If not timing a request,
+	 * do it now.
+	 */
+	if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
+		(nmp->nm_flag & NFSMNT_DUMBTIMR) ||
+		nmp->nm_sent < nmp->nm_cwnd)) {
+		splx(s);
+		if (nmp->nm_soflags & PR_CONNREQUIRED)
+			error = nfs_sndlock(&nmp->nm_flag, rep);
+		if (!error) {
+			m = m_copym(m, 0, M_COPYALL, M_WAIT);
+			error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep);
+			if (nmp->nm_soflags & PR_CONNREQUIRED)
+				nfs_sndunlock(&nmp->nm_flag);
+		}
+		if (!error && (rep->r_flags & R_MUSTRESEND) == 0) {
+			nmp->nm_sent += NFS_CWNDSCALE;
+			rep->r_flags |= R_SENT;
+		}
+	} else {
+		splx(s);
+		rep->r_rtt = -1;
+	}
+
+	/*
+	 * Wait for the reply from our send or the timer's.
+	 */
+	if (!error || error == EPIPE)
+		error = nfs_reply(rep);
+
+	/*
+	 * RPC done, unlink the request.
+	 */
+	s = splsoftclock();
+	rep->r_prev->r_next = rep->r_next;
+	rep->r_next->r_prev = rep->r_prev;
+	splx(s);
+
+	/*
+	 * Decrement the outstanding request count.
+	 */
+	if (rep->r_flags & R_SENT) {
+		rep->r_flags &= ~R_SENT;	/* paranoia */
+		nmp->nm_sent -= NFS_CWNDSCALE;
+	}
+
+	/*
+	 * If there was a successful reply and a tprintf msg.
+	 * tprintf a response.
+	 */
+	if (!error && (rep->r_flags & R_TPRINTFMSG))
+		nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
+		    "is alive again");
+	mrep = rep->r_mrep;
+	md = rep->r_md;
+	dpos = rep->r_dpos;
+	if (error) {
+		m_freem(rep->r_mreq);
+		free((caddr_t)rep, M_NFSREQ);
+		return (error);
+	}
+
+	/*
+	 * break down the rpc header and check if ok
+	 */
+	nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED);
+	if (*tl++ == rpc_msgdenied) {
+		if (*tl == rpc_mismatch)
+			error = EOPNOTSUPP;
+		else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) {
+			if (*tl == rpc_rejectedcred && failed_auth == 0) {
+				failed_auth++;
+				mheadend->m_next = (struct mbuf *)0;
+				m_freem(mrep);
+				m_freem(rep->r_mreq);
+				goto kerbauth;
+			} else
+				error = EAUTH;
+		} else
+			error = EACCES;
+		m_freem(mrep);
+		m_freem(rep->r_mreq);
+		free((caddr_t)rep, M_NFSREQ);
+		return (error);
+	}
+
+	/*
+	 * skip over the auth_verf, someday we may want to cache auth_short's
+	 * for nfs_reqhead(), but for now just dump it
+	 */
+	if (*++tl != 0) {
+		i = nfsm_rndup(fxdr_unsigned(long, *tl));
+		nfsm_adv(i);
+	}
+	nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+	/* 0 == ok */
+	if (*tl == 0) {
+		nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+		if (*tl != 0) {
+			error = fxdr_unsigned(int, *tl);
+			m_freem(mrep);
+			if ((nmp->nm_flag & NFSMNT_NQNFS) &&
+			    error == NQNFS_TRYLATER) {
+				error = 0;
+				waituntil = time.tv_sec + trylater_delay;
+				while (time.tv_sec < waituntil)
+					(void) tsleep((caddr_t)&lbolt,
+						PSOCK, "nqnfstry", 0);
+				trylater_delay *= nfs_backoff[trylater_cnt];
+				if (trylater_cnt < 7)
+					trylater_cnt++;
+				goto tryagain;
+			}
+
+			/*
+			 * If the File Handle was stale, invalidate the
+			 * lookup cache, just in case.
+			 */
+			if (error == ESTALE)
+				cache_purge(vp);
+			m_freem(rep->r_mreq);
+			free((caddr_t)rep, M_NFSREQ);
+			return (error);
+		}
+
+		/*
+		 * For nqnfs, get any lease in reply
+		 */
+		if (nmp->nm_flag & NFSMNT_NQNFS) {
+			nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+			if (*tl) {
+				np = VTONFS(vp);
+				nqlflag = fxdr_unsigned(int, *tl);
+				nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED);
+				cachable = fxdr_unsigned(int, *tl++);
+				reqtime += fxdr_unsigned(int, *tl++);
+				if (reqtime > time.tv_sec) {
+				    fxdr_hyper(tl, &frev);
+				    nqnfs_clientlease(nmp, np, nqlflag,
+					cachable, reqtime, frev);
+				}
+			}
+		}
+		*mrp = mrep;
+		*mdp = md;
+		*dposp = dpos;
+		m_freem(rep->r_mreq);
+		FREE((caddr_t)rep, M_NFSREQ);
+		return (0);
+	}
+	m_freem(mrep);
+	m_freem(rep->r_mreq);
+	free((caddr_t)rep, M_NFSREQ);
+	error = EPROTONOSUPPORT;
+nfsmout:
+	return (error);
+}
+
+/*
+ * Generate the rpc reply header
+ * siz arg. is used to decide if adding a cluster is worthwhile
+ */
+nfs_rephead(siz, nd, err, cache, frev, mrq, mbp, bposp)
+	int siz;
+	struct nfsd *nd;
+	int err;
+	int cache;
+	u_quad_t *frev;
+	struct mbuf **mrq;
+	struct mbuf **mbp;
+	caddr_t *bposp;
+{
+	register u_long *tl;
+	register struct mbuf *mreq;
+	caddr_t bpos;
+	struct mbuf *mb, *mb2;
+
+	MGETHDR(mreq, M_WAIT, MT_DATA);
+	mb = mreq;
+	/*
+	 * If this is a big reply, use a cluster else
+	 * try and leave leading space for the lower level headers.
+	 */
+	siz += RPC_REPLYSIZ;
+	if (siz >= MINCLSIZE) {
+		MCLGET(mreq, M_WAIT);
+	} else
+		mreq->m_data += max_hdr;
+	tl = mtod(mreq, u_long *);
+	mreq->m_len = 6*NFSX_UNSIGNED;
+	bpos = ((caddr_t)tl)+mreq->m_len;
+	*tl++ = nd->nd_retxid;
+	*tl++ = rpc_reply;
+	if (err == ERPCMISMATCH || err == NQNFS_AUTHERR) {
+		*tl++ = rpc_msgdenied;
+		if (err == NQNFS_AUTHERR) {
+			*tl++ = rpc_autherr;
+			*tl = rpc_rejectedcred;
+			mreq->m_len -= NFSX_UNSIGNED;
+			bpos -= NFSX_UNSIGNED;
+		} else {
+			*tl++ = rpc_mismatch;
+			*tl++ = txdr_unsigned(2);
+			*tl = txdr_unsigned(2);
+		}
+	} else {
+		*tl++ = rpc_msgaccepted;
+		*tl++ = 0;
+		*tl++ = 0;
+		switch (err) {
+		case EPROGUNAVAIL:
+			*tl = txdr_unsigned(RPC_PROGUNAVAIL);
+			break;
+		case EPROGMISMATCH:
+			*tl = txdr_unsigned(RPC_PROGMISMATCH);
+			nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+			*tl++ = txdr_unsigned(2);
+			*tl = txdr_unsigned(2);	/* someday 3 */
+			break;
+		case EPROCUNAVAIL:
+			*tl = txdr_unsigned(RPC_PROCUNAVAIL);
+			break;
+		default:
+			*tl = 0;
+			if (err != VNOVAL) {
+				nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+				if (err)
+					*tl = txdr_unsigned(nfsrv_errmap[err - 1]);
+				else
+					*tl = 0;
+			}
+			break;
+		};
+	}
+
+	/*
+	 * For nqnfs, piggyback lease as requested.
+	 */
+	if (nd->nd_nqlflag != NQL_NOVAL && err == 0) {
+		if (nd->nd_nqlflag) {
+			nfsm_build(tl, u_long *, 5*NFSX_UNSIGNED);
+			*tl++ = txdr_unsigned(nd->nd_nqlflag);
+			*tl++ = txdr_unsigned(cache);
+			*tl++ = txdr_unsigned(nd->nd_duration);
+			txdr_hyper(frev, tl);
+		} else {
+			if (nd->nd_nqlflag != 0)
+				panic("nqreph");
+			nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+			*tl = 0;
+		}
+	}
+	*mrq = mreq;
+	*mbp = mb;
+	*bposp = bpos;
+	if (err != 0 && err != VNOVAL)
+		nfsstats.srvrpc_errs++;
+	return (0);
+}
+
+/*
+ * Nfs timer routine
+ * Scan the nfsreq list and retranmit any requests that have timed out
+ * To avoid retransmission attempts on STREAM sockets (in the future) make
+ * sure to set the r_retry field to 0 (implies nm_retry == 0).
+ */
+void
+nfs_timer(arg)
+	void *arg;
+{
+	register struct nfsreq *rep;
+	register struct mbuf *m;
+	register struct socket *so;
+	register struct nfsmount *nmp;
+	register int timeo;
+	static long lasttime = 0;
+	int s, error;
+
+	s = splnet();
+	for (rep = nfsreqh.r_next; rep != &nfsreqh; rep = rep->r_next) {
+		nmp = rep->r_nmp;
+		if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
+			continue;
+		if (nfs_sigintr(nmp, rep, rep->r_procp)) {
+			rep->r_flags |= R_SOFTTERM;
+			continue;
+		}
+		if (rep->r_rtt >= 0) {
+			rep->r_rtt++;
+			if (nmp->nm_flag & NFSMNT_DUMBTIMR)
+				timeo = nmp->nm_timeo;
+			else
+				timeo = NFS_RTO(nmp, proct[rep->r_procnum]);
+			if (nmp->nm_timeouts > 0)
+				timeo *= nfs_backoff[nmp->nm_timeouts - 1];
+			if (rep->r_rtt <= timeo)
+				continue;
+			if (nmp->nm_timeouts < 8)
+				nmp->nm_timeouts++;
+		}
+		/*
+		 * Check for server not responding
+		 */
+		if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
+		     rep->r_rexmit > nmp->nm_deadthresh) {
+			nfs_msg(rep->r_procp,
+			    nmp->nm_mountp->mnt_stat.f_mntfromname,
+			    "not responding");
+			rep->r_flags |= R_TPRINTFMSG;
+		}
+		if (rep->r_rexmit >= rep->r_retry) {	/* too many */
+			nfsstats.rpctimeouts++;
+			rep->r_flags |= R_SOFTTERM;
+			continue;
+		}
+		if (nmp->nm_sotype != SOCK_DGRAM) {
+			if (++rep->r_rexmit > NFS_MAXREXMIT)
+				rep->r_rexmit = NFS_MAXREXMIT;
+			continue;
+		}
+		if ((so = nmp->nm_so) == NULL)
+			continue;
+
+		/*
+		 * If there is enough space and the window allows..
+		 *	Resend it
+		 * Set r_rtt to -1 in case we fail to send it now.
+		 */
+		rep->r_rtt = -1;
+		if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
+		   ((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
+		    (rep->r_flags & R_SENT) ||
+		    nmp->nm_sent < nmp->nm_cwnd) &&
+		   (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
+			if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
+			    error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
+			    (struct mbuf *)0, (struct mbuf *)0);
+			else
+			    error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
+			    nmp->nm_nam, (struct mbuf *)0);
+			if (error) {
+				if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
+					so->so_error = 0;
+			} else {
+				/*
+				 * Iff first send, start timing
+				 * else turn timing off, backoff timer
+				 * and divide congestion window by 2.
+				 */
+				if (rep->r_flags & R_SENT) {
+					rep->r_flags &= ~R_TIMING;
+					if (++rep->r_rexmit > NFS_MAXREXMIT)
+						rep->r_rexmit = NFS_MAXREXMIT;
+					nmp->nm_cwnd >>= 1;
+					if (nmp->nm_cwnd < NFS_CWNDSCALE)
+						nmp->nm_cwnd = NFS_CWNDSCALE;
+					nfsstats.rpcretries++;
+				} else {
+					rep->r_flags |= R_SENT;
+					nmp->nm_sent += NFS_CWNDSCALE;
+				}
+				rep->r_rtt = 0;
+			}
+		}
+	}
+
+	/*
+	 * Call the nqnfs server timer once a second to handle leases.
+	 */
+	if (lasttime != time.tv_sec) {
+		lasttime = time.tv_sec;
+		nqnfs_serverd();
+	}
+	splx(s);
+	timeout(nfs_timer, (void *)0, hz / NFS_HZ);
+}
+
+/*
+ * Test for a termination condition pending on the process.
+ * This is used for NFSMNT_INT mounts.
+ */
+nfs_sigintr(nmp, rep, p)
+	struct nfsmount *nmp;
+	struct nfsreq *rep;
+	register struct proc *p;
+{
+
+	if (rep && (rep->r_flags & R_SOFTTERM))
+		return (EINTR);
+	if (!(nmp->nm_flag & NFSMNT_INT))
+		return (0);
+	if (p && p->p_siglist &&
+	    (((p->p_siglist & ~p->p_sigmask) & ~p->p_sigignore) &
+	    NFSINT_SIGMASK))
+		return (EINTR);
+	return (0);
+}
+
+/*
+ * Lock a socket against others.
+ * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
+ * and also to avoid race conditions between the processes with nfs requests
+ * in progress when a reconnect is necessary.
+ */
+nfs_sndlock(flagp, rep)
+	register int *flagp;
+	struct nfsreq *rep;
+{
+	struct proc *p;
+	int slpflag = 0, slptimeo = 0;
+
+	if (rep) {
+		p = rep->r_procp;
+		if (rep->r_nmp->nm_flag & NFSMNT_INT)
+			slpflag = PCATCH;
+	} else
+		p = (struct proc *)0;
+	while (*flagp & NFSMNT_SNDLOCK) {
+		if (nfs_sigintr(rep->r_nmp, rep, p))
+			return (EINTR);
+		*flagp |= NFSMNT_WANTSND;
+		(void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsndlck",
+			slptimeo);
+		if (slpflag == PCATCH) {
+			slpflag = 0;
+			slptimeo = 2 * hz;
+		}
+	}
+	*flagp |= NFSMNT_SNDLOCK;
+	return (0);
+}
+
+/*
+ * Unlock the stream socket for others.
+ */
+void
+nfs_sndunlock(flagp)
+	register int *flagp;
+{
+
+	if ((*flagp & NFSMNT_SNDLOCK) == 0)
+		panic("nfs sndunlock");
+	*flagp &= ~NFSMNT_SNDLOCK;
+	if (*flagp & NFSMNT_WANTSND) {
+		*flagp &= ~NFSMNT_WANTSND;
+		wakeup((caddr_t)flagp);
+	}
+}
+
+nfs_rcvlock(rep)
+	register struct nfsreq *rep;
+{
+	register int *flagp = &rep->r_nmp->nm_flag;
+	int slpflag, slptimeo = 0;
+
+	if (*flagp & NFSMNT_INT)
+		slpflag = PCATCH;
+	else
+		slpflag = 0;
+	while (*flagp & NFSMNT_RCVLOCK) {
+		if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp))
+			return (EINTR);
+		*flagp |= NFSMNT_WANTRCV;
+		(void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsrcvlk",
+			slptimeo);
+		if (slpflag == PCATCH) {
+			slpflag = 0;
+			slptimeo = 2 * hz;
+		}
+	}
+	*flagp |= NFSMNT_RCVLOCK;
+	return (0);
+}
+
+/*
+ * Unlock the stream socket for others.
+ */
+void
+nfs_rcvunlock(flagp)
+	register int *flagp;
+{
+
+	if ((*flagp & NFSMNT_RCVLOCK) == 0)
+		panic("nfs rcvunlock");
+	*flagp &= ~NFSMNT_RCVLOCK;
+	if (*flagp & NFSMNT_WANTRCV) {
+		*flagp &= ~NFSMNT_WANTRCV;
+		wakeup((caddr_t)flagp);
+	}
+}
+
+/*
+ * Check for badly aligned mbuf data areas and
+ * realign data in an mbuf list by copying the data areas up, as required.
+ */
+void
+nfs_realign(m, hsiz)
+	register struct mbuf *m;
+	int hsiz;
+{
+	register struct mbuf *m2;
+	register int siz, mlen, olen;
+	register caddr_t tcp, fcp;
+	struct mbuf *mnew;
+
+	while (m) {
+	    /*
+	     * This never happens for UDP, rarely happens for TCP
+	     * but frequently happens for iso transport.
+	     */
+	    if ((m->m_len & 0x3) || (mtod(m, int) & 0x3)) {
+		olen = m->m_len;
+		fcp = mtod(m, caddr_t);
+		if ((int)fcp & 0x3) {
+			m->m_flags &= ~M_PKTHDR;
+			if (m->m_flags & M_EXT)
+				m->m_data = m->m_ext.ext_buf +
+					((m->m_ext.ext_size - olen) & ~0x3);
+			else
+				m->m_data = m->m_dat;
+		}
+		m->m_len = 0;
+		tcp = mtod(m, caddr_t);
+		mnew = m;
+		m2 = m->m_next;
+	
+		/*
+		 * If possible, only put the first invariant part
+		 * of the RPC header in the first mbuf.
+		 */
+		mlen = M_TRAILINGSPACE(m);
+		if (olen <= hsiz && mlen > hsiz)
+			mlen = hsiz;
+	
+		/*
+		 * Loop through the mbuf list consolidating data.
+		 */
+		while (m) {
+			while (olen > 0) {
+				if (mlen == 0) {
+					m2->m_flags &= ~M_PKTHDR;
+					if (m2->m_flags & M_EXT)
+						m2->m_data = m2->m_ext.ext_buf;
+					else
+						m2->m_data = m2->m_dat;
+					m2->m_len = 0;
+					mlen = M_TRAILINGSPACE(m2);
+					tcp = mtod(m2, caddr_t);
+					mnew = m2;
+					m2 = m2->m_next;
+				}
+				siz = min(mlen, olen);
+				if (tcp != fcp)
+					bcopy(fcp, tcp, siz);
+				mnew->m_len += siz;
+				mlen -= siz;
+				olen -= siz;
+				tcp += siz;
+				fcp += siz;
+			}
+			m = m->m_next;
+			if (m) {
+				olen = m->m_len;
+				fcp = mtod(m, caddr_t);
+			}
+		}
+	
+		/*
+		 * Finally, set m_len == 0 for any trailing mbufs that have
+		 * been copied out of.
+		 */
+		while (m2) {
+			m2->m_len = 0;
+			m2 = m2->m_next;
+		}
+		return;
+	    }
+	    m = m->m_next;
+	}
+}
+
+/*
+ * Socket upcall routine for the nfsd sockets.
+ * The caddr_t arg is a pointer to the "struct nfssvc_sock".
+ * Essentially do as much as possible non-blocking, else punt and it will
+ * be called with M_WAIT from an nfsd.
+ */
+void
+nfsrv_rcv(so, arg, waitflag)
+	struct socket *so;
+	caddr_t arg;
+	int waitflag;
+{
+	register struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
+	register struct mbuf *m;
+	struct mbuf *mp, *nam;
+	struct uio auio;
+	int flags, error;
+
+	if ((slp->ns_flag & SLP_VALID) == 0)
+		return;
+#ifdef notdef
+	/*
+	 * Define this to test for nfsds handling this under heavy load.
+	 */
+	if (waitflag == M_DONTWAIT) {
+		slp->ns_flag |= SLP_NEEDQ; goto dorecs;
+	}
+#endif
+	auio.uio_procp = NULL;
+	if (so->so_type == SOCK_STREAM) {
+		/*
+		 * If there are already records on the queue, defer soreceive()
+		 * to an nfsd so that there is feedback to the TCP layer that
+		 * the nfs servers are heavily loaded.
+		 */
+		if (slp->ns_rec && waitflag == M_DONTWAIT) {
+			slp->ns_flag |= SLP_NEEDQ;
+			goto dorecs;
+		}
+
+		/*
+		 * Do soreceive().
+		 */
+		auio.uio_resid = 1000000000;
+		flags = MSG_DONTWAIT;
+		error = soreceive(so, &nam, &auio, &mp, (struct mbuf **)0, &flags);
+		if (error || mp == (struct mbuf *)0) {
+			if (error == EWOULDBLOCK)
+				slp->ns_flag |= SLP_NEEDQ;
+			else
+				slp->ns_flag |= SLP_DISCONN;
+			goto dorecs;
+		}
+		m = mp;
+		if (slp->ns_rawend) {
+			slp->ns_rawend->m_next = m;
+			slp->ns_cc += 1000000000 - auio.uio_resid;
+		} else {
+			slp->ns_raw = m;
+			slp->ns_cc = 1000000000 - auio.uio_resid;
+		}
+		while (m->m_next)
+			m = m->m_next;
+		slp->ns_rawend = m;
+
+		/*
+		 * Now try and parse record(s) out of the raw stream data.
+		 */
+		if (error = nfsrv_getstream(slp, waitflag)) {
+			if (error == EPERM)
+				slp->ns_flag |= SLP_DISCONN;
+			else
+				slp->ns_flag |= SLP_NEEDQ;
+		}
+	} else {
+		do {
+			auio.uio_resid = 1000000000;
+			flags = MSG_DONTWAIT;
+			error = soreceive(so, &nam, &auio, &mp,
+						(struct mbuf **)0, &flags);
+			if (mp) {
+				nfs_realign(mp, 10 * NFSX_UNSIGNED);
+				if (nam) {
+					m = nam;
+					m->m_next = mp;
+				} else
+					m = mp;
+				if (slp->ns_recend)
+					slp->ns_recend->m_nextpkt = m;
+				else
+					slp->ns_rec = m;
+				slp->ns_recend = m;
+				m->m_nextpkt = (struct mbuf *)0;
+			}
+			if (error) {
+				if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
+					&& error != EWOULDBLOCK) {
+					slp->ns_flag |= SLP_DISCONN;
+					goto dorecs;
+				}
+			}
+		} while (mp);
+	}
+
+	/*
+	 * Now try and process the request records, non-blocking.
+	 */
+dorecs:
+	if (waitflag == M_DONTWAIT &&
+		(slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN))))
+		nfsrv_wakenfsd(slp);
+}
+
+/*
+ * Try and extract an RPC request from the mbuf data list received on a
+ * stream socket. The "waitflag" argument indicates whether or not it
+ * can sleep.
+ */
+nfsrv_getstream(slp, waitflag)
+	register struct nfssvc_sock *slp;
+	int waitflag;
+{
+	register struct mbuf *m;
+	register char *cp1, *cp2;
+	register int len;
+	struct mbuf *om, *m2, *recm;
+	u_long recmark;
+
+	if (slp->ns_flag & SLP_GETSTREAM)
+		panic("nfs getstream");
+	slp->ns_flag |= SLP_GETSTREAM;
+	for (;;) {
+	    if (slp->ns_reclen == 0) {
+		if (slp->ns_cc < NFSX_UNSIGNED) {
+			slp->ns_flag &= ~SLP_GETSTREAM;
+			return (0);
+		}
+		m = slp->ns_raw;
+		if (m->m_len >= NFSX_UNSIGNED) {
+			bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED);
+			m->m_data += NFSX_UNSIGNED;
+			m->m_len -= NFSX_UNSIGNED;
+		} else {
+			cp1 = (caddr_t)&recmark;
+			cp2 = mtod(m, caddr_t);
+			while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
+				while (m->m_len == 0) {
+					m = m->m_next;
+					cp2 = mtod(m, caddr_t);
+				}
+				*cp1++ = *cp2++;
+				m->m_data++;
+				m->m_len--;
+			}
+		}
+		slp->ns_cc -= NFSX_UNSIGNED;
+		slp->ns_reclen = ntohl(recmark) & ~0x80000000;
+		if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) {
+			slp->ns_flag &= ~SLP_GETSTREAM;
+			return (EPERM);
+		}
+	    }
+
+	    /*
+	     * Now get the record part.
+	     */
+	    if (slp->ns_cc == slp->ns_reclen) {
+		recm = slp->ns_raw;
+		slp->ns_raw = slp->ns_rawend = (struct mbuf *)0;
+		slp->ns_cc = slp->ns_reclen = 0;
+	    } else if (slp->ns_cc > slp->ns_reclen) {
+		len = 0;
+		m = slp->ns_raw;
+		om = (struct mbuf *)0;
+		while (len < slp->ns_reclen) {
+			if ((len + m->m_len) > slp->ns_reclen) {
+				m2 = m_copym(m, 0, slp->ns_reclen - len,
+					waitflag);
+				if (m2) {
+					if (om) {
+						om->m_next = m2;
+						recm = slp->ns_raw;
+					} else
+						recm = m2;
+					m->m_data += slp->ns_reclen - len;
+					m->m_len -= slp->ns_reclen - len;
+					len = slp->ns_reclen;
+				} else {
+					slp->ns_flag &= ~SLP_GETSTREAM;
+					return (EWOULDBLOCK);
+				}
+			} else if ((len + m->m_len) == slp->ns_reclen) {
+				om = m;
+				len += m->m_len;
+				m = m->m_next;
+				recm = slp->ns_raw;
+				om->m_next = (struct mbuf *)0;
+			} else {
+				om = m;
+				len += m->m_len;
+				m = m->m_next;
+			}
+		}
+		slp->ns_raw = m;
+		slp->ns_cc -= len;
+		slp->ns_reclen = 0;
+	    } else {
+		slp->ns_flag &= ~SLP_GETSTREAM;
+		return (0);
+	    }
+	    nfs_realign(recm, 10 * NFSX_UNSIGNED);
+	    if (slp->ns_recend)
+		slp->ns_recend->m_nextpkt = recm;
+	    else
+		slp->ns_rec = recm;
+	    slp->ns_recend = recm;
+	}
+}
+
+/*
+ * Parse an RPC header.
+ */
+nfsrv_dorec(slp, nd)
+	register struct nfssvc_sock *slp;
+	register struct nfsd *nd;
+{
+	register struct mbuf *m;
+	int error;
+
+	if ((slp->ns_flag & SLP_VALID) == 0 ||
+	    (m = slp->ns_rec) == (struct mbuf *)0)
+		return (ENOBUFS);
+	if (slp->ns_rec = m->m_nextpkt)
+		m->m_nextpkt = (struct mbuf *)0;
+	else
+		slp->ns_recend = (struct mbuf *)0;
+	if (m->m_type == MT_SONAME) {
+		nd->nd_nam = m;
+		nd->nd_md = nd->nd_mrep = m->m_next;
+		m->m_next = (struct mbuf *)0;
+	} else {
+		nd->nd_nam = (struct mbuf *)0;
+		nd->nd_md = nd->nd_mrep = m;
+	}
+	nd->nd_dpos = mtod(nd->nd_md, caddr_t);
+	if (error = nfs_getreq(nd, TRUE)) {
+		m_freem(nd->nd_nam);
+		return (error);
+	}
+	return (0);
+}
+
+/*
+ * Parse an RPC request
+ * - verify it
+ * - fill in the cred struct.
+ */
+nfs_getreq(nd, has_header)
+	register struct nfsd *nd;
+	int has_header;
+{
+	register int len, i;
+	register u_long *tl;
+	register long t1;
+	struct uio uio;
+	struct iovec iov;
+	caddr_t dpos, cp2;
+	u_long nfsvers, auth_type;
+	int error = 0, nqnfs = 0;
+	struct mbuf *mrep, *md;
+
+	mrep = nd->nd_mrep;
+	md = nd->nd_md;
+	dpos = nd->nd_dpos;
+	if (has_header) {
+		nfsm_dissect(tl, u_long *, 10*NFSX_UNSIGNED);
+		nd->nd_retxid = *tl++;
+		if (*tl++ != rpc_call) {
+			m_freem(mrep);
+			return (EBADRPC);
+		}
+	} else {
+		nfsm_dissect(tl, u_long *, 8*NFSX_UNSIGNED);
+	}
+	nd->nd_repstat = 0;
+	if (*tl++ != rpc_vers) {
+		nd->nd_repstat = ERPCMISMATCH;
+		nd->nd_procnum = NFSPROC_NOOP;
+		return (0);
+	}
+	nfsvers = nfs_vers;
+	if (*tl != nfs_prog) {
+		if (*tl == nqnfs_prog) {
+			nqnfs++;
+			nfsvers = nqnfs_vers;
+		} else {
+			nd->nd_repstat = EPROGUNAVAIL;
+			nd->nd_procnum = NFSPROC_NOOP;
+			return (0);
+		}
+	}
+	tl++;
+	if (*tl++ != nfsvers) {
+		nd->nd_repstat = EPROGMISMATCH;
+		nd->nd_procnum = NFSPROC_NOOP;
+		return (0);
+	}
+	nd->nd_procnum = fxdr_unsigned(u_long, *tl++);
+	if (nd->nd_procnum == NFSPROC_NULL)
+		return (0);
+	if (nd->nd_procnum >= NFS_NPROCS ||
+		(!nqnfs && nd->nd_procnum > NFSPROC_STATFS) ||
+		(*tl != rpc_auth_unix && *tl != rpc_auth_kerb)) {
+		nd->nd_repstat = EPROCUNAVAIL;
+		nd->nd_procnum = NFSPROC_NOOP;
+		return (0);
+	}
+	auth_type = *tl++;
+	len = fxdr_unsigned(int, *tl++);
+	if (len < 0 || len > RPCAUTH_MAXSIZ) {
+		m_freem(mrep);
+		return (EBADRPC);
+	}
+
+	/*
+	 * Handle auth_unix or auth_kerb.
+	 */
+	if (auth_type == rpc_auth_unix) {
+		len = fxdr_unsigned(int, *++tl);
+		if (len < 0 || len > NFS_MAXNAMLEN) {
+			m_freem(mrep);
+			return (EBADRPC);
+		}
+		nfsm_adv(nfsm_rndup(len));
+		nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED);
+		nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
+		nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++);
+		len = fxdr_unsigned(int, *tl);
+		if (len < 0 || len > RPCAUTH_UNIXGIDS) {
+			m_freem(mrep);
+			return (EBADRPC);
+		}
+		nfsm_dissect(tl, u_long *, (len + 2)*NFSX_UNSIGNED);
+		for (i = 1; i <= len; i++)
+			if (i < NGROUPS)
+				nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
+			else
+				tl++;
+		nd->nd_cr.cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
+	} else if (auth_type == rpc_auth_kerb) {
+		nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
+		nd->nd_authlen = fxdr_unsigned(int, *tl);
+		uio.uio_resid = nfsm_rndup(nd->nd_authlen);
+		if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) {
+			m_freem(mrep);
+			return (EBADRPC);
+		}
+		uio.uio_offset = 0;
+		uio.uio_iov = &iov;
+		uio.uio_iovcnt = 1;
+		uio.uio_segflg = UIO_SYSSPACE;
+		iov.iov_base = (caddr_t)nd->nd_authstr;
+		iov.iov_len = RPCAUTH_MAXSIZ;
+		nfsm_mtouio(&uio, uio.uio_resid);
+		nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
+		nd->nd_flag |= NFSD_NEEDAUTH;
+	}
+
+	/*
+	 * Do we have any use for the verifier.
+	 * According to the "Remote Procedure Call Protocol Spec." it
+	 * should be AUTH_NULL, but some clients make it AUTH_UNIX?
+	 * For now, just skip over it
+	 */
+	len = fxdr_unsigned(int, *++tl);
+	if (len < 0 || len > RPCAUTH_MAXSIZ) {
+		m_freem(mrep);
+		return (EBADRPC);
+	}
+	if (len > 0) {
+		nfsm_adv(nfsm_rndup(len));
+	}
+
+	/*
+	 * For nqnfs, get piggybacked lease request.
+	 */
+	if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) {
+		nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+		nd->nd_nqlflag = fxdr_unsigned(int, *tl);
+		if (nd->nd_nqlflag) {
+			nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+			nd->nd_duration = fxdr_unsigned(int, *tl);
+		} else
+			nd->nd_duration = NQ_MINLEASE;
+	} else {
+		nd->nd_nqlflag = NQL_NOVAL;
+		nd->nd_duration = NQ_MINLEASE;
+	}
+	nd->nd_md = md;
+	nd->nd_dpos = dpos;
+	return (0);
+nfsmout:
+	return (error);
+}
+
+/*
+ * Search for a sleeping nfsd and wake it up.
+ * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
+ * running nfsds will go look for the work in the nfssvc_sock list.
+ */
+void
+nfsrv_wakenfsd(slp)
+	struct nfssvc_sock *slp;
+{
+	register struct nfsd *nd = nfsd_head.nd_next;
+
+	if ((slp->ns_flag & SLP_VALID) == 0)
+		return;
+	while (nd != (struct nfsd *)&nfsd_head) {
+		if (nd->nd_flag & NFSD_WAITING) {
+			nd->nd_flag &= ~NFSD_WAITING;
+			if (nd->nd_slp)
+				panic("nfsd wakeup");
+			slp->ns_sref++;
+			nd->nd_slp = slp;
+			wakeup((caddr_t)nd);
+			return;
+		}
+		nd = nd->nd_next;
+	}
+	slp->ns_flag |= SLP_DOREC;
+	nfsd_head.nd_flag |= NFSD_CHECKSLP;
+}
+
+nfs_msg(p, server, msg)
+	struct proc *p;
+	char *server, *msg;
+{
+	tpr_t tpr;
+
+	if (p)
+		tpr = tprintf_open(p);
+	else
+		tpr = NULL;
+	tprintf(tpr, "nfs server %s: %s\n", server, msg);
+	tprintf_close(tpr);
+}
diff --git a/sys/nfsclient/nfs_subs.c b/sys/nfsclient/nfs_subs.c
new file mode 100644
index 00000000000..5778f7d7f01
--- /dev/null
+++ b/sys/nfsclient/nfs_subs.c
@@ -0,0 +1,1130 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfs_subs.c	8.3 (Berkeley) 1/4/94
+ */
+
+/*
+ * These functions support the macros and help fiddle mbuf chains for
+ * the nfs op functions. They do things like create the rpc header and
+ * copy data between mbuf chains and uio lists.
+ */
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/namei.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nqnfs.h>
+#include <nfs/nfsrtt.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <netinet/in.h>
+#ifdef ISO
+#include <netiso/iso.h>
+#endif
+
+#define TRUE	1
+#define	FALSE	0
+
+/*
+ * Data items converted to xdr at startup, since they are constant
+ * This is kinda hokey, but may save a little time doing byte swaps
+ */
+u_long nfs_procids[NFS_NPROCS];
+u_long nfs_xdrneg1;
+u_long rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr,
+	rpc_mismatch, rpc_auth_unix, rpc_msgaccepted, rpc_rejectedcred,
+	rpc_auth_kerb;
+u_long nfs_vers, nfs_prog, nfs_true, nfs_false;
+
+/* And other global data */
+static u_long nfs_xid = 0;
+enum vtype ntov_type[7] = { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON };
+extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
+extern struct nfsreq nfsreqh;
+extern int nqnfs_piggy[NFS_NPROCS];
+extern struct nfsrtt nfsrtt;
+extern time_t nqnfsstarttime;
+extern u_long nqnfs_prog, nqnfs_vers;
+extern int nqsrv_clockskew;
+extern int nqsrv_writeslack;
+extern int nqsrv_maxlease;
+
+/*
+ * Create the header for an rpc request packet
+ * The hsiz is the size of the rest of the nfs request header.
+ * (just used to decide if a cluster is a good idea)
+ */
+struct mbuf *
+nfsm_reqh(vp, procid, hsiz, bposp)
+	struct vnode *vp;
+	u_long procid;
+	int hsiz;
+	caddr_t *bposp;
+{
+	register struct mbuf *mb;
+	register u_long *tl;
+	register caddr_t bpos;
+	struct mbuf *mb2;
+	struct nfsmount *nmp;
+	int nqflag;
+
+	MGET(mb, M_WAIT, MT_DATA);
+	if (hsiz >= MINCLSIZE)
+		MCLGET(mb, M_WAIT);
+	mb->m_len = 0;
+	bpos = mtod(mb, caddr_t);
+	
+	/*
+	 * For NQNFS, add lease request.
+	 */
+	if (vp) {
+		nmp = VFSTONFS(vp->v_mount);
+		if (nmp->nm_flag & NFSMNT_NQNFS) {
+			nqflag = NQNFS_NEEDLEASE(vp, procid);
+			if (nqflag) {
+				nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+				*tl++ = txdr_unsigned(nqflag);
+				*tl = txdr_unsigned(nmp->nm_leaseterm);
+			} else {
+				nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+				*tl = 0;
+			}
+		}
+	}
+	/* Finally, return values */
+	*bposp = bpos;
+	return (mb);
+}
+
+/*
+ * Build the RPC header and fill in the authorization info.
+ * The authorization string argument is only used when the credentials
+ * come from outside of the kernel.
+ * Returns the head of the mbuf list.
+ */
+struct mbuf *
+nfsm_rpchead(cr, nqnfs, procid, auth_type, auth_len, auth_str, mrest,
+	mrest_len, mbp, xidp)
+	register struct ucred *cr;
+	int nqnfs;
+	int procid;
+	int auth_type;
+	int auth_len;
+	char *auth_str;
+	struct mbuf *mrest;
+	int mrest_len;
+	struct mbuf **mbp;
+	u_long *xidp;
+{
+	register struct mbuf *mb;
+	register u_long *tl;
+	register caddr_t bpos;
+	register int i;
+	struct mbuf *mreq, *mb2;
+	int siz, grpsiz, authsiz;
+
+	authsiz = nfsm_rndup(auth_len);
+	if (auth_type == RPCAUTH_NQNFS)
+		authsiz += 2 * NFSX_UNSIGNED;
+	MGETHDR(mb, M_WAIT, MT_DATA);
+	if ((authsiz + 10*NFSX_UNSIGNED) >= MINCLSIZE) {
+		MCLGET(mb, M_WAIT);
+	} else if ((authsiz + 10*NFSX_UNSIGNED) < MHLEN) {
+		MH_ALIGN(mb, authsiz + 10*NFSX_UNSIGNED);
+	} else {
+		MH_ALIGN(mb, 8*NFSX_UNSIGNED);
+	}
+	mb->m_len = 0;
+	mreq = mb;
+	bpos = mtod(mb, caddr_t);
+
+	/*
+	 * First the RPC header.
+	 */
+	nfsm_build(tl, u_long *, 8*NFSX_UNSIGNED);
+	if (++nfs_xid == 0)
+		nfs_xid++;
+	*tl++ = *xidp = txdr_unsigned(nfs_xid);
+	*tl++ = rpc_call;
+	*tl++ = rpc_vers;
+	if (nqnfs) {
+		*tl++ = txdr_unsigned(NQNFS_PROG);
+		*tl++ = txdr_unsigned(NQNFS_VER1);
+	} else {
+		*tl++ = txdr_unsigned(NFS_PROG);
+		*tl++ = txdr_unsigned(NFS_VER2);
+	}
+	*tl++ = txdr_unsigned(procid);
+
+	/*
+	 * And then the authorization cred.
+	 */
+	*tl++ = txdr_unsigned(auth_type);
+	*tl = txdr_unsigned(authsiz);
+	switch (auth_type) {
+	case RPCAUTH_UNIX:
+		nfsm_build(tl, u_long *, auth_len);
+		*tl++ = 0;		/* stamp ?? */
+		*tl++ = 0;		/* NULL hostname */
+		*tl++ = txdr_unsigned(cr->cr_uid);
+		*tl++ = txdr_unsigned(cr->cr_groups[0]);
+		grpsiz = (auth_len >> 2) - 5;
+		*tl++ = txdr_unsigned(grpsiz);
+		for (i = 1; i <= grpsiz; i++)
+			*tl++ = txdr_unsigned(cr->cr_groups[i]);
+		break;
+	case RPCAUTH_NQNFS:
+		nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+		*tl++ = txdr_unsigned(cr->cr_uid);
+		*tl = txdr_unsigned(auth_len);
+		siz = auth_len;
+		while (siz > 0) {
+			if (M_TRAILINGSPACE(mb) == 0) {
+				MGET(mb2, M_WAIT, MT_DATA);
+				if (siz >= MINCLSIZE)
+					MCLGET(mb2, M_WAIT);
+				mb->m_next = mb2;
+				mb = mb2;
+				mb->m_len = 0;
+				bpos = mtod(mb, caddr_t);
+			}
+			i = min(siz, M_TRAILINGSPACE(mb));
+			bcopy(auth_str, bpos, i);
+			mb->m_len += i;
+			auth_str += i;
+			bpos += i;
+			siz -= i;
+		}
+		if ((siz = (nfsm_rndup(auth_len) - auth_len)) > 0) {
+			for (i = 0; i < siz; i++)
+				*bpos++ = '\0';
+			mb->m_len += siz;
+		}
+		break;
+	};
+	nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+	*tl++ = txdr_unsigned(RPCAUTH_NULL);
+	*tl = 0;
+	mb->m_next = mrest;
+	mreq->m_pkthdr.len = authsiz + 10*NFSX_UNSIGNED + mrest_len;
+	mreq->m_pkthdr.rcvif = (struct ifnet *)0;
+	*mbp = mb;
+	return (mreq);
+}
+
+/*
+ * copies mbuf chain to the uio scatter/gather list
+ */
+nfsm_mbuftouio(mrep, uiop, siz, dpos)
+	struct mbuf **mrep;
+	register struct uio *uiop;
+	int siz;
+	caddr_t *dpos;
+{
+	register char *mbufcp, *uiocp;
+	register int xfer, left, len;
+	register struct mbuf *mp;
+	long uiosiz, rem;
+	int error = 0;
+
+	mp = *mrep;
+	mbufcp = *dpos;
+	len = mtod(mp, caddr_t)+mp->m_len-mbufcp;
+	rem = nfsm_rndup(siz)-siz;
+	while (siz > 0) {
+		if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
+			return (EFBIG);
+		left = uiop->uio_iov->iov_len;
+		uiocp = uiop->uio_iov->iov_base;
+		if (left > siz)
+			left = siz;
+		uiosiz = left;
+		while (left > 0) {
+			while (len == 0) {
+				mp = mp->m_next;
+				if (mp == NULL)
+					return (EBADRPC);
+				mbufcp = mtod(mp, caddr_t);
+				len = mp->m_len;
+			}
+			xfer = (left > len) ? len : left;
+#ifdef notdef
+			/* Not Yet.. */
+			if (uiop->uio_iov->iov_op != NULL)
+				(*(uiop->uio_iov->iov_op))
+				(mbufcp, uiocp, xfer);
+			else
+#endif
+			if (uiop->uio_segflg == UIO_SYSSPACE)
+				bcopy(mbufcp, uiocp, xfer);
+			else
+				copyout(mbufcp, uiocp, xfer);
+			left -= xfer;
+			len -= xfer;
+			mbufcp += xfer;
+			uiocp += xfer;
+			uiop->uio_offset += xfer;
+			uiop->uio_resid -= xfer;
+		}
+		if (uiop->uio_iov->iov_len <= siz) {
+			uiop->uio_iovcnt--;
+			uiop->uio_iov++;
+		} else {
+			uiop->uio_iov->iov_base += uiosiz;
+			uiop->uio_iov->iov_len -= uiosiz;
+		}
+		siz -= uiosiz;
+	}
+	*dpos = mbufcp;
+	*mrep = mp;
+	if (rem > 0) {
+		if (len < rem)
+			error = nfs_adv(mrep, dpos, rem, len);
+		else
+			*dpos += rem;
+	}
+	return (error);
+}
+
+/*
+ * copies a uio scatter/gather list to an mbuf chain...
+ */
+nfsm_uiotombuf(uiop, mq, siz, bpos)
+	register struct uio *uiop;
+	struct mbuf **mq;
+	int siz;
+	caddr_t *bpos;
+{
+	register char *uiocp;
+	register struct mbuf *mp, *mp2;
+	register int xfer, left, mlen;
+	int uiosiz, clflg, rem;
+	char *cp;
+
+	if (siz > MLEN)		/* or should it >= MCLBYTES ?? */
+		clflg = 1;
+	else
+		clflg = 0;
+	rem = nfsm_rndup(siz)-siz;
+	mp = mp2 = *mq;
+	while (siz > 0) {
+		if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
+			return (EINVAL);
+		left = uiop->uio_iov->iov_len;
+		uiocp = uiop->uio_iov->iov_base;
+		if (left > siz)
+			left = siz;
+		uiosiz = left;
+		while (left > 0) {
+			mlen = M_TRAILINGSPACE(mp);
+			if (mlen == 0) {
+				MGET(mp, M_WAIT, MT_DATA);
+				if (clflg)
+					MCLGET(mp, M_WAIT);
+				mp->m_len = 0;
+				mp2->m_next = mp;
+				mp2 = mp;
+				mlen = M_TRAILINGSPACE(mp);
+			}
+			xfer = (left > mlen) ? mlen : left;
+#ifdef notdef
+			/* Not Yet.. */
+			if (uiop->uio_iov->iov_op != NULL)
+				(*(uiop->uio_iov->iov_op))
+				(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+			else
+#endif
+			if (uiop->uio_segflg == UIO_SYSSPACE)
+				bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+			else
+				copyin(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+			mp->m_len += xfer;
+			left -= xfer;
+			uiocp += xfer;
+			uiop->uio_offset += xfer;
+			uiop->uio_resid -= xfer;
+		}
+		if (uiop->uio_iov->iov_len <= siz) {
+			uiop->uio_iovcnt--;
+			uiop->uio_iov++;
+		} else {
+			uiop->uio_iov->iov_base += uiosiz;
+			uiop->uio_iov->iov_len -= uiosiz;
+		}
+		siz -= uiosiz;
+	}
+	if (rem > 0) {
+		if (rem > M_TRAILINGSPACE(mp)) {
+			MGET(mp, M_WAIT, MT_DATA);
+			mp->m_len = 0;
+			mp2->m_next = mp;
+		}
+		cp = mtod(mp, caddr_t)+mp->m_len;
+		for (left = 0; left < rem; left++)
+			*cp++ = '\0';
+		mp->m_len += rem;
+		*bpos = cp;
+	} else
+		*bpos = mtod(mp, caddr_t)+mp->m_len;
+	*mq = mp;
+	return (0);
+}
+
+/*
+ * Help break down an mbuf chain by setting the first siz bytes contiguous
+ * pointed to by returned val.
+ * This is used by the macros nfsm_dissect and nfsm_dissecton for tough
+ * cases. (The macros use the vars. dpos and dpos2)
+ */
+nfsm_disct(mdp, dposp, siz, left, cp2)
+	struct mbuf **mdp;
+	caddr_t *dposp;
+	int siz;
+	int left;
+	caddr_t *cp2;
+{
+	register struct mbuf *mp, *mp2;
+	register int siz2, xfer;
+	register caddr_t p;
+
+	mp = *mdp;
+	while (left == 0) {
+		*mdp = mp = mp->m_next;
+		if (mp == NULL)
+			return (EBADRPC);
+		left = mp->m_len;
+		*dposp = mtod(mp, caddr_t);
+	}
+	if (left >= siz) {
+		*cp2 = *dposp;
+		*dposp += siz;
+	} else if (mp->m_next == NULL) {
+		return (EBADRPC);
+	} else if (siz > MHLEN) {
+		panic("nfs S too big");
+	} else {
+		MGET(mp2, M_WAIT, MT_DATA);
+		mp2->m_next = mp->m_next;
+		mp->m_next = mp2;
+		mp->m_len -= left;
+		mp = mp2;
+		*cp2 = p = mtod(mp, caddr_t);
+		bcopy(*dposp, p, left);		/* Copy what was left */
+		siz2 = siz-left;
+		p += left;
+		mp2 = mp->m_next;
+		/* Loop around copying up the siz2 bytes */
+		while (siz2 > 0) {
+			if (mp2 == NULL)
+				return (EBADRPC);
+			xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2;
+			if (xfer > 0) {
+				bcopy(mtod(mp2, caddr_t), p, xfer);
+				NFSMADV(mp2, xfer);
+				mp2->m_len -= xfer;
+				p += xfer;
+				siz2 -= xfer;
+			}
+			if (siz2 > 0)
+				mp2 = mp2->m_next;
+		}
+		mp->m_len = siz;
+		*mdp = mp2;
+		*dposp = mtod(mp2, caddr_t);
+	}
+	return (0);
+}
+
+/*
+ * Advance the position in the mbuf chain.
+ */
+nfs_adv(mdp, dposp, offs, left)
+	struct mbuf **mdp;
+	caddr_t *dposp;
+	int offs;
+	int left;
+{
+	register struct mbuf *m;
+	register int s;
+
+	m = *mdp;
+	s = left;
+	while (s < offs) {
+		offs -= s;
+		m = m->m_next;
+		if (m == NULL)
+			return (EBADRPC);
+		s = m->m_len;
+	}
+	*mdp = m;
+	*dposp = mtod(m, caddr_t)+offs;
+	return (0);
+}
+
+/*
+ * Copy a string into mbufs for the hard cases...
+ */
+nfsm_strtmbuf(mb, bpos, cp, siz)
+	struct mbuf **mb;
+	char **bpos;
+	char *cp;
+	long siz;
+{
+	register struct mbuf *m1, *m2;
+	long left, xfer, len, tlen;
+	u_long *tl;
+	int putsize;
+
+	putsize = 1;
+	m2 = *mb;
+	left = M_TRAILINGSPACE(m2);
+	if (left > 0) {
+		tl = ((u_long *)(*bpos));
+		*tl++ = txdr_unsigned(siz);
+		putsize = 0;
+		left -= NFSX_UNSIGNED;
+		m2->m_len += NFSX_UNSIGNED;
+		if (left > 0) {
+			bcopy(cp, (caddr_t) tl, left);
+			siz -= left;
+			cp += left;
+			m2->m_len += left;
+			left = 0;
+		}
+	}
+	/* Loop around adding mbufs */
+	while (siz > 0) {
+		MGET(m1, M_WAIT, MT_DATA);
+		if (siz > MLEN)
+			MCLGET(m1, M_WAIT);
+		m1->m_len = NFSMSIZ(m1);
+		m2->m_next = m1;
+		m2 = m1;
+		tl = mtod(m1, u_long *);
+		tlen = 0;
+		if (putsize) {
+			*tl++ = txdr_unsigned(siz);
+			m1->m_len -= NFSX_UNSIGNED;
+			tlen = NFSX_UNSIGNED;
+			putsize = 0;
+		}
+		if (siz < m1->m_len) {
+			len = nfsm_rndup(siz);
+			xfer = siz;
+			if (xfer < len)
+				*(tl+(xfer>>2)) = 0;
+		} else {
+			xfer = len = m1->m_len;
+		}
+		bcopy(cp, (caddr_t) tl, xfer);
+		m1->m_len = len+tlen;
+		siz -= xfer;
+		cp += xfer;
+	}
+	*mb = m1;
+	*bpos = mtod(m1, caddr_t)+m1->m_len;
+	return (0);
+}
+
+/*
+ * Called once to initialize data structures...
+ */
+nfs_init()
+{
+	register int i;
+
+	nfsrtt.pos = 0;
+	rpc_vers = txdr_unsigned(RPC_VER2);
+	rpc_call = txdr_unsigned(RPC_CALL);
+	rpc_reply = txdr_unsigned(RPC_REPLY);
+	rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED);
+	rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED);
+	rpc_mismatch = txdr_unsigned(RPC_MISMATCH);
+	rpc_autherr = txdr_unsigned(RPC_AUTHERR);
+	rpc_rejectedcred = txdr_unsigned(AUTH_REJECTCRED);
+	rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX);
+	rpc_auth_kerb = txdr_unsigned(RPCAUTH_NQNFS);
+	nfs_vers = txdr_unsigned(NFS_VER2);
+	nfs_prog = txdr_unsigned(NFS_PROG);
+	nfs_true = txdr_unsigned(TRUE);
+	nfs_false = txdr_unsigned(FALSE);
+	/* Loop thru nfs procids */
+	for (i = 0; i < NFS_NPROCS; i++)
+		nfs_procids[i] = txdr_unsigned(i);
+	/* Ensure async daemons disabled */
+	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
+		nfs_iodwant[i] = (struct proc *)0;
+	TAILQ_INIT(&nfs_bufq);
+	nfs_xdrneg1 = txdr_unsigned(-1);
+	nfs_nhinit();			/* Init the nfsnode table */
+	nfsrv_init(0);			/* Init server data structures */
+	nfsrv_initcache();		/* Init the server request cache */
+
+	/*
+	 * Initialize the nqnfs server stuff.
+	 */
+	if (nqnfsstarttime == 0) {
+		nqnfsstarttime = boottime.tv_sec + nqsrv_maxlease
+			+ nqsrv_clockskew + nqsrv_writeslack;
+		NQLOADNOVRAM(nqnfsstarttime);
+		nqnfs_prog = txdr_unsigned(NQNFS_PROG);
+		nqnfs_vers = txdr_unsigned(NQNFS_VER1);
+		nqthead.th_head[0] = &nqthead;
+		nqthead.th_head[1] = &nqthead;
+		nqfhead = hashinit(NQLCHSZ, M_NQLEASE, &nqfheadhash);
+	}
+
+	/*
+	 * Initialize reply list and start timer
+	 */
+	nfsreqh.r_prev = nfsreqh.r_next = &nfsreqh;
+	nfs_timer();
+}
+
+/*
+ * Attribute cache routines.
+ * nfs_loadattrcache() - loads or updates the cache contents from attributes
+ *	that are on the mbuf list
+ * nfs_getattrcache() - returns valid attributes if found in cache, returns
+ *	error otherwise
+ */
+
+/*
+ * Load the attribute cache (that lives in the nfsnode entry) with
+ * the values on the mbuf list and
+ * Iff vap not NULL
+ *    copy the attributes to *vaper
+ */
+nfs_loadattrcache(vpp, mdp, dposp, vaper)
+	struct vnode **vpp;
+	struct mbuf **mdp;
+	caddr_t *dposp;
+	struct vattr *vaper;
+{
+	register struct vnode *vp = *vpp;
+	register struct vattr *vap;
+	register struct nfsv2_fattr *fp;
+	extern int (**spec_nfsv2nodeop_p)();
+	register struct nfsnode *np, *nq, **nhpp;
+	register long t1;
+	caddr_t dpos, cp2;
+	int error = 0, isnq;
+	struct mbuf *md;
+	enum vtype vtyp;
+	u_short vmode;
+	long rdev;
+	struct timespec mtime;
+	struct vnode *nvp;
+
+	md = *mdp;
+	dpos = *dposp;
+	t1 = (mtod(md, caddr_t) + md->m_len) - dpos;
+	isnq = (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS);
+	if (error = nfsm_disct(&md, &dpos, NFSX_FATTR(isnq), t1, &cp2))
+		return (error);
+	fp = (struct nfsv2_fattr *)cp2;
+	vtyp = nfstov_type(fp->fa_type);
+	vmode = fxdr_unsigned(u_short, fp->fa_mode);
+	if (vtyp == VNON || vtyp == VREG)
+		vtyp = IFTOVT(vmode);
+	if (isnq) {
+		rdev = fxdr_unsigned(long, fp->fa_nqrdev);
+		fxdr_nqtime(&fp->fa_nqmtime, &mtime);
+	} else {
+		rdev = fxdr_unsigned(long, fp->fa_nfsrdev);
+		fxdr_nfstime(&fp->fa_nfsmtime, &mtime);
+	}
+	/*
+	 * If v_type == VNON it is a new node, so fill in the v_type,
+	 * n_mtime fields. Check to see if it represents a special 
+	 * device, and if so, check for a possible alias. Once the
+	 * correct vnode has been obtained, fill in the rest of the
+	 * information.
+	 */
+	np = VTONFS(vp);
+	if (vp->v_type == VNON) {
+		if (vtyp == VCHR && rdev == 0xffffffff)
+			vp->v_type = vtyp = VFIFO;
+		else
+			vp->v_type = vtyp;
+		if (vp->v_type == VFIFO) {
+#ifdef FIFO
+			extern int (**fifo_nfsv2nodeop_p)();
+			vp->v_op = fifo_nfsv2nodeop_p;
+#else
+			return (EOPNOTSUPP);
+#endif /* FIFO */
+		}
+		if (vp->v_type == VCHR || vp->v_type == VBLK) {
+			vp->v_op = spec_nfsv2nodeop_p;
+			if (nvp = checkalias(vp, (dev_t)rdev, vp->v_mount)) {
+				/*
+				 * Discard unneeded vnode, but save its nfsnode.
+				 */
+				if (nq = np->n_forw)
+					nq->n_back = np->n_back;
+				*np->n_back = nq;
+				nvp->v_data = vp->v_data;
+				vp->v_data = NULL;
+				vp->v_op = spec_vnodeop_p;
+				vrele(vp);
+				vgone(vp);
+				/*
+				 * Reinitialize aliased node.
+				 */
+				np->n_vnode = nvp;
+				nhpp = (struct nfsnode **)nfs_hash(&np->n_fh);
+				if (nq = *nhpp)
+					nq->n_back = &np->n_forw;
+				np->n_forw = nq;
+				np->n_back = nhpp;
+				*nhpp = np;
+				*vpp = vp = nvp;
+			}
+		}
+		np->n_mtime = mtime.ts_sec;
+	}
+	vap = &np->n_vattr;
+	vap->va_type = vtyp;
+	vap->va_mode = (vmode & 07777);
+	vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
+	vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid);
+	vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid);
+	vap->va_rdev = (dev_t)rdev;
+	vap->va_mtime = mtime;
+	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+	if (isnq) {
+		fxdr_hyper(&fp->fa_nqsize, &vap->va_size);
+		vap->va_blocksize = fxdr_unsigned(long, fp->fa_nqblocksize);
+		fxdr_hyper(&fp->fa_nqbytes, &vap->va_bytes);
+		vap->va_fileid = fxdr_unsigned(long, fp->fa_nqfileid);
+		fxdr_nqtime(&fp->fa_nqatime, &vap->va_atime);
+		vap->va_flags = fxdr_unsigned(u_long, fp->fa_nqflags);
+		fxdr_nqtime(&fp->fa_nqctime, &vap->va_ctime);
+		vap->va_gen = fxdr_unsigned(u_long, fp->fa_nqgen);
+		fxdr_hyper(&fp->fa_nqfilerev, &vap->va_filerev);
+	} else {
+		vap->va_size = fxdr_unsigned(u_long, fp->fa_nfssize);
+		vap->va_blocksize = fxdr_unsigned(long, fp->fa_nfsblocksize);
+		vap->va_bytes = fxdr_unsigned(long, fp->fa_nfsblocks) * NFS_FABLKSIZE;
+		vap->va_fileid = fxdr_unsigned(long, fp->fa_nfsfileid);
+		fxdr_nfstime(&fp->fa_nfsatime, &vap->va_atime);
+		vap->va_flags = 0;
+		vap->va_ctime.ts_sec = fxdr_unsigned(long, fp->fa_nfsctime.nfs_sec);
+		vap->va_ctime.ts_nsec = 0;
+		vap->va_gen = fxdr_unsigned(u_long, fp->fa_nfsctime.nfs_usec);
+		vap->va_filerev = 0;
+	}
+	if (vap->va_size != np->n_size) {
+		if (vap->va_type == VREG) {
+			if (np->n_flag & NMODIFIED) {
+				if (vap->va_size < np->n_size)
+					vap->va_size = np->n_size;
+				else
+					np->n_size = vap->va_size;
+			} else
+				np->n_size = vap->va_size;
+			vnode_pager_setsize(vp, (u_long)np->n_size);
+		} else
+			np->n_size = vap->va_size;
+	}
+	np->n_attrstamp = time.tv_sec;
+	*dposp = dpos;
+	*mdp = md;
+	if (vaper != NULL) {
+		bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap));
+#ifdef notdef
+		if ((np->n_flag & NMODIFIED) && np->n_size > vap->va_size)
+		if (np->n_size > vap->va_size)
+			vaper->va_size = np->n_size;
+#endif
+		if (np->n_flag & NCHG) {
+			if (np->n_flag & NACC) {
+				vaper->va_atime.ts_sec = np->n_atim.tv_sec;
+				vaper->va_atime.ts_nsec =
+				    np->n_atim.tv_usec * 1000;
+			}
+			if (np->n_flag & NUPD) {
+				vaper->va_mtime.ts_sec = np->n_mtim.tv_sec;
+				vaper->va_mtime.ts_nsec =
+				    np->n_mtim.tv_usec * 1000;
+			}
+		}
+	}
+	return (0);
+}
+
+/*
+ * Check the time stamp
+ * If the cache is valid, copy contents to *vap and return 0
+ * otherwise return an error
+ */
+nfs_getattrcache(vp, vaper)
+	register struct vnode *vp;
+	struct vattr *vaper;
+{
+	register struct nfsnode *np = VTONFS(vp);
+	register struct vattr *vap;
+
+	if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQLOOKLEASE) {
+		if (!NQNFS_CKCACHABLE(vp, NQL_READ) || np->n_attrstamp == 0) {
+			nfsstats.attrcache_misses++;
+			return (ENOENT);
+		}
+	} else if ((time.tv_sec - np->n_attrstamp) >= NFS_ATTRTIMEO(np)) {
+		nfsstats.attrcache_misses++;
+		return (ENOENT);
+	}
+	nfsstats.attrcache_hits++;
+	vap = &np->n_vattr;
+	if (vap->va_size != np->n_size) {
+		if (vap->va_type == VREG) {
+			if (np->n_flag & NMODIFIED) {
+				if (vap->va_size < np->n_size)
+					vap->va_size = np->n_size;
+				else
+					np->n_size = vap->va_size;
+			} else
+				np->n_size = vap->va_size;
+			vnode_pager_setsize(vp, (u_long)np->n_size);
+		} else
+			np->n_size = vap->va_size;
+	}
+	bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr));
+#ifdef notdef
+	if ((np->n_flag & NMODIFIED) == 0) {
+		np->n_size = vaper->va_size;
+		vnode_pager_setsize(vp, (u_long)np->n_size);
+	} else if (np->n_size > vaper->va_size)
+	if (np->n_size > vaper->va_size)
+		vaper->va_size = np->n_size;
+#endif
+	if (np->n_flag & NCHG) {
+		if (np->n_flag & NACC) {
+			vaper->va_atime.ts_sec = np->n_atim.tv_sec;
+			vaper->va_atime.ts_nsec = np->n_atim.tv_usec * 1000;
+		}
+		if (np->n_flag & NUPD) {
+			vaper->va_mtime.ts_sec = np->n_mtim.tv_sec;
+			vaper->va_mtime.ts_nsec = np->n_mtim.tv_usec * 1000;
+		}
+	}
+	return (0);
+}
+
+/*
+ * Set up nameidata for a lookup() call and do it
+ */
+nfs_namei(ndp, fhp, len, slp, nam, mdp, dposp, p)
+	register struct nameidata *ndp;
+	fhandle_t *fhp;
+	int len;
+	struct nfssvc_sock *slp;
+	struct mbuf *nam;
+	struct mbuf **mdp;
+	caddr_t *dposp;
+	struct proc *p;
+{
+	register int i, rem;
+	register struct mbuf *md;
+	register char *fromcp, *tocp;
+	struct vnode *dp;
+	int error, rdonly;
+	struct componentname *cnp = &ndp->ni_cnd;
+
+	MALLOC(cnp->cn_pnbuf, char *, len + 1, M_NAMEI, M_WAITOK);
+	/*
+	 * Copy the name from the mbuf list to ndp->ni_pnbuf
+	 * and set the various ndp fields appropriately.
+	 */
+	fromcp = *dposp;
+	tocp = cnp->cn_pnbuf;
+	md = *mdp;
+	rem = mtod(md, caddr_t) + md->m_len - fromcp;
+	cnp->cn_hash = 0;
+	for (i = 0; i < len; i++) {
+		while (rem == 0) {
+			md = md->m_next;
+			if (md == NULL) {
+				error = EBADRPC;
+				goto out;
+			}
+			fromcp = mtod(md, caddr_t);
+			rem = md->m_len;
+		}
+		if (*fromcp == '\0' || *fromcp == '/') {
+			error = EINVAL;
+			goto out;
+		}
+		cnp->cn_hash += (unsigned char)*fromcp;
+		*tocp++ = *fromcp++;
+		rem--;
+	}
+	*tocp = '\0';
+	*mdp = md;
+	*dposp = fromcp;
+	len = nfsm_rndup(len)-len;
+	if (len > 0) {
+		if (rem >= len)
+			*dposp += len;
+		else if (error = nfs_adv(mdp, dposp, len, rem))
+			goto out;
+	}
+	ndp->ni_pathlen = tocp - cnp->cn_pnbuf;
+	cnp->cn_nameptr = cnp->cn_pnbuf;
+	/*
+	 * Extract and set starting directory.
+	 */
+	if (error = nfsrv_fhtovp(fhp, FALSE, &dp, ndp->ni_cnd.cn_cred, slp,
+	    nam, &rdonly))
+		goto out;
+	if (dp->v_type != VDIR) {
+		vrele(dp);
+		error = ENOTDIR;
+		goto out;
+	}
+	ndp->ni_startdir = dp;
+	if (rdonly)
+		cnp->cn_flags |= (NOCROSSMOUNT | RDONLY);
+	else
+		cnp->cn_flags |= NOCROSSMOUNT;
+	/*
+	 * And call lookup() to do the real work
+	 */
+	cnp->cn_proc = p;
+	if (error = lookup(ndp))
+		goto out;
+	/*
+	 * Check for encountering a symbolic link
+	 */
+	if (cnp->cn_flags & ISSYMLINK) {
+		if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
+			vput(ndp->ni_dvp);
+		else
+			vrele(ndp->ni_dvp);
+		vput(ndp->ni_vp);
+		ndp->ni_vp = NULL;
+		error = EINVAL;
+		goto out;
+	}
+	/*
+	 * Check for saved name request
+	 */
+	if (cnp->cn_flags & (SAVENAME | SAVESTART)) {
+		cnp->cn_flags |= HASBUF;
+		return (0);
+	}
+out:
+	FREE(cnp->cn_pnbuf, M_NAMEI);
+	return (error);
+}
+
+/*
+ * A fiddled version of m_adj() that ensures null fill to a long
+ * boundary and only trims off the back end
+ */
+void
+nfsm_adj(mp, len, nul)
+	struct mbuf *mp;
+	register int len;
+	int nul;
+{
+	register struct mbuf *m;
+	register int count, i;
+	register char *cp;
+
+	/*
+	 * Trim from tail.  Scan the mbuf chain,
+	 * calculating its length and finding the last mbuf.
+	 * If the adjustment only affects this mbuf, then just
+	 * adjust and return.  Otherwise, rescan and truncate
+	 * after the remaining size.
+	 */
+	count = 0;
+	m = mp;
+	for (;;) {
+		count += m->m_len;
+		if (m->m_next == (struct mbuf *)0)
+			break;
+		m = m->m_next;
+	}
+	if (m->m_len > len) {
+		m->m_len -= len;
+		if (nul > 0) {
+			cp = mtod(m, caddr_t)+m->m_len-nul;
+			for (i = 0; i < nul; i++)
+				*cp++ = '\0';
+		}
+		return;
+	}
+	count -= len;
+	if (count < 0)
+		count = 0;
+	/*
+	 * Correct length for chain is "count".
+	 * Find the mbuf with last data, adjust its length,
+	 * and toss data from remaining mbufs on chain.
+	 */
+	for (m = mp; m; m = m->m_next) {
+		if (m->m_len >= count) {
+			m->m_len = count;
+			if (nul > 0) {
+				cp = mtod(m, caddr_t)+m->m_len-nul;
+				for (i = 0; i < nul; i++)
+					*cp++ = '\0';
+			}
+			break;
+		}
+		count -= m->m_len;
+	}
+	while (m = m->m_next)
+		m->m_len = 0;
+}
+
+/*
+ * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked)
+ * 	- look up fsid in mount list (if not found ret error)
+ *	- get vp and export rights by calling VFS_FHTOVP()
+ *	- if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
+ *	- if not lockflag unlock it with VOP_UNLOCK()
+ */
+nfsrv_fhtovp(fhp, lockflag, vpp, cred, slp, nam, rdonlyp)
+	fhandle_t *fhp;
+	int lockflag;
+	struct vnode **vpp;
+	struct ucred *cred;
+	struct nfssvc_sock *slp;
+	struct mbuf *nam;
+	int *rdonlyp;
+{
+	register struct mount *mp;
+	register struct nfsuid *uidp;
+	register int i;
+	struct ucred *credanon;
+	int error, exflags;
+
+	*vpp = (struct vnode *)0;
+	if ((mp = getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if (error = VFS_FHTOVP(mp, &fhp->fh_fid, nam, vpp, &exflags, &credanon))
+		return (error);
+	/*
+	 * Check/setup credentials.
+	 */
+	if (exflags & MNT_EXKERB) {
+		uidp = slp->ns_uidh[NUIDHASH(cred->cr_uid)];
+		while (uidp) {
+			if (uidp->nu_uid == cred->cr_uid)
+				break;
+			uidp = uidp->nu_hnext;
+		}
+		if (uidp) {
+			cred->cr_uid = uidp->nu_cr.cr_uid;
+			for (i = 0; i < uidp->nu_cr.cr_ngroups; i++)
+				cred->cr_groups[i] = uidp->nu_cr.cr_groups[i];
+		} else {
+			vput(*vpp);
+			return (NQNFS_AUTHERR);
+		}
+	} else if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) {
+		cred->cr_uid = credanon->cr_uid;
+		for (i = 0; i < credanon->cr_ngroups && i < NGROUPS; i++)
+			cred->cr_groups[i] = credanon->cr_groups[i];
+	}
+	if (exflags & MNT_EXRDONLY)
+		*rdonlyp = 1;
+	else
+		*rdonlyp = 0;
+	if (!lockflag)
+		VOP_UNLOCK(*vpp);
+	return (0);
+}
+
+/*
+ * This function compares two net addresses by family and returns TRUE
+ * if they are the same host.
+ * If there is any doubt, return FALSE.
+ * The AF_INET family is handled as a special case so that address mbufs
+ * don't need to be saved to store "struct in_addr", which is only 4 bytes.
+ */
+netaddr_match(family, haddr, nam)
+	int family;
+	union nethostaddr *haddr;
+	struct mbuf *nam;
+{
+	register struct sockaddr_in *inetaddr;
+
+	switch (family) {
+	case AF_INET:
+		inetaddr = mtod(nam, struct sockaddr_in *);
+		if (inetaddr->sin_family == AF_INET &&
+		    inetaddr->sin_addr.s_addr == haddr->had_inetaddr)
+			return (1);
+		break;
+#ifdef ISO
+	case AF_ISO:
+	    {
+		register struct sockaddr_iso *isoaddr1, *isoaddr2;
+
+		isoaddr1 = mtod(nam, struct sockaddr_iso *);
+		isoaddr2 = mtod(haddr->had_nam, struct sockaddr_iso *);
+		if (isoaddr1->siso_family == AF_ISO &&
+		    isoaddr1->siso_nlen > 0 &&
+		    isoaddr1->siso_nlen == isoaddr2->siso_nlen &&
+		    SAME_ISOADDR(isoaddr1, isoaddr2))
+			return (1);
+		break;
+	    }
+#endif	/* ISO */
+	default:
+		break;
+	};
+	return (0);
+}
diff --git a/sys/nfsclient/nfs_vfsops.c b/sys/nfsclient/nfs_vfsops.c
new file mode 100644
index 00000000000..1f186760689
--- /dev/null
+++ b/sys/nfsclient/nfs_vfsops.c
@@ -0,0 +1,740 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfs_vfsops.c	8.3 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/conf.h>
+#include <sys/ioctl.h>
+#include <sys/signal.h>
+#include <sys/proc.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/kernel.h>
+#include <sys/mount.h>
+#include <sys/buf.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+
+#include <net/if.h>
+#include <net/route.h>
+#include <netinet/in.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nfs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nfsdiskless.h>
+#include <nfs/nqnfs.h>
+
+/*
+ * nfs vfs operations.
+ */
+struct vfsops nfs_vfsops = {
+	nfs_mount,
+	nfs_start,
+	nfs_unmount,
+	nfs_root,
+	nfs_quotactl,
+	nfs_statfs,
+	nfs_sync,
+	nfs_vget,
+	nfs_fhtovp,
+	nfs_vptofh,
+	nfs_init,
+};
+
+/*
+ * This structure must be filled in by a primary bootstrap or bootstrap
+ * server for a diskless/dataless machine. It is initialized below just
+ * to ensure that it is allocated to initialized data (.data not .bss).
+ */
+struct nfs_diskless nfs_diskless = { 0 };
+
+extern u_long nfs_procids[NFS_NPROCS];
+extern u_long nfs_prog, nfs_vers;
+void nfs_disconnect __P((struct nfsmount *));
+void nfsargs_ntoh __P((struct nfs_args *));
+static struct mount *nfs_mountdiskless __P((char *, char *, int,
+    struct sockaddr_in *, struct nfs_args *, register struct vnode **));
+
+#define TRUE	1
+#define	FALSE	0
+
+/*
+ * nfs statfs call
+ */
+int
+nfs_statfs(mp, sbp, p)
+	struct mount *mp;
+	register struct statfs *sbp;
+	struct proc *p;
+{
+	register struct vnode *vp;
+	register struct nfsv2_statfs *sfp;
+	register caddr_t cp;
+	register long t1;
+	caddr_t bpos, dpos, cp2;
+	int error = 0, isnq;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+	struct nfsmount *nmp;
+	struct ucred *cred;
+	struct nfsnode *np;
+
+	nmp = VFSTONFS(mp);
+	isnq = (nmp->nm_flag & NFSMNT_NQNFS);
+	if (error = nfs_nget(mp, &nmp->nm_fh, &np))
+		return (error);
+	vp = NFSTOV(np);
+	nfsstats.rpccnt[NFSPROC_STATFS]++;
+	cred = crget();
+	cred->cr_ngroups = 1;
+	nfsm_reqhead(vp, NFSPROC_STATFS, NFSX_FH);
+	nfsm_fhtom(vp);
+	nfsm_request(vp, NFSPROC_STATFS, p, cred);
+	nfsm_dissect(sfp, struct nfsv2_statfs *, NFSX_STATFS(isnq));
+	sbp->f_type = MOUNT_NFS;
+	sbp->f_flags = nmp->nm_flag;
+	sbp->f_iosize = NFS_MAXDGRAMDATA;
+	sbp->f_bsize = fxdr_unsigned(long, sfp->sf_bsize);
+	sbp->f_blocks = fxdr_unsigned(long, sfp->sf_blocks);
+	sbp->f_bfree = fxdr_unsigned(long, sfp->sf_bfree);
+	sbp->f_bavail = fxdr_unsigned(long, sfp->sf_bavail);
+	if (isnq) {
+		sbp->f_files = fxdr_unsigned(long, sfp->sf_files);
+		sbp->f_ffree = fxdr_unsigned(long, sfp->sf_ffree);
+	} else {
+		sbp->f_files = 0;
+		sbp->f_ffree = 0;
+	}
+	if (sbp != &mp->mnt_stat) {
+		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+	}
+	nfsm_reqdone;
+	vrele(vp);
+	crfree(cred);
+	return (error);
+}
+
+/*
+ * Mount a remote root fs via. nfs. This depends on the info in the
+ * nfs_diskless structure that has been filled in properly by some primary
+ * bootstrap.
+ * It goes something like this:
+ * - do enough of "ifconfig" by calling ifioctl() so that the system
+ *   can talk to the server
+ * - If nfs_diskless.mygateway is filled in, use that address as
+ *   a default gateway.
+ * - hand craft the swap nfs vnode hanging off a fake mount point
+ *	if swdevt[0].sw_dev == NODEV
+ * - build the rootfs mount point and call mountnfs() to do the rest.
+ */
+int
+nfs_mountroot()
+{
+	register struct mount *mp;
+	register struct nfs_diskless *nd = &nfs_diskless;
+	struct socket *so;
+	struct vnode *vp;
+	struct proc *p = curproc;		/* XXX */
+	int error, i;
+
+	/*
+	 * XXX time must be non-zero when we init the interface or else
+	 * the arp code will wedge...
+	 */
+	if (time.tv_sec == 0)
+		time.tv_sec = 1;
+
+#ifdef notyet
+	/* Set up swap credentials. */
+	proc0.p_ucred->cr_uid = ntohl(nd->swap_ucred.cr_uid);
+	proc0.p_ucred->cr_gid = ntohl(nd->swap_ucred.cr_gid);
+	if ((proc0.p_ucred->cr_ngroups = ntohs(nd->swap_ucred.cr_ngroups)) >
+		NGROUPS)
+		proc0.p_ucred->cr_ngroups = NGROUPS;
+	for (i = 0; i < proc0.p_ucred->cr_ngroups; i++)
+	    proc0.p_ucred->cr_groups[i] = ntohl(nd->swap_ucred.cr_groups[i]);
+#endif
+
+	/*
+	 * Do enough of ifconfig(8) so that the critical net interface can
+	 * talk to the server.
+	 */
+	if (error = socreate(nd->myif.ifra_addr.sa_family, &so, SOCK_DGRAM, 0))
+		panic("nfs_mountroot: socreate: %d", error);
+	if (error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, p))
+		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
+	soclose(so);
+
+	/*
+	 * If the gateway field is filled in, set it as the default route.
+	 */
+	if (nd->mygateway.sin_len != 0) {
+		struct sockaddr_in mask, sin;
+
+		bzero((caddr_t)&mask, sizeof(mask));
+		sin = mask;
+		sin.sin_family = AF_INET;
+		sin.sin_len = sizeof(sin);
+		if (error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
+		    (struct sockaddr *)&nd->mygateway,
+		    (struct sockaddr *)&mask,
+		    RTF_UP | RTF_GATEWAY, (struct rtentry **)0))
+			panic("nfs_mountroot: RTM_ADD: %d", error);
+	}
+
+	/*
+	 * If swapping to an nfs node (indicated by swdevt[0].sw_dev == NODEV):
+	 * Create a fake mount point just for the swap vnode so that the
+	 * swap file can be on a different server from the rootfs.
+	 */
+	if (swdevt[0].sw_dev == NODEV) {
+		nd->swap_args.fh = (nfsv2fh_t *)nd->swap_fh;
+		(void) nfs_mountdiskless(nd->swap_hostnam, "/swap", 0,
+		    &nd->swap_saddr, &nd->swap_args, &vp);
+	
+		/*
+		 * Since the swap file is not the root dir of a file system,
+		 * hack it to a regular file.
+		 */
+		vp->v_type = VREG;
+		vp->v_flag = 0;
+		swapdev_vp = vp;
+		VREF(vp);
+		swdevt[0].sw_vp = vp;
+		swdevt[0].sw_nblks = ntohl(nd->swap_nblks);
+	} else if (bdevvp(swapdev, &swapdev_vp))
+		panic("nfs_mountroot: can't setup swapdev_vp");
+
+	/*
+	 * Create the rootfs mount point.
+	 */
+	nd->root_args.fh = (nfsv2fh_t *)nd->root_fh;
+	mp = nfs_mountdiskless(nd->root_hostnam, "/", MNT_RDONLY,
+	    &nd->root_saddr, &nd->root_args, &vp);
+
+	if (vfs_lock(mp))
+		panic("nfs_mountroot: vfs_lock");
+	TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+	mp->mnt_flag |= MNT_ROOTFS;
+	mp->mnt_vnodecovered = NULLVP;
+	vfs_unlock(mp);
+	rootvp = vp;
+
+	/*
+	 * This is not really an nfs issue, but it is much easier to
+	 * set hostname here and then let the "/etc/rc.xxx" files
+	 * mount the right /var based upon its preset value.
+	 */
+	bcopy(nd->my_hostnam, hostname, MAXHOSTNAMELEN);
+	hostname[MAXHOSTNAMELEN - 1] = '\0';
+	for (i = 0; i < MAXHOSTNAMELEN; i++)
+		if (hostname[i] == '\0')
+			break;
+	hostnamelen = i;
+	inittodr(ntohl(nd->root_time));
+	return (0);
+}
+
+/*
+ * Internal version of mount system call for diskless setup.
+ */
+static struct mount *
+nfs_mountdiskless(path, which, mountflag, sin, args, vpp)
+	char *path;
+	char *which;
+	int mountflag;
+	struct sockaddr_in *sin;
+	struct nfs_args *args;
+	register struct vnode **vpp;
+{
+	register struct mount *mp;
+	register struct mbuf *m;
+	register int error;
+
+	mp = (struct mount *)malloc((u_long)sizeof(struct mount),
+	    M_MOUNT, M_NOWAIT);
+	if (mp == NULL)
+		panic("nfs_mountroot: %s mount malloc", which);
+	bzero((char *)mp, (u_long)sizeof(struct mount));
+	mp->mnt_op = &nfs_vfsops;
+	mp->mnt_flag = mountflag;
+
+	MGET(m, MT_SONAME, M_DONTWAIT);
+	if (m == NULL)
+		panic("nfs_mountroot: %s mount mbuf", which);
+	bcopy((caddr_t)sin, mtod(m, caddr_t), sin->sin_len);
+	m->m_len = sin->sin_len;
+	nfsargs_ntoh(args);
+	if (error = mountnfs(args, mp, m, which, path, vpp))
+		panic("nfs_mountroot: mount %s on %s: %d", path, which, error);
+
+	return (mp);
+}
+
+/*
+ * Convert the integer fields of the nfs_args structure from net byte order
+ * to host byte order. Called by nfs_mountroot() above.
+ */
+void
+nfsargs_ntoh(nfsp)
+	register struct nfs_args *nfsp;
+{
+
+	NTOHL(nfsp->sotype);
+	NTOHL(nfsp->proto);
+	NTOHL(nfsp->flags);
+	NTOHL(nfsp->wsize);
+	NTOHL(nfsp->rsize);
+	NTOHL(nfsp->timeo);
+	NTOHL(nfsp->retrans);
+	NTOHL(nfsp->maxgrouplist);
+	NTOHL(nfsp->readahead);
+	NTOHL(nfsp->leaseterm);
+	NTOHL(nfsp->deadthresh);
+}
+
+/*
+ * VFS Operations.
+ *
+ * mount system call
+ * It seems a bit dumb to copyinstr() the host and path here and then
+ * bcopy() them in mountnfs(), but I wanted to detect errors before
+ * doing the sockargs() call because sockargs() allocates an mbuf and
+ * an error after that means that I have to release the mbuf.
+ */
+/* ARGSUSED */
+int
+nfs_mount(mp, path, data, ndp, p)
+	struct mount *mp;
+	char *path;
+	caddr_t data;
+	struct nameidata *ndp;
+	struct proc *p;
+{
+	int error;
+	struct nfs_args args;
+	struct mbuf *nam;
+	struct vnode *vp;
+	char pth[MNAMELEN], hst[MNAMELEN];
+	u_int len;
+	nfsv2fh_t nfh;
+
+	if (error = copyin(data, (caddr_t)&args, sizeof (struct nfs_args)))
+		return (error);
+	if (error = copyin((caddr_t)args.fh, (caddr_t)&nfh, sizeof (nfsv2fh_t)))
+		return (error);
+	if (error = copyinstr(path, pth, MNAMELEN-1, &len))
+		return (error);
+	bzero(&pth[len], MNAMELEN - len);
+	if (error = copyinstr(args.hostname, hst, MNAMELEN-1, &len))
+		return (error);
+	bzero(&hst[len], MNAMELEN - len);
+	/* sockargs() call must be after above copyin() calls */
+	if (error = sockargs(&nam, (caddr_t)args.addr,
+		args.addrlen, MT_SONAME))
+		return (error);
+	args.fh = &nfh;
+	error = mountnfs(&args, mp, nam, pth, hst, &vp);
+	return (error);
+}
+
+/*
+ * Common code for mount and mountroot
+ */
+int
+mountnfs(argp, mp, nam, pth, hst, vpp)
+	register struct nfs_args *argp;
+	register struct mount *mp;
+	struct mbuf *nam;
+	char *pth, *hst;
+	struct vnode **vpp;
+{
+	register struct nfsmount *nmp;
+	struct nfsnode *np;
+	int error;
+
+	if (mp->mnt_flag & MNT_UPDATE) {
+		nmp = VFSTONFS(mp);
+		/* update paths, file handles, etc, here	XXX */
+		m_freem(nam);
+		return (0);
+	} else {
+		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount),
+		    M_NFSMNT, M_WAITOK);
+		bzero((caddr_t)nmp, sizeof (struct nfsmount));
+		mp->mnt_data = (qaddr_t)nmp;
+	}
+	getnewfsid(mp, MOUNT_NFS);
+	nmp->nm_mountp = mp;
+	nmp->nm_flag = argp->flags;
+	if ((nmp->nm_flag & (NFSMNT_NQNFS | NFSMNT_MYWRITE)) ==
+		(NFSMNT_NQNFS | NFSMNT_MYWRITE)) {
+		error = EPERM;
+		goto bad;
+	}
+	if (nmp->nm_flag & NFSMNT_NQNFS)
+		/*
+		 * We have to set mnt_maxsymlink to a non-zero value so
+		 * that COMPAT_43 routines will know that we are setting
+		 * the d_type field in directories (and can zero it for
+		 * unsuspecting binaries).
+		 */
+		mp->mnt_maxsymlinklen = 1;
+	nmp->nm_timeo = NFS_TIMEO;
+	nmp->nm_retry = NFS_RETRANS;
+	nmp->nm_wsize = NFS_WSIZE;
+	nmp->nm_rsize = NFS_RSIZE;
+	nmp->nm_numgrps = NFS_MAXGRPS;
+	nmp->nm_readahead = NFS_DEFRAHEAD;
+	nmp->nm_leaseterm = NQ_DEFLEASE;
+	nmp->nm_deadthresh = NQ_DEADTHRESH;
+	nmp->nm_tnext = (struct nfsnode *)nmp;
+	nmp->nm_tprev = (struct nfsnode *)nmp;
+	nmp->nm_inprog = NULLVP;
+	bcopy((caddr_t)argp->fh, (caddr_t)&nmp->nm_fh, sizeof(nfsv2fh_t));
+	mp->mnt_stat.f_type = MOUNT_NFS;
+	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
+	bcopy(pth, mp->mnt_stat.f_mntonname, MNAMELEN);
+	nmp->nm_nam = nam;
+
+	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
+		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
+		if (nmp->nm_timeo < NFS_MINTIMEO)
+			nmp->nm_timeo = NFS_MINTIMEO;
+		else if (nmp->nm_timeo > NFS_MAXTIMEO)
+			nmp->nm_timeo = NFS_MAXTIMEO;
+	}
+
+	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
+		nmp->nm_retry = argp->retrans;
+		if (nmp->nm_retry > NFS_MAXREXMIT)
+			nmp->nm_retry = NFS_MAXREXMIT;
+	}
+
+	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
+		nmp->nm_wsize = argp->wsize;
+		/* Round down to multiple of blocksize */
+		nmp->nm_wsize &= ~0x1ff;
+		if (nmp->nm_wsize <= 0)
+			nmp->nm_wsize = 512;
+		else if (nmp->nm_wsize > NFS_MAXDATA)
+			nmp->nm_wsize = NFS_MAXDATA;
+	}
+	if (nmp->nm_wsize > MAXBSIZE)
+		nmp->nm_wsize = MAXBSIZE;
+
+	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
+		nmp->nm_rsize = argp->rsize;
+		/* Round down to multiple of blocksize */
+		nmp->nm_rsize &= ~0x1ff;
+		if (nmp->nm_rsize <= 0)
+			nmp->nm_rsize = 512;
+		else if (nmp->nm_rsize > NFS_MAXDATA)
+			nmp->nm_rsize = NFS_MAXDATA;
+	}
+	if (nmp->nm_rsize > MAXBSIZE)
+		nmp->nm_rsize = MAXBSIZE;
+	if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0 &&
+		argp->maxgrouplist <= NFS_MAXGRPS)
+		nmp->nm_numgrps = argp->maxgrouplist;
+	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0 &&
+		argp->readahead <= NFS_MAXRAHEAD)
+		nmp->nm_readahead = argp->readahead;
+	if ((argp->flags & NFSMNT_LEASETERM) && argp->leaseterm >= 2 &&
+		argp->leaseterm <= NQ_MAXLEASE)
+		nmp->nm_leaseterm = argp->leaseterm;
+	if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 1 &&
+		argp->deadthresh <= NQ_NEVERDEAD)
+		nmp->nm_deadthresh = argp->deadthresh;
+	/* Set up the sockets and per-host congestion */
+	nmp->nm_sotype = argp->sotype;
+	nmp->nm_soproto = argp->proto;
+
+	/*
+	 * For Connection based sockets (TCP,...) defer the connect until
+	 * the first request, in case the server is not responding.
+	 */
+	if (nmp->nm_sotype == SOCK_DGRAM &&
+		(error = nfs_connect(nmp, (struct nfsreq *)0)))
+		goto bad;
+
+	/*
+	 * This is silly, but it has to be set so that vinifod() works.
+	 * We do not want to do an nfs_statfs() here since we can get
+	 * stuck on a dead server and we are holding a lock on the mount
+	 * point.
+	 */
+	mp->mnt_stat.f_iosize = NFS_MAXDGRAMDATA;
+	/*
+	 * A reference count is needed on the nfsnode representing the
+	 * remote root.  If this object is not persistent, then backward
+	 * traversals of the mount point (i.e. "..") will not work if
+	 * the nfsnode gets flushed out of the cache. Ufs does not have
+	 * this problem, because one can identify root inodes by their
+	 * number == ROOTINO (2).
+	 */
+	if (error = nfs_nget(mp, &nmp->nm_fh, &np))
+		goto bad;
+	*vpp = NFSTOV(np);
+
+	return (0);
+bad:
+	nfs_disconnect(nmp);
+	free((caddr_t)nmp, M_NFSMNT);
+	m_freem(nam);
+	return (error);
+}
+
+/*
+ * unmount system call
+ */
+int
+nfs_unmount(mp, mntflags, p)
+	struct mount *mp;
+	int mntflags;
+	struct proc *p;
+{
+	register struct nfsmount *nmp;
+	struct nfsnode *np;
+	struct vnode *vp;
+	int error, flags = 0;
+	extern int doforce;
+
+	if (mntflags & MNT_FORCE) {
+		if (!doforce || (mp->mnt_flag & MNT_ROOTFS))
+			return (EINVAL);
+		flags |= FORCECLOSE;
+	}
+	nmp = VFSTONFS(mp);
+	/*
+	 * Goes something like this..
+	 * - Check for activity on the root vnode (other than ourselves).
+	 * - Call vflush() to clear out vnodes for this file system,
+	 *   except for the root vnode.
+	 * - Decrement reference on the vnode representing remote root.
+	 * - Close the socket
+	 * - Free up the data structures
+	 */
+	/*
+	 * We need to decrement the ref. count on the nfsnode representing
+	 * the remote root.  See comment in mountnfs().  The VFS unmount()
+	 * has done vput on this vnode, otherwise we would get deadlock!
+	 */
+	if (error = nfs_nget(mp, &nmp->nm_fh, &np))
+		return(error);
+	vp = NFSTOV(np);
+	if (vp->v_usecount > 2) {
+		vput(vp);
+		return (EBUSY);
+	}
+
+	/*
+	 * Must handshake with nqnfs_clientd() if it is active.
+	 */
+	nmp->nm_flag |= NFSMNT_DISMINPROG;
+	while (nmp->nm_inprog != NULLVP)
+		(void) tsleep((caddr_t)&lbolt, PSOCK, "nfsdism", 0);
+	if (error = vflush(mp, vp, flags)) {
+		vput(vp);
+		nmp->nm_flag &= ~NFSMNT_DISMINPROG;
+		return (error);
+	}
+
+	/*
+	 * We are now committed to the unmount.
+	 * For NQNFS, let the server daemon free the nfsmount structure.
+	 */
+	if (nmp->nm_flag & (NFSMNT_NQNFS | NFSMNT_KERB))
+		nmp->nm_flag |= NFSMNT_DISMNT;
+
+	/*
+	 * There are two reference counts to get rid of here.
+	 */
+	vrele(vp);
+	vrele(vp);
+	vgone(vp);
+	nfs_disconnect(nmp);
+	m_freem(nmp->nm_nam);
+
+	if ((nmp->nm_flag & (NFSMNT_NQNFS | NFSMNT_KERB)) == 0)
+		free((caddr_t)nmp, M_NFSMNT);
+	return (0);
+}
+
+/*
+ * Return root of a filesystem
+ */
+int
+nfs_root(mp, vpp)
+	struct mount *mp;
+	struct vnode **vpp;
+{
+	register struct vnode *vp;
+	struct nfsmount *nmp;
+	struct nfsnode *np;
+	int error;
+
+	nmp = VFSTONFS(mp);
+	if (error = nfs_nget(mp, &nmp->nm_fh, &np))
+		return (error);
+	vp = NFSTOV(np);
+	vp->v_type = VDIR;
+	vp->v_flag = VROOT;
+	*vpp = vp;
+	return (0);
+}
+
+extern int syncprt;
+
+/*
+ * Flush out the buffer cache
+ */
+/* ARGSUSED */
+int
+nfs_sync(mp, waitfor, cred, p)
+	struct mount *mp;
+	int waitfor;
+	struct ucred *cred;
+	struct proc *p;
+{
+	register struct vnode *vp;
+	int error, allerror = 0;
+
+	/*
+	 * Force stale buffer cache information to be flushed.
+	 */
+loop:
+	for (vp = mp->mnt_vnodelist.lh_first;
+	     vp != NULL;
+	     vp = vp->v_mntvnodes.le_next) {
+		/*
+		 * If the vnode that we are about to sync is no longer
+		 * associated with this mount point, start over.
+		 */
+		if (vp->v_mount != mp)
+			goto loop;
+		if (VOP_ISLOCKED(vp) || vp->v_dirtyblkhd.lh_first == NULL)
+			continue;
+		if (vget(vp, 1))
+			goto loop;
+		if (error = VOP_FSYNC(vp, cred, waitfor, p))
+			allerror = error;
+		vput(vp);
+	}
+	return (allerror);
+}
+
+/*
+ * NFS flat namespace lookup.
+ * Currently unsupported.
+ */
+/* ARGSUSED */
+int
+nfs_vget(mp, ino, vpp)
+	struct mount *mp;
+	ino_t ino;
+	struct vnode **vpp;
+{
+
+	return (EOPNOTSUPP);
+}
+
+/*
+ * At this point, this should never happen
+ */
+/* ARGSUSED */
+int
+nfs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
+	register struct mount *mp;
+	struct fid *fhp;
+	struct mbuf *nam;
+	struct vnode **vpp;
+	int *exflagsp;
+	struct ucred **credanonp;
+{
+
+	return (EINVAL);
+}
+
+/*
+ * Vnode pointer to File handle, should never happen either
+ */
+/* ARGSUSED */
+int
+nfs_vptofh(vp, fhp)
+	struct vnode *vp;
+	struct fid *fhp;
+{
+
+	return (EINVAL);
+}
+
+/*
+ * Vfs start routine, a no-op.
+ */
+/* ARGSUSED */
+int
+nfs_start(mp, flags, p)
+	struct mount *mp;
+	int flags;
+	struct proc *p;
+{
+
+	return (0);
+}
+
+/*
+ * Do operations associated with quotas, not supported
+ */
+/* ARGSUSED */
+int
+nfs_quotactl(mp, cmd, uid, arg, p)
+	struct mount *mp;
+	int cmd;
+	uid_t uid;
+	caddr_t arg;
+	struct proc *p;
+{
+
+	return (EOPNOTSUPP);
+}
diff --git a/sys/nfsclient/nfs_vnops.c b/sys/nfsclient/nfs_vnops.c
new file mode 100644
index 00000000000..a909b48dc67
--- /dev/null
+++ b/sys/nfsclient/nfs_vnops.c
@@ -0,0 +1,2539 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfs_vnops.c	8.5 (Berkeley) 2/13/94
+ */
+
+/*
+ * vnode op calls for sun nfs version 2
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/mount.h>
+#include <sys/buf.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/conf.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/map.h>
+#include <sys/dirent.h>
+
+#include <vm/vm.h>
+
+#include <miscfs/specfs/specdev.h>
+#include <miscfs/fifofs/fifo.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfsmount.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nqnfs.h>
+
+/* Defs */
+#define	TRUE	1
+#define	FALSE	0
+
+/*
+ * Global vfs data structures for nfs
+ */
+int (**nfsv2_vnodeop_p)();
+struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, nfs_lookup },	/* lookup */
+	{ &vop_create_desc, nfs_create },	/* create */
+	{ &vop_mknod_desc, nfs_mknod },		/* mknod */
+	{ &vop_open_desc, nfs_open },		/* open */
+	{ &vop_close_desc, nfs_close },		/* close */
+	{ &vop_access_desc, nfs_access },	/* access */
+	{ &vop_getattr_desc, nfs_getattr },	/* getattr */
+	{ &vop_setattr_desc, nfs_setattr },	/* setattr */
+	{ &vop_read_desc, nfs_read },		/* read */
+	{ &vop_write_desc, nfs_write },		/* write */
+	{ &vop_ioctl_desc, nfs_ioctl },		/* ioctl */
+	{ &vop_select_desc, nfs_select },	/* select */
+	{ &vop_mmap_desc, nfs_mmap },		/* mmap */
+	{ &vop_fsync_desc, nfs_fsync },		/* fsync */
+	{ &vop_seek_desc, nfs_seek },		/* seek */
+	{ &vop_remove_desc, nfs_remove },	/* remove */
+	{ &vop_link_desc, nfs_link },		/* link */
+	{ &vop_rename_desc, nfs_rename },	/* rename */
+	{ &vop_mkdir_desc, nfs_mkdir },		/* mkdir */
+	{ &vop_rmdir_desc, nfs_rmdir },		/* rmdir */
+	{ &vop_symlink_desc, nfs_symlink },	/* symlink */
+	{ &vop_readdir_desc, nfs_readdir },	/* readdir */
+	{ &vop_readlink_desc, nfs_readlink },	/* readlink */
+	{ &vop_abortop_desc, nfs_abortop },	/* abortop */
+	{ &vop_inactive_desc, nfs_inactive },	/* inactive */
+	{ &vop_reclaim_desc, nfs_reclaim },	/* reclaim */
+	{ &vop_lock_desc, nfs_lock },		/* lock */
+	{ &vop_unlock_desc, nfs_unlock },	/* unlock */
+	{ &vop_bmap_desc, nfs_bmap },		/* bmap */
+	{ &vop_strategy_desc, nfs_strategy },	/* strategy */
+	{ &vop_print_desc, nfs_print },		/* print */
+	{ &vop_islocked_desc, nfs_islocked },	/* islocked */
+	{ &vop_pathconf_desc, nfs_pathconf },	/* pathconf */
+	{ &vop_advlock_desc, nfs_advlock },	/* advlock */
+	{ &vop_blkatoff_desc, nfs_blkatoff },	/* blkatoff */
+	{ &vop_valloc_desc, nfs_valloc },	/* valloc */
+	{ &vop_reallocblks_desc, nfs_reallocblks },	/* reallocblks */
+	{ &vop_vfree_desc, nfs_vfree },		/* vfree */
+	{ &vop_truncate_desc, nfs_truncate },	/* truncate */
+	{ &vop_update_desc, nfs_update },	/* update */
+	{ &vop_bwrite_desc, vn_bwrite },
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc nfsv2_vnodeop_opv_desc =
+	{ &nfsv2_vnodeop_p, nfsv2_vnodeop_entries };
+
+/*
+ * Special device vnode ops
+ */
+int (**spec_nfsv2nodeop_p)();
+struct vnodeopv_entry_desc spec_nfsv2nodeop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, spec_lookup },	/* lookup */
+	{ &vop_create_desc, spec_create },	/* create */
+	{ &vop_mknod_desc, spec_mknod },	/* mknod */
+	{ &vop_open_desc, spec_open },		/* open */
+	{ &vop_close_desc, nfsspec_close },	/* close */
+	{ &vop_access_desc, nfsspec_access },	/* access */
+	{ &vop_getattr_desc, nfs_getattr },	/* getattr */
+	{ &vop_setattr_desc, nfs_setattr },	/* setattr */
+	{ &vop_read_desc, nfsspec_read },	/* read */
+	{ &vop_write_desc, nfsspec_write },	/* write */
+	{ &vop_ioctl_desc, spec_ioctl },	/* ioctl */
+	{ &vop_select_desc, spec_select },	/* select */
+	{ &vop_mmap_desc, spec_mmap },		/* mmap */
+	{ &vop_fsync_desc, nfs_fsync },		/* fsync */
+	{ &vop_seek_desc, spec_seek },		/* seek */
+	{ &vop_remove_desc, spec_remove },	/* remove */
+	{ &vop_link_desc, spec_link },		/* link */
+	{ &vop_rename_desc, spec_rename },	/* rename */
+	{ &vop_mkdir_desc, spec_mkdir },	/* mkdir */
+	{ &vop_rmdir_desc, spec_rmdir },	/* rmdir */
+	{ &vop_symlink_desc, spec_symlink },	/* symlink */
+	{ &vop_readdir_desc, spec_readdir },	/* readdir */
+	{ &vop_readlink_desc, spec_readlink },	/* readlink */
+	{ &vop_abortop_desc, spec_abortop },	/* abortop */
+	{ &vop_inactive_desc, nfs_inactive },	/* inactive */
+	{ &vop_reclaim_desc, nfs_reclaim },	/* reclaim */
+	{ &vop_lock_desc, nfs_lock },		/* lock */
+	{ &vop_unlock_desc, nfs_unlock },	/* unlock */
+	{ &vop_bmap_desc, spec_bmap },		/* bmap */
+	{ &vop_strategy_desc, spec_strategy },	/* strategy */
+	{ &vop_print_desc, nfs_print },		/* print */
+	{ &vop_islocked_desc, nfs_islocked },	/* islocked */
+	{ &vop_pathconf_desc, spec_pathconf },	/* pathconf */
+	{ &vop_advlock_desc, spec_advlock },	/* advlock */
+	{ &vop_blkatoff_desc, spec_blkatoff },	/* blkatoff */
+	{ &vop_valloc_desc, spec_valloc },	/* valloc */
+	{ &vop_reallocblks_desc, spec_reallocblks },	/* reallocblks */
+	{ &vop_vfree_desc, spec_vfree },	/* vfree */
+	{ &vop_truncate_desc, spec_truncate },	/* truncate */
+	{ &vop_update_desc, nfs_update },	/* update */
+	{ &vop_bwrite_desc, vn_bwrite },
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc spec_nfsv2nodeop_opv_desc =
+	{ &spec_nfsv2nodeop_p, spec_nfsv2nodeop_entries };
+
+#ifdef FIFO
+int (**fifo_nfsv2nodeop_p)();
+struct vnodeopv_entry_desc fifo_nfsv2nodeop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, fifo_lookup },	/* lookup */
+	{ &vop_create_desc, fifo_create },	/* create */
+	{ &vop_mknod_desc, fifo_mknod },	/* mknod */
+	{ &vop_open_desc, fifo_open },		/* open */
+	{ &vop_close_desc, nfsfifo_close },	/* close */
+	{ &vop_access_desc, nfsspec_access },	/* access */
+	{ &vop_getattr_desc, nfs_getattr },	/* getattr */
+	{ &vop_setattr_desc, nfs_setattr },	/* setattr */
+	{ &vop_read_desc, nfsfifo_read },	/* read */
+	{ &vop_write_desc, nfsfifo_write },	/* write */
+	{ &vop_ioctl_desc, fifo_ioctl },	/* ioctl */
+	{ &vop_select_desc, fifo_select },	/* select */
+	{ &vop_mmap_desc, fifo_mmap },		/* mmap */
+	{ &vop_fsync_desc, nfs_fsync },		/* fsync */
+	{ &vop_seek_desc, fifo_seek },		/* seek */
+	{ &vop_remove_desc, fifo_remove },	/* remove */
+	{ &vop_link_desc, fifo_link },		/* link */
+	{ &vop_rename_desc, fifo_rename },	/* rename */
+	{ &vop_mkdir_desc, fifo_mkdir },	/* mkdir */
+	{ &vop_rmdir_desc, fifo_rmdir },	/* rmdir */
+	{ &vop_symlink_desc, fifo_symlink },	/* symlink */
+	{ &vop_readdir_desc, fifo_readdir },	/* readdir */
+	{ &vop_readlink_desc, fifo_readlink },	/* readlink */
+	{ &vop_abortop_desc, fifo_abortop },	/* abortop */
+	{ &vop_inactive_desc, nfs_inactive },	/* inactive */
+	{ &vop_reclaim_desc, nfs_reclaim },	/* reclaim */
+	{ &vop_lock_desc, nfs_lock },		/* lock */
+	{ &vop_unlock_desc, nfs_unlock },	/* unlock */
+	{ &vop_bmap_desc, fifo_bmap },		/* bmap */
+	{ &vop_strategy_desc, fifo_badop },	/* strategy */
+	{ &vop_print_desc, nfs_print },		/* print */
+	{ &vop_islocked_desc, nfs_islocked },	/* islocked */
+	{ &vop_pathconf_desc, fifo_pathconf },	/* pathconf */
+	{ &vop_advlock_desc, fifo_advlock },	/* advlock */
+	{ &vop_blkatoff_desc, fifo_blkatoff },	/* blkatoff */
+	{ &vop_valloc_desc, fifo_valloc },	/* valloc */
+	{ &vop_reallocblks_desc, fifo_reallocblks },	/* reallocblks */
+	{ &vop_vfree_desc, fifo_vfree },	/* vfree */
+	{ &vop_truncate_desc, fifo_truncate },	/* truncate */
+	{ &vop_update_desc, nfs_update },	/* update */
+	{ &vop_bwrite_desc, vn_bwrite },
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc =
+	{ &fifo_nfsv2nodeop_p, fifo_nfsv2nodeop_entries };
+#endif /* FIFO */
+
+void nqnfs_clientlease();
+
+/*
+ * Global variables
+ */
+extern u_long nfs_procids[NFS_NPROCS];
+extern u_long nfs_prog, nfs_vers, nfs_true, nfs_false;
+extern char nfsiobuf[MAXPHYS+NBPG];
+struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
+int nfs_numasync = 0;
+#define	DIRHDSIZ	(sizeof (struct dirent) - (MAXNAMLEN + 1))
+
+/*
+ * nfs null call from vfs.
+ */
+int
+nfs_null(vp, cred, procp)
+	struct vnode *vp;
+	struct ucred *cred;
+	struct proc *procp;
+{
+	caddr_t bpos, dpos;
+	int error = 0;
+	struct mbuf *mreq, *mrep, *md, *mb;
+	
+	nfsm_reqhead(vp, NFSPROC_NULL, 0);
+	nfsm_request(vp, NFSPROC_NULL, procp, cred);
+	nfsm_reqdone;
+	return (error);
+}
+
+/*
+ * nfs access vnode op.
+ * For nfs, just return ok. File accesses may fail later.
+ * For nqnfs, use the access rpc to check accessibility. If file modes are
+ * changed on the server, accesses might still fail later.
+ */
+int
+nfs_access(ap)
+	struct vop_access_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register u_long *tl;
+	register caddr_t cp;
+	caddr_t bpos, dpos;
+	int error = 0;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+	/*
+	 * For nqnfs, do an access rpc, otherwise you are stuck emulating
+	 * ufs_access() locally using the vattr. This may not be correct,
+	 * since the server may apply other access criteria such as
+	 * client uid-->server uid mapping that we do not know about, but
+	 * this is better than just returning anything that is lying about
+	 * in the cache.
+	 */
+	if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) {
+		nfsstats.rpccnt[NQNFSPROC_ACCESS]++;
+		nfsm_reqhead(vp, NQNFSPROC_ACCESS, NFSX_FH + 3 * NFSX_UNSIGNED);
+		nfsm_fhtom(vp);
+		nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
+		if (ap->a_mode & VREAD)
+			*tl++ = nfs_true;
+		else
+			*tl++ = nfs_false;
+		if (ap->a_mode & VWRITE)
+			*tl++ = nfs_true;
+		else
+			*tl++ = nfs_false;
+		if (ap->a_mode & VEXEC)
+			*tl = nfs_true;
+		else
+			*tl = nfs_false;
+		nfsm_request(vp, NQNFSPROC_ACCESS, ap->a_p, ap->a_cred);
+		nfsm_reqdone;
+		return (error);
+	} else
+		return (nfsspec_access(ap));
+}
+
+/*
+ * nfs open vnode op
+ * Check to see if the type is ok
+ * and that deletion is not in progress.
+ * For paged in text files, you will need to flush the page cache
+ * if consistency is lost.
+ */
+/* ARGSUSED */
+int
+nfs_open(ap)
+	struct vop_open_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	struct nfsnode *np = VTONFS(vp);
+	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+	struct vattr vattr;
+	int error;
+
+	if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
+		return (EACCES);
+	if (vp->v_flag & VTEXT) {
+	    /*
+	     * Get a valid lease. If cached data is stale, flush it.
+	     */
+	    if (nmp->nm_flag & NFSMNT_NQNFS) {
+		if (NQNFS_CKINVALID(vp, np, NQL_READ)) {
+		    do {
+			error = nqnfs_getlease(vp, NQL_READ, ap->a_cred, ap->a_p);
+		    } while (error == NQNFS_EXPIRED);
+		    if (error)
+			return (error);
+		    if (np->n_lrev != np->n_brev ||
+			(np->n_flag & NQNFSNONCACHE)) {
+			if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
+				ap->a_p, 1)) == EINTR)
+				return (error);
+			(void) vnode_pager_uncache(vp);
+			np->n_brev = np->n_lrev;
+		    }
+		}
+	    } else {
+		if (np->n_flag & NMODIFIED) {
+			if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
+				ap->a_p, 1)) == EINTR)
+				return (error);
+			(void) vnode_pager_uncache(vp);
+			np->n_attrstamp = 0;
+			np->n_direofoffset = 0;
+			if (error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p))
+				return (error);
+			np->n_mtime = vattr.va_mtime.ts_sec;
+		} else {
+			if (error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p))
+				return (error);
+			if (np->n_mtime != vattr.va_mtime.ts_sec) {
+				np->n_direofoffset = 0;
+				if ((error = nfs_vinvalbuf(vp, V_SAVE,
+					ap->a_cred, ap->a_p, 1)) == EINTR)
+					return (error);
+				(void) vnode_pager_uncache(vp);
+				np->n_mtime = vattr.va_mtime.ts_sec;
+			}
+		}
+	    }
+	} else if ((nmp->nm_flag & NFSMNT_NQNFS) == 0)
+		np->n_attrstamp = 0; /* For Open/Close consistency */
+	return (0);
+}
+
+/*
+ * nfs close vnode op
+ * For reg files, invalidate any buffer cache entries.
+ */
+/* ARGSUSED */
+int
+nfs_close(ap)
+	struct vop_close_args /* {
+		struct vnodeop_desc *a_desc;
+		struct vnode *a_vp;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct nfsnode *np = VTONFS(vp);
+	int error = 0;
+
+	if (vp->v_type == VREG) {
+	    if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) == 0 &&
+		(np->n_flag & NMODIFIED)) {
+		error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1);
+		np->n_attrstamp = 0;
+	    }
+	    if (np->n_flag & NWRITEERR) {
+		np->n_flag &= ~NWRITEERR;
+		error = np->n_error;
+	    }
+	}
+	return (error);
+}
+
+/*
+ * nfs getattr call from vfs.
+ */
+int
+nfs_getattr(ap)
+	struct vop_getattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct nfsnode *np = VTONFS(vp);
+	register caddr_t cp;
+	caddr_t bpos, dpos;
+	int error = 0;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+	
+	/*
+	 * Update local times for special files.
+	 */
+	if (np->n_flag & (NACC | NUPD))
+		np->n_flag |= NCHG;
+	/*
+	 * First look in the cache.
+	 */
+	if (nfs_getattrcache(vp, ap->a_vap) == 0)
+		return (0);
+	nfsstats.rpccnt[NFSPROC_GETATTR]++;
+	nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH);
+	nfsm_fhtom(vp);
+	nfsm_request(vp, NFSPROC_GETATTR, ap->a_p, ap->a_cred);
+	nfsm_loadattr(vp, ap->a_vap);
+	nfsm_reqdone;
+	return (error);
+}
+
+/*
+ * nfs setattr call.
+ */
+int
+nfs_setattr(ap)
+	struct vop_setattr_args /* {
+		struct vnodeop_desc *a_desc;
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct nfsv2_sattr *sp;
+	register caddr_t cp;
+	register long t1;
+	caddr_t bpos, dpos, cp2;
+	u_long *tl;
+	int error = 0, isnq;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+	register struct vnode *vp = ap->a_vp;
+	register struct nfsnode *np = VTONFS(vp);
+	register struct vattr *vap = ap->a_vap;
+	u_quad_t frev, tsize;
+
+	if (vap->va_size != VNOVAL || vap->va_mtime.ts_sec != VNOVAL ||
+		vap->va_atime.ts_sec != VNOVAL) {
+		if (vap->va_size != VNOVAL) {
+			if (np->n_flag & NMODIFIED) {
+			    if (vap->va_size == 0)
+				error = nfs_vinvalbuf(vp, 0, ap->a_cred,
+					ap->a_p, 1);
+			    else
+				error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
+					ap->a_p, 1);
+			    if (error)
+				return (error);
+			}
+			tsize = np->n_size;
+			np->n_size = np->n_vattr.va_size = vap->va_size;
+			vnode_pager_setsize(vp, (u_long)np->n_size);
+		} else if ((np->n_flag & NMODIFIED) &&
+			(error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
+			 ap->a_p, 1)) == EINTR)
+			return (error);
+	}
+	nfsstats.rpccnt[NFSPROC_SETATTR]++;
+	isnq = (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS);
+	nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH+NFSX_SATTR(isnq));
+	nfsm_fhtom(vp);
+	nfsm_build(sp, struct nfsv2_sattr *, NFSX_SATTR(isnq));
+	if (vap->va_mode == (u_short)-1)
+		sp->sa_mode = VNOVAL;
+	else
+		sp->sa_mode = vtonfs_mode(vp->v_type, vap->va_mode);
+	if (vap->va_uid == (uid_t)-1)
+		sp->sa_uid = VNOVAL;
+	else
+		sp->sa_uid = txdr_unsigned(vap->va_uid);
+	if (vap->va_gid == (gid_t)-1)
+		sp->sa_gid = VNOVAL;
+	else
+		sp->sa_gid = txdr_unsigned(vap->va_gid);
+	if (isnq) {
+		txdr_hyper(&vap->va_size, &sp->sa_nqsize);
+		txdr_nqtime(&vap->va_atime, &sp->sa_nqatime);
+		txdr_nqtime(&vap->va_mtime, &sp->sa_nqmtime);
+		sp->sa_nqflags = txdr_unsigned(vap->va_flags);
+		sp->sa_nqrdev = VNOVAL;
+	} else {
+		sp->sa_nfssize = txdr_unsigned(vap->va_size);
+		txdr_nfstime(&vap->va_atime, &sp->sa_nfsatime);
+		txdr_nfstime(&vap->va_mtime, &sp->sa_nfsmtime);
+	}
+	nfsm_request(vp, NFSPROC_SETATTR, ap->a_p, ap->a_cred);
+	nfsm_loadattr(vp, (struct vattr *)0);
+	if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) &&
+	    NQNFS_CKCACHABLE(vp, NQL_WRITE)) {
+		nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
+		fxdr_hyper(tl, &frev);
+		if (frev > np->n_brev)
+			np->n_brev = frev;
+	}
+	nfsm_reqdone;
+	if (error) {
+		np->n_size = np->n_vattr.va_size = tsize;
+		vnode_pager_setsize(vp, (u_long)np->n_size);
+	}
+	return (error);
+}
+
+/*
+ * nfs lookup call, one step at a time...
+ * First look in cache
+ * If not found, unlock the directory nfsnode and do the rpc
+ */
+int
+nfs_lookup(ap)
+	struct vop_lookup_args /* {
+		struct vnodeop_desc *a_desc;
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	register struct componentname *cnp = ap->a_cnp;
+	register struct vnode *dvp = ap->a_dvp;
+	register struct vnode **vpp = ap->a_vpp;
+	register int flags = cnp->cn_flags;
+	register struct vnode *vdp;
+	register u_long *tl;
+	register caddr_t cp;
+	register long t1, t2;
+	struct nfsmount *nmp;
+	caddr_t bpos, dpos, cp2;
+	time_t reqtime;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+	struct vnode *newvp;
+	long len;
+	nfsv2fh_t *fhp;
+	struct nfsnode *np;
+	int lockparent, wantparent, error = 0;
+	int nqlflag, cachable;
+	u_quad_t frev;
+
+	*vpp = NULL;
+	if (dvp->v_type != VDIR)
+		return (ENOTDIR);
+	lockparent = flags & LOCKPARENT;
+	wantparent = flags & (LOCKPARENT|WANTPARENT);
+	nmp = VFSTONFS(dvp->v_mount);
+	np = VTONFS(dvp);
+	if ((error = cache_lookup(dvp, vpp, cnp)) && error != ENOENT) {
+		struct vattr vattr;
+		int vpid;
+
+		vdp = *vpp;
+		vpid = vdp->v_id;
+		/*
+		 * See the comment starting `Step through' in ufs/ufs_lookup.c
+		 * for an explanation of the locking protocol
+		 */
+		if (dvp == vdp) {
+			VREF(vdp);
+			error = 0;
+		} else
+			error = vget(vdp, 1);
+		if (!error) {
+			if (vpid == vdp->v_id) {
+			   if (nmp->nm_flag & NFSMNT_NQNFS) {
+				if ((nmp->nm_flag & NFSMNT_NQLOOKLEASE) == 0) {
+					nfsstats.lookupcache_hits++;
+					if (cnp->cn_nameiop != LOOKUP &&
+					    (flags & ISLASTCN))
+					    cnp->cn_flags |= SAVENAME;
+					return (0);
+			        } else if (NQNFS_CKCACHABLE(dvp, NQL_READ)) {
+					if (np->n_lrev != np->n_brev ||
+					    (np->n_flag & NMODIFIED)) {
+						np->n_direofoffset = 0;
+						cache_purge(dvp);
+						error = nfs_vinvalbuf(dvp, 0,
+						    cnp->cn_cred, cnp->cn_proc,
+						    1);
+						if (error == EINTR)
+							return (error);
+						np->n_brev = np->n_lrev;
+					} else {
+						nfsstats.lookupcache_hits++;
+						if (cnp->cn_nameiop != LOOKUP &&
+						    (flags & ISLASTCN))
+						    cnp->cn_flags |= SAVENAME;
+						return (0);
+					}
+				}
+			   } else if (!VOP_GETATTR(vdp, &vattr, cnp->cn_cred, cnp->cn_proc) &&
+			       vattr.va_ctime.ts_sec == VTONFS(vdp)->n_ctime) {
+				nfsstats.lookupcache_hits++;
+				if (cnp->cn_nameiop != LOOKUP &&
+				    (flags & ISLASTCN))
+					cnp->cn_flags |= SAVENAME;
+				return (0);
+			   }
+			   cache_purge(vdp);
+			}
+			vrele(vdp);
+		}
+		*vpp = NULLVP;
+	}
+	error = 0;
+	nfsstats.lookupcache_misses++;
+	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
+	len = cnp->cn_namelen;
+	nfsm_reqhead(dvp, NFSPROC_LOOKUP, NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(len));
+
+	/*
+	 * For nqnfs optionally piggyback a getlease request for the name
+	 * being looked up.
+	 */
+	if (nmp->nm_flag & NFSMNT_NQNFS) {
+		nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+		if ((nmp->nm_flag & NFSMNT_NQLOOKLEASE) &&
+		    ((cnp->cn_flags & MAKEENTRY) &&
+		    (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))))
+			*tl = txdr_unsigned(nmp->nm_leaseterm);
+		else
+			*tl = 0;
+	}
+	nfsm_fhtom(dvp);
+	nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
+	reqtime = time.tv_sec;
+	nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred);
+nfsmout:
+	if (error) {
+		if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
+		    (flags & ISLASTCN) && error == ENOENT)
+			error = EJUSTRETURN;
+		if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
+			cnp->cn_flags |= SAVENAME;
+		return (error);
+	}
+	if (nmp->nm_flag & NFSMNT_NQNFS) {
+		nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+		if (*tl) {
+			nqlflag = fxdr_unsigned(int, *tl);
+			nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED);
+			cachable = fxdr_unsigned(int, *tl++);
+			reqtime += fxdr_unsigned(int, *tl++);
+			fxdr_hyper(tl, &frev);
+		} else
+			nqlflag = 0;
+	}
+	nfsm_dissect(fhp, nfsv2fh_t *, NFSX_FH);
+
+	/*
+	 * Handle RENAME case...
+	 */
+	if (cnp->cn_nameiop == RENAME && wantparent && (flags & ISLASTCN)) {
+		if (!bcmp(np->n_fh.fh_bytes, (caddr_t)fhp, NFSX_FH)) {
+			m_freem(mrep);
+			return (EISDIR);
+		}
+		if (error = nfs_nget(dvp->v_mount, fhp, &np)) {
+			m_freem(mrep);
+			return (error);
+		}
+		newvp = NFSTOV(np);
+		if (error =
+		    nfs_loadattrcache(&newvp, &md, &dpos, (struct vattr *)0)) {
+			vrele(newvp);
+			m_freem(mrep);
+			return (error);
+		}
+		*vpp = newvp;
+		m_freem(mrep);
+		cnp->cn_flags |= SAVENAME;
+		return (0);
+	}
+
+	if (!bcmp(np->n_fh.fh_bytes, (caddr_t)fhp, NFSX_FH)) {
+		VREF(dvp);
+		newvp = dvp;
+	} else {
+		if (error = nfs_nget(dvp->v_mount, fhp, &np)) {
+			m_freem(mrep);
+			return (error);
+		}
+		newvp = NFSTOV(np);
+	}
+	if (error = nfs_loadattrcache(&newvp, &md, &dpos, (struct vattr *)0)) {
+		vrele(newvp);
+		m_freem(mrep);
+		return (error);
+	}
+	m_freem(mrep);
+	*vpp = newvp;
+	if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
+		cnp->cn_flags |= SAVENAME;
+	if ((cnp->cn_flags & MAKEENTRY) &&
+	    (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) {
+		if ((nmp->nm_flag & NFSMNT_NQNFS) == 0)
+			np->n_ctime = np->n_vattr.va_ctime.ts_sec;
+		else if (nqlflag && reqtime > time.tv_sec)
+			nqnfs_clientlease(nmp, np, nqlflag, cachable, reqtime,
+				frev);
+		cache_enter(dvp, *vpp, cnp);
+	}
+	return (0);
+}
+
+/*
+ * nfs read call.
+ * Just call nfs_bioread() to do the work.
+ */
+int
+nfs_read(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+
+	if (vp->v_type != VREG)
+		return (EPERM);
+	return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred));
+}
+
+/*
+ * nfs readlink call
+ */
+int
+nfs_readlink(ap)
+	struct vop_readlink_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+
+	if (vp->v_type != VLNK)
+		return (EPERM);
+	return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred));
+}
+
+/*
+ * Do a readlink rpc.
+ * Called by nfs_doio() from below the buffer cache.
+ */
+int
+nfs_readlinkrpc(vp, uiop, cred)
+	register struct vnode *vp;
+	struct uio *uiop;
+	struct ucred *cred;
+{
+	register u_long *tl;
+	register caddr_t cp;
+	register long t1;
+	caddr_t bpos, dpos, cp2;
+	int error = 0;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+	long len;
+
+	nfsstats.rpccnt[NFSPROC_READLINK]++;
+	nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH);
+	nfsm_fhtom(vp);
+	nfsm_request(vp, NFSPROC_READLINK, uiop->uio_procp, cred);
+	nfsm_strsiz(len, NFS_MAXPATHLEN);
+	nfsm_mtouio(uiop, len);
+	nfsm_reqdone;
+	return (error);
+}
+
+/*
+ * nfs read rpc call
+ * Ditto above
+ */
+int
+nfs_readrpc(vp, uiop, cred)
+	register struct vnode *vp;
+	struct uio *uiop;
+	struct ucred *cred;
+{
+	register u_long *tl;
+	register caddr_t cp;
+	register long t1;
+	caddr_t bpos, dpos, cp2;
+	int error = 0;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+	struct nfsmount *nmp;
+	long len, retlen, tsiz;
+
+	nmp = VFSTONFS(vp->v_mount);
+	tsiz = uiop->uio_resid;
+	if (uiop->uio_offset + tsiz > 0xffffffff &&
+	    (nmp->nm_flag & NFSMNT_NQNFS) == 0)
+		return (EFBIG);
+	while (tsiz > 0) {
+		nfsstats.rpccnt[NFSPROC_READ]++;
+		len = (tsiz > nmp->nm_rsize) ? nmp->nm_rsize : tsiz;
+		nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH+NFSX_UNSIGNED*3);
+		nfsm_fhtom(vp);
+		nfsm_build(tl, u_long *, NFSX_UNSIGNED*3);
+		if (nmp->nm_flag & NFSMNT_NQNFS) {
+			txdr_hyper(&uiop->uio_offset, tl);
+			*(tl + 2) = txdr_unsigned(len);
+		} else {
+			*tl++ = txdr_unsigned(uiop->uio_offset);
+			*tl++ = txdr_unsigned(len);
+			*tl = 0;
+		}
+		nfsm_request(vp, NFSPROC_READ, uiop->uio_procp, cred);
+		nfsm_loadattr(vp, (struct vattr *)0);
+		nfsm_strsiz(retlen, nmp->nm_rsize);
+		nfsm_mtouio(uiop, retlen);
+		m_freem(mrep);
+		if (retlen < len)
+			tsiz = 0;
+		else
+			tsiz -= len;
+	}
+nfsmout:
+	return (error);
+}
+
+/*
+ * nfs write call
+ */
+int
+nfs_writerpc(vp, uiop, cred, ioflags)
+	register struct vnode *vp;
+	struct uio *uiop;
+	struct ucred *cred;
+	int ioflags;
+{
+	register u_long *tl;
+	register caddr_t cp;
+	register long t1;
+	caddr_t bpos, dpos, cp2;
+	int error = 0;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+	struct nfsmount *nmp;
+	struct nfsnode *np = VTONFS(vp);
+	u_quad_t frev;
+	long len, tsiz;
+
+	nmp = VFSTONFS(vp->v_mount);
+	tsiz = uiop->uio_resid;
+	if (uiop->uio_offset + tsiz > 0xffffffff &&
+	    (nmp->nm_flag & NFSMNT_NQNFS) == 0)
+		return (EFBIG);
+	while (tsiz > 0) {
+		nfsstats.rpccnt[NFSPROC_WRITE]++;
+		len = (tsiz > nmp->nm_wsize) ? nmp->nm_wsize : tsiz;
+		nfsm_reqhead(vp, NFSPROC_WRITE,
+			NFSX_FH+NFSX_UNSIGNED*4+nfsm_rndup(len));
+		nfsm_fhtom(vp);
+		nfsm_build(tl, u_long *, NFSX_UNSIGNED * 4);
+		if (nmp->nm_flag & NFSMNT_NQNFS) {
+			txdr_hyper(&uiop->uio_offset, tl);
+			tl += 2;
+			if (ioflags & IO_APPEND)
+				*tl++ = txdr_unsigned(1);
+			else
+				*tl++ = 0;
+		} else {
+			*++tl = txdr_unsigned(uiop->uio_offset);
+			tl += 2;
+		}
+		*tl = txdr_unsigned(len);
+		nfsm_uiotom(uiop, len);
+		nfsm_request(vp, NFSPROC_WRITE, uiop->uio_procp, cred);
+		nfsm_loadattr(vp, (struct vattr *)0);
+		if (nmp->nm_flag & NFSMNT_MYWRITE)
+			VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime.ts_sec;
+		else if ((nmp->nm_flag & NFSMNT_NQNFS) &&
+			 NQNFS_CKCACHABLE(vp, NQL_WRITE)) {
+			nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
+			fxdr_hyper(tl, &frev);
+			if (frev > np->n_brev)
+				np->n_brev = frev;
+		}
+		m_freem(mrep);
+		tsiz -= len;
+	}
+nfsmout:
+	if (error)
+		uiop->uio_resid = tsiz;
+	return (error);
+}
+
+/*
+ * nfs mknod call
+ * This is a kludge. Use a create rpc but with the IFMT bits of the mode
+ * set to specify the file type and the size field for rdev.
+ */
+/* ARGSUSED */
+int
+nfs_mknod(ap)
+	struct vop_mknod_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	register struct vnode *dvp = ap->a_dvp;
+	register struct vattr *vap = ap->a_vap;
+	register struct componentname *cnp = ap->a_cnp;
+	register struct nfsv2_sattr *sp;
+	register u_long *tl;
+	register caddr_t cp;
+	register long t1, t2;
+	struct vnode *newvp;
+	struct vattr vattr;
+	char *cp2;
+	caddr_t bpos, dpos;
+	int error = 0, isnq;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+	u_long rdev;
+
+	isnq = (VFSTONFS(dvp->v_mount)->nm_flag & NFSMNT_NQNFS);
+	if (vap->va_type == VCHR || vap->va_type == VBLK)
+		rdev = txdr_unsigned(vap->va_rdev);
+#ifdef FIFO
+	else if (vap->va_type == VFIFO)
+		rdev = 0xffffffff;
+#endif /* FIFO */
+	else {
+		VOP_ABORTOP(dvp, cnp);
+		vput(dvp);
+		return (EOPNOTSUPP);
+	}
+	if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) {
+		VOP_ABORTOP(dvp, cnp);
+		vput(dvp);
+		return (error);
+	}
+	nfsstats.rpccnt[NFSPROC_CREATE]++;
+	nfsm_reqhead(dvp, NFSPROC_CREATE,
+	  NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(cnp->cn_namelen)+NFSX_SATTR(isnq));
+	nfsm_fhtom(dvp);
+	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+	nfsm_build(sp, struct nfsv2_sattr *, NFSX_SATTR(isnq));
+	sp->sa_mode = vtonfs_mode(vap->va_type, vap->va_mode);
+	sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
+	sp->sa_gid = txdr_unsigned(vattr.va_gid);
+	if (isnq) {
+		sp->sa_nqrdev = rdev;
+		sp->sa_nqflags = 0;
+		txdr_nqtime(&vap->va_atime, &sp->sa_nqatime);
+		txdr_nqtime(&vap->va_mtime, &sp->sa_nqmtime);
+	} else {
+		sp->sa_nfssize = rdev;
+		txdr_nfstime(&vap->va_atime, &sp->sa_nfsatime);
+		txdr_nfstime(&vap->va_mtime, &sp->sa_nfsmtime);
+	}
+	nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred);
+	nfsm_mtofh(dvp, newvp);
+	nfsm_reqdone;
+	if (!error && (cnp->cn_flags & MAKEENTRY))
+		cache_enter(dvp, newvp, cnp);
+	FREE(cnp->cn_pnbuf, M_NAMEI);
+	VTONFS(dvp)->n_flag |= NMODIFIED;
+	VTONFS(dvp)->n_attrstamp = 0;
+	vrele(dvp);
+	return (error);
+}
+
+/*
+ * nfs file create call
+ */
+int
+nfs_create(ap)
+	struct vop_create_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	register struct vnode *dvp = ap->a_dvp;
+	register struct vattr *vap = ap->a_vap;
+	register struct componentname *cnp = ap->a_cnp;
+	register struct nfsv2_sattr *sp;
+	register u_long *tl;
+	register caddr_t cp;
+	register long t1, t2;
+	caddr_t bpos, dpos, cp2;
+	int error = 0, isnq;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+	struct vattr vattr;
+
+	if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) {
+		VOP_ABORTOP(dvp, cnp);
+		vput(dvp);
+		return (error);
+	}
+	nfsstats.rpccnt[NFSPROC_CREATE]++;
+	isnq = (VFSTONFS(dvp->v_mount)->nm_flag & NFSMNT_NQNFS);
+	nfsm_reqhead(dvp, NFSPROC_CREATE,
+	  NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(cnp->cn_namelen)+NFSX_SATTR(isnq));
+	nfsm_fhtom(dvp);
+	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+	nfsm_build(sp, struct nfsv2_sattr *, NFSX_SATTR(isnq));
+	sp->sa_mode = vtonfs_mode(vap->va_type, vap->va_mode);
+	sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
+	sp->sa_gid = txdr_unsigned(vattr.va_gid);
+	if (isnq) {
+		u_quad_t qval = 0;
+
+		txdr_hyper(&qval, &sp->sa_nqsize);
+		sp->sa_nqflags = 0;
+		sp->sa_nqrdev = -1;
+		txdr_nqtime(&vap->va_atime, &sp->sa_nqatime);
+		txdr_nqtime(&vap->va_mtime, &sp->sa_nqmtime);
+	} else {
+		sp->sa_nfssize = 0;
+		txdr_nfstime(&vap->va_atime, &sp->sa_nfsatime);
+		txdr_nfstime(&vap->va_mtime, &sp->sa_nfsmtime);
+	}
+	nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred);
+	nfsm_mtofh(dvp, *ap->a_vpp);
+	nfsm_reqdone;
+	if (!error && (cnp->cn_flags & MAKEENTRY))
+		cache_enter(dvp, *ap->a_vpp, cnp);
+	FREE(cnp->cn_pnbuf, M_NAMEI);
+	VTONFS(dvp)->n_flag |= NMODIFIED;
+	VTONFS(dvp)->n_attrstamp = 0;
+	vrele(dvp);
+	return (error);
+}
+
+/*
+ * nfs file remove call
+ * To try and make nfs semantics closer to ufs semantics, a file that has
+ * other processes using the vnode is renamed instead of removed and then
+ * removed later on the last close.
+ * - If v_usecount > 1
+ *	  If a rename is not already in the works
+ *	     call nfs_sillyrename() to set it up
+ *     else
+ *	  do the remove rpc
+ */
+int
+nfs_remove(ap)
+	struct vop_remove_args /* {
+		struct vnodeop_desc *a_desc;
+		struct vnode * a_dvp;
+		struct vnode * a_vp;
+		struct componentname * a_cnp;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct vnode *dvp = ap->a_dvp;
+	register struct componentname *cnp = ap->a_cnp;
+	register struct nfsnode *np = VTONFS(vp);
+	register u_long *tl;
+	register caddr_t cp;
+	register long t2;
+	caddr_t bpos, dpos;
+	int error = 0;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+	if (vp->v_usecount > 1) {
+		if (!np->n_sillyrename)
+			error = nfs_sillyrename(dvp, vp, cnp);
+	} else {
+		/*
+		 * Purge the name cache so that the chance of a lookup for
+		 * the name succeeding while the remove is in progress is
+		 * minimized. Without node locking it can still happen, such
+		 * that an I/O op returns ESTALE, but since you get this if
+		 * another host removes the file..
+		 */
+		cache_purge(vp);
+		/*
+		 * Throw away biocache buffers. Mainly to avoid
+		 * unnecessary delayed writes.
+		 */
+		error = nfs_vinvalbuf(vp, 0, cnp->cn_cred, cnp->cn_proc, 1);
+		if (error == EINTR)
+			return (error);
+		/* Do the rpc */
+		nfsstats.rpccnt[NFSPROC_REMOVE]++;
+		nfsm_reqhead(dvp, NFSPROC_REMOVE,
+			NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(cnp->cn_namelen));
+		nfsm_fhtom(dvp);
+		nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+		nfsm_request(dvp, NFSPROC_REMOVE, cnp->cn_proc, cnp->cn_cred);
+		nfsm_reqdone;
+		FREE(cnp->cn_pnbuf, M_NAMEI);
+		VTONFS(dvp)->n_flag |= NMODIFIED;
+		VTONFS(dvp)->n_attrstamp = 0;
+		/*
+		 * Kludge City: If the first reply to the remove rpc is lost..
+		 *   the reply to the retransmitted request will be ENOENT
+		 *   since the file was in fact removed
+		 *   Therefore, we cheat and return success.
+		 */
+		if (error == ENOENT)
+			error = 0;
+	}
+	np->n_attrstamp = 0;
+	vrele(dvp);
+	vrele(vp);
+	return (error);
+}
+
+/*
+ * nfs file remove rpc called from nfs_inactive
+ */
+int
+nfs_removeit(sp)
+	register struct sillyrename *sp;
+{
+	register u_long *tl;
+	register caddr_t cp;
+	register long t2;
+	caddr_t bpos, dpos;
+	int error = 0;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+	nfsstats.rpccnt[NFSPROC_REMOVE]++;
+	nfsm_reqhead(sp->s_dvp, NFSPROC_REMOVE,
+		NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(sp->s_namlen));
+	nfsm_fhtom(sp->s_dvp);
+	nfsm_strtom(sp->s_name, sp->s_namlen, NFS_MAXNAMLEN);
+	nfsm_request(sp->s_dvp, NFSPROC_REMOVE, NULL, sp->s_cred);
+	nfsm_reqdone;
+	VTONFS(sp->s_dvp)->n_flag |= NMODIFIED;
+	VTONFS(sp->s_dvp)->n_attrstamp = 0;
+	return (error);
+}
+
+/*
+ * nfs file rename call
+ */
+int
+nfs_rename(ap)
+	struct vop_rename_args  /* {
+		struct vnode *a_fdvp;
+		struct vnode *a_fvp;
+		struct componentname *a_fcnp;
+		struct vnode *a_tdvp;
+		struct vnode *a_tvp;
+		struct componentname *a_tcnp;
+	} */ *ap;
+{
+	register struct vnode *fvp = ap->a_fvp;
+	register struct vnode *tvp = ap->a_tvp;
+	register struct vnode *fdvp = ap->a_fdvp;
+	register struct vnode *tdvp = ap->a_tdvp;
+	register struct componentname *tcnp = ap->a_tcnp;
+	register struct componentname *fcnp = ap->a_fcnp;
+	register u_long *tl;
+	register caddr_t cp;
+	register long t2;
+	caddr_t bpos, dpos;
+	int error = 0;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+	/* Check for cross-device rename */
+	if ((fvp->v_mount != tdvp->v_mount) ||
+	    (tvp && (fvp->v_mount != tvp->v_mount))) {
+		error = EXDEV;
+		goto out;
+	}
+
+
+	nfsstats.rpccnt[NFSPROC_RENAME]++;
+	nfsm_reqhead(fdvp, NFSPROC_RENAME,
+		(NFSX_FH+NFSX_UNSIGNED)*2+nfsm_rndup(fcnp->cn_namelen)+
+		nfsm_rndup(fcnp->cn_namelen)); /* or fcnp->cn_cred?*/
+	nfsm_fhtom(fdvp);
+	nfsm_strtom(fcnp->cn_nameptr, fcnp->cn_namelen, NFS_MAXNAMLEN);
+	nfsm_fhtom(tdvp);
+	nfsm_strtom(tcnp->cn_nameptr, tcnp->cn_namelen, NFS_MAXNAMLEN);
+	nfsm_request(fdvp, NFSPROC_RENAME, tcnp->cn_proc, tcnp->cn_cred);
+	nfsm_reqdone;
+	VTONFS(fdvp)->n_flag |= NMODIFIED;
+	VTONFS(fdvp)->n_attrstamp = 0;
+	VTONFS(tdvp)->n_flag |= NMODIFIED;
+	VTONFS(tdvp)->n_attrstamp = 0;
+	if (fvp->v_type == VDIR) {
+		if (tvp != NULL && tvp->v_type == VDIR)
+			cache_purge(tdvp);
+		cache_purge(fdvp);
+	}
+out:
+	if (tdvp == tvp)
+		vrele(tdvp);
+	else
+		vput(tdvp);
+	if (tvp)
+		vput(tvp);
+	vrele(fdvp);
+	vrele(fvp);
+	/*
+	 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
+	 */
+	if (error == ENOENT)
+		error = 0;
+	return (error);
+}
+
+/*
+ * nfs file rename rpc called from nfs_remove() above
+ */
+int
+nfs_renameit(sdvp, scnp, sp)
+	struct vnode *sdvp;
+	struct componentname *scnp;
+	register struct sillyrename *sp;
+{
+	register u_long *tl;
+	register caddr_t cp;
+	register long t2;
+	caddr_t bpos, dpos;
+	int error = 0;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+	nfsstats.rpccnt[NFSPROC_RENAME]++;
+	nfsm_reqhead(sdvp, NFSPROC_RENAME,
+		(NFSX_FH+NFSX_UNSIGNED)*2+nfsm_rndup(scnp->cn_namelen)+
+		nfsm_rndup(sp->s_namlen));
+	nfsm_fhtom(sdvp);
+	nfsm_strtom(scnp->cn_nameptr, scnp->cn_namelen, NFS_MAXNAMLEN);
+	nfsm_fhtom(sdvp);
+	nfsm_strtom(sp->s_name, sp->s_namlen, NFS_MAXNAMLEN);
+	nfsm_request(sdvp, NFSPROC_RENAME, scnp->cn_proc, scnp->cn_cred);
+	nfsm_reqdone;
+	FREE(scnp->cn_pnbuf, M_NAMEI);
+	VTONFS(sdvp)->n_flag |= NMODIFIED;
+	VTONFS(sdvp)->n_attrstamp = 0;
+	return (error);
+}
+
+/*
+ * nfs hard link create call
+ */
+int
+nfs_link(ap)
+	struct vop_link_args /* {
+		struct vnode *a_vp;
+		struct vnode *a_tdvp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct vnode *tdvp = ap->a_tdvp;
+	register struct componentname *cnp = ap->a_cnp;
+	register u_long *tl;
+	register caddr_t cp;
+	register long t2;
+	caddr_t bpos, dpos;
+	int error = 0;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+	if (vp->v_mount != tdvp->v_mount) {
+		/*VOP_ABORTOP(vp, cnp);*/
+		if (tdvp == vp)
+			vrele(vp);
+		else
+			vput(vp);
+		return (EXDEV);
+	}
+
+	nfsstats.rpccnt[NFSPROC_LINK]++;
+	nfsm_reqhead(tdvp, NFSPROC_LINK,
+		NFSX_FH*2+NFSX_UNSIGNED+nfsm_rndup(cnp->cn_namelen));
+	nfsm_fhtom(tdvp);
+	nfsm_fhtom(vp);
+	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+	nfsm_request(tdvp, NFSPROC_LINK, cnp->cn_proc, cnp->cn_cred);
+	nfsm_reqdone;
+	FREE(cnp->cn_pnbuf, M_NAMEI);
+	VTONFS(tdvp)->n_attrstamp = 0;
+	VTONFS(tdvp)->n_flag |= NMODIFIED;
+	VTONFS(vp)->n_attrstamp = 0;
+	vrele(vp);
+	/*
+	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
+	 */
+	if (error == EEXIST)
+		error = 0;
+	return (error);
+}
+
+/*
+ * nfs symbolic link create call
+ */
+/* start here */
+int
+nfs_symlink(ap)
+	struct vop_symlink_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+		char *a_target;
+	} */ *ap;
+{
+	register struct vnode *dvp = ap->a_dvp;
+	register struct vattr *vap = ap->a_vap;
+	register struct componentname *cnp = ap->a_cnp;
+	register struct nfsv2_sattr *sp;
+	register u_long *tl;
+	register caddr_t cp;
+	register long t2;
+	caddr_t bpos, dpos;
+	int slen, error = 0, isnq;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+	nfsstats.rpccnt[NFSPROC_SYMLINK]++;
+	slen = strlen(ap->a_target);
+	isnq = (VFSTONFS(dvp->v_mount)->nm_flag & NFSMNT_NQNFS);
+	nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH+2*NFSX_UNSIGNED+
+	    nfsm_rndup(cnp->cn_namelen)+nfsm_rndup(slen)+NFSX_SATTR(isnq));
+	nfsm_fhtom(dvp);
+	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+	nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN);
+	nfsm_build(sp, struct nfsv2_sattr *, NFSX_SATTR(isnq));
+	sp->sa_mode = vtonfs_mode(VLNK, vap->va_mode);
+	sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
+	sp->sa_gid = txdr_unsigned(cnp->cn_cred->cr_gid);
+	if (isnq) {
+		quad_t qval = -1;
+
+		txdr_hyper(&qval, &sp->sa_nqsize);
+		sp->sa_nqflags = 0;
+		txdr_nqtime(&vap->va_atime, &sp->sa_nqatime);
+		txdr_nqtime(&vap->va_mtime, &sp->sa_nqmtime);
+	} else {
+		sp->sa_nfssize = -1;
+		txdr_nfstime(&vap->va_atime, &sp->sa_nfsatime);
+		txdr_nfstime(&vap->va_mtime, &sp->sa_nfsmtime);
+	}
+	nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_proc, cnp->cn_cred);
+	nfsm_reqdone;
+	FREE(cnp->cn_pnbuf, M_NAMEI);
+	VTONFS(dvp)->n_flag |= NMODIFIED;
+	VTONFS(dvp)->n_attrstamp = 0;
+	vrele(dvp);
+	/*
+	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
+	 */
+	if (error == EEXIST)
+		error = 0;
+	return (error);
+}
+
+/*
+ * nfs make dir call
+ */
+int
+nfs_mkdir(ap)
+	struct vop_mkdir_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	register struct vnode *dvp = ap->a_dvp;
+	register struct vattr *vap = ap->a_vap;
+	register struct componentname *cnp = ap->a_cnp;
+	register struct vnode **vpp = ap->a_vpp;
+	register struct nfsv2_sattr *sp;
+	register u_long *tl;
+	register caddr_t cp;
+	register long t1, t2;
+	register int len;
+	caddr_t bpos, dpos, cp2;
+	int error = 0, firsttry = 1, isnq;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+	struct vattr vattr;
+
+	if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) {
+		VOP_ABORTOP(dvp, cnp);
+		vput(dvp);
+		return (error);
+	}
+	len = cnp->cn_namelen;
+	isnq = (VFSTONFS(dvp->v_mount)->nm_flag & NFSMNT_NQNFS);
+	nfsstats.rpccnt[NFSPROC_MKDIR]++;
+	nfsm_reqhead(dvp, NFSPROC_MKDIR,
+	  NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(len)+NFSX_SATTR(isnq));
+	nfsm_fhtom(dvp);
+	nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
+	nfsm_build(sp, struct nfsv2_sattr *, NFSX_SATTR(isnq));
+	sp->sa_mode = vtonfs_mode(VDIR, vap->va_mode);
+	sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
+	sp->sa_gid = txdr_unsigned(vattr.va_gid);
+	if (isnq) {
+		quad_t qval = -1;
+
+		txdr_hyper(&qval, &sp->sa_nqsize);
+		sp->sa_nqflags = 0;
+		txdr_nqtime(&vap->va_atime, &sp->sa_nqatime);
+		txdr_nqtime(&vap->va_mtime, &sp->sa_nqmtime);
+	} else {
+		sp->sa_nfssize = -1;
+		txdr_nfstime(&vap->va_atime, &sp->sa_nfsatime);
+		txdr_nfstime(&vap->va_mtime, &sp->sa_nfsmtime);
+	}
+	nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_proc, cnp->cn_cred);
+	nfsm_mtofh(dvp, *vpp);
+	nfsm_reqdone;
+	VTONFS(dvp)->n_flag |= NMODIFIED;
+	VTONFS(dvp)->n_attrstamp = 0;
+	/*
+	 * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
+	 * if we can succeed in looking up the directory.
+	 * "firsttry" is necessary since the macros may "goto nfsmout" which
+	 * is above the if on errors. (Ugh)
+	 */
+	if (error == EEXIST && firsttry) {
+		firsttry = 0;
+		error = 0;
+		nfsstats.rpccnt[NFSPROC_LOOKUP]++;
+		*vpp = NULL;
+		nfsm_reqhead(dvp, NFSPROC_LOOKUP,
+		    NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(len));
+		nfsm_fhtom(dvp);
+		nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
+		nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred);
+		nfsm_mtofh(dvp, *vpp);
+		if ((*vpp)->v_type != VDIR) {
+			vput(*vpp);
+			error = EEXIST;
+		}
+		m_freem(mrep);
+	}
+	FREE(cnp->cn_pnbuf, M_NAMEI);
+	vrele(dvp);
+	return (error);
+}
+
+/*
+ * nfs remove directory call
+ */
+int
+nfs_rmdir(ap)
+	struct vop_rmdir_args /* {
+		struct vnode *a_dvp;
+		struct vnode *a_vp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct vnode *dvp = ap->a_dvp;
+	register struct componentname *cnp = ap->a_cnp;
+	register u_long *tl;
+	register caddr_t cp;
+	register long t2;
+	caddr_t bpos, dpos;
+	int error = 0;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+	if (dvp == vp) {
+		vrele(dvp);
+		vrele(dvp);
+		FREE(cnp->cn_pnbuf, M_NAMEI);
+		return (EINVAL);
+	}
+	nfsstats.rpccnt[NFSPROC_RMDIR]++;
+	nfsm_reqhead(dvp, NFSPROC_RMDIR,
+		NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(cnp->cn_namelen));
+	nfsm_fhtom(dvp);
+	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+	nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_proc, cnp->cn_cred);
+	nfsm_reqdone;
+	FREE(cnp->cn_pnbuf, M_NAMEI);
+	VTONFS(dvp)->n_flag |= NMODIFIED;
+	VTONFS(dvp)->n_attrstamp = 0;
+	cache_purge(dvp);
+	cache_purge(vp);
+	vrele(vp);
+	vrele(dvp);
+	/*
+	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
+	 */
+	if (error == ENOENT)
+		error = 0;
+	return (error);
+}
+
+/*
+ * nfs readdir call
+ * Although cookie is defined as opaque, I translate it to/from net byte
+ * order so that it looks more sensible. This appears consistent with the
+ * Ultrix implementation of NFS.
+ */
+int
+nfs_readdir(ap)
+	struct vop_readdir_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct nfsnode *np = VTONFS(vp);
+	register struct uio *uio = ap->a_uio;
+	int tresid, error;
+	struct vattr vattr;
+
+	if (vp->v_type != VDIR)
+		return (EPERM);
+	/*
+	 * First, check for hit on the EOF offset cache
+	 */
+	if (uio->uio_offset != 0 && uio->uio_offset == np->n_direofoffset &&
+	    (np->n_flag & NMODIFIED) == 0) {
+		if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) {
+			if (NQNFS_CKCACHABLE(vp, NQL_READ)) {
+				nfsstats.direofcache_hits++;
+				return (0);
+			}
+		} else if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_procp) == 0 &&
+			np->n_mtime == vattr.va_mtime.ts_sec) {
+			nfsstats.direofcache_hits++;
+			return (0);
+		}
+	}
+
+	/*
+	 * Call nfs_bioread() to do the real work.
+	 */
+	tresid = uio->uio_resid;
+	error = nfs_bioread(vp, uio, 0, ap->a_cred);
+
+	if (!error && uio->uio_resid == tresid)
+		nfsstats.direofcache_misses++;
+	return (error);
+}
+
+/*
+ * Readdir rpc call.
+ * Called from below the buffer cache by nfs_doio().
+ */
+int
+nfs_readdirrpc(vp, uiop, cred)
+	register struct vnode *vp;
+	struct uio *uiop;
+	struct ucred *cred;
+{
+	register long len;
+	register struct dirent *dp;
+	register u_long *tl;
+	register caddr_t cp;
+	register long t1;
+	long tlen, lastlen;
+	caddr_t bpos, dpos, cp2;
+	int error = 0;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+	struct mbuf *md2;
+	caddr_t dpos2;
+	int siz;
+	int more_dirs = 1;
+	u_long off, savoff;
+	struct dirent *savdp;
+	struct nfsmount *nmp;
+	struct nfsnode *np = VTONFS(vp);
+	long tresid;
+
+	nmp = VFSTONFS(vp->v_mount);
+	tresid = uiop->uio_resid;
+	/*
+	 * Loop around doing readdir rpc's of size uio_resid or nm_rsize,
+	 * whichever is smaller, truncated to a multiple of NFS_DIRBLKSIZ.
+	 * The stopping criteria is EOF or buffer full.
+	 */
+	while (more_dirs && uiop->uio_resid >= NFS_DIRBLKSIZ) {
+		nfsstats.rpccnt[NFSPROC_READDIR]++;
+		nfsm_reqhead(vp, NFSPROC_READDIR,
+			NFSX_FH + 2 * NFSX_UNSIGNED);
+		nfsm_fhtom(vp);
+		nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
+		off = (u_long)uiop->uio_offset;
+		*tl++ = txdr_unsigned(off);
+		*tl = txdr_unsigned(((uiop->uio_resid > nmp->nm_rsize) ?
+			nmp->nm_rsize : uiop->uio_resid) & ~(NFS_DIRBLKSIZ-1));
+		nfsm_request(vp, NFSPROC_READDIR, uiop->uio_procp, cred);
+		siz = 0;
+		nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+		more_dirs = fxdr_unsigned(int, *tl);
+	
+		/* Save the position so that we can do nfsm_mtouio() later */
+		dpos2 = dpos;
+		md2 = md;
+	
+		/* loop thru the dir entries, doctoring them to 4bsd form */
+#ifdef lint
+		dp = (struct dirent *)0;
+#endif /* lint */
+		while (more_dirs && siz < uiop->uio_resid) {
+			savoff = off;		/* Hold onto offset and dp */
+			savdp = dp;
+			nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
+			dp = (struct dirent *)tl;
+			dp->d_fileno = fxdr_unsigned(u_long, *tl++);
+			len = fxdr_unsigned(int, *tl);
+			if (len <= 0 || len > NFS_MAXNAMLEN) {
+				error = EBADRPC;
+				m_freem(mrep);
+				goto nfsmout;
+			}
+			dp->d_namlen = (u_char)len;
+			dp->d_type = DT_UNKNOWN;
+			nfsm_adv(len);		/* Point past name */
+			tlen = nfsm_rndup(len);
+			/*
+			 * This should not be necessary, but some servers have
+			 * broken XDR such that these bytes are not null filled.
+			 */
+			if (tlen != len) {
+				*dpos = '\0';	/* Null-terminate */
+				nfsm_adv(tlen - len);
+				len = tlen;
+			}
+			nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
+			off = fxdr_unsigned(u_long, *tl);
+			*tl++ = 0;	/* Ensures null termination of name */
+			more_dirs = fxdr_unsigned(int, *tl);
+			dp->d_reclen = len + 4 * NFSX_UNSIGNED;
+			siz += dp->d_reclen;
+		}
+		/*
+		 * If at end of rpc data, get the eof boolean
+		 */
+		if (!more_dirs) {
+			nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+			more_dirs = (fxdr_unsigned(int, *tl) == 0);
+
+			/*
+			 * If at EOF, cache directory offset
+			 */
+			if (!more_dirs)
+				np->n_direofoffset = off;
+		}
+		/*
+		 * If there is too much to fit in the data buffer, use savoff and
+		 * savdp to trim off the last record.
+		 * --> we are not at eof
+		 */
+		if (siz > uiop->uio_resid) {
+			off = savoff;
+			siz -= dp->d_reclen;
+			dp = savdp;
+			more_dirs = 0;	/* Paranoia */
+		}
+		if (siz > 0) {
+			lastlen = dp->d_reclen;
+			md = md2;
+			dpos = dpos2;
+			nfsm_mtouio(uiop, siz);
+			uiop->uio_offset = (off_t)off;
+		} else
+			more_dirs = 0;	/* Ugh, never happens, but in case.. */
+		m_freem(mrep);
+	}
+	/*
+	 * Fill last record, iff any, out to a multiple of NFS_DIRBLKSIZ
+	 * by increasing d_reclen for the last record.
+	 */
+	if (uiop->uio_resid < tresid) {
+		len = uiop->uio_resid & (NFS_DIRBLKSIZ - 1);
+		if (len > 0) {
+			dp = (struct dirent *)
+				(uiop->uio_iov->iov_base - lastlen);
+			dp->d_reclen += len;
+			uiop->uio_iov->iov_base += len;
+			uiop->uio_iov->iov_len -= len;
+			uiop->uio_resid -= len;
+		}
+	}
+nfsmout:
+	return (error);
+}
+
+/*
+ * Nqnfs readdir_and_lookup RPC. Used in place of nfs_readdirrpc().
+ */
+int
+nfs_readdirlookrpc(vp, uiop, cred)
+	struct vnode *vp;
+	register struct uio *uiop;
+	struct ucred *cred;
+{
+	register int len;
+	register struct dirent *dp;
+	register u_long *tl;
+	register caddr_t cp;
+	register long t1;
+	caddr_t bpos, dpos, cp2;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+	struct nameidata nami, *ndp = &nami;
+	struct componentname *cnp = &ndp->ni_cnd;
+	u_long off, endoff, fileno;
+	time_t reqtime, ltime;
+	struct nfsmount *nmp;
+	struct nfsnode *np;
+	struct vnode *newvp;
+	nfsv2fh_t *fhp;
+	u_quad_t frev;
+	int error = 0, tlen, more_dirs = 1, tresid, doit, bigenough, i;
+	int cachable;
+
+	if (uiop->uio_iovcnt != 1)
+		panic("nfs rdirlook");
+	nmp = VFSTONFS(vp->v_mount);
+	tresid = uiop->uio_resid;
+	ndp->ni_dvp = vp;
+	newvp = NULLVP;
+	/*
+	 * Loop around doing readdir rpc's of size uio_resid or nm_rsize,
+	 * whichever is smaller, truncated to a multiple of NFS_DIRBLKSIZ.
+	 * The stopping criteria is EOF or buffer full.
+	 */
+	while (more_dirs && uiop->uio_resid >= NFS_DIRBLKSIZ) {
+		nfsstats.rpccnt[NQNFSPROC_READDIRLOOK]++;
+		nfsm_reqhead(vp, NQNFSPROC_READDIRLOOK,
+			NFSX_FH + 3 * NFSX_UNSIGNED);
+		nfsm_fhtom(vp);
+ 		nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
+		off = (u_long)uiop->uio_offset;
+		*tl++ = txdr_unsigned(off);
+		*tl++ = txdr_unsigned(((uiop->uio_resid > nmp->nm_rsize) ?
+			nmp->nm_rsize : uiop->uio_resid) & ~(NFS_DIRBLKSIZ-1));
+		if (nmp->nm_flag & NFSMNT_NQLOOKLEASE)
+			*tl = txdr_unsigned(nmp->nm_leaseterm);
+		else
+			*tl = 0;
+		reqtime = time.tv_sec;
+		nfsm_request(vp, NQNFSPROC_READDIRLOOK, uiop->uio_procp, cred);
+		nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+		more_dirs = fxdr_unsigned(int, *tl);
+	
+		/* loop thru the dir entries, doctoring them to 4bsd form */
+		bigenough = 1;
+		while (more_dirs && bigenough) {
+			doit = 1;
+			nfsm_dissect(tl, u_long *, 4 * NFSX_UNSIGNED);
+			if (nmp->nm_flag & NFSMNT_NQLOOKLEASE) {
+				cachable = fxdr_unsigned(int, *tl++);
+				ltime = reqtime + fxdr_unsigned(int, *tl++);
+				fxdr_hyper(tl, &frev);
+			}
+			nfsm_dissect(fhp, nfsv2fh_t *, NFSX_FH);
+			if (!bcmp(VTONFS(vp)->n_fh.fh_bytes, (caddr_t)fhp, NFSX_FH)) {
+				VREF(vp);
+				newvp = vp;
+				np = VTONFS(vp);
+			} else {
+				if (error = nfs_nget(vp->v_mount, fhp, &np))
+					doit = 0;
+				newvp = NFSTOV(np);
+			}
+			if (error = nfs_loadattrcache(&newvp, &md, &dpos,
+				(struct vattr *)0))
+				doit = 0;
+			nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
+			fileno = fxdr_unsigned(u_long, *tl++);
+			len = fxdr_unsigned(int, *tl);
+			if (len <= 0 || len > NFS_MAXNAMLEN) {
+				error = EBADRPC;
+				m_freem(mrep);
+				goto nfsmout;
+			}
+			tlen = (len + 4) & ~0x3;
+			if ((tlen + DIRHDSIZ) > uiop->uio_resid)
+				bigenough = 0;
+			if (bigenough && doit) {
+				dp = (struct dirent *)uiop->uio_iov->iov_base;
+				dp->d_fileno = fileno;
+				dp->d_namlen = len;
+				dp->d_reclen = tlen + DIRHDSIZ;
+				dp->d_type =
+				    IFTODT(VTTOIF(np->n_vattr.va_type));
+				uiop->uio_resid -= DIRHDSIZ;
+				uiop->uio_iov->iov_base += DIRHDSIZ;
+				uiop->uio_iov->iov_len -= DIRHDSIZ;
+				cnp->cn_nameptr = uiop->uio_iov->iov_base;
+				cnp->cn_namelen = len;
+				ndp->ni_vp = newvp;
+				nfsm_mtouio(uiop, len);
+				cp = uiop->uio_iov->iov_base;
+				tlen -= len;
+				for (i = 0; i < tlen; i++)
+					*cp++ = '\0';
+				uiop->uio_iov->iov_base += tlen;
+				uiop->uio_iov->iov_len -= tlen;
+				uiop->uio_resid -= tlen;
+				cnp->cn_hash = 0;
+				for (cp = cnp->cn_nameptr, i = 1; i <= len; i++, cp++)
+					cnp->cn_hash += (unsigned char)*cp * i;
+				if ((nmp->nm_flag & NFSMNT_NQLOOKLEASE) &&
+					ltime > time.tv_sec)
+					nqnfs_clientlease(nmp, np, NQL_READ,
+						cachable, ltime, frev);
+				if (cnp->cn_namelen <= NCHNAMLEN)
+				    cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp);
+			} else {
+				nfsm_adv(nfsm_rndup(len));
+			}
+			if (newvp != NULLVP) {
+				vrele(newvp);
+				newvp = NULLVP;
+			}
+			nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
+			if (bigenough)
+				endoff = off = fxdr_unsigned(u_long, *tl++);
+			else
+				endoff = fxdr_unsigned(u_long, *tl++);
+			more_dirs = fxdr_unsigned(int, *tl);
+		}
+		/*
+		 * If at end of rpc data, get the eof boolean
+		 */
+		if (!more_dirs) {
+			nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+			more_dirs = (fxdr_unsigned(int, *tl) == 0);
+
+			/*
+			 * If at EOF, cache directory offset
+			 */
+			if (!more_dirs)
+				VTONFS(vp)->n_direofoffset = endoff;
+		}
+		if (uiop->uio_resid < tresid)
+			uiop->uio_offset = (off_t)off;
+		else
+			more_dirs = 0;
+		m_freem(mrep);
+	}
+	/*
+	 * Fill last record, iff any, out to a multiple of NFS_DIRBLKSIZ
+	 * by increasing d_reclen for the last record.
+	 */
+	if (uiop->uio_resid < tresid) {
+		len = uiop->uio_resid & (NFS_DIRBLKSIZ - 1);
+		if (len > 0) {
+			dp->d_reclen += len;
+			uiop->uio_iov->iov_base += len;
+			uiop->uio_iov->iov_len -= len;
+			uiop->uio_resid -= len;
+		}
+	}
+nfsmout:
+	if (newvp != NULLVP)
+		vrele(newvp);
+	return (error);
+}
+static char hextoasc[] = "0123456789abcdef";
+
+/*
+ * Silly rename. To make the NFS filesystem that is stateless look a little
+ * more like the "ufs" a remove of an active vnode is translated to a rename
+ * to a funny looking filename that is removed by nfs_inactive on the
+ * nfsnode. There is the potential for another process on a different client
+ * to create the same funny name between the nfs_lookitup() fails and the
+ * nfs_rename() completes, but...
+ */
+int
+nfs_sillyrename(dvp, vp, cnp)
+	struct vnode *dvp, *vp;
+	struct componentname *cnp;
+{
+	register struct nfsnode *np;
+	register struct sillyrename *sp;
+	int error;
+	short pid;
+
+	cache_purge(dvp);
+	np = VTONFS(vp);
+#ifdef SILLYSEPARATE
+	MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename),
+		M_NFSREQ, M_WAITOK);
+#else
+	sp = &np->n_silly;
+#endif
+	sp->s_cred = crdup(cnp->cn_cred);
+	sp->s_dvp = dvp;
+	VREF(dvp);
+
+	/* Fudge together a funny name */
+	pid = cnp->cn_proc->p_pid;
+	bcopy(".nfsAxxxx4.4", sp->s_name, 13);
+	sp->s_namlen = 12;
+	sp->s_name[8] = hextoasc[pid & 0xf];
+	sp->s_name[7] = hextoasc[(pid >> 4) & 0xf];
+	sp->s_name[6] = hextoasc[(pid >> 8) & 0xf];
+	sp->s_name[5] = hextoasc[(pid >> 12) & 0xf];
+
+	/* Try lookitups until we get one that isn't there */
+	while (nfs_lookitup(sp, (nfsv2fh_t *)0, cnp->cn_proc) == 0) {
+		sp->s_name[4]++;
+		if (sp->s_name[4] > 'z') {
+			error = EINVAL;
+			goto bad;
+		}
+	}
+	if (error = nfs_renameit(dvp, cnp, sp))
+		goto bad;
+	nfs_lookitup(sp, &np->n_fh, cnp->cn_proc);
+	np->n_sillyrename = sp;
+	return (0);
+bad:
+	vrele(sp->s_dvp);
+	crfree(sp->s_cred);
+#ifdef SILLYSEPARATE
+	free((caddr_t)sp, M_NFSREQ);
+#endif
+	return (error);
+}
+
+/*
+ * Look up a file name for silly rename stuff.
+ * Just like nfs_lookup() except that it doesn't load returned values
+ * into the nfsnode table.
+ * If fhp != NULL it copies the returned file handle out
+ */
+int
+nfs_lookitup(sp, fhp, procp)
+	register struct sillyrename *sp;
+	nfsv2fh_t *fhp;
+	struct proc *procp;
+{
+	register struct vnode *vp = sp->s_dvp;
+	register u_long *tl;
+	register caddr_t cp;
+	register long t1, t2;
+	caddr_t bpos, dpos, cp2;
+	int error = 0, isnq;
+	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+	long len;
+
+	isnq = (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS);
+	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
+	len = sp->s_namlen;
+	nfsm_reqhead(vp, NFSPROC_LOOKUP, NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(len));
+	if (isnq) {
+		nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+		*tl = 0;
+	}
+	nfsm_fhtom(vp);
+	nfsm_strtom(sp->s_name, len, NFS_MAXNAMLEN);
+	nfsm_request(vp, NFSPROC_LOOKUP, procp, sp->s_cred);
+	if (fhp != NULL) {
+		if (isnq)
+			nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+		nfsm_dissect(cp, caddr_t, NFSX_FH);
+		bcopy(cp, (caddr_t)fhp, NFSX_FH);
+	}
+	nfsm_reqdone;
+	return (error);
+}
+
+/*
+ * Kludge City..
+ * - make nfs_bmap() essentially a no-op that does no translation
+ * - do nfs_strategy() by faking physical I/O with nfs_readrpc/nfs_writerpc
+ *   after mapping the physical addresses into Kernel Virtual space in the
+ *   nfsiobuf area.
+ *   (Maybe I could use the process's page mapping, but I was concerned that
+ *    Kernel Write might not be enabled and also figured copyout() would do
+ *    a lot more work than bcopy() and also it currently happens in the
+ *    context of the swapper process (2).
+ */
+int
+nfs_bmap(ap)
+	struct vop_bmap_args /* {
+		struct vnode *a_vp;
+		daddr_t  a_bn;
+		struct vnode **a_vpp;
+		daddr_t *a_bnp;
+		int *a_runp;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+
+	if (ap->a_vpp != NULL)
+		*ap->a_vpp = vp;
+	if (ap->a_bnp != NULL)
+		*ap->a_bnp = ap->a_bn * btodb(vp->v_mount->mnt_stat.f_iosize);
+	return (0);
+}
+
+/*
+ * Strategy routine.
+ * For async requests when nfsiod(s) are running, queue the request by
+ * calling nfs_asyncio(), otherwise just all nfs_doio() to do the
+ * request.
+ */
+int
+nfs_strategy(ap)
+	struct vop_strategy_args *ap;
+{
+	register struct buf *bp = ap->a_bp;
+	struct ucred *cr;
+	struct proc *p;
+	int error = 0;
+
+	if (bp->b_flags & B_PHYS)
+		panic("nfs physio");
+	if (bp->b_flags & B_ASYNC)
+		p = (struct proc *)0;
+	else
+		p = curproc;	/* XXX */
+	if (bp->b_flags & B_READ)
+		cr = bp->b_rcred;
+	else
+		cr = bp->b_wcred;
+	/*
+	 * If the op is asynchronous and an i/o daemon is waiting
+	 * queue the request, wake it up and wait for completion
+	 * otherwise just do it ourselves.
+	 */
+	if ((bp->b_flags & B_ASYNC) == 0 ||
+		nfs_asyncio(bp, NOCRED))
+		error = nfs_doio(bp, cr, p);
+	return (error);
+}
+
+/*
+ * Mmap a file
+ *
+ * NB Currently unsupported.
+ */
+/* ARGSUSED */
+int
+nfs_mmap(ap)
+	struct vop_mmap_args /* {
+		struct vnode *a_vp;
+		int  a_fflags;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	return (EINVAL);
+}
+
+/*
+ * Flush all the blocks associated with a vnode.
+ * 	Walk through the buffer pool and push any dirty pages
+ *	associated with the vnode.
+ */
+/* ARGSUSED */
+int
+nfs_fsync(ap)
+	struct vop_fsync_args /* {
+		struct vnodeop_desc *a_desc;
+		struct vnode * a_vp;
+		struct ucred * a_cred;
+		int  a_waitfor;
+		struct proc * a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct nfsnode *np = VTONFS(vp);
+	register struct buf *bp;
+	struct buf *nbp;
+	struct nfsmount *nmp;
+	int s, error = 0, slptimeo = 0, slpflag = 0;
+
+	nmp = VFSTONFS(vp->v_mount);
+	if (nmp->nm_flag & NFSMNT_INT)
+		slpflag = PCATCH;
+loop:
+	s = splbio();
+	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
+		nbp = bp->b_vnbufs.le_next;
+		if (bp->b_flags & B_BUSY) {
+			if (ap->a_waitfor != MNT_WAIT)
+				continue;
+			bp->b_flags |= B_WANTED;
+			error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
+				"nfsfsync", slptimeo);
+			splx(s);
+			if (error) {
+			    if (nfs_sigintr(nmp, (struct nfsreq *)0, ap->a_p))
+				return (EINTR);
+			    if (slpflag == PCATCH) {
+				slpflag = 0;
+				slptimeo = 2 * hz;
+			    }
+			}
+			goto loop;
+		}
+		if ((bp->b_flags & B_DELWRI) == 0)
+			panic("nfs_fsync: not dirty");
+		bremfree(bp);
+		bp->b_flags |= B_BUSY;
+		splx(s);
+		bp->b_flags |= B_ASYNC;
+		VOP_BWRITE(bp);
+		goto loop;
+	}
+	splx(s);
+	if (ap->a_waitfor == MNT_WAIT) {
+		while (vp->v_numoutput) {
+			vp->v_flag |= VBWAIT;
+			error = tsleep((caddr_t)&vp->v_numoutput,
+				slpflag | (PRIBIO + 1), "nfsfsync", slptimeo);
+			if (error) {
+			    if (nfs_sigintr(nmp, (struct nfsreq *)0, ap->a_p))
+				return (EINTR);
+			    if (slpflag == PCATCH) {
+				slpflag = 0;
+				slptimeo = 2 * hz;
+			    }
+			}
+		}
+		if (vp->v_dirtyblkhd.lh_first) {
+#ifdef DIAGNOSTIC
+			vprint("nfs_fsync: dirty", vp);
+#endif
+			goto loop;
+		}
+	}
+	if (np->n_flag & NWRITEERR) {
+		error = np->n_error;
+		np->n_flag &= ~NWRITEERR;
+	}
+	return (error);
+}
+
+/*
+ * Return POSIX pathconf information applicable to nfs.
+ *
+ * Currently the NFS protocol does not support getting such
+ * information from the remote server.
+ */
+/* ARGSUSED */
+nfs_pathconf(ap)
+	struct vop_pathconf_args /* {
+		struct vnode *a_vp;
+		int a_name;
+		int *a_retval;
+	} */ *ap;
+{
+
+	return (EINVAL);
+}
+
+/*
+ * NFS advisory byte-level locks.
+ * Currently unsupported.
+ */
+int
+nfs_advlock(ap)
+	struct vop_advlock_args /* {
+		struct vnode *a_vp;
+		caddr_t  a_id;
+		int  a_op;
+		struct flock *a_fl;
+		int  a_flags;
+	} */ *ap;
+{
+
+	return (EOPNOTSUPP);
+}
+
+/*
+ * Print out the contents of an nfsnode.
+ */
+int
+nfs_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct nfsnode *np = VTONFS(vp);
+
+	printf("tag VT_NFS, fileid %d fsid 0x%x",
+		np->n_vattr.va_fileid, np->n_vattr.va_fsid);
+#ifdef FIFO
+	if (vp->v_type == VFIFO)
+		fifo_printinfo(vp);
+#endif /* FIFO */
+	printf("\n");
+}
+
+/*
+ * NFS directory offset lookup.
+ * Currently unsupported.
+ */
+int
+nfs_blkatoff(ap)
+	struct vop_blkatoff_args /* {
+		struct vnode *a_vp;
+		off_t a_offset;
+		char **a_res;
+		struct buf **a_bpp;
+	} */ *ap;
+{
+
+	return (EOPNOTSUPP);
+}
+
+/*
+ * NFS flat namespace allocation.
+ * Currently unsupported.
+ */
+int
+nfs_valloc(ap)
+	struct vop_valloc_args /* {
+		struct vnode *a_pvp;
+		int a_mode;
+		struct ucred *a_cred;
+		struct vnode **a_vpp;
+	} */ *ap;
+{
+
+	return (EOPNOTSUPP);
+}
+
+/*
+ * NFS flat namespace free.
+ * Currently unsupported.
+ */
+int
+nfs_vfree(ap)
+	struct vop_vfree_args /* {
+		struct vnode *a_pvp;
+		ino_t a_ino;
+		int a_mode;
+	} */ *ap;
+{
+
+	return (EOPNOTSUPP);
+}
+
+/*
+ * NFS file truncation.
+ */
+int
+nfs_truncate(ap)
+	struct vop_truncate_args /* {
+		struct vnode *a_vp;
+		off_t a_length;
+		int a_flags;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	/* Use nfs_setattr */
+	printf("nfs_truncate: need to implement!!");
+	return (EOPNOTSUPP);
+}
+
+/*
+ * NFS update.
+ */
+int
+nfs_update(ap)
+	struct vop_update_args /* {
+		struct vnode *a_vp;
+		struct timeval *a_ta;
+		struct timeval *a_tm;
+		int a_waitfor;
+	} */ *ap;
+{
+
+	/* Use nfs_setattr */
+	printf("nfs_update: need to implement!!");
+	return (EOPNOTSUPP);
+}
+
+/*
+ * nfs special file access vnode op.
+ * Essentially just get vattr and then imitate iaccess() since the device is
+ * local to the client.
+ */
+int
+nfsspec_access(ap)
+	struct vop_access_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vattr *vap;
+	register gid_t *gp;
+	register struct ucred *cred = ap->a_cred;
+	mode_t mode = ap->a_mode;
+	struct vattr vattr;
+	register int i;
+	int error;
+
+	/*
+	 * If you're the super-user,
+	 * you always get access.
+	 */
+	if (cred->cr_uid == 0)
+		return (0);
+	vap = &vattr;
+	if (error = VOP_GETATTR(ap->a_vp, vap, cred, ap->a_p))
+		return (error);
+	/*
+	 * Access check is based on only one of owner, group, public.
+	 * If not owner, then check group. If not a member of the
+	 * group, then check public access.
+	 */
+	if (cred->cr_uid != vap->va_uid) {
+		mode >>= 3;
+		gp = cred->cr_groups;
+		for (i = 0; i < cred->cr_ngroups; i++, gp++)
+			if (vap->va_gid == *gp)
+				goto found;
+		mode >>= 3;
+found:
+		;
+	}
+	return ((vap->va_mode & mode) == mode ? 0 : EACCES);
+}
+
+/*
+ * Read wrapper for special devices.
+ */
+int
+nfsspec_read(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	register struct nfsnode *np = VTONFS(ap->a_vp);
+
+	/*
+	 * Set access flag.
+	 */
+	np->n_flag |= NACC;
+	np->n_atim = time;
+	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap));
+}
+
+/*
+ * Write wrapper for special devices.
+ */
+int
+nfsspec_write(ap)
+	struct vop_write_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	register struct nfsnode *np = VTONFS(ap->a_vp);
+
+	/*
+	 * Set update flag.
+	 */
+	np->n_flag |= NUPD;
+	np->n_mtim = time;
+	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap));
+}
+
+/*
+ * Close wrapper for special devices.
+ *
+ * Update the times on the nfsnode then do device close.
+ */
+int
+nfsspec_close(ap)
+	struct vop_close_args /* {
+		struct vnode *a_vp;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct nfsnode *np = VTONFS(vp);
+	struct vattr vattr;
+
+	if (np->n_flag & (NACC | NUPD)) {
+		np->n_flag |= NCHG;
+		if (vp->v_usecount == 1 &&
+		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
+			VATTR_NULL(&vattr);
+			if (np->n_flag & NACC) {
+				vattr.va_atime.ts_sec = np->n_atim.tv_sec;
+				vattr.va_atime.ts_nsec =
+				    np->n_atim.tv_usec * 1000;
+			}
+			if (np->n_flag & NUPD) {
+				vattr.va_mtime.ts_sec = np->n_mtim.tv_sec;
+				vattr.va_mtime.ts_nsec =
+				    np->n_mtim.tv_usec * 1000;
+			}
+			(void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p);
+		}
+	}
+	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_close), ap));
+}
+
+#ifdef FIFO
+/*
+ * Read wrapper for fifos.
+ */
+int
+nfsfifo_read(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	extern int (**fifo_vnodeop_p)();
+	register struct nfsnode *np = VTONFS(ap->a_vp);
+
+	/*
+	 * Set access flag.
+	 */
+	np->n_flag |= NACC;
+	np->n_atim = time;
+	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap));
+}
+
+/*
+ * Write wrapper for fifos.
+ */
+int
+nfsfifo_write(ap)
+	struct vop_write_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	extern int (**fifo_vnodeop_p)();
+	register struct nfsnode *np = VTONFS(ap->a_vp);
+
+	/*
+	 * Set update flag.
+	 */
+	np->n_flag |= NUPD;
+	np->n_mtim = time;
+	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap));
+}
+
+/*
+ * Close wrapper for fifos.
+ *
+ * Update the times on the nfsnode then do fifo close.
+ */
+int
+nfsfifo_close(ap)
+	struct vop_close_args /* {
+		struct vnode *a_vp;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct nfsnode *np = VTONFS(vp);
+	struct vattr vattr;
+	extern int (**fifo_vnodeop_p)();
+
+	if (np->n_flag & (NACC | NUPD)) {
+		if (np->n_flag & NACC)
+			np->n_atim = time;
+		if (np->n_flag & NUPD)
+			np->n_mtim = time;
+		np->n_flag |= NCHG;
+		if (vp->v_usecount == 1 &&
+		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
+			VATTR_NULL(&vattr);
+			if (np->n_flag & NACC) {
+				vattr.va_atime.ts_sec = np->n_atim.tv_sec;
+				vattr.va_atime.ts_nsec =
+				    np->n_atim.tv_usec * 1000;
+			}
+			if (np->n_flag & NUPD) {
+				vattr.va_mtime.ts_sec = np->n_mtim.tv_sec;
+				vattr.va_mtime.ts_nsec =
+				    np->n_mtim.tv_usec * 1000;
+			}
+			(void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p);
+		}
+	}
+	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap));
+}
+#endif /* FIFO */
diff --git a/sys/nfsclient/nfsargs.h b/sys/nfsclient/nfsargs.h
new file mode 100644
index 00000000000..261fd42657a
--- /dev/null
+++ b/sys/nfsclient/nfsargs.h
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfs.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Tunable constants for nfs
+ */
+
+#define	NFS_MAXIOVEC	34
+#define NFS_HZ		25		/* Ticks per second for NFS timeouts */
+#define	NFS_TIMEO	(1*NFS_HZ)	/* Default timeout = 1 second */
+#define	NFS_MINTIMEO	(1*NFS_HZ)	/* Min timeout to use */
+#define	NFS_MAXTIMEO	(60*NFS_HZ)	/* Max timeout to backoff to */
+#define	NFS_MINIDEMTIMEO (5*NFS_HZ)	/* Min timeout for non-idempotent ops*/
+#define	NFS_MAXREXMIT	100		/* Stop counting after this many */
+#define	NFS_MAXWINDOW	1024		/* Max number of outstanding requests */
+#define	NFS_RETRANS	10		/* Num of retrans for soft mounts */
+#define	NFS_MAXGRPS	16		/* Max. size of groups list */
+#define	NFS_MINATTRTIMO 5		/* Attribute cache timeout in sec */
+#define	NFS_MAXATTRTIMO 60
+#define	NFS_WSIZE	8192		/* Def. write data size <= 8192 */
+#define	NFS_RSIZE	8192		/* Def. read data size <= 8192 */
+#define	NFS_DEFRAHEAD	1		/* Def. read ahead # blocks */
+#define	NFS_MAXRAHEAD	4		/* Max. read ahead # blocks */
+#define	NFS_MAXREADDIR	NFS_MAXDATA	/* Max. size of directory read */
+#define	NFS_MAXUIDHASH	64		/* Max. # of hashed uid entries/mp */
+#define	NFS_MAXASYNCDAEMON 20	/* Max. number async_daemons runable */
+#define	NFS_DIRBLKSIZ	1024		/* Size of an NFS directory block */
+#define	NMOD(a)		((a) % nfs_asyncdaemons)
+
+/*
+ * Set the attribute timeout based on how recently the file has been modified.
+ */
+#define	NFS_ATTRTIMEO(np) \
+	((((np)->n_flag & NMODIFIED) || \
+	 (time.tv_sec - (np)->n_mtime) / 10 < NFS_MINATTRTIMO) ? NFS_MINATTRTIMO : \
+	 ((time.tv_sec - (np)->n_mtime) / 10 > NFS_MAXATTRTIMO ? NFS_MAXATTRTIMO : \
+	  (time.tv_sec - (np)->n_mtime) / 10))
+
+/*
+ * Structures for the nfssvc(2) syscall. Not that anyone but nfsd and mount_nfs
+ * should ever try and use it.
+ */
+struct nfsd_args {
+	int	sock;		/* Socket to serve */
+	caddr_t	name;		/* Client address for connection based sockets */
+	int	namelen;	/* Length of name */
+};
+
+struct nfsd_srvargs {
+	struct nfsd	*nsd_nfsd;	/* Pointer to in kernel nfsd struct */
+	uid_t		nsd_uid;	/* Effective uid mapped to cred */
+	u_long		nsd_haddr;	/* Ip address of client */
+	struct ucred	nsd_cr;		/* Cred. uid maps to */
+	int		nsd_authlen;	/* Length of auth string (ret) */
+	char		*nsd_authstr;	/* Auth string (ret) */
+};
+
+struct nfsd_cargs {
+	char		*ncd_dirp;	/* Mount dir path */
+	uid_t		ncd_authuid;	/* Effective uid */
+	int		ncd_authtype;	/* Type of authenticator */
+	int		ncd_authlen;	/* Length of authenticator string */
+	char		*ncd_authstr;	/* Authenticator string */
+};
+
+/*
+ * Stats structure
+ */
+struct nfsstats {
+	int	attrcache_hits;
+	int	attrcache_misses;
+	int	lookupcache_hits;
+	int	lookupcache_misses;
+	int	direofcache_hits;
+	int	direofcache_misses;
+	int	biocache_reads;
+	int	read_bios;
+	int	read_physios;
+	int	biocache_writes;
+	int	write_bios;
+	int	write_physios;
+	int	biocache_readlinks;
+	int	readlink_bios;
+	int	biocache_readdirs;
+	int	readdir_bios;
+	int	rpccnt[NFS_NPROCS];
+	int	rpcretries;
+	int	srvrpccnt[NFS_NPROCS];
+	int	srvrpc_errs;
+	int	srv_errs;
+	int	rpcrequests;
+	int	rpctimeouts;
+	int	rpcunexpected;
+	int	rpcinvalid;
+	int	srvcache_inproghits;
+	int	srvcache_idemdonehits;
+	int	srvcache_nonidemdonehits;
+	int	srvcache_misses;
+	int	srvnqnfs_leases;
+	int	srvnqnfs_maxleases;
+	int	srvnqnfs_getleases;
+};
+
+/*
+ * Flags for nfssvc() system call.
+ */
+#define	NFSSVC_BIOD	0x002
+#define	NFSSVC_NFSD	0x004
+#define	NFSSVC_ADDSOCK	0x008
+#define	NFSSVC_AUTHIN	0x010
+#define	NFSSVC_GOTAUTH	0x040
+#define	NFSSVC_AUTHINFAIL 0x080
+#define	NFSSVC_MNTD	0x100
+
+/*
+ * The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts.
+ * What should be in this set is open to debate, but I believe that since
+ * I/O system calls on ufs are never interrupted by signals the set should
+ * be minimal. My reasoning is that many current programs that use signals
+ * such as SIGALRM will not expect file I/O system calls to be interrupted
+ * by them and break.
+ */
+#ifdef KERNEL
+#define	NFSINT_SIGMASK	(sigmask(SIGINT)|sigmask(SIGTERM)|sigmask(SIGKILL)| \
+			 sigmask(SIGHUP)|sigmask(SIGQUIT))
+
+/*
+ * Socket errors ignored for connectionless sockets??
+ * For now, ignore them all
+ */
+#define	NFSIGNORE_SOERROR(s, e) \
+		((e) != EINTR && (e) != ERESTART && (e) != EWOULDBLOCK && \
+		((s) & PR_CONNREQUIRED) == 0)
+
+/*
+ * Nfs outstanding request list element
+ */
+struct nfsreq {
+	struct nfsreq	*r_next;
+	struct nfsreq	*r_prev;
+	struct mbuf	*r_mreq;
+	struct mbuf	*r_mrep;
+	struct mbuf	*r_md;
+	caddr_t		r_dpos;
+	struct nfsmount *r_nmp;
+	struct vnode	*r_vp;
+	u_long		r_xid;
+	int		r_flags;	/* flags on request, see below */
+	int		r_retry;	/* max retransmission count */
+	int		r_rexmit;	/* current retrans count */
+	int		r_timer;	/* tick counter on reply */
+	int		r_procnum;	/* NFS procedure number */
+	int		r_rtt;		/* RTT for rpc */
+	struct proc	*r_procp;	/* Proc that did I/O system call */
+};
+
+/* Flag values for r_flags */
+#define R_TIMING	0x01		/* timing request (in mntp) */
+#define R_SENT		0x02		/* request has been sent */
+#define	R_SOFTTERM	0x04		/* soft mnt, too many retries */
+#define	R_INTR		0x08		/* intr mnt, signal pending */
+#define	R_SOCKERR	0x10		/* Fatal error on socket */
+#define	R_TPRINTFMSG	0x20		/* Did a tprintf msg. */
+#define	R_MUSTRESEND	0x40		/* Must resend request */
+#define	R_GETONEREP	0x80		/* Probe for one reply only */
+
+struct nfsstats nfsstats;
+
+/*
+ * A list of nfssvc_sock structures is maintained with all the sockets
+ * that require service by the nfsd.
+ * The nfsuid structs hang off of the nfssvc_sock structs in both lru
+ * and uid hash lists.
+ */
+#define	NUIDHASHSIZ	32
+#define	NUIDHASH(uid)	((uid) & (NUIDHASHSIZ - 1))
+
+/*
+ * Network address hash list element
+ */
+union nethostaddr {
+	u_long had_inetaddr;
+	struct mbuf *had_nam;
+};
+
+struct nfsuid {
+	struct nfsuid	*nu_lrunext;	/* MUST be first */
+	struct nfsuid	*nu_lruprev;
+	struct nfsuid	*nu_hnext;
+	struct nfsuid	*nu_hprev;
+	int		nu_flag;	/* Flags */
+	uid_t		nu_uid;		/* Uid mapped by this entry */
+	union nethostaddr nu_haddr;	/* Host addr. for dgram sockets */
+	struct ucred	nu_cr;		/* Cred uid mapped to */
+};
+
+#define	nu_inetaddr	nu_haddr.had_inetaddr
+#define	nu_nam		nu_haddr.had_nam
+/* Bits for nu_flag */
+#define	NU_INETADDR	0x1
+
+struct nfssvc_sock {
+	struct nfsuid	*ns_lrunext;	/* MUST be first */
+	struct nfsuid	*ns_lruprev;
+	struct nfssvc_sock *ns_next;
+	struct nfssvc_sock *ns_prev;
+	int		ns_flag;
+	u_long		ns_sref;
+	struct file	*ns_fp;
+	struct socket	*ns_so;
+	int		ns_solock;
+	struct mbuf	*ns_nam;
+	int		ns_cc;
+	struct mbuf	*ns_raw;
+	struct mbuf	*ns_rawend;
+	int		ns_reclen;
+	struct mbuf	*ns_rec;
+	struct mbuf	*ns_recend;
+	int		ns_numuids;
+	struct nfsuid	*ns_uidh[NUIDHASHSIZ];
+};
+
+/* Bits for "ns_flag" */
+#define	SLP_VALID	0x01
+#define	SLP_DOREC	0x02
+#define	SLP_NEEDQ	0x04
+#define	SLP_DISCONN	0x08
+#define	SLP_GETSTREAM	0x10
+#define	SLP_INIT	0x20
+#define	SLP_WANTINIT	0x40
+
+#define SLP_ALLFLAGS	0xff
+
+/*
+ * One of these structures is allocated for each nfsd.
+ */
+struct nfsd {
+	struct nfsd	*nd_next;	/* Must be first */
+	struct nfsd	*nd_prev;
+	int		nd_flag;	/* NFSD_ flags */
+	struct nfssvc_sock *nd_slp;	/* Current socket */
+	struct mbuf	*nd_nam;	/* Client addr for datagram req. */
+	struct mbuf	*nd_mrep;	/* Req. mbuf list */
+	struct mbuf	*nd_md;
+	caddr_t		nd_dpos;	/* Position in list */
+	int		nd_procnum;	/* RPC procedure number */
+	u_long		nd_retxid;	/* RPC xid */
+	int		nd_repstat;	/* Reply status value */
+	struct ucred	nd_cr;		/* Credentials for req. */
+	int		nd_nqlflag;	/* Leasing flag */
+	int		nd_duration;	/* Lease duration */
+	int		nd_authlen;	/* Authenticator len */
+	u_char		nd_authstr[RPCAUTH_MAXSIZ]; /* Authenticator data */
+	struct proc	*nd_procp;	/* Proc ptr */
+};
+
+#define	NFSD_WAITING	0x01
+#define	NFSD_CHECKSLP	0x02
+#define	NFSD_REQINPROG	0x04
+#define	NFSD_NEEDAUTH	0x08
+#define	NFSD_AUTHFAIL	0x10
+#endif	/* KERNEL */
diff --git a/sys/nfsclient/nfsdiskless.h b/sys/nfsclient/nfsdiskless.h
new file mode 100644
index 00000000000..74e6b7bca43
--- /dev/null
+++ b/sys/nfsclient/nfsdiskless.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfsdiskless.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Structure that must be initialized for a diskless nfs client.
+ * This structure is used by nfs_mountroot() to set up the root and swap
+ * vnodes plus do a partial ifconfig(8) and route(8) so that the critical net
+ * interface can communicate with the server.
+ * The primary bootstrap is expected to fill in the appropriate fields before
+ * starting vmunix. Whether or not the swap area is nfs mounted is determined
+ * by the value in swdevt[0]. (equal to NODEV --> swap over nfs)
+ * Currently only works for AF_INET protocols.
+ * NB: All fields are stored in net byte order to avoid hassles with
+ * client/server byte ordering differences.
+ */
+struct nfs_diskless {
+	struct ifaliasreq myif;			/* Default interface */
+	struct sockaddr_in mygateway;		/* Default gateway */
+	struct nfs_args	swap_args;		/* Mount args for swap file */
+	u_char		swap_fh[NFS_FHSIZE];	/* Swap file's file handle */
+	struct sockaddr_in swap_saddr;		/* Address of swap server */
+	char		swap_hostnam[MNAMELEN];	/* Host name for mount pt */
+	int		swap_nblks;		/* Size of server swap file */
+	struct ucred	swap_ucred;		/* Swap credentials */
+	struct nfs_args	root_args;		/* Mount args for root fs */
+	u_char		root_fh[NFS_FHSIZE];	/* File handle of root dir */
+	struct sockaddr_in root_saddr;		/* Address of root server */
+	char		root_hostnam[MNAMELEN];	/* Host name for mount pt */
+	long		root_time;		/* Timestamp of root fs */
+	char		my_hostnam[MAXHOSTNAMELEN]; /* Client host name */
+};
diff --git a/sys/nfsclient/nfsm_subs.h b/sys/nfsclient/nfsm_subs.h
new file mode 100644
index 00000000000..879db360057
--- /dev/null
+++ b/sys/nfsclient/nfsm_subs.h
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfsm_subs.h	8.1 (Berkeley) 6/16/93
+ */
+
+/*
+ * These macros do strange and peculiar things to mbuf chains for
+ * the assistance of the nfs code. To attempt to use them for any
+ * other purpose will be dangerous. (they make weird assumptions)
+ */
+
+/*
+ * First define what the actual subs. return
+ */
+extern struct mbuf *nfsm_reqh();
+
+#define	M_HASCL(m)	((m)->m_flags & M_EXT)
+#define	NFSMINOFF(m) \
+		if (M_HASCL(m)) \
+			(m)->m_data = (m)->m_ext.ext_buf; \
+		else if ((m)->m_flags & M_PKTHDR) \
+			(m)->m_data = (m)->m_pktdat; \
+		else \
+			(m)->m_data = (m)->m_dat
+#define	NFSMADV(m, s)	(m)->m_data += (s)
+#define	NFSMSIZ(m)	((M_HASCL(m))?MCLBYTES: \
+				(((m)->m_flags & M_PKTHDR)?MHLEN:MLEN))
+
+/*
+ * Now for the macros that do the simple stuff and call the functions
+ * for the hard stuff.
+ * These macros use several vars. declared in nfsm_reqhead and these
+ * vars. must not be used elsewhere unless you are careful not to corrupt
+ * them. The vars. starting with pN and tN (N=1,2,3,..) are temporaries
+ * that may be used so long as the value is not expected to retained
+ * after a macro.
+ * I know, this is kind of dorkey, but it makes the actual op functions
+ * fairly clean and deals with the mess caused by the xdr discriminating
+ * unions.
+ */
+
+#define	nfsm_build(a,c,s) \
+		{ if ((s) > M_TRAILINGSPACE(mb)) { \
+			MGET(mb2, M_WAIT, MT_DATA); \
+			if ((s) > MLEN) \
+				panic("build > MLEN"); \
+			mb->m_next = mb2; \
+			mb = mb2; \
+			mb->m_len = 0; \
+			bpos = mtod(mb, caddr_t); \
+		} \
+		(a) = (c)(bpos); \
+		mb->m_len += (s); \
+		bpos += (s); }
+
+#define	nfsm_dissect(a,c,s) \
+		{ t1 = mtod(md, caddr_t)+md->m_len-dpos; \
+		if (t1 >= (s)) { \
+			(a) = (c)(dpos); \
+			dpos += (s); \
+		} else if (error = nfsm_disct(&md, &dpos, (s), t1, &cp2)) { \
+			m_freem(mrep); \
+			goto nfsmout; \
+		} else { \
+			(a) = (c)cp2; \
+		} }
+
+#define nfsm_fhtom(v) \
+		nfsm_build(cp,caddr_t,NFSX_FH); \
+		bcopy((caddr_t)&(VTONFS(v)->n_fh), cp, NFSX_FH)
+
+#define nfsm_srvfhtom(f) \
+		nfsm_build(cp,caddr_t,NFSX_FH); \
+		bcopy((caddr_t)(f), cp, NFSX_FH)
+
+#define nfsm_mtofh(d,v) \
+		{ struct nfsnode *np; nfsv2fh_t *fhp; \
+		nfsm_dissect(fhp,nfsv2fh_t *,NFSX_FH); \
+		if (error = nfs_nget((d)->v_mount, fhp, &np)) { \
+			m_freem(mrep); \
+			goto nfsmout; \
+		} \
+		(v) = NFSTOV(np); \
+		nfsm_loadattr(v, (struct vattr *)0); \
+		}
+
+#define	nfsm_loadattr(v,a) \
+		{ struct vnode *tvp = (v); \
+		if (error = nfs_loadattrcache(&tvp, &md, &dpos, (a))) { \
+			m_freem(mrep); \
+			goto nfsmout; \
+		} \
+		(v) = tvp; }
+
+#define	nfsm_strsiz(s,m) \
+		{ nfsm_dissect(tl,u_long *,NFSX_UNSIGNED); \
+		if (((s) = fxdr_unsigned(long,*tl)) > (m)) { \
+			m_freem(mrep); \
+			error = EBADRPC; \
+			goto nfsmout; \
+		} }
+
+#define	nfsm_srvstrsiz(s,m) \
+		{ nfsm_dissect(tl,u_long *,NFSX_UNSIGNED); \
+		if (((s) = fxdr_unsigned(long,*tl)) > (m) || (s) <= 0) { \
+			error = EBADRPC; \
+			nfsm_reply(0); \
+		} }
+
+#define nfsm_mtouio(p,s) \
+		if ((s) > 0 && \
+		   (error = nfsm_mbuftouio(&md,(p),(s),&dpos))) { \
+			m_freem(mrep); \
+			goto nfsmout; \
+		}
+
+#define nfsm_uiotom(p,s) \
+		if (error = nfsm_uiotombuf((p),&mb,(s),&bpos)) { \
+			m_freem(mreq); \
+			goto nfsmout; \
+		}
+
+#define	nfsm_reqhead(v,a,s) \
+		mb = mreq = nfsm_reqh((v),(a),(s),&bpos)
+
+#define nfsm_reqdone	m_freem(mrep); \
+		nfsmout: 
+
+#define nfsm_rndup(a)	(((a)+3)&(~0x3))
+
+#define	nfsm_request(v, t, p, c)	\
+		if (error = nfs_request((v), mreq, (t), (p), \
+		   (c), &mrep, &md, &dpos)) \
+			goto nfsmout
+
+#define	nfsm_strtom(a,s,m) \
+		if ((s) > (m)) { \
+			m_freem(mreq); \
+			error = ENAMETOOLONG; \
+			goto nfsmout; \
+		} \
+		t2 = nfsm_rndup(s)+NFSX_UNSIGNED; \
+		if (t2 <= M_TRAILINGSPACE(mb)) { \
+			nfsm_build(tl,u_long *,t2); \
+			*tl++ = txdr_unsigned(s); \
+			*(tl+((t2>>2)-2)) = 0; \
+			bcopy((caddr_t)(a), (caddr_t)tl, (s)); \
+		} else if (error = nfsm_strtmbuf(&mb, &bpos, (a), (s))) { \
+			m_freem(mreq); \
+			goto nfsmout; \
+		}
+
+#define	nfsm_srvdone \
+		nfsmout: \
+		return(error)
+
+#define	nfsm_reply(s) \
+		{ \
+		nfsd->nd_repstat = error; \
+		if (error) \
+		   (void) nfs_rephead(0, nfsd, error, cache, &frev, \
+			mrq, &mb, &bpos); \
+		else \
+		   (void) nfs_rephead((s), nfsd, error, cache, &frev, \
+			mrq, &mb, &bpos); \
+		m_freem(mrep); \
+		mreq = *mrq; \
+		if (error) \
+			return(0); \
+		}
+
+#define	nfsm_adv(s) \
+		t1 = mtod(md, caddr_t)+md->m_len-dpos; \
+		if (t1 >= (s)) { \
+			dpos += (s); \
+		} else if (error = nfs_adv(&md, &dpos, (s), t1)) { \
+			m_freem(mrep); \
+			goto nfsmout; \
+		}
+
+#define nfsm_srvmtofh(f) \
+		nfsm_dissect(tl, u_long *, NFSX_FH); \
+		bcopy((caddr_t)tl, (caddr_t)f, NFSX_FH)
+
+#define	nfsm_clget \
+		if (bp >= be) { \
+			if (mp == mb) \
+				mp->m_len += bp-bpos; \
+			MGET(mp, M_WAIT, MT_DATA); \
+			MCLGET(mp, M_WAIT); \
+			mp->m_len = NFSMSIZ(mp); \
+			mp2->m_next = mp; \
+			mp2 = mp; \
+			bp = mtod(mp, caddr_t); \
+			be = bp+mp->m_len; \
+		} \
+		tl = (u_long *)bp
+
+#define	nfsm_srvfillattr \
+	fp->fa_type = vtonfs_type(vap->va_type); \
+	fp->fa_mode = vtonfs_mode(vap->va_type, vap->va_mode); \
+	fp->fa_nlink = txdr_unsigned(vap->va_nlink); \
+	fp->fa_uid = txdr_unsigned(vap->va_uid); \
+	fp->fa_gid = txdr_unsigned(vap->va_gid); \
+	if (nfsd->nd_nqlflag == NQL_NOVAL) { \
+		fp->fa_nfsblocksize = txdr_unsigned(vap->va_blocksize); \
+		if (vap->va_type == VFIFO) \
+			fp->fa_nfsrdev = 0xffffffff; \
+		else \
+			fp->fa_nfsrdev = txdr_unsigned(vap->va_rdev); \
+		fp->fa_nfsfsid = txdr_unsigned(vap->va_fsid); \
+		fp->fa_nfsfileid = txdr_unsigned(vap->va_fileid); \
+		fp->fa_nfssize = txdr_unsigned(vap->va_size); \
+		fp->fa_nfsblocks = txdr_unsigned(vap->va_bytes / NFS_FABLKSIZE); \
+		txdr_nfstime(&vap->va_atime, &fp->fa_nfsatime); \
+		txdr_nfstime(&vap->va_mtime, &fp->fa_nfsmtime); \
+		fp->fa_nfsctime.nfs_sec = txdr_unsigned(vap->va_ctime.ts_sec); \
+		fp->fa_nfsctime.nfs_usec = txdr_unsigned(vap->va_gen); \
+	} else { \
+		fp->fa_nqblocksize = txdr_unsigned(vap->va_blocksize); \
+		if (vap->va_type == VFIFO) \
+			fp->fa_nqrdev = 0xffffffff; \
+		else \
+			fp->fa_nqrdev = txdr_unsigned(vap->va_rdev); \
+		fp->fa_nqfsid = txdr_unsigned(vap->va_fsid); \
+		fp->fa_nqfileid = txdr_unsigned(vap->va_fileid); \
+		txdr_hyper(&vap->va_size, &fp->fa_nqsize); \
+		txdr_hyper(&vap->va_bytes, &fp->fa_nqbytes); \
+		txdr_nqtime(&vap->va_atime, &fp->fa_nqatime); \
+		txdr_nqtime(&vap->va_mtime, &fp->fa_nqmtime); \
+		txdr_nqtime(&vap->va_ctime, &fp->fa_nqctime); \
+		fp->fa_nqflags = txdr_unsigned(vap->va_flags); \
+		fp->fa_nqgen = txdr_unsigned(vap->va_gen); \
+		txdr_hyper(&vap->va_filerev, &fp->fa_nqfilerev); \
+	}
+
diff --git a/sys/nfsclient/nfsmount.h b/sys/nfsclient/nfsmount.h
new file mode 100644
index 00000000000..4d74acb38a5
--- /dev/null
+++ b/sys/nfsclient/nfsmount.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfsmount.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Mount structure.
+ * One allocated on every NFS mount.
+ * Holds NFS specific information for mount.
+ */
+struct	nfsmount {
+	int	nm_flag;		/* Flags for soft/hard... */
+	struct	mount *nm_mountp;	/* Vfs structure for this filesystem */
+	int	nm_numgrps;		/* Max. size of groupslist */
+	nfsv2fh_t nm_fh;		/* File handle of root dir */
+	struct	socket *nm_so;		/* Rpc socket */
+	int	nm_sotype;		/* Type of socket */
+	int	nm_soproto;		/* and protocol */
+	int	nm_soflags;		/* pr_flags for socket protocol */
+	struct	mbuf *nm_nam;		/* Addr of server */
+	int	nm_timeo;		/* Init timer for NFSMNT_DUMBTIMR */
+	int	nm_retry;		/* Max retries */
+	int	nm_srtt[4];		/* Timers for rpcs */
+	int	nm_sdrtt[4];
+	int	nm_sent;		/* Request send count */
+	int	nm_cwnd;		/* Request send window */
+	int	nm_timeouts;		/* Request timeouts */
+	int	nm_deadthresh;		/* Threshold of timeouts-->dead server*/
+	int	nm_rsize;		/* Max size of read rpc */
+	int	nm_wsize;		/* Max size of write rpc */
+	int	nm_readahead;		/* Num. of blocks to readahead */
+	int	nm_leaseterm;		/* Term (sec) for NQNFS lease */
+	struct nfsnode *nm_tnext;	/* Head of lease timer queue */
+	struct nfsnode *nm_tprev;
+	struct vnode *nm_inprog;	/* Vnode in prog by nqnfs_clientd() */
+	uid_t	nm_authuid;		/* Uid for authenticator */
+	int	nm_authtype;		/* Authenticator type */
+	int	nm_authlen;		/* and length */
+	char	*nm_authstr;		/* Authenticator string */
+};
+
+#ifdef KERNEL
+/*
+ * Convert mount ptr to nfsmount ptr.
+ */
+#define VFSTONFS(mp)	((struct nfsmount *)((mp)->mnt_data))
+#endif /* KERNEL */
+
+/*
+ * Prototypes for NFS mount operations
+ */
+int	nfs_mount __P((
+		struct mount *mp,
+		char *path,
+		caddr_t data,
+		struct nameidata *ndp,
+		struct proc *p));
+int	nfs_start __P((
+		struct mount *mp,
+		int flags,
+		struct proc *p));
+int	nfs_unmount __P((
+		struct mount *mp,
+		int mntflags,
+		struct proc *p));
+int	nfs_root __P((
+		struct mount *mp,
+		struct vnode **vpp));
+int	nfs_quotactl __P((
+		struct mount *mp,
+		int cmds,
+		uid_t uid,
+		caddr_t arg,
+		struct proc *p));
+int	nfs_statfs __P((
+		struct mount *mp,
+		struct statfs *sbp,
+		struct proc *p));
+int	nfs_sync __P((
+		struct mount *mp,
+		int waitfor,
+		struct ucred *cred,
+		struct proc *p));
+int	nfs_fhtovp __P((
+		struct mount *mp,
+		struct fid *fhp,
+		struct mbuf *nam,
+		struct vnode **vpp,
+		int *exflagsp,
+		struct ucred **credanonp));
+int	nfs_vptofh __P((
+		struct vnode *vp,
+		struct fid *fhp));
+int	nfs_init __P(());
diff --git a/sys/nfsclient/nfsnode.h b/sys/nfsclient/nfsnode.h
new file mode 100644
index 00000000000..f5fee5bf2f3
--- /dev/null
+++ b/sys/nfsclient/nfsnode.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfsnode.h	8.4 (Berkeley) 2/13/94
+ */
+
+/*
+ * Silly rename structure that hangs off the nfsnode until the name
+ * can be removed by nfs_inactive()
+ */
+struct sillyrename {
+	struct	ucred *s_cred;
+	struct	vnode *s_dvp;
+	long	s_namlen;
+	char	s_name[20];
+};
+
+/*
+ * The nfsnode is the nfs equivalent to ufs's inode. Any similarity
+ * is purely coincidental.
+ * There is a unique nfsnode allocated for each active file,
+ * each current directory, each mounted-on file, text file, and the root.
+ * An nfsnode is 'named' by its file handle. (nget/nfs_node.c)
+ */
+
+struct nfsnode {
+	struct	nfsnode *n_forw;	/* hash, forward */
+	struct	nfsnode **n_back;	/* hash, backward */
+	nfsv2fh_t n_fh;			/* NFS File Handle */
+	long	n_flag;			/* Flag for locking.. */
+	struct	vnode *n_vnode;		/* vnode associated with this node */
+	struct	vattr n_vattr;		/* Vnode attribute cache */
+	time_t	n_attrstamp;		/* Time stamp for cached attributes */
+	struct	sillyrename *n_sillyrename; /* Ptr to silly rename struct */
+	u_quad_t n_size;		/* Current size of file */
+	int	n_error;		/* Save write error value */
+	u_long	n_direofoffset;		/* Dir. EOF offset cache */
+	time_t	n_mtime;		 /* Prev modify time. */
+	time_t	n_ctime;		 /* Prev create time. */
+	u_quad_t n_brev;		 /* Modify rev when cached */
+	u_quad_t n_lrev;		 /* Modify rev for lease */
+	time_t	n_expiry;		 /* Lease expiry time */
+	struct	nfsnode *n_tnext;	 /* Nqnfs timer chain */
+	struct	nfsnode *n_tprev;		
+	long	spare1;			/* To 8 byte boundary */
+	struct	sillyrename n_silly;	/* Silly rename struct */
+	struct	timeval n_atim;		/* Special file times */
+	struct	timeval n_mtim;
+};
+
+/*
+ * Flags for n_flag
+ */
+#define	NFLUSHWANT	0x0001	/* Want wakeup from a flush in prog. */
+#define	NFLUSHINPROG	0x0002	/* Avoid multiple calls to vinvalbuf() */
+#define	NMODIFIED	0x0004	/* Might have a modified buffer in bio */
+#define	NWRITEERR	0x0008	/* Flag write errors so close will know */
+#define	NQNFSNONCACHE	0x0020	/* Non-cachable lease */
+#define	NQNFSWRITE	0x0040	/* Write lease */
+#define	NQNFSEVICTED	0x0080	/* Has been evicted */
+#define	NACC		0x0100	/* Special file accessed */
+#define	NUPD		0x0200	/* Special file updated */
+#define	NCHG		0x0400	/* Special file times changed */
+
+/*
+ * Convert between nfsnode pointers and vnode pointers
+ */
+#define VTONFS(vp)	((struct nfsnode *)(vp)->v_data)
+#define NFSTOV(np)	((struct vnode *)(np)->n_vnode)
+
+/*
+ * Queue head for nfsiod's
+ */
+TAILQ_HEAD(nfsbufs, buf) nfs_bufq;
+
+#ifdef KERNEL
+/*
+ * Prototypes for NFS vnode operations
+ */
+int	nfs_lookup __P((struct vop_lookup_args *));
+int	nfs_create __P((struct vop_create_args *));
+int	nfs_mknod __P((struct vop_mknod_args *));
+int	nfs_open __P((struct vop_open_args *));
+int	nfs_close __P((struct vop_close_args *));
+int	nfsspec_close __P((struct vop_close_args *));
+#ifdef FIFO
+int	nfsfifo_close __P((struct vop_close_args *));
+#endif
+int	nfs_access __P((struct vop_access_args *));
+int	nfsspec_access __P((struct vop_access_args *));
+int	nfs_getattr __P((struct vop_getattr_args *));
+int	nfs_setattr __P((struct vop_setattr_args *));
+int	nfs_read __P((struct vop_read_args *));
+int	nfs_write __P((struct vop_write_args *));
+int	nfsspec_read __P((struct vop_read_args *));
+int	nfsspec_write __P((struct vop_write_args *));
+#ifdef FIFO
+int	nfsfifo_read __P((struct vop_read_args *));
+int	nfsfifo_write __P((struct vop_write_args *));
+#endif
+#define nfs_ioctl ((int (*) __P((struct  vop_ioctl_args *)))enoioctl)
+#define nfs_select ((int (*) __P((struct  vop_select_args *)))seltrue)
+int	nfs_mmap __P((struct vop_mmap_args *));
+int	nfs_fsync __P((struct vop_fsync_args *));
+#define nfs_seek ((int (*) __P((struct  vop_seek_args *)))nullop)
+int	nfs_remove __P((struct vop_remove_args *));
+int	nfs_link __P((struct vop_link_args *));
+int	nfs_rename __P((struct vop_rename_args *));
+int	nfs_mkdir __P((struct vop_mkdir_args *));
+int	nfs_rmdir __P((struct vop_rmdir_args *));
+int	nfs_symlink __P((struct vop_symlink_args *));
+int	nfs_readdir __P((struct vop_readdir_args *));
+int	nfs_readlink __P((struct vop_readlink_args *));
+int	nfs_abortop __P((struct vop_abortop_args *));
+int	nfs_inactive __P((struct vop_inactive_args *));
+int	nfs_reclaim __P((struct vop_reclaim_args *));
+int	nfs_lock __P((struct vop_lock_args *));
+int	nfs_unlock __P((struct vop_unlock_args *));
+int	nfs_bmap __P((struct vop_bmap_args *));
+int	nfs_strategy __P((struct vop_strategy_args *));
+int	nfs_print __P((struct vop_print_args *));
+int	nfs_islocked __P((struct vop_islocked_args *));
+int	nfs_pathconf __P((struct vop_pathconf_args *));
+int	nfs_advlock __P((struct vop_advlock_args *));
+int	nfs_blkatoff __P((struct vop_blkatoff_args *));
+int	nfs_vget __P((struct mount *, ino_t, struct vnode **));
+int	nfs_valloc __P((struct vop_valloc_args *));
+#define nfs_reallocblks \
+	((int (*) __P((struct  vop_reallocblks_args *)))eopnotsupp)
+int	nfs_vfree __P((struct vop_vfree_args *));
+int	nfs_truncate __P((struct vop_truncate_args *));
+int	nfs_update __P((struct vop_update_args *));
+int	nfs_bwrite __P((struct vop_bwrite_args *));
+#endif /* KERNEL */
diff --git a/sys/nfsclient/nfsstats.h b/sys/nfsclient/nfsstats.h
new file mode 100644
index 00000000000..261fd42657a
--- /dev/null
+++ b/sys/nfsclient/nfsstats.h
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfs.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Tunable constants for nfs
+ */
+
+#define	NFS_MAXIOVEC	34
+#define NFS_HZ		25		/* Ticks per second for NFS timeouts */
+#define	NFS_TIMEO	(1*NFS_HZ)	/* Default timeout = 1 second */
+#define	NFS_MINTIMEO	(1*NFS_HZ)	/* Min timeout to use */
+#define	NFS_MAXTIMEO	(60*NFS_HZ)	/* Max timeout to backoff to */
+#define	NFS_MINIDEMTIMEO (5*NFS_HZ)	/* Min timeout for non-idempotent ops*/
+#define	NFS_MAXREXMIT	100		/* Stop counting after this many */
+#define	NFS_MAXWINDOW	1024		/* Max number of outstanding requests */
+#define	NFS_RETRANS	10		/* Num of retrans for soft mounts */
+#define	NFS_MAXGRPS	16		/* Max. size of groups list */
+#define	NFS_MINATTRTIMO 5		/* Attribute cache timeout in sec */
+#define	NFS_MAXATTRTIMO 60
+#define	NFS_WSIZE	8192		/* Def. write data size <= 8192 */
+#define	NFS_RSIZE	8192		/* Def. read data size <= 8192 */
+#define	NFS_DEFRAHEAD	1		/* Def. read ahead # blocks */
+#define	NFS_MAXRAHEAD	4		/* Max. read ahead # blocks */
+#define	NFS_MAXREADDIR	NFS_MAXDATA	/* Max. size of directory read */
+#define	NFS_MAXUIDHASH	64		/* Max. # of hashed uid entries/mp */
+#define	NFS_MAXASYNCDAEMON 20	/* Max. number async_daemons runable */
+#define	NFS_DIRBLKSIZ	1024		/* Size of an NFS directory block */
+#define	NMOD(a)		((a) % nfs_asyncdaemons)
+
+/*
+ * Set the attribute timeout based on how recently the file has been modified.
+ */
+#define	NFS_ATTRTIMEO(np) \
+	((((np)->n_flag & NMODIFIED) || \
+	 (time.tv_sec - (np)->n_mtime) / 10 < NFS_MINATTRTIMO) ? NFS_MINATTRTIMO : \
+	 ((time.tv_sec - (np)->n_mtime) / 10 > NFS_MAXATTRTIMO ? NFS_MAXATTRTIMO : \
+	  (time.tv_sec - (np)->n_mtime) / 10))
+
+/*
+ * Structures for the nfssvc(2) syscall. Not that anyone but nfsd and mount_nfs
+ * should ever try and use it.
+ */
+struct nfsd_args {
+	int	sock;		/* Socket to serve */
+	caddr_t	name;		/* Client address for connection based sockets */
+	int	namelen;	/* Length of name */
+};
+
+struct nfsd_srvargs {
+	struct nfsd	*nsd_nfsd;	/* Pointer to in kernel nfsd struct */
+	uid_t		nsd_uid;	/* Effective uid mapped to cred */
+	u_long		nsd_haddr;	/* Ip address of client */
+	struct ucred	nsd_cr;		/* Cred. uid maps to */
+	int		nsd_authlen;	/* Length of auth string (ret) */
+	char		*nsd_authstr;	/* Auth string (ret) */
+};
+
+struct nfsd_cargs {
+	char		*ncd_dirp;	/* Mount dir path */
+	uid_t		ncd_authuid;	/* Effective uid */
+	int		ncd_authtype;	/* Type of authenticator */
+	int		ncd_authlen;	/* Length of authenticator string */
+	char		*ncd_authstr;	/* Authenticator string */
+};
+
+/*
+ * Stats structure
+ */
+struct nfsstats {
+	int	attrcache_hits;
+	int	attrcache_misses;
+	int	lookupcache_hits;
+	int	lookupcache_misses;
+	int	direofcache_hits;
+	int	direofcache_misses;
+	int	biocache_reads;
+	int	read_bios;
+	int	read_physios;
+	int	biocache_writes;
+	int	write_bios;
+	int	write_physios;
+	int	biocache_readlinks;
+	int	readlink_bios;
+	int	biocache_readdirs;
+	int	readdir_bios;
+	int	rpccnt[NFS_NPROCS];
+	int	rpcretries;
+	int	srvrpccnt[NFS_NPROCS];
+	int	srvrpc_errs;
+	int	srv_errs;
+	int	rpcrequests;
+	int	rpctimeouts;
+	int	rpcunexpected;
+	int	rpcinvalid;
+	int	srvcache_inproghits;
+	int	srvcache_idemdonehits;
+	int	srvcache_nonidemdonehits;
+	int	srvcache_misses;
+	int	srvnqnfs_leases;
+	int	srvnqnfs_maxleases;
+	int	srvnqnfs_getleases;
+};
+
+/*
+ * Flags for nfssvc() system call.
+ */
+#define	NFSSVC_BIOD	0x002
+#define	NFSSVC_NFSD	0x004
+#define	NFSSVC_ADDSOCK	0x008
+#define	NFSSVC_AUTHIN	0x010
+#define	NFSSVC_GOTAUTH	0x040
+#define	NFSSVC_AUTHINFAIL 0x080
+#define	NFSSVC_MNTD	0x100
+
+/*
+ * The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts.
+ * What should be in this set is open to debate, but I believe that since
+ * I/O system calls on ufs are never interrupted by signals the set should
+ * be minimal. My reasoning is that many current programs that use signals
+ * such as SIGALRM will not expect file I/O system calls to be interrupted
+ * by them and break.
+ */
+#ifdef KERNEL
+#define	NFSINT_SIGMASK	(sigmask(SIGINT)|sigmask(SIGTERM)|sigmask(SIGKILL)| \
+			 sigmask(SIGHUP)|sigmask(SIGQUIT))
+
+/*
+ * Socket errors ignored for connectionless sockets??
+ * For now, ignore them all
+ */
+#define	NFSIGNORE_SOERROR(s, e) \
+		((e) != EINTR && (e) != ERESTART && (e) != EWOULDBLOCK && \
+		((s) & PR_CONNREQUIRED) == 0)
+
+/*
+ * Nfs outstanding request list element
+ */
+struct nfsreq {
+	struct nfsreq	*r_next;
+	struct nfsreq	*r_prev;
+	struct mbuf	*r_mreq;
+	struct mbuf	*r_mrep;
+	struct mbuf	*r_md;
+	caddr_t		r_dpos;
+	struct nfsmount *r_nmp;
+	struct vnode	*r_vp;
+	u_long		r_xid;
+	int		r_flags;	/* flags on request, see below */
+	int		r_retry;	/* max retransmission count */
+	int		r_rexmit;	/* current retrans count */
+	int		r_timer;	/* tick counter on reply */
+	int		r_procnum;	/* NFS procedure number */
+	int		r_rtt;		/* RTT for rpc */
+	struct proc	*r_procp;	/* Proc that did I/O system call */
+};
+
+/* Flag values for r_flags */
+#define R_TIMING	0x01		/* timing request (in mntp) */
+#define R_SENT		0x02		/* request has been sent */
+#define	R_SOFTTERM	0x04		/* soft mnt, too many retries */
+#define	R_INTR		0x08		/* intr mnt, signal pending */
+#define	R_SOCKERR	0x10		/* Fatal error on socket */
+#define	R_TPRINTFMSG	0x20		/* Did a tprintf msg. */
+#define	R_MUSTRESEND	0x40		/* Must resend request */
+#define	R_GETONEREP	0x80		/* Probe for one reply only */
+
+struct nfsstats nfsstats;
+
+/*
+ * A list of nfssvc_sock structures is maintained with all the sockets
+ * that require service by the nfsd.
+ * The nfsuid structs hang off of the nfssvc_sock structs in both lru
+ * and uid hash lists.
+ */
+#define	NUIDHASHSIZ	32
+#define	NUIDHASH(uid)	((uid) & (NUIDHASHSIZ - 1))
+
+/*
+ * Network address hash list element
+ */
+union nethostaddr {
+	u_long had_inetaddr;
+	struct mbuf *had_nam;
+};
+
+struct nfsuid {
+	struct nfsuid	*nu_lrunext;	/* MUST be first */
+	struct nfsuid	*nu_lruprev;
+	struct nfsuid	*nu_hnext;
+	struct nfsuid	*nu_hprev;
+	int		nu_flag;	/* Flags */
+	uid_t		nu_uid;		/* Uid mapped by this entry */
+	union nethostaddr nu_haddr;	/* Host addr. for dgram sockets */
+	struct ucred	nu_cr;		/* Cred uid mapped to */
+};
+
+#define	nu_inetaddr	nu_haddr.had_inetaddr
+#define	nu_nam		nu_haddr.had_nam
+/* Bits for nu_flag */
+#define	NU_INETADDR	0x1
+
+struct nfssvc_sock {
+	struct nfsuid	*ns_lrunext;	/* MUST be first */
+	struct nfsuid	*ns_lruprev;
+	struct nfssvc_sock *ns_next;
+	struct nfssvc_sock *ns_prev;
+	int		ns_flag;
+	u_long		ns_sref;
+	struct file	*ns_fp;
+	struct socket	*ns_so;
+	int		ns_solock;
+	struct mbuf	*ns_nam;
+	int		ns_cc;
+	struct mbuf	*ns_raw;
+	struct mbuf	*ns_rawend;
+	int		ns_reclen;
+	struct mbuf	*ns_rec;
+	struct mbuf	*ns_recend;
+	int		ns_numuids;
+	struct nfsuid	*ns_uidh[NUIDHASHSIZ];
+};
+
+/* Bits for "ns_flag" */
+#define	SLP_VALID	0x01
+#define	SLP_DOREC	0x02
+#define	SLP_NEEDQ	0x04
+#define	SLP_DISCONN	0x08
+#define	SLP_GETSTREAM	0x10
+#define	SLP_INIT	0x20
+#define	SLP_WANTINIT	0x40
+
+#define SLP_ALLFLAGS	0xff
+
+/*
+ * One of these structures is allocated for each nfsd.
+ */
+struct nfsd {
+	struct nfsd	*nd_next;	/* Must be first */
+	struct nfsd	*nd_prev;
+	int		nd_flag;	/* NFSD_ flags */
+	struct nfssvc_sock *nd_slp;	/* Current socket */
+	struct mbuf	*nd_nam;	/* Client addr for datagram req. */
+	struct mbuf	*nd_mrep;	/* Req. mbuf list */
+	struct mbuf	*nd_md;
+	caddr_t		nd_dpos;	/* Position in list */
+	int		nd_procnum;	/* RPC procedure number */
+	u_long		nd_retxid;	/* RPC xid */
+	int		nd_repstat;	/* Reply status value */
+	struct ucred	nd_cr;		/* Credentials for req. */
+	int		nd_nqlflag;	/* Leasing flag */
+	int		nd_duration;	/* Lease duration */
+	int		nd_authlen;	/* Authenticator len */
+	u_char		nd_authstr[RPCAUTH_MAXSIZ]; /* Authenticator data */
+	struct proc	*nd_procp;	/* Proc ptr */
+};
+
+#define	NFSD_WAITING	0x01
+#define	NFSD_CHECKSLP	0x02
+#define	NFSD_REQINPROG	0x04
+#define	NFSD_NEEDAUTH	0x08
+#define	NFSD_AUTHFAIL	0x10
+#endif	/* KERNEL */
diff --git a/sys/nfsserver/nfs.h b/sys/nfsserver/nfs.h
new file mode 100644
index 00000000000..261fd42657a
--- /dev/null
+++ b/sys/nfsserver/nfs.h
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfs.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Tunable constants for nfs
+ */
+
+#define	NFS_MAXIOVEC	34
+#define NFS_HZ		25		/* Ticks per second for NFS timeouts */
+#define	NFS_TIMEO	(1*NFS_HZ)	/* Default timeout = 1 second */
+#define	NFS_MINTIMEO	(1*NFS_HZ)	/* Min timeout to use */
+#define	NFS_MAXTIMEO	(60*NFS_HZ)	/* Max timeout to backoff to */
+#define	NFS_MINIDEMTIMEO (5*NFS_HZ)	/* Min timeout for non-idempotent ops*/
+#define	NFS_MAXREXMIT	100		/* Stop counting after this many */
+#define	NFS_MAXWINDOW	1024		/* Max number of outstanding requests */
+#define	NFS_RETRANS	10		/* Num of retrans for soft mounts */
+#define	NFS_MAXGRPS	16		/* Max. size of groups list */
+#define	NFS_MINATTRTIMO 5		/* Attribute cache timeout in sec */
+#define	NFS_MAXATTRTIMO 60
+#define	NFS_WSIZE	8192		/* Def. write data size <= 8192 */
+#define	NFS_RSIZE	8192		/* Def. read data size <= 8192 */
+#define	NFS_DEFRAHEAD	1		/* Def. read ahead # blocks */
+#define	NFS_MAXRAHEAD	4		/* Max. read ahead # blocks */
+#define	NFS_MAXREADDIR	NFS_MAXDATA	/* Max. size of directory read */
+#define	NFS_MAXUIDHASH	64		/* Max. # of hashed uid entries/mp */
+#define	NFS_MAXASYNCDAEMON 20	/* Max. number async_daemons runable */
+#define	NFS_DIRBLKSIZ	1024		/* Size of an NFS directory block */
+#define	NMOD(a)		((a) % nfs_asyncdaemons)
+
+/*
+ * Set the attribute timeout based on how recently the file has been modified.
+ */
+#define	NFS_ATTRTIMEO(np) \
+	((((np)->n_flag & NMODIFIED) || \
+	 (time.tv_sec - (np)->n_mtime) / 10 < NFS_MINATTRTIMO) ? NFS_MINATTRTIMO : \
+	 ((time.tv_sec - (np)->n_mtime) / 10 > NFS_MAXATTRTIMO ? NFS_MAXATTRTIMO : \
+	  (time.tv_sec - (np)->n_mtime) / 10))
+
+/*
+ * Structures for the nfssvc(2) syscall. Not that anyone but nfsd and mount_nfs
+ * should ever try and use it.
+ */
+struct nfsd_args {
+	int	sock;		/* Socket to serve */
+	caddr_t	name;		/* Client address for connection based sockets */
+	int	namelen;	/* Length of name */
+};
+
+struct nfsd_srvargs {
+	struct nfsd	*nsd_nfsd;	/* Pointer to in kernel nfsd struct */
+	uid_t		nsd_uid;	/* Effective uid mapped to cred */
+	u_long		nsd_haddr;	/* Ip address of client */
+	struct ucred	nsd_cr;		/* Cred. uid maps to */
+	int		nsd_authlen;	/* Length of auth string (ret) */
+	char		*nsd_authstr;	/* Auth string (ret) */
+};
+
+struct nfsd_cargs {
+	char		*ncd_dirp;	/* Mount dir path */
+	uid_t		ncd_authuid;	/* Effective uid */
+	int		ncd_authtype;	/* Type of authenticator */
+	int		ncd_authlen;	/* Length of authenticator string */
+	char		*ncd_authstr;	/* Authenticator string */
+};
+
+/*
+ * Stats structure
+ */
+struct nfsstats {
+	int	attrcache_hits;
+	int	attrcache_misses;
+	int	lookupcache_hits;
+	int	lookupcache_misses;
+	int	direofcache_hits;
+	int	direofcache_misses;
+	int	biocache_reads;
+	int	read_bios;
+	int	read_physios;
+	int	biocache_writes;
+	int	write_bios;
+	int	write_physios;
+	int	biocache_readlinks;
+	int	readlink_bios;
+	int	biocache_readdirs;
+	int	readdir_bios;
+	int	rpccnt[NFS_NPROCS];
+	int	rpcretries;
+	int	srvrpccnt[NFS_NPROCS];
+	int	srvrpc_errs;
+	int	srv_errs;
+	int	rpcrequests;
+	int	rpctimeouts;
+	int	rpcunexpected;
+	int	rpcinvalid;
+	int	srvcache_inproghits;
+	int	srvcache_idemdonehits;
+	int	srvcache_nonidemdonehits;
+	int	srvcache_misses;
+	int	srvnqnfs_leases;
+	int	srvnqnfs_maxleases;
+	int	srvnqnfs_getleases;
+};
+
+/*
+ * Flags for nfssvc() system call.
+ */
+#define	NFSSVC_BIOD	0x002
+#define	NFSSVC_NFSD	0x004
+#define	NFSSVC_ADDSOCK	0x008
+#define	NFSSVC_AUTHIN	0x010
+#define	NFSSVC_GOTAUTH	0x040
+#define	NFSSVC_AUTHINFAIL 0x080
+#define	NFSSVC_MNTD	0x100
+
+/*
+ * The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts.
+ * What should be in this set is open to debate, but I believe that since
+ * I/O system calls on ufs are never interrupted by signals the set should
+ * be minimal. My reasoning is that many current programs that use signals
+ * such as SIGALRM will not expect file I/O system calls to be interrupted
+ * by them and break.
+ */
+#ifdef KERNEL
+#define	NFSINT_SIGMASK	(sigmask(SIGINT)|sigmask(SIGTERM)|sigmask(SIGKILL)| \
+			 sigmask(SIGHUP)|sigmask(SIGQUIT))
+
+/*
+ * Socket errors ignored for connectionless sockets??
+ * For now, ignore them all
+ */
+#define	NFSIGNORE_SOERROR(s, e) \
+		((e) != EINTR && (e) != ERESTART && (e) != EWOULDBLOCK && \
+		((s) & PR_CONNREQUIRED) == 0)
+
+/*
+ * Nfs outstanding request list element
+ */
+struct nfsreq {
+	struct nfsreq	*r_next;
+	struct nfsreq	*r_prev;
+	struct mbuf	*r_mreq;
+	struct mbuf	*r_mrep;
+	struct mbuf	*r_md;
+	caddr_t		r_dpos;
+	struct nfsmount *r_nmp;
+	struct vnode	*r_vp;
+	u_long		r_xid;
+	int		r_flags;	/* flags on request, see below */
+	int		r_retry;	/* max retransmission count */
+	int		r_rexmit;	/* current retrans count */
+	int		r_timer;	/* tick counter on reply */
+	int		r_procnum;	/* NFS procedure number */
+	int		r_rtt;		/* RTT for rpc */
+	struct proc	*r_procp;	/* Proc that did I/O system call */
+};
+
+/* Flag values for r_flags */
+#define R_TIMING	0x01		/* timing request (in mntp) */
+#define R_SENT		0x02		/* request has been sent */
+#define	R_SOFTTERM	0x04		/* soft mnt, too many retries */
+#define	R_INTR		0x08		/* intr mnt, signal pending */
+#define	R_SOCKERR	0x10		/* Fatal error on socket */
+#define	R_TPRINTFMSG	0x20		/* Did a tprintf msg. */
+#define	R_MUSTRESEND	0x40		/* Must resend request */
+#define	R_GETONEREP	0x80		/* Probe for one reply only */
+
+struct nfsstats nfsstats;
+
+/*
+ * A list of nfssvc_sock structures is maintained with all the sockets
+ * that require service by the nfsd.
+ * The nfsuid structs hang off of the nfssvc_sock structs in both lru
+ * and uid hash lists.
+ */
+#define	NUIDHASHSIZ	32
+#define	NUIDHASH(uid)	((uid) & (NUIDHASHSIZ - 1))
+
+/*
+ * Network address hash list element
+ */
+union nethostaddr {
+	u_long had_inetaddr;
+	struct mbuf *had_nam;
+};
+
+struct nfsuid {
+	struct nfsuid	*nu_lrunext;	/* MUST be first */
+	struct nfsuid	*nu_lruprev;
+	struct nfsuid	*nu_hnext;
+	struct nfsuid	*nu_hprev;
+	int		nu_flag;	/* Flags */
+	uid_t		nu_uid;		/* Uid mapped by this entry */
+	union nethostaddr nu_haddr;	/* Host addr. for dgram sockets */
+	struct ucred	nu_cr;		/* Cred uid mapped to */
+};
+
+#define	nu_inetaddr	nu_haddr.had_inetaddr
+#define	nu_nam		nu_haddr.had_nam
+/* Bits for nu_flag */
+#define	NU_INETADDR	0x1
+
+struct nfssvc_sock {
+	struct nfsuid	*ns_lrunext;	/* MUST be first */
+	struct nfsuid	*ns_lruprev;
+	struct nfssvc_sock *ns_next;
+	struct nfssvc_sock *ns_prev;
+	int		ns_flag;
+	u_long		ns_sref;
+	struct file	*ns_fp;
+	struct socket	*ns_so;
+	int		ns_solock;
+	struct mbuf	*ns_nam;
+	int		ns_cc;
+	struct mbuf	*ns_raw;
+	struct mbuf	*ns_rawend;
+	int		ns_reclen;
+	struct mbuf	*ns_rec;
+	struct mbuf	*ns_recend;
+	int		ns_numuids;
+	struct nfsuid	*ns_uidh[NUIDHASHSIZ];
+};
+
+/* Bits for "ns_flag" */
+#define	SLP_VALID	0x01
+#define	SLP_DOREC	0x02
+#define	SLP_NEEDQ	0x04
+#define	SLP_DISCONN	0x08
+#define	SLP_GETSTREAM	0x10
+#define	SLP_INIT	0x20
+#define	SLP_WANTINIT	0x40
+
+#define SLP_ALLFLAGS	0xff
+
+/*
+ * One of these structures is allocated for each nfsd.
+ */
+struct nfsd {
+	struct nfsd	*nd_next;	/* Must be first */
+	struct nfsd	*nd_prev;
+	int		nd_flag;	/* NFSD_ flags */
+	struct nfssvc_sock *nd_slp;	/* Current socket */
+	struct mbuf	*nd_nam;	/* Client addr for datagram req. */
+	struct mbuf	*nd_mrep;	/* Req. mbuf list */
+	struct mbuf	*nd_md;
+	caddr_t		nd_dpos;	/* Position in list */
+	int		nd_procnum;	/* RPC procedure number */
+	u_long		nd_retxid;	/* RPC xid */
+	int		nd_repstat;	/* Reply status value */
+	struct ucred	nd_cr;		/* Credentials for req. */
+	int		nd_nqlflag;	/* Leasing flag */
+	int		nd_duration;	/* Lease duration */
+	int		nd_authlen;	/* Authenticator len */
+	u_char		nd_authstr[RPCAUTH_MAXSIZ]; /* Authenticator data */
+	struct proc	*nd_procp;	/* Proc ptr */
+};
+
+#define	NFSD_WAITING	0x01
+#define	NFSD_CHECKSLP	0x02
+#define	NFSD_REQINPROG	0x04
+#define	NFSD_NEEDAUTH	0x08
+#define	NFSD_AUTHFAIL	0x10
+#endif	/* KERNEL */
diff --git a/sys/nfsserver/nfs_serv.c b/sys/nfsserver/nfs_serv.c
new file mode 100644
index 00000000000..f31b96e02ed
--- /dev/null
+++ b/sys/nfsserver/nfs_serv.c
@@ -0,0 +1,1908 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfs_serv.c	8.3 (Berkeley) 1/12/94
+ */
+
+/*
+ * nfs version 2 server calls to vnode ops
+ * - these routines generally have 3 phases
+ *   1 - break down and validate rpc request in mbuf list
+ *   2 - do the vnode ops for the request
+ *       (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
+ *   3 - build the rpc reply in an mbuf list
+ *   nb:
+ *	- do not mix the phases, since the nfsm_?? macros can return failures
+ *	  on a bad rpc or similar and do not do any vrele() or vput()'s
+ *
+ *      - the nfsm_reply() macro generates an nfs rpc reply with the nfs
+ *	error number iff error != 0 whereas
+ *	returning an error from the server function implies a fatal error
+ *	such as a badly constructed rpc request that should be dropped without
+ *	a reply.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/mbuf.h>
+#include <sys/dirent.h>
+#include <sys/stat.h>
+
+#include <vm/vm.h>
+
+#include <nfs/nfsv2.h>
+#include <nfs/rpcv2.h>
+#include <nfs/nfs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nqnfs.h>
+
+/* Defs */
+#define	TRUE	1
+#define	FALSE	0
+
+/* Global vars */
+extern u_long nfs_procids[NFS_NPROCS];
+extern u_long nfs_xdrneg1;
+extern u_long nfs_false, nfs_true;
+nfstype nfs_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON,
+		      NFCHR, NFNON };
+
+/*
+ * nqnfs access service
+ */
+nqnfsrv_access(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	struct vnode *vp;
+	nfsv2fh_t nfh;
+	fhandle_t *fhp;
+	register u_long *tl;
+	register long t1;
+	caddr_t bpos;
+	int error = 0, rdonly, cache, mode = 0;
+	char *cp2;
+	struct mbuf *mb, *mreq;
+	u_quad_t frev;
+
+	fhp = &nfh.fh_generic;
+	nfsm_srvmtofh(fhp);
+	nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
+	if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+		nfsm_reply(0);
+	if (*tl++ == nfs_true)
+		mode |= VREAD;
+	if (*tl++ == nfs_true)
+		mode |= VWRITE;
+	if (*tl == nfs_true)
+		mode |= VEXEC;
+	error = nfsrv_access(vp, mode, cred, rdonly, nfsd->nd_procp);
+	vput(vp);
+	nfsm_reply(0);
+	nfsm_srvdone;
+}
+
+/*
+ * nfs getattr service
+ */
+nfsrv_getattr(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	register struct nfsv2_fattr *fp;
+	struct vattr va;
+	register struct vattr *vap = &va;
+	struct vnode *vp;
+	nfsv2fh_t nfh;
+	fhandle_t *fhp;
+	register u_long *tl;
+	register long t1;
+	caddr_t bpos;
+	int error = 0, rdonly, cache;
+	char *cp2;
+	struct mbuf *mb, *mb2, *mreq;
+	u_quad_t frev;
+
+	fhp = &nfh.fh_generic;
+	nfsm_srvmtofh(fhp);
+	if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+		nfsm_reply(0);
+	nqsrv_getl(vp, NQL_READ);
+	error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp);
+	vput(vp);
+	nfsm_reply(NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+	nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+	nfsm_srvfillattr;
+	nfsm_srvdone;
+}
+
+/*
+ * nfs setattr service
+ */
+nfsrv_setattr(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	struct vattr va;
+	register struct vattr *vap = &va;
+	register struct nfsv2_sattr *sp;
+	register struct nfsv2_fattr *fp;
+	struct vnode *vp;
+	nfsv2fh_t nfh;
+	fhandle_t *fhp;
+	register u_long *tl;
+	register long t1;
+	caddr_t bpos;
+	int error = 0, rdonly, cache;
+	char *cp2;
+	struct mbuf *mb, *mb2, *mreq;
+	u_quad_t frev, frev2;
+
+	fhp = &nfh.fh_generic;
+	nfsm_srvmtofh(fhp);
+	nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_SATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+	if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+		nfsm_reply(0);
+	nqsrv_getl(vp, NQL_WRITE);
+	VATTR_NULL(vap);
+	/*
+	 * Nah nah nah nah na nah
+	 * There is a bug in the Sun client that puts 0xffff in the mode
+	 * field of sattr when it should put in 0xffffffff. The u_short
+	 * doesn't sign extend.
+	 * --> check the low order 2 bytes for 0xffff
+	 */
+	if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
+		vap->va_mode = nfstov_mode(sp->sa_mode);
+	if (sp->sa_uid != nfs_xdrneg1)
+		vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid);
+	if (sp->sa_gid != nfs_xdrneg1)
+		vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid);
+	if (nfsd->nd_nqlflag == NQL_NOVAL) {
+		if (sp->sa_nfssize != nfs_xdrneg1)
+			vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_nfssize);
+		if (sp->sa_nfsatime.nfs_sec != nfs_xdrneg1) {
+#ifdef notyet
+			fxdr_nfstime(&sp->sa_nfsatime, &vap->va_atime);
+#else
+			vap->va_atime.ts_sec =
+				fxdr_unsigned(long, sp->sa_nfsatime.nfs_sec);
+			vap->va_atime.ts_nsec = 0;
+#endif
+		}
+		if (sp->sa_nfsmtime.nfs_sec != nfs_xdrneg1)
+			fxdr_nfstime(&sp->sa_nfsmtime, &vap->va_mtime);
+	} else {
+		fxdr_hyper(&sp->sa_nqsize, &vap->va_size);
+		fxdr_nqtime(&sp->sa_nqatime, &vap->va_atime);
+		fxdr_nqtime(&sp->sa_nqmtime, &vap->va_mtime);
+		vap->va_flags = fxdr_unsigned(u_long, sp->sa_nqflags);
+	}
+
+	/*
+	 * If the size is being changed write acces is required, otherwise
+	 * just check for a read only file system.
+	 */
+	if (vap->va_size == ((u_quad_t)((quad_t) -1))) {
+		if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
+			error = EROFS;
+			goto out;
+		}
+	} else {
+		if (vp->v_type == VDIR) {
+			error = EISDIR;
+			goto out;
+		} else if (error = nfsrv_access(vp, VWRITE, cred, rdonly,
+			nfsd->nd_procp))
+			goto out;
+	}
+	if (error = VOP_SETATTR(vp, vap, cred, nfsd->nd_procp)) {
+		vput(vp);
+		nfsm_reply(0);
+	}
+	error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp);
+out:
+	vput(vp);
+	nfsm_reply(NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL) + 2*NFSX_UNSIGNED);
+	nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+	nfsm_srvfillattr;
+	if (nfsd->nd_nqlflag != NQL_NOVAL) {
+		nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+		txdr_hyper(&frev2, tl);
+	}
+	nfsm_srvdone;
+}
+
+/*
+ * nfs lookup rpc
+ */
+nfsrv_lookup(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	register struct nfsv2_fattr *fp;
+	struct nameidata nd;
+	struct vnode *vp;
+	nfsv2fh_t nfh;
+	fhandle_t *fhp;
+	register caddr_t cp;
+	register u_long *tl;
+	register long t1;
+	caddr_t bpos;
+	int error = 0, cache, duration2, cache2, len;
+	char *cp2;
+	struct mbuf *mb, *mb2, *mreq;
+	struct vattr va, *vap = &va;
+	u_quad_t frev, frev2;
+
+	fhp = &nfh.fh_generic;
+	duration2 = 0;
+	if (nfsd->nd_nqlflag != NQL_NOVAL) {
+		nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+		duration2 = fxdr_unsigned(int, *tl);
+	}
+	nfsm_srvmtofh(fhp);
+	nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+	nd.ni_cnd.cn_cred = cred;
+	nd.ni_cnd.cn_nameiop = LOOKUP;
+	nd.ni_cnd.cn_flags = LOCKLEAF | SAVESTART;
+	if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos,
+	    nfsd->nd_procp))
+		nfsm_reply(0);
+	nqsrv_getl(nd.ni_startdir, NQL_READ);
+	vrele(nd.ni_startdir);
+	FREE(nd.ni_cnd.cn_pnbuf, M_NAMEI);
+	vp = nd.ni_vp;
+	bzero((caddr_t)fhp, sizeof(nfh));
+	fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
+	if (error = VFS_VPTOFH(vp, &fhp->fh_fid)) {
+		vput(vp);
+		nfsm_reply(0);
+	}
+	if (duration2)
+		(void) nqsrv_getlease(vp, &duration2, NQL_READ, nfsd,
+			nam, &cache2, &frev2, cred);
+	error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp);
+	vput(vp);
+	nfsm_reply(NFSX_FH + NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL) + 5*NFSX_UNSIGNED);
+	if (nfsd->nd_nqlflag != NQL_NOVAL) {
+		if (duration2) {
+			nfsm_build(tl, u_long *, 5*NFSX_UNSIGNED);
+			*tl++ = txdr_unsigned(NQL_READ);
+			*tl++ = txdr_unsigned(cache2);
+			*tl++ = txdr_unsigned(duration2);
+			txdr_hyper(&frev2, tl);
+		} else {
+			nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+			*tl = 0;
+		}
+	}
+	nfsm_srvfhtom(fhp);
+	nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+	nfsm_srvfillattr;
+	nfsm_srvdone;
+}
+
+/*
+ * nfs readlink service
+ */
+nfsrv_readlink(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
+	register struct iovec *ivp = iv;
+	register struct mbuf *mp;
+	register u_long *tl;
+	register long t1;
+	caddr_t bpos;
+	int error = 0, rdonly, cache, i, tlen, len;
+	char *cp2;
+	struct mbuf *mb, *mb2, *mp2, *mp3, *mreq;
+	struct vnode *vp;
+	nfsv2fh_t nfh;
+	fhandle_t *fhp;
+	struct uio io, *uiop = &io;
+	u_quad_t frev;
+
+	fhp = &nfh.fh_generic;
+	nfsm_srvmtofh(fhp);
+	len = 0;
+	i = 0;
+	while (len < NFS_MAXPATHLEN) {
+		MGET(mp, M_WAIT, MT_DATA);
+		MCLGET(mp, M_WAIT);
+		mp->m_len = NFSMSIZ(mp);
+		if (len == 0)
+			mp3 = mp2 = mp;
+		else {
+			mp2->m_next = mp;
+			mp2 = mp;
+		}
+		if ((len+mp->m_len) > NFS_MAXPATHLEN) {
+			mp->m_len = NFS_MAXPATHLEN-len;
+			len = NFS_MAXPATHLEN;
+		} else
+			len += mp->m_len;
+		ivp->iov_base = mtod(mp, caddr_t);
+		ivp->iov_len = mp->m_len;
+		i++;
+		ivp++;
+	}
+	uiop->uio_iov = iv;
+	uiop->uio_iovcnt = i;
+	uiop->uio_offset = 0;
+	uiop->uio_resid = len;
+	uiop->uio_rw = UIO_READ;
+	uiop->uio_segflg = UIO_SYSSPACE;
+	uiop->uio_procp = (struct proc *)0;
+	if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly)) {
+		m_freem(mp3);
+		nfsm_reply(0);
+	}
+	if (vp->v_type != VLNK) {
+		error = EINVAL;
+		goto out;
+	}
+	nqsrv_getl(vp, NQL_READ);
+	error = VOP_READLINK(vp, uiop, cred);
+out:
+	vput(vp);
+	if (error)
+		m_freem(mp3);
+	nfsm_reply(NFSX_UNSIGNED);
+	if (uiop->uio_resid > 0) {
+		len -= uiop->uio_resid;
+		tlen = nfsm_rndup(len);
+		nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len);
+	}
+	nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+	*tl = txdr_unsigned(len);
+	mb->m_next = mp3;
+	nfsm_srvdone;
+}
+
+/*
+ * nfs read service
+ */
+nfsrv_read(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	register struct iovec *iv;
+	struct iovec *iv2;
+	register struct mbuf *m;
+	register struct nfsv2_fattr *fp;
+	register u_long *tl;
+	register long t1;
+	caddr_t bpos;
+	int error = 0, rdonly, cache, i, cnt, len, left, siz, tlen;
+	char *cp2;
+	struct mbuf *mb, *mb2, *mreq;
+	struct mbuf *m2;
+	struct vnode *vp;
+	nfsv2fh_t nfh;
+	fhandle_t *fhp;
+	struct uio io, *uiop = &io;
+	struct vattr va, *vap = &va;
+	off_t off;
+	u_quad_t frev;
+
+	fhp = &nfh.fh_generic;
+	nfsm_srvmtofh(fhp);
+	if (nfsd->nd_nqlflag == NQL_NOVAL) {
+		nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+		off = (off_t)fxdr_unsigned(u_long, *tl);
+	} else {
+		nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
+		fxdr_hyper(tl, &off);
+	}
+	nfsm_srvstrsiz(cnt, NFS_MAXDATA);
+	if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+		nfsm_reply(0);
+	if (vp->v_type != VREG) {
+		error = (vp->v_type == VDIR) ? EISDIR : EACCES;
+		vput(vp);
+		nfsm_reply(0);
+	}
+	nqsrv_getl(vp, NQL_READ);
+	if ((error = nfsrv_access(vp, VREAD, cred, rdonly, nfsd->nd_procp)) &&
+	    (error = nfsrv_access(vp, VEXEC, cred, rdonly, nfsd->nd_procp))) {
+		vput(vp);
+		nfsm_reply(0);
+	}
+	if (error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp)) {
+		vput(vp);
+		nfsm_reply(0);
+	}
+	if (off >= vap->va_size)
+		cnt = 0;
+	else if ((off + cnt) > vap->va_size)
+		cnt = nfsm_rndup(vap->va_size - off);
+	nfsm_reply(NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)+NFSX_UNSIGNED+nfsm_rndup(cnt));
+	nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+	nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+	len = left = cnt;
+	if (cnt > 0) {
+		/*
+		 * Generate the mbuf list with the uio_iov ref. to it.
+		 */
+		i = 0;
+		m = m2 = mb;
+		MALLOC(iv, struct iovec *,
+		       ((NFS_MAXDATA+MLEN-1)/MLEN) * sizeof (struct iovec),
+		       M_TEMP, M_WAITOK);
+		iv2 = iv;
+		while (left > 0) {
+			siz = min(M_TRAILINGSPACE(m), left);
+			if (siz > 0) {
+				m->m_len += siz;
+				iv->iov_base = bpos;
+				iv->iov_len = siz;
+				iv++;
+				i++;
+				left -= siz;
+			}
+			if (left > 0) {
+				MGET(m, M_WAIT, MT_DATA);
+				MCLGET(m, M_WAIT);
+				m->m_len = 0;
+				m2->m_next = m;
+				m2 = m;
+				bpos = mtod(m, caddr_t);
+			}
+		}
+		uiop->uio_iov = iv2;
+		uiop->uio_iovcnt = i;
+		uiop->uio_offset = off;
+		uiop->uio_resid = cnt;
+		uiop->uio_rw = UIO_READ;
+		uiop->uio_segflg = UIO_SYSSPACE;
+		error = VOP_READ(vp, uiop, IO_NODELOCKED, cred);
+		off = uiop->uio_offset;
+		FREE((caddr_t)iv2, M_TEMP);
+		if (error || (error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp))) {
+			m_freem(mreq);
+			vput(vp);
+			nfsm_reply(0);
+		}
+	} else
+		uiop->uio_resid = 0;
+	vput(vp);
+	nfsm_srvfillattr;
+	len -= uiop->uio_resid;
+	tlen = nfsm_rndup(len);
+	if (cnt != tlen || tlen != len)
+		nfsm_adj(mb, cnt-tlen, tlen-len);
+	*tl = txdr_unsigned(len);
+	nfsm_srvdone;
+}
+
+/*
+ * nfs write service
+ */
+nfsrv_write(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	register struct iovec *ivp;
+	register struct mbuf *mp;
+	register struct nfsv2_fattr *fp;
+	struct iovec iv[NFS_MAXIOVEC];
+	struct vattr va;
+	register struct vattr *vap = &va;
+	register u_long *tl;
+	register long t1;
+	caddr_t bpos;
+	int error = 0, rdonly, cache, siz, len, xfer;
+	int ioflags = IO_SYNC | IO_NODELOCKED;
+	char *cp2;
+	struct mbuf *mb, *mb2, *mreq;
+	struct vnode *vp;
+	nfsv2fh_t nfh;
+	fhandle_t *fhp;
+	struct uio io, *uiop = &io;
+	off_t off;
+	u_quad_t frev;
+
+	fhp = &nfh.fh_generic;
+	nfsm_srvmtofh(fhp);
+	nfsm_dissect(tl, u_long *, 4 * NFSX_UNSIGNED);
+	if (nfsd->nd_nqlflag == NQL_NOVAL) {
+		off = (off_t)fxdr_unsigned(u_long, *++tl);
+		tl += 2;
+	} else {
+		fxdr_hyper(tl, &off);
+		tl += 2;
+		if (fxdr_unsigned(u_long, *tl++))
+			ioflags |= IO_APPEND;
+	}
+	len = fxdr_unsigned(long, *tl);
+	if (len > NFS_MAXDATA || len <= 0) {
+		error = EBADRPC;
+		nfsm_reply(0);
+	}
+	if (dpos == (mtod(md, caddr_t)+md->m_len)) {
+		mp = md->m_next;
+		if (mp == NULL) {
+			error = EBADRPC;
+			nfsm_reply(0);
+		}
+	} else {
+		mp = md;
+		siz = dpos-mtod(mp, caddr_t);
+		mp->m_len -= siz;
+		NFSMADV(mp, siz);
+	}
+	if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+		nfsm_reply(0);
+	if (vp->v_type != VREG) {
+		error = (vp->v_type == VDIR) ? EISDIR : EACCES;
+		vput(vp);
+		nfsm_reply(0);
+	}
+	nqsrv_getl(vp, NQL_WRITE);
+	if (error = nfsrv_access(vp, VWRITE, cred, rdonly, nfsd->nd_procp)) {
+		vput(vp);
+		nfsm_reply(0);
+	}
+	uiop->uio_resid = 0;
+	uiop->uio_rw = UIO_WRITE;
+	uiop->uio_segflg = UIO_SYSSPACE;
+	uiop->uio_procp = (struct proc *)0;
+	/*
+	 * Do up to NFS_MAXIOVEC mbufs of write each iteration of the
+	 * loop until done.
+	 */
+	while (len > 0 && uiop->uio_resid == 0) {
+		ivp = iv;
+		siz = 0;
+		uiop->uio_iov = ivp;
+		uiop->uio_iovcnt = 0;
+		uiop->uio_offset = off;
+		while (len > 0 && uiop->uio_iovcnt < NFS_MAXIOVEC && mp != NULL) {
+			ivp->iov_base = mtod(mp, caddr_t);
+			if (len < mp->m_len)
+				ivp->iov_len = xfer = len;
+			else
+				ivp->iov_len = xfer = mp->m_len;
+#ifdef notdef
+			/* Not Yet .. */
+			if (M_HASCL(mp) && (((u_long)ivp->iov_base) & CLOFSET) == 0)
+				ivp->iov_op = NULL;	/* what should it be ?? */
+			else
+				ivp->iov_op = NULL;
+#endif
+			uiop->uio_iovcnt++;
+			ivp++;
+			len -= xfer;
+			siz += xfer;
+			mp = mp->m_next;
+		}
+		if (len > 0 && mp == NULL) {
+			error = EBADRPC;
+			vput(vp);
+			nfsm_reply(0);
+		}
+		uiop->uio_resid = siz;
+		if (error = VOP_WRITE(vp, uiop, ioflags, cred)) {
+			vput(vp);
+			nfsm_reply(0);
+		}
+		off = uiop->uio_offset;
+	}
+	error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp);
+	vput(vp);
+	nfsm_reply(NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+	nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+	nfsm_srvfillattr;
+	if (nfsd->nd_nqlflag != NQL_NOVAL) {
+		nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+		txdr_hyper(&vap->va_filerev, tl);
+	}
+	nfsm_srvdone;
+}
+
+/*
+ * nfs create service
+ * now does a truncate to 0 length via. setattr if it already exists
+ */
+nfsrv_create(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	register struct nfsv2_fattr *fp;
+	struct vattr va;
+	register struct vattr *vap = &va;
+	register struct nfsv2_sattr *sp;
+	register u_long *tl;
+	struct nameidata nd;
+	register caddr_t cp;
+	register long t1;
+	caddr_t bpos;
+	int error = 0, rdev, cache, len, tsize;
+	char *cp2;
+	struct mbuf *mb, *mb2, *mreq;
+	struct vnode *vp;
+	nfsv2fh_t nfh;
+	fhandle_t *fhp;
+	u_quad_t frev;
+
+	nd.ni_cnd.cn_nameiop = 0;
+	fhp = &nfh.fh_generic;
+	nfsm_srvmtofh(fhp);
+	nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+	nd.ni_cnd.cn_cred = cred;
+	nd.ni_cnd.cn_nameiop = CREATE;
+	nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART;
+	if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos,
+	    nfsd->nd_procp))
+		nfsm_reply(0);
+	VATTR_NULL(vap);
+	nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_SATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+	/*
+	 * Iff doesn't exist, create it
+	 * otherwise just truncate to 0 length
+	 *   should I set the mode too ??
+	 */
+	if (nd.ni_vp == NULL) {
+		vap->va_type = IFTOVT(fxdr_unsigned(u_long, sp->sa_mode));
+		if (vap->va_type == VNON)
+			vap->va_type = VREG;
+		vap->va_mode = nfstov_mode(sp->sa_mode);
+		if (nfsd->nd_nqlflag == NQL_NOVAL)
+			rdev = fxdr_unsigned(long, sp->sa_nfssize);
+		else
+			rdev = fxdr_unsigned(long, sp->sa_nqrdev);
+		if (vap->va_type == VREG || vap->va_type == VSOCK) {
+			vrele(nd.ni_startdir);
+			nqsrv_getl(nd.ni_dvp, NQL_WRITE);
+			if (error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap))
+				nfsm_reply(0);
+			FREE(nd.ni_cnd.cn_pnbuf, M_NAMEI);
+		} else if (vap->va_type == VCHR || vap->va_type == VBLK ||
+			vap->va_type == VFIFO) {
+			if (vap->va_type == VCHR && rdev == 0xffffffff)
+				vap->va_type = VFIFO;
+			if (vap->va_type == VFIFO) {
+#ifndef FIFO
+				VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+				vput(nd.ni_dvp);
+				error = ENXIO;
+				goto out;
+#endif /* FIFO */
+			} else if (error = suser(cred, (u_short *)0)) {
+				VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+				vput(nd.ni_dvp);
+				goto out;
+			} else
+				vap->va_rdev = (dev_t)rdev;
+			nqsrv_getl(nd.ni_dvp, NQL_WRITE);
+			if (error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap)) {
+				vrele(nd.ni_startdir);
+				nfsm_reply(0);
+			}
+			nd.ni_cnd.cn_nameiop = LOOKUP;
+			nd.ni_cnd.cn_flags &= ~(LOCKPARENT | SAVESTART);
+			nd.ni_cnd.cn_proc = nfsd->nd_procp;
+			nd.ni_cnd.cn_cred = nfsd->nd_procp->p_ucred;
+			if (error = lookup(&nd)) {
+				free(nd.ni_cnd.cn_pnbuf, M_NAMEI);
+				nfsm_reply(0);
+			}
+			FREE(nd.ni_cnd.cn_pnbuf, M_NAMEI);
+			if (nd.ni_cnd.cn_flags & ISSYMLINK) {
+				vrele(nd.ni_dvp);
+				vput(nd.ni_vp);
+				VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+				error = EINVAL;
+				nfsm_reply(0);
+			}
+		} else {
+			VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+			vput(nd.ni_dvp);
+			error = ENXIO;
+			goto out;
+		}
+		vp = nd.ni_vp;
+	} else {
+		vrele(nd.ni_startdir);
+		free(nd.ni_cnd.cn_pnbuf, M_NAMEI);
+		vp = nd.ni_vp;
+		if (nd.ni_dvp == vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+		if (nfsd->nd_nqlflag == NQL_NOVAL) {
+			tsize = fxdr_unsigned(long, sp->sa_nfssize);
+			if (tsize != -1)
+				vap->va_size = (u_quad_t)tsize;
+			else
+				vap->va_size = -1;
+		} else
+			fxdr_hyper(&sp->sa_nqsize, &vap->va_size);
+		if (vap->va_size != -1) {
+			if (error = nfsrv_access(vp, VWRITE, cred,
+			    (nd.ni_cnd.cn_flags & RDONLY), nfsd->nd_procp)) {
+				vput(vp);
+				nfsm_reply(0);
+			}
+			nqsrv_getl(vp, NQL_WRITE);
+			if (error = VOP_SETATTR(vp, vap, cred, nfsd->nd_procp)) {
+				vput(vp);
+				nfsm_reply(0);
+			}
+		}
+	}
+	bzero((caddr_t)fhp, sizeof(nfh));
+	fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
+	if (error = VFS_VPTOFH(vp, &fhp->fh_fid)) {
+		vput(vp);
+		nfsm_reply(0);
+	}
+	error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp);
+	vput(vp);
+	nfsm_reply(NFSX_FH+NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+	nfsm_srvfhtom(fhp);
+	nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+	nfsm_srvfillattr;
+	return (error);
+nfsmout:
+	if (nd.ni_cnd.cn_nameiop || nd.ni_cnd.cn_flags)
+		vrele(nd.ni_startdir);
+	VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+	if (nd.ni_dvp == nd.ni_vp)
+		vrele(nd.ni_dvp);
+	else
+		vput(nd.ni_dvp);
+	if (nd.ni_vp)
+		vput(nd.ni_vp);
+	return (error);
+
+out:
+	vrele(nd.ni_startdir);
+	free(nd.ni_cnd.cn_pnbuf, M_NAMEI);
+	nfsm_reply(0);
+}
+
+/*
+ * nfs remove service
+ */
+nfsrv_remove(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	struct nameidata nd;
+	register u_long *tl;
+	register long t1;
+	caddr_t bpos;
+	int error = 0, cache, len;
+	char *cp2;
+	struct mbuf *mb, *mreq;
+	struct vnode *vp;
+	nfsv2fh_t nfh;
+	fhandle_t *fhp;
+	u_quad_t frev;
+
+	fhp = &nfh.fh_generic;
+	nfsm_srvmtofh(fhp);
+	nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+	nd.ni_cnd.cn_cred = cred;
+	nd.ni_cnd.cn_nameiop = DELETE;
+	nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
+	if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos,
+	    nfsd->nd_procp))
+		nfsm_reply(0);
+	vp = nd.ni_vp;
+	if (vp->v_type == VDIR &&
+		(error = suser(cred, (u_short *)0)))
+		goto out;
+	/*
+	 * The root of a mounted filesystem cannot be deleted.
+	 */
+	if (vp->v_flag & VROOT) {
+		error = EBUSY;
+		goto out;
+	}
+	if (vp->v_flag & VTEXT)
+		(void) vnode_pager_uncache(vp);
+out:
+	if (!error) {
+		nqsrv_getl(nd.ni_dvp, NQL_WRITE);
+		nqsrv_getl(vp, NQL_WRITE);
+		error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+	} else {
+		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+		if (nd.ni_dvp == vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		vput(vp);
+	}
+	nfsm_reply(0);
+	nfsm_srvdone;
+}
+
+/*
+ * nfs rename service
+ */
+nfsrv_rename(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	register u_long *tl;
+	register long t1;
+	caddr_t bpos;
+	int error = 0, cache, len, len2;
+	char *cp2;
+	struct mbuf *mb, *mreq;
+	struct nameidata fromnd, tond;
+	struct vnode *fvp, *tvp, *tdvp;
+	nfsv2fh_t fnfh, tnfh;
+	fhandle_t *ffhp, *tfhp;
+	u_quad_t frev;
+	uid_t saved_uid;
+
+	ffhp = &fnfh.fh_generic;
+	tfhp = &tnfh.fh_generic;
+	fromnd.ni_cnd.cn_nameiop = 0;
+	tond.ni_cnd.cn_nameiop = 0;
+	nfsm_srvmtofh(ffhp);
+	nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+	/*
+	 * Remember our original uid so that we can reset cr_uid before
+	 * the second nfs_namei() call, in case it is remapped.
+	 */
+	saved_uid = cred->cr_uid;
+	fromnd.ni_cnd.cn_cred = cred;
+	fromnd.ni_cnd.cn_nameiop = DELETE;
+	fromnd.ni_cnd.cn_flags = WANTPARENT | SAVESTART;
+	if (error = nfs_namei(&fromnd, ffhp, len, nfsd->nd_slp, nam, &md,
+	    &dpos, nfsd->nd_procp))
+		nfsm_reply(0);
+	fvp = fromnd.ni_vp;
+	nfsm_srvmtofh(tfhp);
+	nfsm_strsiz(len2, NFS_MAXNAMLEN);
+	cred->cr_uid = saved_uid;
+	tond.ni_cnd.cn_cred = cred;
+	tond.ni_cnd.cn_nameiop = RENAME;
+	tond.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART;
+	if (error = nfs_namei(&tond, tfhp, len2, nfsd->nd_slp, nam, &md,
+	    &dpos, nfsd->nd_procp)) {
+		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
+		vrele(fromnd.ni_dvp);
+		vrele(fvp);
+		goto out1;
+	}
+	tdvp = tond.ni_dvp;
+	tvp = tond.ni_vp;
+	if (tvp != NULL) {
+		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
+			error = EISDIR;
+			goto out;
+		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
+			error = ENOTDIR;
+			goto out;
+		}
+		if (tvp->v_type == VDIR && tvp->v_mountedhere) {
+			error = EXDEV;
+			goto out;
+		}
+	}
+	if (fvp->v_type == VDIR && fvp->v_mountedhere) {
+		error = EBUSY;
+		goto out;
+	}
+	if (fvp->v_mount != tdvp->v_mount) {
+		error = EXDEV;
+		goto out;
+	}
+	if (fvp == tdvp)
+		error = EINVAL;
+	/*
+	 * If source is the same as the destination (that is the
+	 * same vnode with the same name in the same directory),
+	 * then there is nothing to do.
+	 */
+	if (fvp == tvp && fromnd.ni_dvp == tdvp &&
+	    fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
+	    !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
+	      fromnd.ni_cnd.cn_namelen))
+		error = -1;
+out:
+	if (!error) {
+		nqsrv_getl(fromnd.ni_dvp, NQL_WRITE);
+		nqsrv_getl(tdvp, NQL_WRITE);
+		if (tvp)
+			nqsrv_getl(tvp, NQL_WRITE);
+		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
+				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
+	} else {
+		VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
+		if (tdvp == tvp)
+			vrele(tdvp);
+		else
+			vput(tdvp);
+		if (tvp)
+			vput(tvp);
+		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
+		vrele(fromnd.ni_dvp);
+		vrele(fvp);
+	}
+	vrele(tond.ni_startdir);
+	FREE(tond.ni_cnd.cn_pnbuf, M_NAMEI);
+out1:
+	vrele(fromnd.ni_startdir);
+	FREE(fromnd.ni_cnd.cn_pnbuf, M_NAMEI);
+	nfsm_reply(0);
+	return (error);
+
+nfsmout:
+	if (tond.ni_cnd.cn_nameiop || tond.ni_cnd.cn_flags) {
+		vrele(tond.ni_startdir);
+		FREE(tond.ni_cnd.cn_pnbuf, M_NAMEI);
+	}
+	if (fromnd.ni_cnd.cn_nameiop || fromnd.ni_cnd.cn_flags) {
+		vrele(fromnd.ni_startdir);
+		FREE(fromnd.ni_cnd.cn_pnbuf, M_NAMEI);
+		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
+		vrele(fromnd.ni_dvp);
+		vrele(fvp);
+	}
+	return (error);
+}
+
+/*
+ * nfs link service
+ */
+nfsrv_link(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	struct nameidata nd;
+	register u_long *tl;
+	register long t1;
+	caddr_t bpos;
+	int error = 0, rdonly, cache, len;
+	char *cp2;
+	struct mbuf *mb, *mreq;
+	struct vnode *vp, *xp;
+	nfsv2fh_t nfh, dnfh;
+	fhandle_t *fhp, *dfhp;
+	u_quad_t frev;
+
+	fhp = &nfh.fh_generic;
+	dfhp = &dnfh.fh_generic;
+	nfsm_srvmtofh(fhp);
+	nfsm_srvmtofh(dfhp);
+	nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+	if (error = nfsrv_fhtovp(fhp, FALSE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+		nfsm_reply(0);
+	if (vp->v_type == VDIR && (error = suser(cred, (u_short *)0)))
+		goto out1;
+	nd.ni_cnd.cn_cred = cred;
+	nd.ni_cnd.cn_nameiop = CREATE;
+	nd.ni_cnd.cn_flags = LOCKPARENT;
+	if (error = nfs_namei(&nd, dfhp, len, nfsd->nd_slp, nam, &md, &dpos,
+	    nfsd->nd_procp))
+		goto out1;
+	xp = nd.ni_vp;
+	if (xp != NULL) {
+		error = EEXIST;
+		goto out;
+	}
+	xp = nd.ni_dvp;
+	if (vp->v_mount != xp->v_mount)
+		error = EXDEV;
+out:
+	if (!error) {
+		nqsrv_getl(vp, NQL_WRITE);
+		nqsrv_getl(xp, NQL_WRITE);
+		error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
+	} else {
+		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+		if (nd.ni_dvp == nd.ni_vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		if (nd.ni_vp)
+			vrele(nd.ni_vp);
+	}
+out1:
+	vrele(vp);
+	nfsm_reply(0);
+	nfsm_srvdone;
+}
+
+/*
+ * nfs symbolic link service
+ */
+nfsrv_symlink(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	struct vattr va;
+	struct nameidata nd;
+	register struct vattr *vap = &va;
+	register u_long *tl;
+	register long t1;
+	struct nfsv2_sattr *sp;
+	caddr_t bpos;
+	struct uio io;
+	struct iovec iv;
+	int error = 0, cache, len, len2;
+	char *pathcp, *cp2;
+	struct mbuf *mb, *mreq;
+	nfsv2fh_t nfh;
+	fhandle_t *fhp;
+	u_quad_t frev;
+
+	pathcp = (char *)0;
+	fhp = &nfh.fh_generic;
+	nfsm_srvmtofh(fhp);
+	nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+	nd.ni_cnd.cn_cred = cred;
+	nd.ni_cnd.cn_nameiop = CREATE;
+	nd.ni_cnd.cn_flags = LOCKPARENT;
+	if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos,
+	    nfsd->nd_procp))
+		goto out;
+	nfsm_strsiz(len2, NFS_MAXPATHLEN);
+	MALLOC(pathcp, caddr_t, len2 + 1, M_TEMP, M_WAITOK);
+	iv.iov_base = pathcp;
+	iv.iov_len = len2;
+	io.uio_resid = len2;
+	io.uio_offset = 0;
+	io.uio_iov = &iv;
+	io.uio_iovcnt = 1;
+	io.uio_segflg = UIO_SYSSPACE;
+	io.uio_rw = UIO_READ;
+	io.uio_procp = (struct proc *)0;
+	nfsm_mtouio(&io, len2);
+	nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_SATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+	*(pathcp + len2) = '\0';
+	if (nd.ni_vp) {
+		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+		if (nd.ni_dvp == nd.ni_vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		vrele(nd.ni_vp);
+		error = EEXIST;
+		goto out;
+	}
+	VATTR_NULL(vap);
+	vap->va_mode = fxdr_unsigned(u_short, sp->sa_mode);
+	nqsrv_getl(nd.ni_dvp, NQL_WRITE);
+	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap, pathcp);
+out:
+	if (pathcp)
+		FREE(pathcp, M_TEMP);
+	nfsm_reply(0);
+	return (error);
+nfsmout:
+	VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+	if (nd.ni_dvp == nd.ni_vp)
+		vrele(nd.ni_dvp);
+	else
+		vput(nd.ni_dvp);
+	if (nd.ni_vp)
+		vrele(nd.ni_vp);
+	if (pathcp)
+		FREE(pathcp, M_TEMP);
+	return (error);
+}
+
+/*
+ * nfs mkdir service
+ */
+nfsrv_mkdir(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	struct vattr va;
+	register struct vattr *vap = &va;
+	register struct nfsv2_fattr *fp;
+	struct nameidata nd;
+	register caddr_t cp;
+	register u_long *tl;
+	register long t1;
+	caddr_t bpos;
+	int error = 0, cache, len;
+	char *cp2;
+	struct mbuf *mb, *mb2, *mreq;
+	struct vnode *vp;
+	nfsv2fh_t nfh;
+	fhandle_t *fhp;
+	u_quad_t frev;
+
+	fhp = &nfh.fh_generic;
+	nfsm_srvmtofh(fhp);
+	nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+	nd.ni_cnd.cn_cred = cred;
+	nd.ni_cnd.cn_nameiop = CREATE;
+	nd.ni_cnd.cn_flags = LOCKPARENT;
+	if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos,
+	    nfsd->nd_procp))
+		nfsm_reply(0);
+	nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+	VATTR_NULL(vap);
+	vap->va_type = VDIR;
+	vap->va_mode = nfstov_mode(*tl++);
+	vp = nd.ni_vp;
+	if (vp != NULL) {
+		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+		if (nd.ni_dvp == vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		vrele(vp);
+		error = EEXIST;
+		nfsm_reply(0);
+	}
+	nqsrv_getl(nd.ni_dvp, NQL_WRITE);
+	if (error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap))
+		nfsm_reply(0);
+	vp = nd.ni_vp;
+	bzero((caddr_t)fhp, sizeof(nfh));
+	fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
+	if (error = VFS_VPTOFH(vp, &fhp->fh_fid)) {
+		vput(vp);
+		nfsm_reply(0);
+	}
+	error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp);
+	vput(vp);
+	nfsm_reply(NFSX_FH+NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+	nfsm_srvfhtom(fhp);
+	nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+	nfsm_srvfillattr;
+	return (error);
+nfsmout:
+	VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+	if (nd.ni_dvp == nd.ni_vp)
+		vrele(nd.ni_dvp);
+	else
+		vput(nd.ni_dvp);
+	if (nd.ni_vp)
+		vrele(nd.ni_vp);
+	return (error);
+}
+
+/*
+ * nfs rmdir service
+ */
+nfsrv_rmdir(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	register u_long *tl;
+	register long t1;
+	caddr_t bpos;
+	int error = 0, cache, len;
+	char *cp2;
+	struct mbuf *mb, *mreq;
+	struct vnode *vp;
+	nfsv2fh_t nfh;
+	fhandle_t *fhp;
+	struct nameidata nd;
+	u_quad_t frev;
+
+	fhp = &nfh.fh_generic;
+	nfsm_srvmtofh(fhp);
+	nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+	nd.ni_cnd.cn_cred = cred;
+	nd.ni_cnd.cn_nameiop = DELETE;
+	nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
+	if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos,
+	    nfsd->nd_procp))
+		nfsm_reply(0);
+	vp = nd.ni_vp;
+	if (vp->v_type != VDIR) {
+		error = ENOTDIR;
+		goto out;
+	}
+	/*
+	 * No rmdir "." please.
+	 */
+	if (nd.ni_dvp == vp) {
+		error = EINVAL;
+		goto out;
+	}
+	/*
+	 * The root of a mounted filesystem cannot be deleted.
+	 */
+	if (vp->v_flag & VROOT)
+		error = EBUSY;
+out:
+	if (!error) {
+		nqsrv_getl(nd.ni_dvp, NQL_WRITE);
+		nqsrv_getl(vp, NQL_WRITE);
+		error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+	} else {
+		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+		if (nd.ni_dvp == nd.ni_vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		vput(vp);
+	}
+	nfsm_reply(0);
+	nfsm_srvdone;
+}
+
+/*
+ * nfs readdir service
+ * - mallocs what it thinks is enough to read
+ *	count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR
+ * - calls VOP_READDIR()
+ * - loops around building the reply
+ *	if the output generated exceeds count break out of loop
+ *	The nfsm_clget macro is used here so that the reply will be packed
+ *	tightly in mbuf clusters.
+ * - it only knows that it has encountered eof when the VOP_READDIR()
+ *	reads nothing
+ * - as such one readdir rpc will return eof false although you are there
+ *	and then the next will return eof
+ * - it trims out records with d_fileno == 0
+ *	this doesn't matter for Unix clients, but they might confuse clients
+ *	for other os'.
+ * NB: It is tempting to set eof to true if the VOP_READDIR() reads less
+ *	than requested, but this may not apply to all filesystems. For
+ *	example, client NFS does not { although it is never remote mounted
+ *	anyhow }
+ *     The alternate call nqnfsrv_readdirlook() does lookups as well.
+ * PS: The NFS protocol spec. does not clarify what the "count" byte
+ *	argument is a count of.. just name strings and file id's or the
+ *	entire reply rpc or ...
+ *	I tried just file name and id sizes and it confused the Sun client,
+ *	so I am using the full rpc size now. The "paranoia.." comment refers
+ *	to including the status longwords that are not a part of the dir.
+ *	"entry" structures, but are in the rpc.
+ */
+struct flrep {
+	u_long fl_cachable;
+	u_long fl_duration;
+	u_long fl_frev[2];
+	nfsv2fh_t fl_nfh;
+	u_long fl_fattr[NFSX_NQFATTR / sizeof (u_long)];
+};
+
+nfsrv_readdir(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	register char *bp, *be;
+	register struct mbuf *mp;
+	register struct dirent *dp;
+	register caddr_t cp;
+	register u_long *tl;
+	register long t1;
+	caddr_t bpos;
+	struct mbuf *mb, *mb2, *mreq, *mp2;
+	char *cpos, *cend, *cp2, *rbuf;
+	struct vnode *vp;
+	nfsv2fh_t nfh;
+	fhandle_t *fhp;
+	struct uio io;
+	struct iovec iv;
+	int len, nlen, rem, xfer, tsiz, i, error = 0;
+	int siz, cnt, fullsiz, eofflag, rdonly, cache;
+	u_quad_t frev;
+	u_long on, off, toff;
+
+	fhp = &nfh.fh_generic;
+	nfsm_srvmtofh(fhp);
+	nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
+	toff = fxdr_unsigned(u_long, *tl++);
+	off = (toff & ~(NFS_DIRBLKSIZ-1));
+	on = (toff & (NFS_DIRBLKSIZ-1));
+	cnt = fxdr_unsigned(int, *tl);
+	siz = ((cnt+NFS_DIRBLKSIZ-1) & ~(NFS_DIRBLKSIZ-1));
+	if (cnt > NFS_MAXREADDIR)
+		siz = NFS_MAXREADDIR;
+	fullsiz = siz;
+	if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+		nfsm_reply(0);
+	nqsrv_getl(vp, NQL_READ);
+	if (error = nfsrv_access(vp, VEXEC, cred, rdonly, nfsd->nd_procp)) {
+		vput(vp);
+		nfsm_reply(0);
+	}
+	VOP_UNLOCK(vp);
+	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
+again:
+	iv.iov_base = rbuf;
+	iv.iov_len = fullsiz;
+	io.uio_iov = &iv;
+	io.uio_iovcnt = 1;
+	io.uio_offset = (off_t)off;
+	io.uio_resid = fullsiz;
+	io.uio_segflg = UIO_SYSSPACE;
+	io.uio_rw = UIO_READ;
+	io.uio_procp = (struct proc *)0;
+	error = VOP_READDIR(vp, &io, cred);
+	off = (off_t)io.uio_offset;
+	if (error) {
+		vrele(vp);
+		free((caddr_t)rbuf, M_TEMP);
+		nfsm_reply(0);
+	}
+	if (io.uio_resid < fullsiz)
+		eofflag = 0;
+	else
+		eofflag = 1;
+	if (io.uio_resid) {
+		siz -= io.uio_resid;
+
+		/*
+		 * If nothing read, return eof
+		 * rpc reply
+		 */
+		if (siz == 0) {
+			vrele(vp);
+			nfsm_reply(2*NFSX_UNSIGNED);
+			nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+			*tl++ = nfs_false;
+			*tl = nfs_true;
+			FREE((caddr_t)rbuf, M_TEMP);
+			return (0);
+		}
+	}
+
+	/*
+	 * Check for degenerate cases of nothing useful read.
+	 * If so go try again
+	 */
+	cpos = rbuf + on;
+	cend = rbuf + siz;
+	dp = (struct dirent *)cpos;
+	while (cpos < cend && dp->d_fileno == 0) {
+		cpos += dp->d_reclen;
+		dp = (struct dirent *)cpos;
+	}
+	if (cpos >= cend) {
+		toff = off;
+		siz = fullsiz;
+		on = 0;
+		goto again;
+	}
+
+	cpos = rbuf + on;
+	cend = rbuf + siz;
+	dp = (struct dirent *)cpos;
+	len = 3*NFSX_UNSIGNED;	/* paranoia, probably can be 0 */
+	nfsm_reply(siz);
+	mp = mp2 = mb;
+	bp = bpos;
+	be = bp + M_TRAILINGSPACE(mp);
+
+	/* Loop through the records and build reply */
+	while (cpos < cend) {
+		if (dp->d_fileno != 0) {
+			nlen = dp->d_namlen;
+			rem = nfsm_rndup(nlen)-nlen;
+			len += (4*NFSX_UNSIGNED + nlen + rem);
+			if (len > cnt) {
+				eofflag = 0;
+				break;
+			}
+			/*
+			 * Build the directory record xdr from
+			 * the dirent entry.
+			 */
+			nfsm_clget;
+			*tl = nfs_true;
+			bp += NFSX_UNSIGNED;
+			nfsm_clget;
+			*tl = txdr_unsigned(dp->d_fileno);
+			bp += NFSX_UNSIGNED;
+			nfsm_clget;
+			*tl = txdr_unsigned(nlen);
+			bp += NFSX_UNSIGNED;
+	
+			/* And loop around copying the name */
+			xfer = nlen;
+			cp = dp->d_name;
+			while (xfer > 0) {
+				nfsm_clget;
+				if ((bp+xfer) > be)
+					tsiz = be-bp;
+				else
+					tsiz = xfer;
+				bcopy(cp, bp, tsiz);
+				bp += tsiz;
+				xfer -= tsiz;
+				if (xfer > 0)
+					cp += tsiz;
+			}
+			/* And null pad to a long boundary */
+			for (i = 0; i < rem; i++)
+				*bp++ = '\0';
+			nfsm_clget;
+	
+			/* Finish off the record */
+			toff += dp->d_reclen;
+			*tl = txdr_unsigned(toff);
+			bp += NFSX_UNSIGNED;
+		} else
+			toff += dp->d_reclen;
+		cpos += dp->d_reclen;
+		dp = (struct dirent *)cpos;
+	}
+	vrele(vp);
+	nfsm_clget;
+	*tl = nfs_false;
+	bp += NFSX_UNSIGNED;
+	nfsm_clget;
+	if (eofflag)
+		*tl = nfs_true;
+	else
+		*tl = nfs_false;
+	bp += NFSX_UNSIGNED;
+	if (mp != mb) {
+		if (bp < be)
+			mp->m_len = bp - mtod(mp, caddr_t);
+	} else
+		mp->m_len += bp - bpos;
+	FREE(rbuf, M_TEMP);
+	nfsm_srvdone;
+}
+
+nqnfsrv_readdirlook(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	register char *bp, *be;
+	register struct mbuf *mp;
+	register struct dirent *dp;
+	register caddr_t cp;
+	register u_long *tl;
+	register long t1;
+	caddr_t bpos;
+	struct mbuf *mb, *mb2, *mreq, *mp2;
+	char *cpos, *cend, *cp2, *rbuf;
+	struct vnode *vp, *nvp;
+	struct flrep fl;
+	nfsv2fh_t nfh;
+	fhandle_t *fhp;
+	struct uio io;
+	struct iovec iv;
+	struct vattr va, *vap = &va;
+	struct nfsv2_fattr *fp;
+	int len, nlen, rem, xfer, tsiz, i, error = 0, duration2, cache2;
+	int siz, cnt, fullsiz, eofflag, rdonly, cache;
+	u_quad_t frev, frev2;
+	u_long on, off, toff;
+
+	fhp = &nfh.fh_generic;
+	nfsm_srvmtofh(fhp);
+	nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED);
+	toff = fxdr_unsigned(u_long, *tl++);
+	off = (toff & ~(NFS_DIRBLKSIZ-1));
+	on = (toff & (NFS_DIRBLKSIZ-1));
+	cnt = fxdr_unsigned(int, *tl++);
+	duration2 = fxdr_unsigned(int, *tl);
+	siz = ((cnt+NFS_DIRBLKSIZ-1) & ~(NFS_DIRBLKSIZ-1));
+	if (cnt > NFS_MAXREADDIR)
+		siz = NFS_MAXREADDIR;
+	fullsiz = siz;
+	if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+		nfsm_reply(0);
+	nqsrv_getl(vp, NQL_READ);
+	if (error = nfsrv_access(vp, VEXEC, cred, rdonly, nfsd->nd_procp)) {
+		vput(vp);
+		nfsm_reply(0);
+	}
+	VOP_UNLOCK(vp);
+	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
+again:
+	iv.iov_base = rbuf;
+	iv.iov_len = fullsiz;
+	io.uio_iov = &iv;
+	io.uio_iovcnt = 1;
+	io.uio_offset = (off_t)off;
+	io.uio_resid = fullsiz;
+	io.uio_segflg = UIO_SYSSPACE;
+	io.uio_rw = UIO_READ;
+	io.uio_procp = (struct proc *)0;
+	error = VOP_READDIR(vp, &io, cred);
+	off = (u_long)io.uio_offset;
+	if (error) {
+		vrele(vp);
+		free((caddr_t)rbuf, M_TEMP);
+		nfsm_reply(0);
+	}
+	if (io.uio_resid < fullsiz)
+		eofflag = 0;
+	else
+		eofflag = 1;
+	if (io.uio_resid) {
+		siz -= io.uio_resid;
+
+		/*
+		 * If nothing read, return eof
+		 * rpc reply
+		 */
+		if (siz == 0) {
+			vrele(vp);
+			nfsm_reply(2 * NFSX_UNSIGNED);
+			nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
+			*tl++ = nfs_false;
+			*tl = nfs_true;
+			FREE((caddr_t)rbuf, M_TEMP);
+			return (0);
+		}
+	}
+
+	/*
+	 * Check for degenerate cases of nothing useful read.
+	 * If so go try again
+	 */
+	cpos = rbuf + on;
+	cend = rbuf + siz;
+	dp = (struct dirent *)cpos;
+	while (cpos < cend && dp->d_fileno == 0) {
+		cpos += dp->d_reclen;
+		dp = (struct dirent *)cpos;
+	}
+	if (cpos >= cend) {
+		toff = off;
+		siz = fullsiz;
+		on = 0;
+		goto again;
+	}
+
+	cpos = rbuf + on;
+	cend = rbuf + siz;
+	dp = (struct dirent *)cpos;
+	len = 3 * NFSX_UNSIGNED;	/* paranoia, probably can be 0 */
+	nfsm_reply(siz);
+	mp = mp2 = mb;
+	bp = bpos;
+	be = bp + M_TRAILINGSPACE(mp);
+
+	/* Loop through the records and build reply */
+	while (cpos < cend) {
+		if (dp->d_fileno != 0) {
+			nlen = dp->d_namlen;
+			rem = nfsm_rndup(nlen)-nlen;
+	
+			/*
+			 * For readdir_and_lookup get the vnode using
+			 * the file number.
+			 */
+			if (VFS_VGET(vp->v_mount, dp->d_fileno, &nvp))
+				goto invalid;
+			bzero((caddr_t)&fl.fl_nfh, sizeof (nfsv2fh_t));
+			fl.fl_nfh.fh_generic.fh_fsid =
+				nvp->v_mount->mnt_stat.f_fsid;
+			if (VFS_VPTOFH(nvp, &fl.fl_nfh.fh_generic.fh_fid)) {
+				vput(nvp);
+				goto invalid;
+			}
+			if (duration2) {
+				(void) nqsrv_getlease(nvp, &duration2, NQL_READ,
+					nfsd, nam, &cache2, &frev2, cred);
+				fl.fl_duration = txdr_unsigned(duration2);
+				fl.fl_cachable = txdr_unsigned(cache2);
+				txdr_hyper(&frev2, fl.fl_frev);
+			} else
+				fl.fl_duration = 0;
+			if (VOP_GETATTR(nvp, vap, cred, nfsd->nd_procp)) {
+				vput(nvp);
+				goto invalid;
+			}
+			vput(nvp);
+			fp = (struct nfsv2_fattr *)&fl.fl_fattr;
+			nfsm_srvfillattr;
+			len += (4*NFSX_UNSIGNED + nlen + rem + NFSX_FH
+				+ NFSX_NQFATTR);
+			if (len > cnt) {
+				eofflag = 0;
+				break;
+			}
+			/*
+			 * Build the directory record xdr from
+			 * the dirent entry.
+			 */
+			nfsm_clget;
+			*tl = nfs_true;
+			bp += NFSX_UNSIGNED;
+
+			/*
+			 * For readdir_and_lookup copy the stuff out.
+			 */
+			xfer = sizeof (struct flrep);
+			cp = (caddr_t)&fl;
+			while (xfer > 0) {
+				nfsm_clget;
+				if ((bp+xfer) > be)
+					tsiz = be-bp;
+				else
+					tsiz = xfer;
+				bcopy(cp, bp, tsiz);
+				bp += tsiz;
+				xfer -= tsiz;
+				if (xfer > 0)
+					cp += tsiz;
+			}
+			nfsm_clget;
+			*tl = txdr_unsigned(dp->d_fileno);
+			bp += NFSX_UNSIGNED;
+			nfsm_clget;
+			*tl = txdr_unsigned(nlen);
+			bp += NFSX_UNSIGNED;
+	
+			/* And loop around copying the name */
+			xfer = nlen;
+			cp = dp->d_name;
+			while (xfer > 0) {
+				nfsm_clget;
+				if ((bp+xfer) > be)
+					tsiz = be-bp;
+				else
+					tsiz = xfer;
+				bcopy(cp, bp, tsiz);
+				bp += tsiz;
+				xfer -= tsiz;
+				if (xfer > 0)
+					cp += tsiz;
+			}
+			/* And null pad to a long boundary */
+			for (i = 0; i < rem; i++)
+				*bp++ = '\0';
+			nfsm_clget;
+	
+			/* Finish off the record */
+			toff += dp->d_reclen;
+			*tl = txdr_unsigned(toff);
+			bp += NFSX_UNSIGNED;
+		} else
+invalid:
+			toff += dp->d_reclen;
+		cpos += dp->d_reclen;
+		dp = (struct dirent *)cpos;
+	}
+	vrele(vp);
+	nfsm_clget;
+	*tl = nfs_false;
+	bp += NFSX_UNSIGNED;
+	nfsm_clget;
+	if (eofflag)
+		*tl = nfs_true;
+	else
+		*tl = nfs_false;
+	bp += NFSX_UNSIGNED;
+	if (mp != mb) {
+		if (bp < be)
+			mp->m_len = bp - mtod(mp, caddr_t);
+	} else
+		mp->m_len += bp - bpos;
+	FREE(rbuf, M_TEMP);
+	nfsm_srvdone;
+}
+
+/*
+ * nfs statfs service
+ */
+nfsrv_statfs(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	register struct statfs *sf;
+	register struct nfsv2_statfs *sfp;
+	register u_long *tl;
+	register long t1;
+	caddr_t bpos;
+	int error = 0, rdonly, cache, isnq;
+	char *cp2;
+	struct mbuf *mb, *mb2, *mreq;
+	struct vnode *vp;
+	nfsv2fh_t nfh;
+	fhandle_t *fhp;
+	struct statfs statfs;
+	u_quad_t frev;
+
+	fhp = &nfh.fh_generic;
+	isnq = (nfsd->nd_nqlflag != NQL_NOVAL);
+	nfsm_srvmtofh(fhp);
+	if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+		nfsm_reply(0);
+	sf = &statfs;
+	error = VFS_STATFS(vp->v_mount, sf, nfsd->nd_procp);
+	vput(vp);
+	nfsm_reply(NFSX_STATFS(isnq));
+	nfsm_build(sfp, struct nfsv2_statfs *, NFSX_STATFS(isnq));
+	sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA);
+	sfp->sf_bsize = txdr_unsigned(sf->f_bsize);
+	sfp->sf_blocks = txdr_unsigned(sf->f_blocks);
+	sfp->sf_bfree = txdr_unsigned(sf->f_bfree);
+	sfp->sf_bavail = txdr_unsigned(sf->f_bavail);
+	if (isnq) {
+		sfp->sf_files = txdr_unsigned(sf->f_files);
+		sfp->sf_ffree = txdr_unsigned(sf->f_ffree);
+	}
+	nfsm_srvdone;
+}
+
+/*
+ * Null operation, used by clients to ping server
+ */
+/* ARGSUSED */
+nfsrv_null(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	caddr_t bpos;
+	int error = VNOVAL, cache;
+	struct mbuf *mb, *mreq;
+	u_quad_t frev;
+
+	nfsm_reply(0);
+	return (error);
+}
+
+/*
+ * No operation, used for obsolete procedures
+ */
+/* ARGSUSED */
+nfsrv_noop(nfsd, mrep, md, dpos, cred, nam, mrq)
+	struct nfsd *nfsd;
+	struct mbuf *mrep, *md;
+	caddr_t dpos;
+	struct ucred *cred;
+	struct mbuf *nam, **mrq;
+{
+	caddr_t bpos;
+	int error, cache;
+	struct mbuf *mb, *mreq;
+	u_quad_t frev;
+
+	if (nfsd->nd_repstat)
+		error = nfsd->nd_repstat;
+	else
+		error = EPROCUNAVAIL;
+	nfsm_reply(0);
+	return (error);
+}
+
+/*
+ * Perform access checking for vnodes obtained from file handles that would
+ * refer to files already opened by a Unix client. You cannot just use
+ * vn_writechk() and VOP_ACCESS() for two reasons.
+ * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case
+ * 2 - The owner is to be given access irrespective of mode bits so that
+ *     processes that chmod after opening a file don't break. I don't like
+ *     this because it opens a security hole, but since the nfs server opens
+ *     a security hole the size of a barn door anyhow, what the heck.
+ */
+nfsrv_access(vp, flags, cred, rdonly, p)
+	register struct vnode *vp;
+	int flags;
+	register struct ucred *cred;
+	int rdonly;
+	struct proc *p;
+{
+	struct vattr vattr;
+	int error;
+	if (flags & VWRITE) {
+		/* Just vn_writechk() changed to check rdonly */
+		/*
+		 * Disallow write attempts on read-only file systems;
+		 * unless the file is a socket or a block or character
+		 * device resident on the file system.
+		 */
+		if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
+			switch (vp->v_type) {
+			case VREG: case VDIR: case VLNK:
+				return (EROFS);
+			}
+		}
+		/*
+		 * If there's shared text associated with
+		 * the inode, try to free it up once.  If
+		 * we fail, we can't allow writing.
+		 */
+		if ((vp->v_flag & VTEXT) && !vnode_pager_uncache(vp))
+			return (ETXTBSY);
+	}
+	if (error = VOP_GETATTR(vp, &vattr, cred, p))
+		return (error);
+	if ((error = VOP_ACCESS(vp, flags, cred, p)) &&
+	    cred->cr_uid != vattr.va_uid)
+		return (error);
+	return (0);
+}
diff --git a/sys/nfsserver/nfs_srvcache.c b/sys/nfsserver/nfs_srvcache.c
new file mode 100644
index 00000000000..63d8bb72d82
--- /dev/null
+++ b/sys/nfsserver/nfs_srvcache.c
@@ -0,0 +1,348 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfs_srvcache.c	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Reference: Chet Juszczak, "Improving the Performance and Correctness
+ *		of an NFS Server", in Proc. Winter 1989 USENIX Conference,
+ *		pages 53-63. San Diego, February 1989.
+ */
+#include <sys/param.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mbuf.h>
+#include <sys/malloc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+
+#include <netinet/in.h>
+#ifdef ISO
+#include <netiso/iso.h>
+#endif
+#include <nfs/nfsm_subs.h>
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsrvcache.h>
+#include <nfs/nqnfs.h>
+
+long numnfsrvcache, desirednfsrvcache = NFSRVCACHESIZ;
+
+#define	NFSRCHASH(xid)		(((xid) + ((xid) >> 24)) & rheadhash)
+static struct nfsrvcache *nfsrvlruhead, **nfsrvlrutail = &nfsrvlruhead;
+static struct nfsrvcache **rheadhtbl;
+static u_long rheadhash;
+
+#define TRUE	1
+#define	FALSE	0
+
+#define	NETFAMILY(rp) \
+		(((rp)->rc_flag & RC_INETADDR) ? AF_INET : AF_ISO)
+
+/*
+ * Static array that defines which nfs rpc's are nonidempotent
+ */
+int nonidempotent[NFS_NPROCS] = {
+	FALSE,
+	FALSE,
+	TRUE,
+	FALSE,
+	FALSE,
+	FALSE,
+	FALSE,
+	FALSE,
+	TRUE,
+	TRUE,
+	TRUE,
+	TRUE,
+	TRUE,
+	TRUE,
+	TRUE,
+	TRUE,
+	FALSE,
+	FALSE,
+	FALSE,
+	FALSE,
+	FALSE,
+	FALSE,
+	FALSE,
+};
+
+/* True iff the rpc reply is an nfs status ONLY! */
+static int repliesstatus[NFS_NPROCS] = {
+	FALSE,
+	FALSE,
+	FALSE,
+	FALSE,
+	FALSE,
+	FALSE,
+	FALSE,
+	FALSE,
+	FALSE,
+	FALSE,
+	TRUE,
+	TRUE,
+	TRUE,
+	TRUE,
+	FALSE,
+	TRUE,
+	FALSE,
+	FALSE,
+	FALSE,
+	FALSE,
+	FALSE,
+	FALSE,
+	TRUE,
+};
+
+/*
+ * Initialize the server request cache list
+ */
+nfsrv_initcache()
+{
+
+	rheadhtbl = hashinit(desirednfsrvcache, M_NFSD, &rheadhash);
+}
+
+/*
+ * Look for the request in the cache
+ * If found then
+ *    return action and optionally reply
+ * else
+ *    insert it in the cache
+ *
+ * The rules are as follows:
+ * - if in progress, return DROP request
+ * - if completed within DELAY of the current time, return DROP it
+ * - if completed a longer time ago return REPLY if the reply was cached or
+ *   return DOIT
+ * Update/add new request at end of lru list
+ */
+nfsrv_getcache(nam, nd, repp)
+	struct mbuf *nam;
+	register struct nfsd *nd;
+	struct mbuf **repp;
+{
+	register struct nfsrvcache *rp, *rq, **rpp;
+	struct mbuf *mb;
+	struct sockaddr_in *saddr;
+	caddr_t bpos;
+	int ret;
+
+	if (nd->nd_nqlflag != NQL_NOVAL)
+		return (RC_DOIT);
+	rpp = &rheadhtbl[NFSRCHASH(nd->nd_retxid)];
+loop:
+	for (rp = *rpp; rp; rp = rp->rc_forw) {
+	    if (nd->nd_retxid == rp->rc_xid && nd->nd_procnum == rp->rc_proc &&
+		netaddr_match(NETFAMILY(rp), &rp->rc_haddr, nam)) {
+			if ((rp->rc_flag & RC_LOCKED) != 0) {
+				rp->rc_flag |= RC_WANTED;
+				(void) tsleep((caddr_t)rp, PZERO-1, "nfsrc", 0);
+				goto loop;
+			}
+			rp->rc_flag |= RC_LOCKED;
+			/* If not at end of LRU chain, move it there */
+			if (rp->rc_next) {
+				/* remove from LRU chain */
+				*rp->rc_prev = rp->rc_next;
+				rp->rc_next->rc_prev = rp->rc_prev;
+				/* and replace at end of it */
+				rp->rc_next = NULL;
+				rp->rc_prev = nfsrvlrutail;
+				*nfsrvlrutail = rp;
+				nfsrvlrutail = &rp->rc_next;
+			}
+			if (rp->rc_state == RC_UNUSED)
+				panic("nfsrv cache");
+			if (rp->rc_state == RC_INPROG) {
+				nfsstats.srvcache_inproghits++;
+				ret = RC_DROPIT;
+			} else if (rp->rc_flag & RC_REPSTATUS) {
+				nfsstats.srvcache_nonidemdonehits++;
+				nfs_rephead(0, nd, rp->rc_status,
+				   0, (u_quad_t *)0, repp, &mb, &bpos);
+				ret = RC_REPLY;
+			} else if (rp->rc_flag & RC_REPMBUF) {
+				nfsstats.srvcache_nonidemdonehits++;
+				*repp = m_copym(rp->rc_reply, 0, M_COPYALL,
+						M_WAIT);
+				ret = RC_REPLY;
+			} else {
+				nfsstats.srvcache_idemdonehits++;
+				rp->rc_state = RC_INPROG;
+				ret = RC_DOIT;
+			}
+			rp->rc_flag &= ~RC_LOCKED;
+			if (rp->rc_flag & RC_WANTED) {
+				rp->rc_flag &= ~RC_WANTED;
+				wakeup((caddr_t)rp);
+			}
+			return (ret);
+		}
+	}
+	nfsstats.srvcache_misses++;
+	if (numnfsrvcache < desirednfsrvcache) {
+		rp = (struct nfsrvcache *)malloc((u_long)sizeof *rp,
+		    M_NFSD, M_WAITOK);
+		bzero((char *)rp, sizeof *rp);
+		numnfsrvcache++;
+		rp->rc_flag = RC_LOCKED;
+	} else {
+		rp = nfsrvlruhead;
+		while ((rp->rc_flag & RC_LOCKED) != 0) {
+			rp->rc_flag |= RC_WANTED;
+			(void) tsleep((caddr_t)rp, PZERO-1, "nfsrc", 0);
+			rp = nfsrvlruhead;
+		}
+		rp->rc_flag |= RC_LOCKED;
+		/* remove from hash chain */
+		if (rq = rp->rc_forw)
+			rq->rc_back = rp->rc_back;
+		*rp->rc_back = rq;
+		/* remove from LRU chain */
+		*rp->rc_prev = rp->rc_next;
+		rp->rc_next->rc_prev = rp->rc_prev;
+		if (rp->rc_flag & RC_REPMBUF)
+			m_freem(rp->rc_reply);
+		if (rp->rc_flag & RC_NAM)
+			MFREE(rp->rc_nam, mb);
+		rp->rc_flag &= (RC_LOCKED | RC_WANTED);
+	}
+	/* place at end of LRU list */
+	rp->rc_next = NULL;
+	rp->rc_prev = nfsrvlrutail;
+	*nfsrvlrutail = rp;
+	nfsrvlrutail = &rp->rc_next;
+	rp->rc_state = RC_INPROG;
+	rp->rc_xid = nd->nd_retxid;
+	saddr = mtod(nam, struct sockaddr_in *);
+	switch (saddr->sin_family) {
+	case AF_INET:
+		rp->rc_flag |= RC_INETADDR;
+		rp->rc_inetaddr = saddr->sin_addr.s_addr;
+		break;
+	case AF_ISO:
+	default:
+		rp->rc_flag |= RC_NAM;
+		rp->rc_nam = m_copym(nam, 0, M_COPYALL, M_WAIT);
+		break;
+	};
+	rp->rc_proc = nd->nd_procnum;
+	/* insert into hash chain */
+	if (rq = *rpp)
+		rq->rc_back = &rp->rc_forw;
+	rp->rc_forw = rq;
+	rp->rc_back = rpp;
+	*rpp = rp;
+	rp->rc_flag &= ~RC_LOCKED;
+	if (rp->rc_flag & RC_WANTED) {
+		rp->rc_flag &= ~RC_WANTED;
+		wakeup((caddr_t)rp);
+	}
+	return (RC_DOIT);
+}
+
+/*
+ * Update a request cache entry after the rpc has been done
+ */
+void
+nfsrv_updatecache(nam, nd, repvalid, repmbuf)
+	struct mbuf *nam;
+	register struct nfsd *nd;
+	int repvalid;
+	struct mbuf *repmbuf;
+{
+	register struct nfsrvcache *rp;
+
+	if (nd->nd_nqlflag != NQL_NOVAL)
+		return;
+loop:
+	for (rp = rheadhtbl[NFSRCHASH(nd->nd_retxid)]; rp; rp = rp->rc_forw) {
+	    if (nd->nd_retxid == rp->rc_xid && nd->nd_procnum == rp->rc_proc &&
+		netaddr_match(NETFAMILY(rp), &rp->rc_haddr, nam)) {
+			if ((rp->rc_flag & RC_LOCKED) != 0) {
+				rp->rc_flag |= RC_WANTED;
+				(void) tsleep((caddr_t)rp, PZERO-1, "nfsrc", 0);
+				goto loop;
+			}
+			rp->rc_flag |= RC_LOCKED;
+			rp->rc_state = RC_DONE;
+			/*
+			 * If we have a valid reply update status and save
+			 * the reply for non-idempotent rpc's.
+			 */
+			if (repvalid && nonidempotent[nd->nd_procnum]) {
+				if (repliesstatus[nd->nd_procnum]) {
+					rp->rc_status = nd->nd_repstat;
+					rp->rc_flag |= RC_REPSTATUS;
+				} else {
+					rp->rc_reply = m_copym(repmbuf,
+						0, M_COPYALL, M_WAIT);
+					rp->rc_flag |= RC_REPMBUF;
+				}
+			}
+			rp->rc_flag &= ~RC_LOCKED;
+			if (rp->rc_flag & RC_WANTED) {
+				rp->rc_flag &= ~RC_WANTED;
+				wakeup((caddr_t)rp);
+			}
+			return;
+		}
+	}
+}
+
+/*
+ * Clean out the cache. Called when the last nfsd terminates.
+ */
+void
+nfsrv_cleancache()
+{
+	register struct nfsrvcache *rp, *nextrp;
+
+	for (rp = nfsrvlruhead; rp; rp = nextrp) {
+		nextrp = rp->rc_next;
+		free(rp, M_NFSD);
+	}
+	bzero((char *)rheadhtbl, (rheadhash + 1) * sizeof(void *));
+	nfsrvlruhead = NULL;
+	nfsrvlrutail = &nfsrvlruhead;
+	numnfsrvcache = 0;
+}
diff --git a/sys/nfsserver/nfs_srvsock.c b/sys/nfsserver/nfs_srvsock.c
new file mode 100644
index 00000000000..cf88ed33d92
--- /dev/null
+++ b/sys/nfsserver/nfs_srvsock.c
@@ -0,0 +1,1990 @@
+/*
+ * Copyright (c) 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfs_socket.c	8.3 (Berkeley) 1/12/94
+ */
+
+/*
+ * Socket operations for use by nfs
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/vnode.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/syslog.h>
+#include <sys/tprintf.h>
+
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfsrtt.h>
+#include <nfs/nqnfs.h>
+
+#define	TRUE	1
+#define	FALSE	0
+
+/*
+ * Estimate rto for an nfs rpc sent via. an unreliable datagram.
+ * Use the mean and mean deviation of rtt for the appropriate type of rpc
+ * for the frequent rpcs and a default for the others.
+ * The justification for doing "other" this way is that these rpcs
+ * happen so infrequently that timer est. would probably be stale.
+ * Also, since many of these rpcs are
+ * non-idempotent, a conservative timeout is desired.
+ * getattr, lookup - A+2D
+ * read, write     - A+4D
+ * other           - nm_timeo
+ */
+#define	NFS_RTO(n, t) \
+	((t) == 0 ? (n)->nm_timeo : \
+	 ((t) < 3 ? \
+	  (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
+	  ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
+#define	NFS_SRTT(r)	(r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
+#define	NFS_SDRTT(r)	(r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
+/*
+ * External data, mostly RPC constants in XDR form
+ */
+extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix,
+	rpc_msgaccepted, rpc_call, rpc_autherr, rpc_rejectedcred,
+	rpc_auth_kerb;
+extern u_long nfs_prog, nfs_vers, nqnfs_prog, nqnfs_vers;
+extern time_t nqnfsstarttime;
+extern int nonidempotent[NFS_NPROCS];
+
+/*
+ * Maps errno values to nfs error numbers.
+ * Use NFSERR_IO as the catch all for ones not specifically defined in
+ * RFC 1094.
+ */
+static int nfsrv_errmap[ELAST] = {
+  NFSERR_PERM,	NFSERR_NOENT,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_NXIO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_IO,	NFSERR_IO,	NFSERR_ACCES,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_IO,	NFSERR_EXIST,	NFSERR_IO,	NFSERR_NODEV,	NFSERR_NOTDIR,
+  NFSERR_ISDIR,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_IO,	NFSERR_FBIG,	NFSERR_NOSPC,	NFSERR_IO,	NFSERR_ROFS,
+  NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_IO,	NFSERR_IO,	NFSERR_NAMETOL,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_NOTEMPTY, NFSERR_IO,	NFSERR_IO,	NFSERR_DQUOT,	NFSERR_STALE,
+  NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
+  NFSERR_IO,
+};
+
+/*
+ * Defines which timer to use for the procnum.
+ * 0 - default
+ * 1 - getattr
+ * 2 - lookup
+ * 3 - read
+ * 4 - write
+ */
+static int proct[NFS_NPROCS] = {
+	0, 1, 0, 0, 2, 3, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 0, 0, 0,
+};
+
+/*
+ * There is a congestion window for outstanding rpcs maintained per mount
+ * point. The cwnd size is adjusted in roughly the way that:
+ * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
+ * SIGCOMM '88". ACM, August 1988.
+ * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
+ * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
+ * of rpcs is in progress.
+ * (The sent count and cwnd are scaled for integer arith.)
+ * Variants of "slow start" were tried and were found to be too much of a
+ * performance hit (ave. rtt 3 times larger),
+ * I suspect due to the large rtt that nfs rpcs have.
+ */
+#define	NFS_CWNDSCALE	256
+#define	NFS_MAXCWND	(NFS_CWNDSCALE * 32)
+static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
+int	nfs_sbwait();
+void	nfs_disconnect(), nfs_realign(), nfsrv_wakenfsd(), nfs_sndunlock();
+void	nfs_rcvunlock(), nqnfs_serverd(), nqnfs_clientlease();
+struct mbuf *nfsm_rpchead();
+int nfsrtton = 0;
+struct nfsrtt nfsrtt;
+struct nfsd nfsd_head;
+
+int	nfsrv_null(),
+	nfsrv_getattr(),
+	nfsrv_setattr(),
+	nfsrv_lookup(),
+	nfsrv_readlink(),
+	nfsrv_read(),
+	nfsrv_write(),
+	nfsrv_create(),
+	nfsrv_remove(),
+	nfsrv_rename(),
+	nfsrv_link(),
+	nfsrv_symlink(),
+	nfsrv_mkdir(),
+	nfsrv_rmdir(),
+	nfsrv_readdir(),
+	nfsrv_statfs(),
+	nfsrv_noop(),
+	nqnfsrv_readdirlook(),
+	nqnfsrv_getlease(),
+	nqnfsrv_vacated(),
+	nqnfsrv_access();
+
+int (*nfsrv_procs[NFS_NPROCS])() = {
+	nfsrv_null,
+	nfsrv_getattr,
+	nfsrv_setattr,
+	nfsrv_noop,
+	nfsrv_lookup,
+	nfsrv_readlink,
+	nfsrv_read,
+	nfsrv_noop,
+	nfsrv_write,
+	nfsrv_create,
+	nfsrv_remove,
+	nfsrv_rename,
+	nfsrv_link,
+	nfsrv_symlink,
+	nfsrv_mkdir,
+	nfsrv_rmdir,
+	nfsrv_readdir,
+	nfsrv_statfs,
+	nqnfsrv_readdirlook,
+	nqnfsrv_getlease,
+	nqnfsrv_vacated,
+	nfsrv_noop,
+	nqnfsrv_access,
+};
+
+struct nfsreq nfsreqh;
+
+/*
+ * Initialize sockets and congestion for a new NFS connection.
+ * We do not free the sockaddr if error.
+ */
+nfs_connect(nmp, rep)
+	register struct nfsmount *nmp;
+	struct nfsreq *rep;
+{
+	register struct socket *so;
+	int s, error, rcvreserve, sndreserve;
+	struct sockaddr *saddr;
+	struct sockaddr_in *sin;
+	struct mbuf *m;
+	u_short tport;
+
+	nmp->nm_so = (struct socket *)0;
+	saddr = mtod(nmp->nm_nam, struct sockaddr *);
+	if (error = socreate(saddr->sa_family,
+		&nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto))
+		goto bad;
+	so = nmp->nm_so;
+	nmp->nm_soflags = so->so_proto->pr_flags;
+
+	/*
+	 * Some servers require that the client port be a reserved port number.
+	 */
+	if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
+		MGET(m, M_WAIT, MT_SONAME);
+		sin = mtod(m, struct sockaddr_in *);
+		sin->sin_len = m->m_len = sizeof (struct sockaddr_in);
+		sin->sin_family = AF_INET;
+		sin->sin_addr.s_addr = INADDR_ANY;
+		tport = IPPORT_RESERVED - 1;
+		sin->sin_port = htons(tport);
+		while ((error = sobind(so, m)) == EADDRINUSE &&
+		       --tport > IPPORT_RESERVED / 2)
+			sin->sin_port = htons(tport);
+		m_freem(m);
+		if (error)
+			goto bad;
+	}
+
+	/*
+	 * Protocols that do not require connections may be optionally left
+	 * unconnected for servers that reply from a port other than NFS_PORT.
+	 */
+	if (nmp->nm_flag & NFSMNT_NOCONN) {
+		if (nmp->nm_soflags & PR_CONNREQUIRED) {
+			error = ENOTCONN;
+			goto bad;
+		}
+	} else {
+		if (error = soconnect(so, nmp->nm_nam))
+			goto bad;
+
+		/*
+		 * Wait for the connection to complete. Cribbed from the
+		 * connect system call but with the wait timing out so
+		 * that interruptible mounts don't hang here for a long time.
+		 */
+		s = splnet();
+		while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
+			(void) tsleep((caddr_t)&so->so_timeo, PSOCK,
+				"nfscon", 2 * hz);
+			if ((so->so_state & SS_ISCONNECTING) &&
+			    so->so_error == 0 && rep &&
+			    (error = nfs_sigintr(nmp, rep, rep->r_procp))) {
+				so->so_state &= ~SS_ISCONNECTING;
+				splx(s);
+				goto bad;
+			}
+		}
+		if (so->so_error) {
+			error = so->so_error;
+			so->so_error = 0;
+			splx(s);
+			goto bad;
+		}
+		splx(s);
+	}
+	if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) {
+		so->so_rcv.sb_timeo = (5 * hz);
+		so->so_snd.sb_timeo = (5 * hz);
+	} else {
+		so->so_rcv.sb_timeo = 0;
+		so->so_snd.sb_timeo = 0;
+	}
+	if (nmp->nm_sotype == SOCK_DGRAM) {
+		sndreserve = nmp->nm_wsize + NFS_MAXPKTHDR;
+		rcvreserve = nmp->nm_rsize + NFS_MAXPKTHDR;
+	} else if (nmp->nm_sotype == SOCK_SEQPACKET) {
+		sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2;
+		rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) * 2;
+	} else {
+		if (nmp->nm_sotype != SOCK_STREAM)
+			panic("nfscon sotype");
+		if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
+			MGET(m, M_WAIT, MT_SOOPTS);
+			*mtod(m, int *) = 1;
+			m->m_len = sizeof(int);
+			sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
+		}
+		if (so->so_proto->pr_protocol == IPPROTO_TCP) {
+			MGET(m, M_WAIT, MT_SOOPTS);
+			*mtod(m, int *) = 1;
+			m->m_len = sizeof(int);
+			sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
+		}
+		sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long))
+				* 2;
+		rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_long))
+				* 2;
+	}
+	if (error = soreserve(so, sndreserve, rcvreserve))
+		goto bad;
+	so->so_rcv.sb_flags |= SB_NOINTR;
+	so->so_snd.sb_flags |= SB_NOINTR;
+
+	/* Initialize other non-zero congestion variables */
+	nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] =
+		nmp->nm_srtt[4] = (NFS_TIMEO << 3);
+	nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
+		nmp->nm_sdrtt[3] = nmp->nm_sdrtt[4] = 0;
+	nmp->nm_cwnd = NFS_MAXCWND / 2;	    /* Initial send window */
+	nmp->nm_sent = 0;
+	nmp->nm_timeouts = 0;
+	return (0);
+
+bad:
+	nfs_disconnect(nmp);
+	return (error);
+}
+
+/*
+ * Reconnect routine:
+ * Called when a connection is broken on a reliable protocol.
+ * - clean up the old socket
+ * - nfs_connect() again
+ * - set R_MUSTRESEND for all outstanding requests on mount point
+ * If this fails the mount point is DEAD!
+ * nb: Must be called with the nfs_sndlock() set on the mount point.
+ */
+nfs_reconnect(rep)
+	register struct nfsreq *rep;
+{
+	register struct nfsreq *rp;
+	register struct nfsmount *nmp = rep->r_nmp;
+	int error;
+
+	nfs_disconnect(nmp);
+	while (error = nfs_connect(nmp, rep)) {
+		if (error == EINTR || error == ERESTART)
+			return (EINTR);
+		(void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
+	}
+
+	/*
+	 * Loop through outstanding request list and fix up all requests
+	 * on old socket.
+	 */
+	rp = nfsreqh.r_next;
+	while (rp != &nfsreqh) {
+		if (rp->r_nmp == nmp)
+			rp->r_flags |= R_MUSTRESEND;
+		rp = rp->r_next;
+	}
+	return (0);
+}
+
+/*
+ * NFS disconnect. Clean up and unlink.
+ */
+void
+nfs_disconnect(nmp)
+	register struct nfsmount *nmp;
+{
+	register struct socket *so;
+
+	if (nmp->nm_so) {
+		so = nmp->nm_so;
+		nmp->nm_so = (struct socket *)0;
+		soshutdown(so, 2);
+		soclose(so);
+	}
+}
+
+/*
+ * This is the nfs send routine. For connection based socket types, it
+ * must be called with an nfs_sndlock() on the socket.
+ * "rep == NULL" indicates that it has been called from a server.
+ * For the client side:
+ * - return EINTR if the RPC is terminated, 0 otherwise
+ * - set R_MUSTRESEND if the send fails for any reason
+ * - do any cleanup required by recoverable socket errors (???)
+ * For the server side:
+ * - return EINTR or ERESTART if interrupted by a signal
+ * - return EPIPE if a connection is lost for connection based sockets (TCP...)
+ * - do any cleanup required by recoverable socket errors (???)
+ */
+nfs_send(so, nam, top, rep)
+	register struct socket *so;
+	struct mbuf *nam;
+	register struct mbuf *top;
+	struct nfsreq *rep;
+{
+	struct mbuf *sendnam;
+	int error, soflags, flags;
+
+	if (rep) {
+		if (rep->r_flags & R_SOFTTERM) {
+			m_freem(top);
+			return (EINTR);
+		}
+		if ((so = rep->r_nmp->nm_so) == NULL) {
+			rep->r_flags |= R_MUSTRESEND;
+			m_freem(top);
+			return (0);
+		}
+		rep->r_flags &= ~R_MUSTRESEND;
+		soflags = rep->r_nmp->nm_soflags;
+	} else
+		soflags = so->so_proto->pr_flags;
+	if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
+		sendnam = (struct mbuf *)0;
+	else
+		sendnam = nam;
+	if (so->so_type == SOCK_SEQPACKET)
+		flags = MSG_EOR;
+	else
+		flags = 0;
+
+	error = sosend(so, sendnam, (struct uio *)0, top,
+		(struct mbuf *)0, flags);
+	if (error) {
+		if (rep) {
+			log(LOG_INFO, "nfs send error %d for server %s\n",error,
+			    rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+			/*
+			 * Deal with errors for the client side.
+			 */
+			if (rep->r_flags & R_SOFTTERM)
+				error = EINTR;
+			else
+				rep->r_flags |= R_MUSTRESEND;
+		} else
+			log(LOG_INFO, "nfsd send error %d\n", error);
+
+		/*
+		 * Handle any recoverable (soft) socket errors here. (???)
+		 */
+		if (error != EINTR && error != ERESTART &&
+			error != EWOULDBLOCK && error != EPIPE)
+			error = 0;
+	}
+	return (error);
+}
+
+/*
+ * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
+ * done by soreceive(), but for SOCK_STREAM we must deal with the Record
+ * Mark and consolidate the data into a new mbuf list.
+ * nb: Sometimes TCP passes the data up to soreceive() in long lists of
+ *     small mbufs.
+ * For SOCK_STREAM we must be very careful to read an entire record once
+ * we have read any of it, even if the system call has been interrupted.
+ */
+nfs_receive(rep, aname, mp)
+	register struct nfsreq *rep;
+	struct mbuf **aname;
+	struct mbuf **mp;
+{
+	register struct socket *so;
+	struct uio auio;
+	struct iovec aio;
+	register struct mbuf *m;
+	struct mbuf *control;
+	u_long len;
+	struct mbuf **getnam;
+	int error, sotype, rcvflg;
+	struct proc *p = curproc;	/* XXX */
+
+	/*
+	 * Set up arguments for soreceive()
+	 */
+	*mp = (struct mbuf *)0;
+	*aname = (struct mbuf *)0;
+	sotype = rep->r_nmp->nm_sotype;
+
+	/*
+	 * For reliable protocols, lock against other senders/receivers
+	 * in case a reconnect is necessary.
+	 * For SOCK_STREAM, first get the Record Mark to find out how much
+	 * more there is to get.
+	 * We must lock the socket against other receivers
+	 * until we have an entire rpc request/reply.
+	 */
+	if (sotype != SOCK_DGRAM) {
+		if (error = nfs_sndlock(&rep->r_nmp->nm_flag, rep))
+			return (error);
+tryagain:
+		/*
+		 * Check for fatal errors and resending request.
+		 */
+		/*
+		 * Ugh: If a reconnect attempt just happened, nm_so
+		 * would have changed. NULL indicates a failed
+		 * attempt that has essentially shut down this
+		 * mount point.
+		 */
+		if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
+			nfs_sndunlock(&rep->r_nmp->nm_flag);
+			return (EINTR);
+		}
+		if ((so = rep->r_nmp->nm_so) == NULL) {
+			if (error = nfs_reconnect(rep)) {
+				nfs_sndunlock(&rep->r_nmp->nm_flag);
+				return (error);
+			}
+			goto tryagain;
+		}
+		while (rep->r_flags & R_MUSTRESEND) {
+			m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
+			nfsstats.rpcretries++;
+			if (error = nfs_send(so, rep->r_nmp->nm_nam, m, rep)) {
+				if (error == EINTR || error == ERESTART ||
+				    (error = nfs_reconnect(rep))) {
+					nfs_sndunlock(&rep->r_nmp->nm_flag);
+					return (error);
+				}
+				goto tryagain;
+			}
+		}
+		nfs_sndunlock(&rep->r_nmp->nm_flag);
+		if (sotype == SOCK_STREAM) {
+			aio.iov_base = (caddr_t) &len;
+			aio.iov_len = sizeof(u_long);
+			auio.uio_iov = &aio;
+			auio.uio_iovcnt = 1;
+			auio.uio_segflg = UIO_SYSSPACE;
+			auio.uio_rw = UIO_READ;
+			auio.uio_offset = 0;
+			auio.uio_resid = sizeof(u_long);
+			auio.uio_procp = p;
+			do {
+			   rcvflg = MSG_WAITALL;
+			   error = soreceive(so, (struct mbuf **)0, &auio,
+				(struct mbuf **)0, (struct mbuf **)0, &rcvflg);
+			   if (error == EWOULDBLOCK && rep) {
+				if (rep->r_flags & R_SOFTTERM)
+					return (EINTR);
+			   }
+			} while (error == EWOULDBLOCK);
+			if (!error && auio.uio_resid > 0) {
+			    log(LOG_INFO,
+				 "short receive (%d/%d) from nfs server %s\n",
+				 sizeof(u_long) - auio.uio_resid,
+				 sizeof(u_long),
+				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+			    error = EPIPE;
+			}
+			if (error)
+				goto errout;
+			len = ntohl(len) & ~0x80000000;
+			/*
+			 * This is SERIOUS! We are out of sync with the sender
+			 * and forcing a disconnect/reconnect is all I can do.
+			 */
+			if (len > NFS_MAXPACKET) {
+			    log(LOG_ERR, "%s (%d) from nfs server %s\n",
+				"impossible packet length",
+				len,
+				rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+			    error = EFBIG;
+			    goto errout;
+			}
+			auio.uio_resid = len;
+			do {
+			    rcvflg = MSG_WAITALL;
+			    error =  soreceive(so, (struct mbuf **)0,
+				&auio, mp, (struct mbuf **)0, &rcvflg);
+			} while (error == EWOULDBLOCK || error == EINTR ||
+				 error == ERESTART);
+			if (!error && auio.uio_resid > 0) {
+			    log(LOG_INFO,
+				"short receive (%d/%d) from nfs server %s\n",
+				len - auio.uio_resid, len,
+				rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+			    error = EPIPE;
+			}
+		} else {
+			/*
+			 * NB: Since uio_resid is big, MSG_WAITALL is ignored
+			 * and soreceive() will return when it has either a
+			 * control msg or a data msg.
+			 * We have no use for control msg., but must grab them
+			 * and then throw them away so we know what is going
+			 * on.
+			 */
+			auio.uio_resid = len = 100000000; /* Anything Big */
+			auio.uio_procp = p;
+			do {
+			    rcvflg = 0;
+			    error =  soreceive(so, (struct mbuf **)0,
+				&auio, mp, &control, &rcvflg);
+			    if (control)
+				m_freem(control);
+			    if (error == EWOULDBLOCK && rep) {
+				if (rep->r_flags & R_SOFTTERM)
+					return (EINTR);
+			    }
+			} while (error == EWOULDBLOCK ||
+				 (!error && *mp == NULL && control));
+			if ((rcvflg & MSG_EOR) == 0)
+				printf("Egad!!\n");
+			if (!error && *mp == NULL)
+				error = EPIPE;
+			len -= auio.uio_resid;
+		}
+errout:
+		if (error && error != EINTR && error != ERESTART) {
+			m_freem(*mp);
+			*mp = (struct mbuf *)0;
+			if (error != EPIPE)
+				log(LOG_INFO,
+				    "receive error %d from nfs server %s\n",
+				    error,
+				 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+			error = nfs_sndlock(&rep->r_nmp->nm_flag, rep);
+			if (!error)
+				error = nfs_reconnect(rep);
+			if (!error)
+				goto tryagain;
+		}
+	} else {
+		if ((so = rep->r_nmp->nm_so) == NULL)
+			return (EACCES);
+		if (so->so_state & SS_ISCONNECTED)
+			getnam = (struct mbuf **)0;
+		else
+			getnam = aname;
+		auio.uio_resid = len = 1000000;
+		auio.uio_procp = p;
+		do {
+			rcvflg = 0;
+			error =  soreceive(so, getnam, &auio, mp,
+				(struct mbuf **)0, &rcvflg);
+			if (error == EWOULDBLOCK &&
+			    (rep->r_flags & R_SOFTTERM))
+				return (EINTR);
+		} while (error == EWOULDBLOCK);
+		len -= auio.uio_resid;
+	}
+	if (error) {
+		m_freem(*mp);
+		*mp = (struct mbuf *)0;
+	}
+	/*
+	 * Search for any mbufs that are not a multiple of 4 bytes long
+	 * or with m_data not longword aligned.
+	 * These could cause pointer alignment problems, so copy them to
+	 * well aligned mbufs.
+	 */
+	nfs_realign(*mp, 5 * NFSX_UNSIGNED);
+	return (error);
+}
+
+/*
+ * Implement receipt of reply on a socket.
+ * We must search through the list of received datagrams matching them
+ * with outstanding requests using the xid, until ours is found.
+ */
+/* ARGSUSED */
+nfs_reply(myrep)
+	struct nfsreq *myrep;
+{
+	register struct nfsreq *rep;
+	register struct nfsmount *nmp = myrep->r_nmp;
+	register long t1;
+	struct mbuf *mrep, *nam, *md;
+	u_long rxid, *tl;
+	caddr_t dpos, cp2;
+	int error;
+
+	/*
+	 * Loop around until we get our own reply
+	 */
+	for (;;) {
+		/*
+		 * Lock against other receivers so that I don't get stuck in
+		 * sbwait() after someone else has received my reply for me.
+		 * Also necessary for connection based protocols to avoid
+		 * race conditions during a reconnect.
+		 */
+		if (error = nfs_rcvlock(myrep))
+			return (error);
+		/* Already received, bye bye */
+		if (myrep->r_mrep != NULL) {
+			nfs_rcvunlock(&nmp->nm_flag);
+			return (0);
+		}
+		/*
+		 * Get the next Rpc reply off the socket
+		 */
+		error = nfs_receive(myrep, &nam, &mrep);
+		nfs_rcvunlock(&nmp->nm_flag);
+		if (error) {
+
+			/*
+			 * Ignore routing errors on connectionless protocols??
+			 */
+			if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
+				nmp->nm_so->so_error = 0;
+				if (myrep->r_flags & R_GETONEREP)
+					return (0);
+				continue;
+			}
+			return (error);
+		}
+		if (nam)
+			m_freem(nam);
+	
+		/*
+		 * Get the xid and check that it is an rpc reply
+		 */
+		md = mrep;
+		dpos = mtod(md, caddr_t);
+		nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
+		rxid = *tl++;
+		if (*tl != rpc_reply) {
+			if (nmp->nm_flag & NFSMNT_NQNFS) {
+				if (nqnfs_callback(nmp, mrep, md, dpos))
+					nfsstats.rpcinvalid++;
+			} else {
+				nfsstats.rpcinvalid++;
+				m_freem(mrep);
+			}
+nfsmout:
+			if (myrep->r_flags & R_GETONEREP)
+				return (0);
+			continue;
+		}
+
+		/*
+		 * Loop through the request list to match up the reply
+		 * Iff no match, just drop the datagram
+		 */
+		rep = nfsreqh.r_next;
+		while (rep != &nfsreqh) {
+			if (rep->r_mrep == NULL && rxid == rep->r_xid) {
+				/* Found it.. */
+				rep->r_mrep = mrep;
+				rep->r_md = md;
+				rep->r_dpos = dpos;
+				if (nfsrtton) {
+					struct rttl *rt;
+
+					rt = &nfsrtt.rttl[nfsrtt.pos];
+					rt->proc = rep->r_procnum;
+					rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]);
+					rt->sent = nmp->nm_sent;
+					rt->cwnd = nmp->nm_cwnd;
+					rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1];
+					rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1];
+					rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid;
+					rt->tstamp = time;
+					if (rep->r_flags & R_TIMING)
+						rt->rtt = rep->r_rtt;
+					else
+						rt->rtt = 1000000;
+					nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ;
+				}
+				/*
+				 * Update congestion window.
+				 * Do the additive increase of
+				 * one rpc/rtt.
+				 */
+				if (nmp->nm_cwnd <= nmp->nm_sent) {
+					nmp->nm_cwnd +=
+					   (NFS_CWNDSCALE * NFS_CWNDSCALE +
+					   (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
+					if (nmp->nm_cwnd > NFS_MAXCWND)
+						nmp->nm_cwnd = NFS_MAXCWND;
+				}
+				rep->r_flags &= ~R_SENT;
+				nmp->nm_sent -= NFS_CWNDSCALE;
+				/*
+				 * Update rtt using a gain of 0.125 on the mean
+				 * and a gain of 0.25 on the deviation.
+				 */
+				if (rep->r_flags & R_TIMING) {
+					/*
+					 * Since the timer resolution of
+					 * NFS_HZ is so course, it can often
+					 * result in r_rtt == 0. Since
+					 * r_rtt == N means that the actual
+					 * rtt is between N+dt and N+2-dt ticks,
+					 * add 1.
+					 */
+					t1 = rep->r_rtt + 1;
+					t1 -= (NFS_SRTT(rep) >> 3);
+					NFS_SRTT(rep) += t1;
+					if (t1 < 0)
+						t1 = -t1;
+					t1 -= (NFS_SDRTT(rep) >> 2);
+					NFS_SDRTT(rep) += t1;
+				}
+				nmp->nm_timeouts = 0;
+				break;
+			}
+			rep = rep->r_next;
+		}
+		/*
+		 * If not matched to a request, drop it.
+		 * If it's mine, get out.
+		 */
+		if (rep == &nfsreqh) {
+			nfsstats.rpcunexpected++;
+			m_freem(mrep);
+		} else if (rep == myrep) {
+			if (rep->r_mrep == NULL)
+				panic("nfsreply nil");
+			return (0);
+		}
+		if (myrep->r_flags & R_GETONEREP)
+			return (0);
+	}
+}
+
+/*
+ * nfs_request - goes something like this
+ *	- fill in request struct
+ *	- links it into list
+ *	- calls nfs_send() for first transmit
+ *	- calls nfs_receive() to get reply
+ *	- break down rpc header and return with nfs reply pointed to
+ *	  by mrep or error
+ * nb: always frees up mreq mbuf list
+ */
+nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp)
+	struct vnode *vp;
+	struct mbuf *mrest;
+	int procnum;
+	struct proc *procp;
+	struct ucred *cred;
+	struct mbuf **mrp;
+	struct mbuf **mdp;
+	caddr_t *dposp;
+{
+	register struct mbuf *m, *mrep;
+	register struct nfsreq *rep;
+	register u_long *tl;
+	register int i;
+	struct nfsmount *nmp;
+	struct mbuf *md, *mheadend;
+	struct nfsreq *reph;
+	struct nfsnode *np;
+	time_t reqtime, waituntil;
+	caddr_t dpos, cp2;
+	int t1, nqlflag, cachable, s, error = 0, mrest_len, auth_len, auth_type;
+	int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0, failed_auth = 0;
+	u_long xid;
+	u_quad_t frev;
+	char *auth_str;
+
+	nmp = VFSTONFS(vp->v_mount);
+	MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
+	rep->r_nmp = nmp;
+	rep->r_vp = vp;
+	rep->r_procp = procp;
+	rep->r_procnum = procnum;
+	i = 0;
+	m = mrest;
+	while (m) {
+		i += m->m_len;
+		m = m->m_next;
+	}
+	mrest_len = i;
+
+	/*
+	 * Get the RPC header with authorization.
+	 */
+kerbauth:
+	auth_str = (char *)0;
+	if (nmp->nm_flag & NFSMNT_KERB) {
+		if (failed_auth) {
+			error = nfs_getauth(nmp, rep, cred, &auth_type,
+				&auth_str, &auth_len);
+			if (error) {
+				free((caddr_t)rep, M_NFSREQ);
+				m_freem(mrest);
+				return (error);
+			}
+		} else {
+			auth_type = RPCAUTH_UNIX;
+			auth_len = 5 * NFSX_UNSIGNED;
+		}
+	} else {
+		auth_type = RPCAUTH_UNIX;
+		if (cred->cr_ngroups < 1)
+			panic("nfsreq nogrps");
+		auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ?
+			nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) +
+			5 * NFSX_UNSIGNED;
+	}
+	m = nfsm_rpchead(cred, (nmp->nm_flag & NFSMNT_NQNFS), procnum,
+	     auth_type, auth_len, auth_str, mrest, mrest_len, &mheadend, &xid);
+	if (auth_str)
+		free(auth_str, M_TEMP);
+
+	/*
+	 * For stream protocols, insert a Sun RPC Record Mark.
+	 */
+	if (nmp->nm_sotype == SOCK_STREAM) {
+		M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
+		*mtod(m, u_long *) = htonl(0x80000000 |
+			 (m->m_pkthdr.len - NFSX_UNSIGNED));
+	}
+	rep->r_mreq = m;
+	rep->r_xid = xid;
+tryagain:
+	if (nmp->nm_flag & NFSMNT_SOFT)
+		rep->r_retry = nmp->nm_retry;
+	else
+		rep->r_retry = NFS_MAXREXMIT + 1;	/* past clip limit */
+	rep->r_rtt = rep->r_rexmit = 0;
+	if (proct[procnum] > 0)
+		rep->r_flags = R_TIMING;
+	else
+		rep->r_flags = 0;
+	rep->r_mrep = NULL;
+
+	/*
+	 * Do the client side RPC.
+	 */
+	nfsstats.rpcrequests++;
+	/*
+	 * Chain request into list of outstanding requests. Be sure
+	 * to put it LAST so timer finds oldest requests first.
+	 */
+	s = splsoftclock();
+	reph = &nfsreqh;
+	reph->r_prev->r_next = rep;
+	rep->r_prev = reph->r_prev;
+	reph->r_prev = rep;
+	rep->r_next = reph;
+
+	/* Get send time for nqnfs */
+	reqtime = time.tv_sec;
+
+	/*
+	 * If backing off another request or avoiding congestion, don't
+	 * send this one now but let timer do it. If not timing a request,
+	 * do it now.
+	 */
+	if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
+		(nmp->nm_flag & NFSMNT_DUMBTIMR) ||
+		nmp->nm_sent < nmp->nm_cwnd)) {
+		splx(s);
+		if (nmp->nm_soflags & PR_CONNREQUIRED)
+			error = nfs_sndlock(&nmp->nm_flag, rep);
+		if (!error) {
+			m = m_copym(m, 0, M_COPYALL, M_WAIT);
+			error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep);
+			if (nmp->nm_soflags & PR_CONNREQUIRED)
+				nfs_sndunlock(&nmp->nm_flag);
+		}
+		if (!error && (rep->r_flags & R_MUSTRESEND) == 0) {
+			nmp->nm_sent += NFS_CWNDSCALE;
+			rep->r_flags |= R_SENT;
+		}
+	} else {
+		splx(s);
+		rep->r_rtt = -1;
+	}
+
+	/*
+	 * Wait for the reply from our send or the timer's.
+	 */
+	if (!error || error == EPIPE)
+		error = nfs_reply(rep);
+
+	/*
+	 * RPC done, unlink the request.
+	 */
+	s = splsoftclock();
+	rep->r_prev->r_next = rep->r_next;
+	rep->r_next->r_prev = rep->r_prev;
+	splx(s);
+
+	/*
+	 * Decrement the outstanding request count.
+	 */
+	if (rep->r_flags & R_SENT) {
+		rep->r_flags &= ~R_SENT;	/* paranoia */
+		nmp->nm_sent -= NFS_CWNDSCALE;
+	}
+
+	/*
+	 * If there was a successful reply and a tprintf msg.
+	 * tprintf a response.
+	 */
+	if (!error && (rep->r_flags & R_TPRINTFMSG))
+		nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
+		    "is alive again");
+	mrep = rep->r_mrep;
+	md = rep->r_md;
+	dpos = rep->r_dpos;
+	if (error) {
+		m_freem(rep->r_mreq);
+		free((caddr_t)rep, M_NFSREQ);
+		return (error);
+	}
+
+	/*
+	 * break down the rpc header and check if ok
+	 */
+	nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED);
+	if (*tl++ == rpc_msgdenied) {
+		if (*tl == rpc_mismatch)
+			error = EOPNOTSUPP;
+		else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) {
+			if (*tl == rpc_rejectedcred && failed_auth == 0) {
+				failed_auth++;
+				mheadend->m_next = (struct mbuf *)0;
+				m_freem(mrep);
+				m_freem(rep->r_mreq);
+				goto kerbauth;
+			} else
+				error = EAUTH;
+		} else
+			error = EACCES;
+		m_freem(mrep);
+		m_freem(rep->r_mreq);
+		free((caddr_t)rep, M_NFSREQ);
+		return (error);
+	}
+
+	/*
+	 * skip over the auth_verf, someday we may want to cache auth_short's
+	 * for nfs_reqhead(), but for now just dump it
+	 */
+	if (*++tl != 0) {
+		i = nfsm_rndup(fxdr_unsigned(long, *tl));
+		nfsm_adv(i);
+	}
+	nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+	/* 0 == ok */
+	if (*tl == 0) {
+		nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+		if (*tl != 0) {
+			error = fxdr_unsigned(int, *tl);
+			m_freem(mrep);
+			if ((nmp->nm_flag & NFSMNT_NQNFS) &&
+			    error == NQNFS_TRYLATER) {
+				error = 0;
+				waituntil = time.tv_sec + trylater_delay;
+				while (time.tv_sec < waituntil)
+					(void) tsleep((caddr_t)&lbolt,
+						PSOCK, "nqnfstry", 0);
+				trylater_delay *= nfs_backoff[trylater_cnt];
+				if (trylater_cnt < 7)
+					trylater_cnt++;
+				goto tryagain;
+			}
+
+			/*
+			 * If the File Handle was stale, invalidate the
+			 * lookup cache, just in case.
+			 */
+			if (error == ESTALE)
+				cache_purge(vp);
+			m_freem(rep->r_mreq);
+			free((caddr_t)rep, M_NFSREQ);
+			return (error);
+		}
+
+		/*
+		 * For nqnfs, get any lease in reply
+		 */
+		if (nmp->nm_flag & NFSMNT_NQNFS) {
+			nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+			if (*tl) {
+				np = VTONFS(vp);
+				nqlflag = fxdr_unsigned(int, *tl);
+				nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED);
+				cachable = fxdr_unsigned(int, *tl++);
+				reqtime += fxdr_unsigned(int, *tl++);
+				if (reqtime > time.tv_sec) {
+				    fxdr_hyper(tl, &frev);
+				    nqnfs_clientlease(nmp, np, nqlflag,
+					cachable, reqtime, frev);
+				}
+			}
+		}
+		*mrp = mrep;
+		*mdp = md;
+		*dposp = dpos;
+		m_freem(rep->r_mreq);
+		FREE((caddr_t)rep, M_NFSREQ);
+		return (0);
+	}
+	m_freem(mrep);
+	m_freem(rep->r_mreq);
+	free((caddr_t)rep, M_NFSREQ);
+	error = EPROTONOSUPPORT;
+nfsmout:
+	return (error);
+}
+
+/*
+ * Generate the rpc reply header
+ * siz arg. is used to decide if adding a cluster is worthwhile
+ */
+nfs_rephead(siz, nd, err, cache, frev, mrq, mbp, bposp)
+	int siz;
+	struct nfsd *nd;
+	int err;
+	int cache;
+	u_quad_t *frev;
+	struct mbuf **mrq;
+	struct mbuf **mbp;
+	caddr_t *bposp;
+{
+	register u_long *tl;
+	register struct mbuf *mreq;
+	caddr_t bpos;
+	struct mbuf *mb, *mb2;
+
+	MGETHDR(mreq, M_WAIT, MT_DATA);
+	mb = mreq;
+	/*
+	 * If this is a big reply, use a cluster else
+	 * try and leave leading space for the lower level headers.
+	 */
+	siz += RPC_REPLYSIZ;
+	if (siz >= MINCLSIZE) {
+		MCLGET(mreq, M_WAIT);
+	} else
+		mreq->m_data += max_hdr;
+	tl = mtod(mreq, u_long *);
+	mreq->m_len = 6*NFSX_UNSIGNED;
+	bpos = ((caddr_t)tl)+mreq->m_len;
+	*tl++ = nd->nd_retxid;
+	*tl++ = rpc_reply;
+	if (err == ERPCMISMATCH || err == NQNFS_AUTHERR) {
+		*tl++ = rpc_msgdenied;
+		if (err == NQNFS_AUTHERR) {
+			*tl++ = rpc_autherr;
+			*tl = rpc_rejectedcred;
+			mreq->m_len -= NFSX_UNSIGNED;
+			bpos -= NFSX_UNSIGNED;
+		} else {
+			*tl++ = rpc_mismatch;
+			*tl++ = txdr_unsigned(2);
+			*tl = txdr_unsigned(2);
+		}
+	} else {
+		*tl++ = rpc_msgaccepted;
+		*tl++ = 0;
+		*tl++ = 0;
+		switch (err) {
+		case EPROGUNAVAIL:
+			*tl = txdr_unsigned(RPC_PROGUNAVAIL);
+			break;
+		case EPROGMISMATCH:
+			*tl = txdr_unsigned(RPC_PROGMISMATCH);
+			nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+			*tl++ = txdr_unsigned(2);
+			*tl = txdr_unsigned(2);	/* someday 3 */
+			break;
+		case EPROCUNAVAIL:
+			*tl = txdr_unsigned(RPC_PROCUNAVAIL);
+			break;
+		default:
+			*tl = 0;
+			if (err != VNOVAL) {
+				nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+				if (err)
+					*tl = txdr_unsigned(nfsrv_errmap[err - 1]);
+				else
+					*tl = 0;
+			}
+			break;
+		};
+	}
+
+	/*
+	 * For nqnfs, piggyback lease as requested.
+	 */
+	if (nd->nd_nqlflag != NQL_NOVAL && err == 0) {
+		if (nd->nd_nqlflag) {
+			nfsm_build(tl, u_long *, 5*NFSX_UNSIGNED);
+			*tl++ = txdr_unsigned(nd->nd_nqlflag);
+			*tl++ = txdr_unsigned(cache);
+			*tl++ = txdr_unsigned(nd->nd_duration);
+			txdr_hyper(frev, tl);
+		} else {
+			if (nd->nd_nqlflag != 0)
+				panic("nqreph");
+			nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+			*tl = 0;
+		}
+	}
+	*mrq = mreq;
+	*mbp = mb;
+	*bposp = bpos;
+	if (err != 0 && err != VNOVAL)
+		nfsstats.srvrpc_errs++;
+	return (0);
+}
+
+/*
+ * Nfs timer routine
+ * Scan the nfsreq list and retranmit any requests that have timed out
+ * To avoid retransmission attempts on STREAM sockets (in the future) make
+ * sure to set the r_retry field to 0 (implies nm_retry == 0).
+ */
+void
+nfs_timer(arg)
+	void *arg;
+{
+	register struct nfsreq *rep;
+	register struct mbuf *m;
+	register struct socket *so;
+	register struct nfsmount *nmp;
+	register int timeo;
+	static long lasttime = 0;
+	int s, error;
+
+	s = splnet();
+	for (rep = nfsreqh.r_next; rep != &nfsreqh; rep = rep->r_next) {
+		nmp = rep->r_nmp;
+		if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
+			continue;
+		if (nfs_sigintr(nmp, rep, rep->r_procp)) {
+			rep->r_flags |= R_SOFTTERM;
+			continue;
+		}
+		if (rep->r_rtt >= 0) {
+			rep->r_rtt++;
+			if (nmp->nm_flag & NFSMNT_DUMBTIMR)
+				timeo = nmp->nm_timeo;
+			else
+				timeo = NFS_RTO(nmp, proct[rep->r_procnum]);
+			if (nmp->nm_timeouts > 0)
+				timeo *= nfs_backoff[nmp->nm_timeouts - 1];
+			if (rep->r_rtt <= timeo)
+				continue;
+			if (nmp->nm_timeouts < 8)
+				nmp->nm_timeouts++;
+		}
+		/*
+		 * Check for server not responding
+		 */
+		if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
+		     rep->r_rexmit > nmp->nm_deadthresh) {
+			nfs_msg(rep->r_procp,
+			    nmp->nm_mountp->mnt_stat.f_mntfromname,
+			    "not responding");
+			rep->r_flags |= R_TPRINTFMSG;
+		}
+		if (rep->r_rexmit >= rep->r_retry) {	/* too many */
+			nfsstats.rpctimeouts++;
+			rep->r_flags |= R_SOFTTERM;
+			continue;
+		}
+		if (nmp->nm_sotype != SOCK_DGRAM) {
+			if (++rep->r_rexmit > NFS_MAXREXMIT)
+				rep->r_rexmit = NFS_MAXREXMIT;
+			continue;
+		}
+		if ((so = nmp->nm_so) == NULL)
+			continue;
+
+		/*
+		 * If there is enough space and the window allows..
+		 *	Resend it
+		 * Set r_rtt to -1 in case we fail to send it now.
+		 */
+		rep->r_rtt = -1;
+		if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
+		   ((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
+		    (rep->r_flags & R_SENT) ||
+		    nmp->nm_sent < nmp->nm_cwnd) &&
+		   (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
+			if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
+			    error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
+			    (struct mbuf *)0, (struct mbuf *)0);
+			else
+			    error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
+			    nmp->nm_nam, (struct mbuf *)0);
+			if (error) {
+				if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
+					so->so_error = 0;
+			} else {
+				/*
+				 * Iff first send, start timing
+				 * else turn timing off, backoff timer
+				 * and divide congestion window by 2.
+				 */
+				if (rep->r_flags & R_SENT) {
+					rep->r_flags &= ~R_TIMING;
+					if (++rep->r_rexmit > NFS_MAXREXMIT)
+						rep->r_rexmit = NFS_MAXREXMIT;
+					nmp->nm_cwnd >>= 1;
+					if (nmp->nm_cwnd < NFS_CWNDSCALE)
+						nmp->nm_cwnd = NFS_CWNDSCALE;
+					nfsstats.rpcretries++;
+				} else {
+					rep->r_flags |= R_SENT;
+					nmp->nm_sent += NFS_CWNDSCALE;
+				}
+				rep->r_rtt = 0;
+			}
+		}
+	}
+
+	/*
+	 * Call the nqnfs server timer once a second to handle leases.
+	 */
+	if (lasttime != time.tv_sec) {
+		lasttime = time.tv_sec;
+		nqnfs_serverd();
+	}
+	splx(s);
+	timeout(nfs_timer, (void *)0, hz / NFS_HZ);
+}
+
+/*
+ * Test for a termination condition pending on the process.
+ * This is used for NFSMNT_INT mounts.
+ */
+nfs_sigintr(nmp, rep, p)
+	struct nfsmount *nmp;
+	struct nfsreq *rep;
+	register struct proc *p;
+{
+
+	if (rep && (rep->r_flags & R_SOFTTERM))
+		return (EINTR);
+	if (!(nmp->nm_flag & NFSMNT_INT))
+		return (0);
+	if (p && p->p_siglist &&
+	    (((p->p_siglist & ~p->p_sigmask) & ~p->p_sigignore) &
+	    NFSINT_SIGMASK))
+		return (EINTR);
+	return (0);
+}
+
+/*
+ * Lock a socket against others.
+ * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
+ * and also to avoid race conditions between the processes with nfs requests
+ * in progress when a reconnect is necessary.
+ */
+nfs_sndlock(flagp, rep)
+	register int *flagp;
+	struct nfsreq *rep;
+{
+	struct proc *p;
+	int slpflag = 0, slptimeo = 0;
+
+	if (rep) {
+		p = rep->r_procp;
+		if (rep->r_nmp->nm_flag & NFSMNT_INT)
+			slpflag = PCATCH;
+	} else
+		p = (struct proc *)0;
+	while (*flagp & NFSMNT_SNDLOCK) {
+		if (nfs_sigintr(rep->r_nmp, rep, p))
+			return (EINTR);
+		*flagp |= NFSMNT_WANTSND;
+		(void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsndlck",
+			slptimeo);
+		if (slpflag == PCATCH) {
+			slpflag = 0;
+			slptimeo = 2 * hz;
+		}
+	}
+	*flagp |= NFSMNT_SNDLOCK;
+	return (0);
+}
+
+/*
+ * Unlock the stream socket for others.
+ */
+void
+nfs_sndunlock(flagp)
+	register int *flagp;
+{
+
+	if ((*flagp & NFSMNT_SNDLOCK) == 0)
+		panic("nfs sndunlock");
+	*flagp &= ~NFSMNT_SNDLOCK;
+	if (*flagp & NFSMNT_WANTSND) {
+		*flagp &= ~NFSMNT_WANTSND;
+		wakeup((caddr_t)flagp);
+	}
+}
+
+nfs_rcvlock(rep)
+	register struct nfsreq *rep;
+{
+	register int *flagp = &rep->r_nmp->nm_flag;
+	int slpflag, slptimeo = 0;
+
+	if (*flagp & NFSMNT_INT)
+		slpflag = PCATCH;
+	else
+		slpflag = 0;
+	while (*flagp & NFSMNT_RCVLOCK) {
+		if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp))
+			return (EINTR);
+		*flagp |= NFSMNT_WANTRCV;
+		(void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsrcvlk",
+			slptimeo);
+		if (slpflag == PCATCH) {
+			slpflag = 0;
+			slptimeo = 2 * hz;
+		}
+	}
+	*flagp |= NFSMNT_RCVLOCK;
+	return (0);
+}
+
+/*
+ * Unlock the stream socket for others.
+ */
+void
+nfs_rcvunlock(flagp)
+	register int *flagp;
+{
+
+	if ((*flagp & NFSMNT_RCVLOCK) == 0)
+		panic("nfs rcvunlock");
+	*flagp &= ~NFSMNT_RCVLOCK;
+	if (*flagp & NFSMNT_WANTRCV) {
+		*flagp &= ~NFSMNT_WANTRCV;
+		wakeup((caddr_t)flagp);
+	}
+}
+
+/*
+ * Check for badly aligned mbuf data areas and
+ * realign data in an mbuf list by copying the data areas up, as required.
+ */
+void
+nfs_realign(m, hsiz)
+	register struct mbuf *m;
+	int hsiz;
+{
+	register struct mbuf *m2;
+	register int siz, mlen, olen;
+	register caddr_t tcp, fcp;
+	struct mbuf *mnew;
+
+	while (m) {
+	    /*
+	     * This never happens for UDP, rarely happens for TCP
+	     * but frequently happens for iso transport.
+	     */
+	    if ((m->m_len & 0x3) || (mtod(m, int) & 0x3)) {
+		olen = m->m_len;
+		fcp = mtod(m, caddr_t);
+		if ((int)fcp & 0x3) {
+			m->m_flags &= ~M_PKTHDR;
+			if (m->m_flags & M_EXT)
+				m->m_data = m->m_ext.ext_buf +
+					((m->m_ext.ext_size - olen) & ~0x3);
+			else
+				m->m_data = m->m_dat;
+		}
+		m->m_len = 0;
+		tcp = mtod(m, caddr_t);
+		mnew = m;
+		m2 = m->m_next;
+	
+		/*
+		 * If possible, only put the first invariant part
+		 * of the RPC header in the first mbuf.
+		 */
+		mlen = M_TRAILINGSPACE(m);
+		if (olen <= hsiz && mlen > hsiz)
+			mlen = hsiz;
+	
+		/*
+		 * Loop through the mbuf list consolidating data.
+		 */
+		while (m) {
+			while (olen > 0) {
+				if (mlen == 0) {
+					m2->m_flags &= ~M_PKTHDR;
+					if (m2->m_flags & M_EXT)
+						m2->m_data = m2->m_ext.ext_buf;
+					else
+						m2->m_data = m2->m_dat;
+					m2->m_len = 0;
+					mlen = M_TRAILINGSPACE(m2);
+					tcp = mtod(m2, caddr_t);
+					mnew = m2;
+					m2 = m2->m_next;
+				}
+				siz = min(mlen, olen);
+				if (tcp != fcp)
+					bcopy(fcp, tcp, siz);
+				mnew->m_len += siz;
+				mlen -= siz;
+				olen -= siz;
+				tcp += siz;
+				fcp += siz;
+			}
+			m = m->m_next;
+			if (m) {
+				olen = m->m_len;
+				fcp = mtod(m, caddr_t);
+			}
+		}
+	
+		/*
+		 * Finally, set m_len == 0 for any trailing mbufs that have
+		 * been copied out of.
+		 */
+		while (m2) {
+			m2->m_len = 0;
+			m2 = m2->m_next;
+		}
+		return;
+	    }
+	    m = m->m_next;
+	}
+}
+
+/*
+ * Socket upcall routine for the nfsd sockets.
+ * The caddr_t arg is a pointer to the "struct nfssvc_sock".
+ * Essentially do as much as possible non-blocking, else punt and it will
+ * be called with M_WAIT from an nfsd.
+ */
+void
+nfsrv_rcv(so, arg, waitflag)
+	struct socket *so;
+	caddr_t arg;
+	int waitflag;
+{
+	register struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
+	register struct mbuf *m;
+	struct mbuf *mp, *nam;
+	struct uio auio;
+	int flags, error;
+
+	if ((slp->ns_flag & SLP_VALID) == 0)
+		return;
+#ifdef notdef
+	/*
+	 * Define this to test for nfsds handling this under heavy load.
+	 */
+	if (waitflag == M_DONTWAIT) {
+		slp->ns_flag |= SLP_NEEDQ; goto dorecs;
+	}
+#endif
+	auio.uio_procp = NULL;
+	if (so->so_type == SOCK_STREAM) {
+		/*
+		 * If there are already records on the queue, defer soreceive()
+		 * to an nfsd so that there is feedback to the TCP layer that
+		 * the nfs servers are heavily loaded.
+		 */
+		if (slp->ns_rec && waitflag == M_DONTWAIT) {
+			slp->ns_flag |= SLP_NEEDQ;
+			goto dorecs;
+		}
+
+		/*
+		 * Do soreceive().
+		 */
+		auio.uio_resid = 1000000000;
+		flags = MSG_DONTWAIT;
+		error = soreceive(so, &nam, &auio, &mp, (struct mbuf **)0, &flags);
+		if (error || mp == (struct mbuf *)0) {
+			if (error == EWOULDBLOCK)
+				slp->ns_flag |= SLP_NEEDQ;
+			else
+				slp->ns_flag |= SLP_DISCONN;
+			goto dorecs;
+		}
+		m = mp;
+		if (slp->ns_rawend) {
+			slp->ns_rawend->m_next = m;
+			slp->ns_cc += 1000000000 - auio.uio_resid;
+		} else {
+			slp->ns_raw = m;
+			slp->ns_cc = 1000000000 - auio.uio_resid;
+		}
+		while (m->m_next)
+			m = m->m_next;
+		slp->ns_rawend = m;
+
+		/*
+		 * Now try and parse record(s) out of the raw stream data.
+		 */
+		if (error = nfsrv_getstream(slp, waitflag)) {
+			if (error == EPERM)
+				slp->ns_flag |= SLP_DISCONN;
+			else
+				slp->ns_flag |= SLP_NEEDQ;
+		}
+	} else {
+		do {
+			auio.uio_resid = 1000000000;
+			flags = MSG_DONTWAIT;
+			error = soreceive(so, &nam, &auio, &mp,
+						(struct mbuf **)0, &flags);
+			if (mp) {
+				nfs_realign(mp, 10 * NFSX_UNSIGNED);
+				if (nam) {
+					m = nam;
+					m->m_next = mp;
+				} else
+					m = mp;
+				if (slp->ns_recend)
+					slp->ns_recend->m_nextpkt = m;
+				else
+					slp->ns_rec = m;
+				slp->ns_recend = m;
+				m->m_nextpkt = (struct mbuf *)0;
+			}
+			if (error) {
+				if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
+					&& error != EWOULDBLOCK) {
+					slp->ns_flag |= SLP_DISCONN;
+					goto dorecs;
+				}
+			}
+		} while (mp);
+	}
+
+	/*
+	 * Now try and process the request records, non-blocking.
+	 */
+dorecs:
+	if (waitflag == M_DONTWAIT &&
+		(slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN))))
+		nfsrv_wakenfsd(slp);
+}
+
+/*
+ * Try and extract an RPC request from the mbuf data list received on a
+ * stream socket. The "waitflag" argument indicates whether or not it
+ * can sleep.
+ */
+nfsrv_getstream(slp, waitflag)
+	register struct nfssvc_sock *slp;
+	int waitflag;
+{
+	register struct mbuf *m;
+	register char *cp1, *cp2;
+	register int len;
+	struct mbuf *om, *m2, *recm;
+	u_long recmark;
+
+	if (slp->ns_flag & SLP_GETSTREAM)
+		panic("nfs getstream");
+	slp->ns_flag |= SLP_GETSTREAM;
+	for (;;) {
+	    if (slp->ns_reclen == 0) {
+		if (slp->ns_cc < NFSX_UNSIGNED) {
+			slp->ns_flag &= ~SLP_GETSTREAM;
+			return (0);
+		}
+		m = slp->ns_raw;
+		if (m->m_len >= NFSX_UNSIGNED) {
+			bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED);
+			m->m_data += NFSX_UNSIGNED;
+			m->m_len -= NFSX_UNSIGNED;
+		} else {
+			cp1 = (caddr_t)&recmark;
+			cp2 = mtod(m, caddr_t);
+			while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
+				while (m->m_len == 0) {
+					m = m->m_next;
+					cp2 = mtod(m, caddr_t);
+				}
+				*cp1++ = *cp2++;
+				m->m_data++;
+				m->m_len--;
+			}
+		}
+		slp->ns_cc -= NFSX_UNSIGNED;
+		slp->ns_reclen = ntohl(recmark) & ~0x80000000;
+		if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) {
+			slp->ns_flag &= ~SLP_GETSTREAM;
+			return (EPERM);
+		}
+	    }
+
+	    /*
+	     * Now get the record part.
+	     */
+	    if (slp->ns_cc == slp->ns_reclen) {
+		recm = slp->ns_raw;
+		slp->ns_raw = slp->ns_rawend = (struct mbuf *)0;
+		slp->ns_cc = slp->ns_reclen = 0;
+	    } else if (slp->ns_cc > slp->ns_reclen) {
+		len = 0;
+		m = slp->ns_raw;
+		om = (struct mbuf *)0;
+		while (len < slp->ns_reclen) {
+			if ((len + m->m_len) > slp->ns_reclen) {
+				m2 = m_copym(m, 0, slp->ns_reclen - len,
+					waitflag);
+				if (m2) {
+					if (om) {
+						om->m_next = m2;
+						recm = slp->ns_raw;
+					} else
+						recm = m2;
+					m->m_data += slp->ns_reclen - len;
+					m->m_len -= slp->ns_reclen - len;
+					len = slp->ns_reclen;
+				} else {
+					slp->ns_flag &= ~SLP_GETSTREAM;
+					return (EWOULDBLOCK);
+				}
+			} else if ((len + m->m_len) == slp->ns_reclen) {
+				om = m;
+				len += m->m_len;
+				m = m->m_next;
+				recm = slp->ns_raw;
+				om->m_next = (struct mbuf *)0;
+			} else {
+				om = m;
+				len += m->m_len;
+				m = m->m_next;
+			}
+		}
+		slp->ns_raw = m;
+		slp->ns_cc -= len;
+		slp->ns_reclen = 0;
+	    } else {
+		slp->ns_flag &= ~SLP_GETSTREAM;
+		return (0);
+	    }
+	    nfs_realign(recm, 10 * NFSX_UNSIGNED);
+	    if (slp->ns_recend)
+		slp->ns_recend->m_nextpkt = recm;
+	    else
+		slp->ns_rec = recm;
+	    slp->ns_recend = recm;
+	}
+}
+
+/*
+ * Parse an RPC header.
+ */
+nfsrv_dorec(slp, nd)
+	register struct nfssvc_sock *slp;
+	register struct nfsd *nd;
+{
+	register struct mbuf *m;
+	int error;
+
+	if ((slp->ns_flag & SLP_VALID) == 0 ||
+	    (m = slp->ns_rec) == (struct mbuf *)0)
+		return (ENOBUFS);
+	if (slp->ns_rec = m->m_nextpkt)
+		m->m_nextpkt = (struct mbuf *)0;
+	else
+		slp->ns_recend = (struct mbuf *)0;
+	if (m->m_type == MT_SONAME) {
+		nd->nd_nam = m;
+		nd->nd_md = nd->nd_mrep = m->m_next;
+		m->m_next = (struct mbuf *)0;
+	} else {
+		nd->nd_nam = (struct mbuf *)0;
+		nd->nd_md = nd->nd_mrep = m;
+	}
+	nd->nd_dpos = mtod(nd->nd_md, caddr_t);
+	if (error = nfs_getreq(nd, TRUE)) {
+		m_freem(nd->nd_nam);
+		return (error);
+	}
+	return (0);
+}
+
+/*
+ * Parse an RPC request
+ * - verify it
+ * - fill in the cred struct.
+ */
+nfs_getreq(nd, has_header)
+	register struct nfsd *nd;
+	int has_header;
+{
+	register int len, i;
+	register u_long *tl;
+	register long t1;
+	struct uio uio;
+	struct iovec iov;
+	caddr_t dpos, cp2;
+	u_long nfsvers, auth_type;
+	int error = 0, nqnfs = 0;
+	struct mbuf *mrep, *md;
+
+	mrep = nd->nd_mrep;
+	md = nd->nd_md;
+	dpos = nd->nd_dpos;
+	if (has_header) {
+		nfsm_dissect(tl, u_long *, 10*NFSX_UNSIGNED);
+		nd->nd_retxid = *tl++;
+		if (*tl++ != rpc_call) {
+			m_freem(mrep);
+			return (EBADRPC);
+		}
+	} else {
+		nfsm_dissect(tl, u_long *, 8*NFSX_UNSIGNED);
+	}
+	nd->nd_repstat = 0;
+	if (*tl++ != rpc_vers) {
+		nd->nd_repstat = ERPCMISMATCH;
+		nd->nd_procnum = NFSPROC_NOOP;
+		return (0);
+	}
+	nfsvers = nfs_vers;
+	if (*tl != nfs_prog) {
+		if (*tl == nqnfs_prog) {
+			nqnfs++;
+			nfsvers = nqnfs_vers;
+		} else {
+			nd->nd_repstat = EPROGUNAVAIL;
+			nd->nd_procnum = NFSPROC_NOOP;
+			return (0);
+		}
+	}
+	tl++;
+	if (*tl++ != nfsvers) {
+		nd->nd_repstat = EPROGMISMATCH;
+		nd->nd_procnum = NFSPROC_NOOP;
+		return (0);
+	}
+	nd->nd_procnum = fxdr_unsigned(u_long, *tl++);
+	if (nd->nd_procnum == NFSPROC_NULL)
+		return (0);
+	if (nd->nd_procnum >= NFS_NPROCS ||
+		(!nqnfs && nd->nd_procnum > NFSPROC_STATFS) ||
+		(*tl != rpc_auth_unix && *tl != rpc_auth_kerb)) {
+		nd->nd_repstat = EPROCUNAVAIL;
+		nd->nd_procnum = NFSPROC_NOOP;
+		return (0);
+	}
+	auth_type = *tl++;
+	len = fxdr_unsigned(int, *tl++);
+	if (len < 0 || len > RPCAUTH_MAXSIZ) {
+		m_freem(mrep);
+		return (EBADRPC);
+	}
+
+	/*
+	 * Handle auth_unix or auth_kerb.
+	 */
+	if (auth_type == rpc_auth_unix) {
+		len = fxdr_unsigned(int, *++tl);
+		if (len < 0 || len > NFS_MAXNAMLEN) {
+			m_freem(mrep);
+			return (EBADRPC);
+		}
+		nfsm_adv(nfsm_rndup(len));
+		nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED);
+		nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
+		nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++);
+		len = fxdr_unsigned(int, *tl);
+		if (len < 0 || len > RPCAUTH_UNIXGIDS) {
+			m_freem(mrep);
+			return (EBADRPC);
+		}
+		nfsm_dissect(tl, u_long *, (len + 2)*NFSX_UNSIGNED);
+		for (i = 1; i <= len; i++)
+			if (i < NGROUPS)
+				nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
+			else
+				tl++;
+		nd->nd_cr.cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
+	} else if (auth_type == rpc_auth_kerb) {
+		nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
+		nd->nd_authlen = fxdr_unsigned(int, *tl);
+		uio.uio_resid = nfsm_rndup(nd->nd_authlen);
+		if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) {
+			m_freem(mrep);
+			return (EBADRPC);
+		}
+		uio.uio_offset = 0;
+		uio.uio_iov = &iov;
+		uio.uio_iovcnt = 1;
+		uio.uio_segflg = UIO_SYSSPACE;
+		iov.iov_base = (caddr_t)nd->nd_authstr;
+		iov.iov_len = RPCAUTH_MAXSIZ;
+		nfsm_mtouio(&uio, uio.uio_resid);
+		nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
+		nd->nd_flag |= NFSD_NEEDAUTH;
+	}
+
+	/*
+	 * Do we have any use for the verifier.
+	 * According to the "Remote Procedure Call Protocol Spec." it
+	 * should be AUTH_NULL, but some clients make it AUTH_UNIX?
+	 * For now, just skip over it
+	 */
+	len = fxdr_unsigned(int, *++tl);
+	if (len < 0 || len > RPCAUTH_MAXSIZ) {
+		m_freem(mrep);
+		return (EBADRPC);
+	}
+	if (len > 0) {
+		nfsm_adv(nfsm_rndup(len));
+	}
+
+	/*
+	 * For nqnfs, get piggybacked lease request.
+	 */
+	if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) {
+		nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+		nd->nd_nqlflag = fxdr_unsigned(int, *tl);
+		if (nd->nd_nqlflag) {
+			nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+			nd->nd_duration = fxdr_unsigned(int, *tl);
+		} else
+			nd->nd_duration = NQ_MINLEASE;
+	} else {
+		nd->nd_nqlflag = NQL_NOVAL;
+		nd->nd_duration = NQ_MINLEASE;
+	}
+	nd->nd_md = md;
+	nd->nd_dpos = dpos;
+	return (0);
+nfsmout:
+	return (error);
+}
+
+/*
+ * Search for a sleeping nfsd and wake it up.
+ * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
+ * running nfsds will go look for the work in the nfssvc_sock list.
+ */
+void
+nfsrv_wakenfsd(slp)
+	struct nfssvc_sock *slp;
+{
+	register struct nfsd *nd = nfsd_head.nd_next;
+
+	if ((slp->ns_flag & SLP_VALID) == 0)
+		return;
+	while (nd != (struct nfsd *)&nfsd_head) {
+		if (nd->nd_flag & NFSD_WAITING) {
+			nd->nd_flag &= ~NFSD_WAITING;
+			if (nd->nd_slp)
+				panic("nfsd wakeup");
+			slp->ns_sref++;
+			nd->nd_slp = slp;
+			wakeup((caddr_t)nd);
+			return;
+		}
+		nd = nd->nd_next;
+	}
+	slp->ns_flag |= SLP_DOREC;
+	nfsd_head.nd_flag |= NFSD_CHECKSLP;
+}
+
+nfs_msg(p, server, msg)
+	struct proc *p;
+	char *server, *msg;
+{
+	tpr_t tpr;
+
+	if (p)
+		tpr = tprintf_open(p);
+	else
+		tpr = NULL;
+	tprintf(tpr, "nfs server %s: %s\n", server, msg);
+	tprintf_close(tpr);
+}
diff --git a/sys/nfsserver/nfs_srvsubs.c b/sys/nfsserver/nfs_srvsubs.c
new file mode 100644
index 00000000000..5778f7d7f01
--- /dev/null
+++ b/sys/nfsserver/nfs_srvsubs.c
@@ -0,0 +1,1130 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfs_subs.c	8.3 (Berkeley) 1/4/94
+ */
+
+/*
+ * These functions support the macros and help fiddle mbuf chains for
+ * the nfs op functions. They do things like create the rpc header and
+ * copy data between mbuf chains and uio lists.
+ */
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/namei.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nqnfs.h>
+#include <nfs/nfsrtt.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <netinet/in.h>
+#ifdef ISO
+#include <netiso/iso.h>
+#endif
+
+#define TRUE	1
+#define	FALSE	0
+
+/*
+ * Data items converted to xdr at startup, since they are constant
+ * This is kinda hokey, but may save a little time doing byte swaps
+ */
+u_long nfs_procids[NFS_NPROCS];
+u_long nfs_xdrneg1;
+u_long rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr,
+	rpc_mismatch, rpc_auth_unix, rpc_msgaccepted, rpc_rejectedcred,
+	rpc_auth_kerb;
+u_long nfs_vers, nfs_prog, nfs_true, nfs_false;
+
+/* And other global data */
+static u_long nfs_xid = 0;
+enum vtype ntov_type[7] = { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON };
+extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
+extern struct nfsreq nfsreqh;
+extern int nqnfs_piggy[NFS_NPROCS];
+extern struct nfsrtt nfsrtt;
+extern time_t nqnfsstarttime;
+extern u_long nqnfs_prog, nqnfs_vers;
+extern int nqsrv_clockskew;
+extern int nqsrv_writeslack;
+extern int nqsrv_maxlease;
+
+/*
+ * Create the header for an rpc request packet
+ * The hsiz is the size of the rest of the nfs request header.
+ * (just used to decide if a cluster is a good idea)
+ */
+struct mbuf *
+nfsm_reqh(vp, procid, hsiz, bposp)
+	struct vnode *vp;
+	u_long procid;
+	int hsiz;
+	caddr_t *bposp;
+{
+	register struct mbuf *mb;
+	register u_long *tl;
+	register caddr_t bpos;
+	struct mbuf *mb2;
+	struct nfsmount *nmp;
+	int nqflag;
+
+	MGET(mb, M_WAIT, MT_DATA);
+	if (hsiz >= MINCLSIZE)
+		MCLGET(mb, M_WAIT);
+	mb->m_len = 0;
+	bpos = mtod(mb, caddr_t);
+	
+	/*
+	 * For NQNFS, add lease request.
+	 */
+	if (vp) {
+		nmp = VFSTONFS(vp->v_mount);
+		if (nmp->nm_flag & NFSMNT_NQNFS) {
+			nqflag = NQNFS_NEEDLEASE(vp, procid);
+			if (nqflag) {
+				nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+				*tl++ = txdr_unsigned(nqflag);
+				*tl = txdr_unsigned(nmp->nm_leaseterm);
+			} else {
+				nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+				*tl = 0;
+			}
+		}
+	}
+	/* Finally, return values */
+	*bposp = bpos;
+	return (mb);
+}
+
+/*
+ * Build the RPC header and fill in the authorization info.
+ * The authorization string argument is only used when the credentials
+ * come from outside of the kernel.
+ * Returns the head of the mbuf list.
+ */
+struct mbuf *
+nfsm_rpchead(cr, nqnfs, procid, auth_type, auth_len, auth_str, mrest,
+	mrest_len, mbp, xidp)
+	register struct ucred *cr;
+	int nqnfs;
+	int procid;
+	int auth_type;
+	int auth_len;
+	char *auth_str;
+	struct mbuf *mrest;
+	int mrest_len;
+	struct mbuf **mbp;
+	u_long *xidp;
+{
+	register struct mbuf *mb;
+	register u_long *tl;
+	register caddr_t bpos;
+	register int i;
+	struct mbuf *mreq, *mb2;
+	int siz, grpsiz, authsiz;
+
+	authsiz = nfsm_rndup(auth_len);
+	if (auth_type == RPCAUTH_NQNFS)
+		authsiz += 2 * NFSX_UNSIGNED;
+	MGETHDR(mb, M_WAIT, MT_DATA);
+	if ((authsiz + 10*NFSX_UNSIGNED) >= MINCLSIZE) {
+		MCLGET(mb, M_WAIT);
+	} else if ((authsiz + 10*NFSX_UNSIGNED) < MHLEN) {
+		MH_ALIGN(mb, authsiz + 10*NFSX_UNSIGNED);
+	} else {
+		MH_ALIGN(mb, 8*NFSX_UNSIGNED);
+	}
+	mb->m_len = 0;
+	mreq = mb;
+	bpos = mtod(mb, caddr_t);
+
+	/*
+	 * First the RPC header.
+	 */
+	nfsm_build(tl, u_long *, 8*NFSX_UNSIGNED);
+	if (++nfs_xid == 0)
+		nfs_xid++;
+	*tl++ = *xidp = txdr_unsigned(nfs_xid);
+	*tl++ = rpc_call;
+	*tl++ = rpc_vers;
+	if (nqnfs) {
+		*tl++ = txdr_unsigned(NQNFS_PROG);
+		*tl++ = txdr_unsigned(NQNFS_VER1);
+	} else {
+		*tl++ = txdr_unsigned(NFS_PROG);
+		*tl++ = txdr_unsigned(NFS_VER2);
+	}
+	*tl++ = txdr_unsigned(procid);
+
+	/*
+	 * And then the authorization cred.
+	 */
+	*tl++ = txdr_unsigned(auth_type);
+	*tl = txdr_unsigned(authsiz);
+	switch (auth_type) {
+	case RPCAUTH_UNIX:
+		nfsm_build(tl, u_long *, auth_len);
+		*tl++ = 0;		/* stamp ?? */
+		*tl++ = 0;		/* NULL hostname */
+		*tl++ = txdr_unsigned(cr->cr_uid);
+		*tl++ = txdr_unsigned(cr->cr_groups[0]);
+		grpsiz = (auth_len >> 2) - 5;
+		*tl++ = txdr_unsigned(grpsiz);
+		for (i = 1; i <= grpsiz; i++)
+			*tl++ = txdr_unsigned(cr->cr_groups[i]);
+		break;
+	case RPCAUTH_NQNFS:
+		nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+		*tl++ = txdr_unsigned(cr->cr_uid);
+		*tl = txdr_unsigned(auth_len);
+		siz = auth_len;
+		while (siz > 0) {
+			if (M_TRAILINGSPACE(mb) == 0) {
+				MGET(mb2, M_WAIT, MT_DATA);
+				if (siz >= MINCLSIZE)
+					MCLGET(mb2, M_WAIT);
+				mb->m_next = mb2;
+				mb = mb2;
+				mb->m_len = 0;
+				bpos = mtod(mb, caddr_t);
+			}
+			i = min(siz, M_TRAILINGSPACE(mb));
+			bcopy(auth_str, bpos, i);
+			mb->m_len += i;
+			auth_str += i;
+			bpos += i;
+			siz -= i;
+		}
+		if ((siz = (nfsm_rndup(auth_len) - auth_len)) > 0) {
+			for (i = 0; i < siz; i++)
+				*bpos++ = '\0';
+			mb->m_len += siz;
+		}
+		break;
+	};
+	nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+	*tl++ = txdr_unsigned(RPCAUTH_NULL);
+	*tl = 0;
+	mb->m_next = mrest;
+	mreq->m_pkthdr.len = authsiz + 10*NFSX_UNSIGNED + mrest_len;
+	mreq->m_pkthdr.rcvif = (struct ifnet *)0;
+	*mbp = mb;
+	return (mreq);
+}
+
+/*
+ * copies mbuf chain to the uio scatter/gather list
+ */
+nfsm_mbuftouio(mrep, uiop, siz, dpos)
+	struct mbuf **mrep;
+	register struct uio *uiop;
+	int siz;
+	caddr_t *dpos;
+{
+	register char *mbufcp, *uiocp;
+	register int xfer, left, len;
+	register struct mbuf *mp;
+	long uiosiz, rem;
+	int error = 0;
+
+	mp = *mrep;
+	mbufcp = *dpos;
+	len = mtod(mp, caddr_t)+mp->m_len-mbufcp;
+	rem = nfsm_rndup(siz)-siz;
+	while (siz > 0) {
+		if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
+			return (EFBIG);
+		left = uiop->uio_iov->iov_len;
+		uiocp = uiop->uio_iov->iov_base;
+		if (left > siz)
+			left = siz;
+		uiosiz = left;
+		while (left > 0) {
+			while (len == 0) {
+				mp = mp->m_next;
+				if (mp == NULL)
+					return (EBADRPC);
+				mbufcp = mtod(mp, caddr_t);
+				len = mp->m_len;
+			}
+			xfer = (left > len) ? len : left;
+#ifdef notdef
+			/* Not Yet.. */
+			if (uiop->uio_iov->iov_op != NULL)
+				(*(uiop->uio_iov->iov_op))
+				(mbufcp, uiocp, xfer);
+			else
+#endif
+			if (uiop->uio_segflg == UIO_SYSSPACE)
+				bcopy(mbufcp, uiocp, xfer);
+			else
+				copyout(mbufcp, uiocp, xfer);
+			left -= xfer;
+			len -= xfer;
+			mbufcp += xfer;
+			uiocp += xfer;
+			uiop->uio_offset += xfer;
+			uiop->uio_resid -= xfer;
+		}
+		if (uiop->uio_iov->iov_len <= siz) {
+			uiop->uio_iovcnt--;
+			uiop->uio_iov++;
+		} else {
+			uiop->uio_iov->iov_base += uiosiz;
+			uiop->uio_iov->iov_len -= uiosiz;
+		}
+		siz -= uiosiz;
+	}
+	*dpos = mbufcp;
+	*mrep = mp;
+	if (rem > 0) {
+		if (len < rem)
+			error = nfs_adv(mrep, dpos, rem, len);
+		else
+			*dpos += rem;
+	}
+	return (error);
+}
+
+/*
+ * copies a uio scatter/gather list to an mbuf chain...
+ */
+nfsm_uiotombuf(uiop, mq, siz, bpos)
+	register struct uio *uiop;
+	struct mbuf **mq;
+	int siz;
+	caddr_t *bpos;
+{
+	register char *uiocp;
+	register struct mbuf *mp, *mp2;
+	register int xfer, left, mlen;
+	int uiosiz, clflg, rem;
+	char *cp;
+
+	if (siz > MLEN)		/* or should it >= MCLBYTES ?? */
+		clflg = 1;
+	else
+		clflg = 0;
+	rem = nfsm_rndup(siz)-siz;
+	mp = mp2 = *mq;
+	while (siz > 0) {
+		if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
+			return (EINVAL);
+		left = uiop->uio_iov->iov_len;
+		uiocp = uiop->uio_iov->iov_base;
+		if (left > siz)
+			left = siz;
+		uiosiz = left;
+		while (left > 0) {
+			mlen = M_TRAILINGSPACE(mp);
+			if (mlen == 0) {
+				MGET(mp, M_WAIT, MT_DATA);
+				if (clflg)
+					MCLGET(mp, M_WAIT);
+				mp->m_len = 0;
+				mp2->m_next = mp;
+				mp2 = mp;
+				mlen = M_TRAILINGSPACE(mp);
+			}
+			xfer = (left > mlen) ? mlen : left;
+#ifdef notdef
+			/* Not Yet.. */
+			if (uiop->uio_iov->iov_op != NULL)
+				(*(uiop->uio_iov->iov_op))
+				(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+			else
+#endif
+			if (uiop->uio_segflg == UIO_SYSSPACE)
+				bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+			else
+				copyin(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+			mp->m_len += xfer;
+			left -= xfer;
+			uiocp += xfer;
+			uiop->uio_offset += xfer;
+			uiop->uio_resid -= xfer;
+		}
+		if (uiop->uio_iov->iov_len <= siz) {
+			uiop->uio_iovcnt--;
+			uiop->uio_iov++;
+		} else {
+			uiop->uio_iov->iov_base += uiosiz;
+			uiop->uio_iov->iov_len -= uiosiz;
+		}
+		siz -= uiosiz;
+	}
+	if (rem > 0) {
+		if (rem > M_TRAILINGSPACE(mp)) {
+			MGET(mp, M_WAIT, MT_DATA);
+			mp->m_len = 0;
+			mp2->m_next = mp;
+		}
+		cp = mtod(mp, caddr_t)+mp->m_len;
+		for (left = 0; left < rem; left++)
+			*cp++ = '\0';
+		mp->m_len += rem;
+		*bpos = cp;
+	} else
+		*bpos = mtod(mp, caddr_t)+mp->m_len;
+	*mq = mp;
+	return (0);
+}
+
+/*
+ * Help break down an mbuf chain by setting the first siz bytes contiguous
+ * pointed to by returned val.
+ * This is used by the macros nfsm_dissect and nfsm_dissecton for tough
+ * cases. (The macros use the vars. dpos and dpos2)
+ */
+nfsm_disct(mdp, dposp, siz, left, cp2)
+	struct mbuf **mdp;
+	caddr_t *dposp;
+	int siz;
+	int left;
+	caddr_t *cp2;
+{
+	register struct mbuf *mp, *mp2;
+	register int siz2, xfer;
+	register caddr_t p;
+
+	mp = *mdp;
+	while (left == 0) {
+		*mdp = mp = mp->m_next;
+		if (mp == NULL)
+			return (EBADRPC);
+		left = mp->m_len;
+		*dposp = mtod(mp, caddr_t);
+	}
+	if (left >= siz) {
+		*cp2 = *dposp;
+		*dposp += siz;
+	} else if (mp->m_next == NULL) {
+		return (EBADRPC);
+	} else if (siz > MHLEN) {
+		panic("nfs S too big");
+	} else {
+		MGET(mp2, M_WAIT, MT_DATA);
+		mp2->m_next = mp->m_next;
+		mp->m_next = mp2;
+		mp->m_len -= left;
+		mp = mp2;
+		*cp2 = p = mtod(mp, caddr_t);
+		bcopy(*dposp, p, left);		/* Copy what was left */
+		siz2 = siz-left;
+		p += left;
+		mp2 = mp->m_next;
+		/* Loop around copying up the siz2 bytes */
+		while (siz2 > 0) {
+			if (mp2 == NULL)
+				return (EBADRPC);
+			xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2;
+			if (xfer > 0) {
+				bcopy(mtod(mp2, caddr_t), p, xfer);
+				NFSMADV(mp2, xfer);
+				mp2->m_len -= xfer;
+				p += xfer;
+				siz2 -= xfer;
+			}
+			if (siz2 > 0)
+				mp2 = mp2->m_next;
+		}
+		mp->m_len = siz;
+		*mdp = mp2;
+		*dposp = mtod(mp2, caddr_t);
+	}
+	return (0);
+}
+
+/*
+ * Advance the position in the mbuf chain.
+ */
+nfs_adv(mdp, dposp, offs, left)
+	struct mbuf **mdp;
+	caddr_t *dposp;
+	int offs;
+	int left;
+{
+	register struct mbuf *m;
+	register int s;
+
+	m = *mdp;
+	s = left;
+	while (s < offs) {
+		offs -= s;
+		m = m->m_next;
+		if (m == NULL)
+			return (EBADRPC);
+		s = m->m_len;
+	}
+	*mdp = m;
+	*dposp = mtod(m, caddr_t)+offs;
+	return (0);
+}
+
+/*
+ * Copy a string into mbufs for the hard cases...
+ */
+nfsm_strtmbuf(mb, bpos, cp, siz)
+	struct mbuf **mb;
+	char **bpos;
+	char *cp;
+	long siz;
+{
+	register struct mbuf *m1, *m2;
+	long left, xfer, len, tlen;
+	u_long *tl;
+	int putsize;
+
+	putsize = 1;
+	m2 = *mb;
+	left = M_TRAILINGSPACE(m2);
+	if (left > 0) {
+		tl = ((u_long *)(*bpos));
+		*tl++ = txdr_unsigned(siz);
+		putsize = 0;
+		left -= NFSX_UNSIGNED;
+		m2->m_len += NFSX_UNSIGNED;
+		if (left > 0) {
+			bcopy(cp, (caddr_t) tl, left);
+			siz -= left;
+			cp += left;
+			m2->m_len += left;
+			left = 0;
+		}
+	}
+	/* Loop around adding mbufs */
+	while (siz > 0) {
+		MGET(m1, M_WAIT, MT_DATA);
+		if (siz > MLEN)
+			MCLGET(m1, M_WAIT);
+		m1->m_len = NFSMSIZ(m1);
+		m2->m_next = m1;
+		m2 = m1;
+		tl = mtod(m1, u_long *);
+		tlen = 0;
+		if (putsize) {
+			*tl++ = txdr_unsigned(siz);
+			m1->m_len -= NFSX_UNSIGNED;
+			tlen = NFSX_UNSIGNED;
+			putsize = 0;
+		}
+		if (siz < m1->m_len) {
+			len = nfsm_rndup(siz);
+			xfer = siz;
+			if (xfer < len)
+				*(tl+(xfer>>2)) = 0;
+		} else {
+			xfer = len = m1->m_len;
+		}
+		bcopy(cp, (caddr_t) tl, xfer);
+		m1->m_len = len+tlen;
+		siz -= xfer;
+		cp += xfer;
+	}
+	*mb = m1;
+	*bpos = mtod(m1, caddr_t)+m1->m_len;
+	return (0);
+}
+
+/*
+ * Called once to initialize data structures...
+ */
+nfs_init()
+{
+	register int i;
+
+	nfsrtt.pos = 0;
+	rpc_vers = txdr_unsigned(RPC_VER2);
+	rpc_call = txdr_unsigned(RPC_CALL);
+	rpc_reply = txdr_unsigned(RPC_REPLY);
+	rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED);
+	rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED);
+	rpc_mismatch = txdr_unsigned(RPC_MISMATCH);
+	rpc_autherr = txdr_unsigned(RPC_AUTHERR);
+	rpc_rejectedcred = txdr_unsigned(AUTH_REJECTCRED);
+	rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX);
+	rpc_auth_kerb = txdr_unsigned(RPCAUTH_NQNFS);
+	nfs_vers = txdr_unsigned(NFS_VER2);
+	nfs_prog = txdr_unsigned(NFS_PROG);
+	nfs_true = txdr_unsigned(TRUE);
+	nfs_false = txdr_unsigned(FALSE);
+	/* Loop thru nfs procids */
+	for (i = 0; i < NFS_NPROCS; i++)
+		nfs_procids[i] = txdr_unsigned(i);
+	/* Ensure async daemons disabled */
+	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
+		nfs_iodwant[i] = (struct proc *)0;
+	TAILQ_INIT(&nfs_bufq);
+	nfs_xdrneg1 = txdr_unsigned(-1);
+	nfs_nhinit();			/* Init the nfsnode table */
+	nfsrv_init(0);			/* Init server data structures */
+	nfsrv_initcache();		/* Init the server request cache */
+
+	/*
+	 * Initialize the nqnfs server stuff.
+	 */
+	if (nqnfsstarttime == 0) {
+		nqnfsstarttime = boottime.tv_sec + nqsrv_maxlease
+			+ nqsrv_clockskew + nqsrv_writeslack;
+		NQLOADNOVRAM(nqnfsstarttime);
+		nqnfs_prog = txdr_unsigned(NQNFS_PROG);
+		nqnfs_vers = txdr_unsigned(NQNFS_VER1);
+		nqthead.th_head[0] = &nqthead;
+		nqthead.th_head[1] = &nqthead;
+		nqfhead = hashinit(NQLCHSZ, M_NQLEASE, &nqfheadhash);
+	}
+
+	/*
+	 * Initialize reply list and start timer
+	 */
+	nfsreqh.r_prev = nfsreqh.r_next = &nfsreqh;
+	nfs_timer();
+}
+
+/*
+ * Attribute cache routines.
+ * nfs_loadattrcache() - loads or updates the cache contents from attributes
+ *	that are on the mbuf list
+ * nfs_getattrcache() - returns valid attributes if found in cache, returns
+ *	error otherwise
+ */
+
+/*
+ * Load the attribute cache (that lives in the nfsnode entry) with
+ * the values on the mbuf list and
+ * Iff vap not NULL
+ *    copy the attributes to *vaper
+ */
+nfs_loadattrcache(vpp, mdp, dposp, vaper)
+	struct vnode **vpp;
+	struct mbuf **mdp;
+	caddr_t *dposp;
+	struct vattr *vaper;
+{
+	register struct vnode *vp = *vpp;
+	register struct vattr *vap;
+	register struct nfsv2_fattr *fp;
+	extern int (**spec_nfsv2nodeop_p)();
+	register struct nfsnode *np, *nq, **nhpp;
+	register long t1;
+	caddr_t dpos, cp2;
+	int error = 0, isnq;
+	struct mbuf *md;
+	enum vtype vtyp;
+	u_short vmode;
+	long rdev;
+	struct timespec mtime;
+	struct vnode *nvp;
+
+	md = *mdp;
+	dpos = *dposp;
+	t1 = (mtod(md, caddr_t) + md->m_len) - dpos;
+	isnq = (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS);
+	if (error = nfsm_disct(&md, &dpos, NFSX_FATTR(isnq), t1, &cp2))
+		return (error);
+	fp = (struct nfsv2_fattr *)cp2;
+	vtyp = nfstov_type(fp->fa_type);
+	vmode = fxdr_unsigned(u_short, fp->fa_mode);
+	if (vtyp == VNON || vtyp == VREG)
+		vtyp = IFTOVT(vmode);
+	if (isnq) {
+		rdev = fxdr_unsigned(long, fp->fa_nqrdev);
+		fxdr_nqtime(&fp->fa_nqmtime, &mtime);
+	} else {
+		rdev = fxdr_unsigned(long, fp->fa_nfsrdev);
+		fxdr_nfstime(&fp->fa_nfsmtime, &mtime);
+	}
+	/*
+	 * If v_type == VNON it is a new node, so fill in the v_type,
+	 * n_mtime fields. Check to see if it represents a special 
+	 * device, and if so, check for a possible alias. Once the
+	 * correct vnode has been obtained, fill in the rest of the
+	 * information.
+	 */
+	np = VTONFS(vp);
+	if (vp->v_type == VNON) {
+		if (vtyp == VCHR && rdev == 0xffffffff)
+			vp->v_type = vtyp = VFIFO;
+		else
+			vp->v_type = vtyp;
+		if (vp->v_type == VFIFO) {
+#ifdef FIFO
+			extern int (**fifo_nfsv2nodeop_p)();
+			vp->v_op = fifo_nfsv2nodeop_p;
+#else
+			return (EOPNOTSUPP);
+#endif /* FIFO */
+		}
+		if (vp->v_type == VCHR || vp->v_type == VBLK) {
+			vp->v_op = spec_nfsv2nodeop_p;
+			if (nvp = checkalias(vp, (dev_t)rdev, vp->v_mount)) {
+				/*
+				 * Discard unneeded vnode, but save its nfsnode.
+				 */
+				if (nq = np->n_forw)
+					nq->n_back = np->n_back;
+				*np->n_back = nq;
+				nvp->v_data = vp->v_data;
+				vp->v_data = NULL;
+				vp->v_op = spec_vnodeop_p;
+				vrele(vp);
+				vgone(vp);
+				/*
+				 * Reinitialize aliased node.
+				 */
+				np->n_vnode = nvp;
+				nhpp = (struct nfsnode **)nfs_hash(&np->n_fh);
+				if (nq = *nhpp)
+					nq->n_back = &np->n_forw;
+				np->n_forw = nq;
+				np->n_back = nhpp;
+				*nhpp = np;
+				*vpp = vp = nvp;
+			}
+		}
+		np->n_mtime = mtime.ts_sec;
+	}
+	vap = &np->n_vattr;
+	vap->va_type = vtyp;
+	vap->va_mode = (vmode & 07777);
+	vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
+	vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid);
+	vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid);
+	vap->va_rdev = (dev_t)rdev;
+	vap->va_mtime = mtime;
+	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+	if (isnq) {
+		fxdr_hyper(&fp->fa_nqsize, &vap->va_size);
+		vap->va_blocksize = fxdr_unsigned(long, fp->fa_nqblocksize);
+		fxdr_hyper(&fp->fa_nqbytes, &vap->va_bytes);
+		vap->va_fileid = fxdr_unsigned(long, fp->fa_nqfileid);
+		fxdr_nqtime(&fp->fa_nqatime, &vap->va_atime);
+		vap->va_flags = fxdr_unsigned(u_long, fp->fa_nqflags);
+		fxdr_nqtime(&fp->fa_nqctime, &vap->va_ctime);
+		vap->va_gen = fxdr_unsigned(u_long, fp->fa_nqgen);
+		fxdr_hyper(&fp->fa_nqfilerev, &vap->va_filerev);
+	} else {
+		vap->va_size = fxdr_unsigned(u_long, fp->fa_nfssize);
+		vap->va_blocksize = fxdr_unsigned(long, fp->fa_nfsblocksize);
+		vap->va_bytes = fxdr_unsigned(long, fp->fa_nfsblocks) * NFS_FABLKSIZE;
+		vap->va_fileid = fxdr_unsigned(long, fp->fa_nfsfileid);
+		fxdr_nfstime(&fp->fa_nfsatime, &vap->va_atime);
+		vap->va_flags = 0;
+		vap->va_ctime.ts_sec = fxdr_unsigned(long, fp->fa_nfsctime.nfs_sec);
+		vap->va_ctime.ts_nsec = 0;
+		vap->va_gen = fxdr_unsigned(u_long, fp->fa_nfsctime.nfs_usec);
+		vap->va_filerev = 0;
+	}
+	if (vap->va_size != np->n_size) {
+		if (vap->va_type == VREG) {
+			if (np->n_flag & NMODIFIED) {
+				if (vap->va_size < np->n_size)
+					vap->va_size = np->n_size;
+				else
+					np->n_size = vap->va_size;
+			} else
+				np->n_size = vap->va_size;
+			vnode_pager_setsize(vp, (u_long)np->n_size);
+		} else
+			np->n_size = vap->va_size;
+	}
+	np->n_attrstamp = time.tv_sec;
+	*dposp = dpos;
+	*mdp = md;
+	if (vaper != NULL) {
+		bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap));
+#ifdef notdef
+		if ((np->n_flag & NMODIFIED) && np->n_size > vap->va_size)
+		if (np->n_size > vap->va_size)
+			vaper->va_size = np->n_size;
+#endif
+		if (np->n_flag & NCHG) {
+			if (np->n_flag & NACC) {
+				vaper->va_atime.ts_sec = np->n_atim.tv_sec;
+				vaper->va_atime.ts_nsec =
+				    np->n_atim.tv_usec * 1000;
+			}
+			if (np->n_flag & NUPD) {
+				vaper->va_mtime.ts_sec = np->n_mtim.tv_sec;
+				vaper->va_mtime.ts_nsec =
+				    np->n_mtim.tv_usec * 1000;
+			}
+		}
+	}
+	return (0);
+}
+
+/*
+ * Check the time stamp
+ * If the cache is valid, copy contents to *vap and return 0
+ * otherwise return an error
+ */
+nfs_getattrcache(vp, vaper)
+	register struct vnode *vp;
+	struct vattr *vaper;
+{
+	register struct nfsnode *np = VTONFS(vp);
+	register struct vattr *vap;
+
+	if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQLOOKLEASE) {
+		if (!NQNFS_CKCACHABLE(vp, NQL_READ) || np->n_attrstamp == 0) {
+			nfsstats.attrcache_misses++;
+			return (ENOENT);
+		}
+	} else if ((time.tv_sec - np->n_attrstamp) >= NFS_ATTRTIMEO(np)) {
+		nfsstats.attrcache_misses++;
+		return (ENOENT);
+	}
+	nfsstats.attrcache_hits++;
+	vap = &np->n_vattr;
+	if (vap->va_size != np->n_size) {
+		if (vap->va_type == VREG) {
+			if (np->n_flag & NMODIFIED) {
+				if (vap->va_size < np->n_size)
+					vap->va_size = np->n_size;
+				else
+					np->n_size = vap->va_size;
+			} else
+				np->n_size = vap->va_size;
+			vnode_pager_setsize(vp, (u_long)np->n_size);
+		} else
+			np->n_size = vap->va_size;
+	}
+	bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr));
+#ifdef notdef
+	if ((np->n_flag & NMODIFIED) == 0) {
+		np->n_size = vaper->va_size;
+		vnode_pager_setsize(vp, (u_long)np->n_size);
+	} else if (np->n_size > vaper->va_size)
+	if (np->n_size > vaper->va_size)
+		vaper->va_size = np->n_size;
+#endif
+	if (np->n_flag & NCHG) {
+		if (np->n_flag & NACC) {
+			vaper->va_atime.ts_sec = np->n_atim.tv_sec;
+			vaper->va_atime.ts_nsec = np->n_atim.tv_usec * 1000;
+		}
+		if (np->n_flag & NUPD) {
+			vaper->va_mtime.ts_sec = np->n_mtim.tv_sec;
+			vaper->va_mtime.ts_nsec = np->n_mtim.tv_usec * 1000;
+		}
+	}
+	return (0);
+}
+
+/*
+ * Set up nameidata for a lookup() call and do it
+ */
+nfs_namei(ndp, fhp, len, slp, nam, mdp, dposp, p)
+	register struct nameidata *ndp;
+	fhandle_t *fhp;
+	int len;
+	struct nfssvc_sock *slp;
+	struct mbuf *nam;
+	struct mbuf **mdp;
+	caddr_t *dposp;
+	struct proc *p;
+{
+	register int i, rem;
+	register struct mbuf *md;
+	register char *fromcp, *tocp;
+	struct vnode *dp;
+	int error, rdonly;
+	struct componentname *cnp = &ndp->ni_cnd;
+
+	MALLOC(cnp->cn_pnbuf, char *, len + 1, M_NAMEI, M_WAITOK);
+	/*
+	 * Copy the name from the mbuf list to ndp->ni_pnbuf
+	 * and set the various ndp fields appropriately.
+	 */
+	fromcp = *dposp;
+	tocp = cnp->cn_pnbuf;
+	md = *mdp;
+	rem = mtod(md, caddr_t) + md->m_len - fromcp;
+	cnp->cn_hash = 0;
+	for (i = 0; i < len; i++) {
+		while (rem == 0) {
+			md = md->m_next;
+			if (md == NULL) {
+				error = EBADRPC;
+				goto out;
+			}
+			fromcp = mtod(md, caddr_t);
+			rem = md->m_len;
+		}
+		if (*fromcp == '\0' || *fromcp == '/') {
+			error = EINVAL;
+			goto out;
+		}
+		cnp->cn_hash += (unsigned char)*fromcp;
+		*tocp++ = *fromcp++;
+		rem--;
+	}
+	*tocp = '\0';
+	*mdp = md;
+	*dposp = fromcp;
+	len = nfsm_rndup(len)-len;
+	if (len > 0) {
+		if (rem >= len)
+			*dposp += len;
+		else if (error = nfs_adv(mdp, dposp, len, rem))
+			goto out;
+	}
+	ndp->ni_pathlen = tocp - cnp->cn_pnbuf;
+	cnp->cn_nameptr = cnp->cn_pnbuf;
+	/*
+	 * Extract and set starting directory.
+	 */
+	if (error = nfsrv_fhtovp(fhp, FALSE, &dp, ndp->ni_cnd.cn_cred, slp,
+	    nam, &rdonly))
+		goto out;
+	if (dp->v_type != VDIR) {
+		vrele(dp);
+		error = ENOTDIR;
+		goto out;
+	}
+	ndp->ni_startdir = dp;
+	if (rdonly)
+		cnp->cn_flags |= (NOCROSSMOUNT | RDONLY);
+	else
+		cnp->cn_flags |= NOCROSSMOUNT;
+	/*
+	 * And call lookup() to do the real work
+	 */
+	cnp->cn_proc = p;
+	if (error = lookup(ndp))
+		goto out;
+	/*
+	 * Check for encountering a symbolic link
+	 */
+	if (cnp->cn_flags & ISSYMLINK) {
+		if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
+			vput(ndp->ni_dvp);
+		else
+			vrele(ndp->ni_dvp);
+		vput(ndp->ni_vp);
+		ndp->ni_vp = NULL;
+		error = EINVAL;
+		goto out;
+	}
+	/*
+	 * Check for saved name request
+	 */
+	if (cnp->cn_flags & (SAVENAME | SAVESTART)) {
+		cnp->cn_flags |= HASBUF;
+		return (0);
+	}
+out:
+	FREE(cnp->cn_pnbuf, M_NAMEI);
+	return (error);
+}
+
+/*
+ * A fiddled version of m_adj() that ensures null fill to a long
+ * boundary and only trims off the back end
+ */
+void
+nfsm_adj(mp, len, nul)
+	struct mbuf *mp;
+	register int len;
+	int nul;
+{
+	register struct mbuf *m;
+	register int count, i;
+	register char *cp;
+
+	/*
+	 * Trim from tail.  Scan the mbuf chain,
+	 * calculating its length and finding the last mbuf.
+	 * If the adjustment only affects this mbuf, then just
+	 * adjust and return.  Otherwise, rescan and truncate
+	 * after the remaining size.
+	 */
+	count = 0;
+	m = mp;
+	for (;;) {
+		count += m->m_len;
+		if (m->m_next == (struct mbuf *)0)
+			break;
+		m = m->m_next;
+	}
+	if (m->m_len > len) {
+		m->m_len -= len;
+		if (nul > 0) {
+			cp = mtod(m, caddr_t)+m->m_len-nul;
+			for (i = 0; i < nul; i++)
+				*cp++ = '\0';
+		}
+		return;
+	}
+	count -= len;
+	if (count < 0)
+		count = 0;
+	/*
+	 * Correct length for chain is "count".
+	 * Find the mbuf with last data, adjust its length,
+	 * and toss data from remaining mbufs on chain.
+	 */
+	for (m = mp; m; m = m->m_next) {
+		if (m->m_len >= count) {
+			m->m_len = count;
+			if (nul > 0) {
+				cp = mtod(m, caddr_t)+m->m_len-nul;
+				for (i = 0; i < nul; i++)
+					*cp++ = '\0';
+			}
+			break;
+		}
+		count -= m->m_len;
+	}
+	while (m = m->m_next)
+		m->m_len = 0;
+}
+
+/*
+ * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked)
+ * 	- look up fsid in mount list (if not found ret error)
+ *	- get vp and export rights by calling VFS_FHTOVP()
+ *	- if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
+ *	- if not lockflag unlock it with VOP_UNLOCK()
+ */
+nfsrv_fhtovp(fhp, lockflag, vpp, cred, slp, nam, rdonlyp)
+	fhandle_t *fhp;
+	int lockflag;
+	struct vnode **vpp;
+	struct ucred *cred;
+	struct nfssvc_sock *slp;
+	struct mbuf *nam;
+	int *rdonlyp;
+{
+	register struct mount *mp;
+	register struct nfsuid *uidp;
+	register int i;
+	struct ucred *credanon;
+	int error, exflags;
+
+	*vpp = (struct vnode *)0;
+	if ((mp = getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if (error = VFS_FHTOVP(mp, &fhp->fh_fid, nam, vpp, &exflags, &credanon))
+		return (error);
+	/*
+	 * Check/setup credentials.
+	 */
+	if (exflags & MNT_EXKERB) {
+		uidp = slp->ns_uidh[NUIDHASH(cred->cr_uid)];
+		while (uidp) {
+			if (uidp->nu_uid == cred->cr_uid)
+				break;
+			uidp = uidp->nu_hnext;
+		}
+		if (uidp) {
+			cred->cr_uid = uidp->nu_cr.cr_uid;
+			for (i = 0; i < uidp->nu_cr.cr_ngroups; i++)
+				cred->cr_groups[i] = uidp->nu_cr.cr_groups[i];
+		} else {
+			vput(*vpp);
+			return (NQNFS_AUTHERR);
+		}
+	} else if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) {
+		cred->cr_uid = credanon->cr_uid;
+		for (i = 0; i < credanon->cr_ngroups && i < NGROUPS; i++)
+			cred->cr_groups[i] = credanon->cr_groups[i];
+	}
+	if (exflags & MNT_EXRDONLY)
+		*rdonlyp = 1;
+	else
+		*rdonlyp = 0;
+	if (!lockflag)
+		VOP_UNLOCK(*vpp);
+	return (0);
+}
+
+/*
+ * This function compares two net addresses by family and returns TRUE
+ * if they are the same host.
+ * If there is any doubt, return FALSE.
+ * The AF_INET family is handled as a special case so that address mbufs
+ * don't need to be saved to store "struct in_addr", which is only 4 bytes.
+ */
+netaddr_match(family, haddr, nam)
+	int family;
+	union nethostaddr *haddr;
+	struct mbuf *nam;
+{
+	register struct sockaddr_in *inetaddr;
+
+	switch (family) {
+	case AF_INET:
+		inetaddr = mtod(nam, struct sockaddr_in *);
+		if (inetaddr->sin_family == AF_INET &&
+		    inetaddr->sin_addr.s_addr == haddr->had_inetaddr)
+			return (1);
+		break;
+#ifdef ISO
+	case AF_ISO:
+	    {
+		register struct sockaddr_iso *isoaddr1, *isoaddr2;
+
+		isoaddr1 = mtod(nam, struct sockaddr_iso *);
+		isoaddr2 = mtod(haddr->had_nam, struct sockaddr_iso *);
+		if (isoaddr1->siso_family == AF_ISO &&
+		    isoaddr1->siso_nlen > 0 &&
+		    isoaddr1->siso_nlen == isoaddr2->siso_nlen &&
+		    SAME_ISOADDR(isoaddr1, isoaddr2))
+			return (1);
+		break;
+	    }
+#endif	/* ISO */
+	default:
+		break;
+	};
+	return (0);
+}
diff --git a/sys/nfsserver/nfs_syscalls.c b/sys/nfsserver/nfs_syscalls.c
new file mode 100644
index 00000000000..5d86b42ee20
--- /dev/null
+++ b/sys/nfsserver/nfs_syscalls.c
@@ -0,0 +1,874 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfs_syscalls.c	8.3 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/uio.h>
+#include <sys/malloc.h>
+#include <sys/buf.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/namei.h>
+#include <sys/syslog.h>
+
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#ifdef ISO
+#include <netiso/iso.h>
+#endif
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsrvcache.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nqnfs.h>
+#include <nfs/nfsrtt.h>
+
+/* Global defs. */
+extern u_long nfs_prog, nfs_vers;
+extern int (*nfsrv_procs[NFS_NPROCS])();
+extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
+extern int nfs_numasync;
+extern time_t nqnfsstarttime;
+extern struct nfsrv_req nsrvq_head;
+extern struct nfsd nfsd_head;
+extern int nqsrv_writeslack;
+extern int nfsrtton;
+struct nfssvc_sock *nfs_udpsock, *nfs_cltpsock;
+int nuidhash_max = NFS_MAXUIDHASH;
+static int nfs_numnfsd = 0;
+int nfsd_waiting = 0;
+static int notstarted = 1;
+static int modify_flag = 0;
+static struct nfsdrt nfsdrt;
+void nfsrv_cleancache(), nfsrv_rcv(), nfsrv_wakenfsd(), nfs_sndunlock();
+static void nfsd_rt();
+void nfsrv_slpderef(), nfsrv_init();
+
+#define	TRUE	1
+#define	FALSE	0
+
+static int nfs_asyncdaemon[NFS_MAXASYNCDAEMON];
+/*
+ * NFS server system calls
+ * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c
+ */
+
+/*
+ * Get file handle system call
+ */
+struct getfh_args {
+	char	*fname;
+	fhandle_t *fhp;
+};
+getfh(p, uap, retval)
+	struct proc *p;
+	register struct getfh_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	fhandle_t fh;
+	int error;
+	struct nameidata nd;
+
+	/*
+	 * Must be super user
+	 */
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	bzero((caddr_t)&fh, sizeof(fh));
+	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
+	error = VFS_VPTOFH(vp, &fh.fh_fid);
+	vput(vp);
+	if (error)
+		return (error);
+	error = copyout((caddr_t)&fh, (caddr_t)uap->fhp, sizeof (fh));
+	return (error);
+}
+
+static struct nfssvc_sock nfssvc_sockhead;
+
+/*
+ * Nfs server psuedo system call for the nfsd's
+ * Based on the flag value it either:
+ * - adds a socket to the selection list
+ * - remains in the kernel as an nfsd
+ * - remains in the kernel as an nfsiod
+ */
+struct nfssvc_args {
+	int flag;
+	caddr_t argp;
+};
+nfssvc(p, uap, retval)
+	struct proc *p;
+	register struct nfssvc_args *uap;
+	int *retval;
+{
+	struct nameidata nd;
+	struct file *fp;
+	struct mbuf *nam;
+	struct nfsd_args nfsdarg;
+	struct nfsd_srvargs nfsd_srvargs, *nsd = &nfsd_srvargs;
+	struct nfsd_cargs ncd;
+	struct nfsd *nfsd;
+	struct nfssvc_sock *slp;
+	struct nfsuid *nuidp, **nuh;
+	struct nfsmount *nmp;
+	int error;
+
+	/*
+	 * Must be super user
+	 */
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+	while (nfssvc_sockhead.ns_flag & SLP_INIT) {
+		nfssvc_sockhead.ns_flag |= SLP_WANTINIT;
+		(void) tsleep((caddr_t)&nfssvc_sockhead, PSOCK, "nfsd init", 0);
+	}
+	if (uap->flag & NFSSVC_BIOD)
+		error = nfssvc_iod(p);
+	else if (uap->flag & NFSSVC_MNTD) {
+		if (error = copyin(uap->argp, (caddr_t)&ncd, sizeof (ncd)))
+			return (error);
+		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
+			ncd.ncd_dirp, p);
+		if (error = namei(&nd))
+			return (error);
+		if ((nd.ni_vp->v_flag & VROOT) == 0)
+			error = EINVAL;
+		nmp = VFSTONFS(nd.ni_vp->v_mount);
+		vput(nd.ni_vp);
+		if (error)
+			return (error);
+		if ((nmp->nm_flag & NFSMNT_MNTD) &&
+			(uap->flag & NFSSVC_GOTAUTH) == 0)
+			return (0);
+		nmp->nm_flag |= NFSMNT_MNTD;
+		error = nqnfs_clientd(nmp, p->p_ucred, &ncd, uap->flag,
+			uap->argp, p);
+	} else if (uap->flag & NFSSVC_ADDSOCK) {
+		if (error = copyin(uap->argp, (caddr_t)&nfsdarg,
+		    sizeof(nfsdarg)))
+			return (error);
+		if (error = getsock(p->p_fd, nfsdarg.sock, &fp))
+			return (error);
+		/*
+		 * Get the client address for connected sockets.
+		 */
+		if (nfsdarg.name == NULL || nfsdarg.namelen == 0)
+			nam = (struct mbuf *)0;
+		else if (error = sockargs(&nam, nfsdarg.name, nfsdarg.namelen,
+			MT_SONAME))
+			return (error);
+		error = nfssvc_addsock(fp, nam);
+	} else {
+		if (error = copyin(uap->argp, (caddr_t)nsd, sizeof (*nsd)))
+			return (error);
+		if ((uap->flag & NFSSVC_AUTHIN) && (nfsd = nsd->nsd_nfsd) &&
+			(nfsd->nd_slp->ns_flag & SLP_VALID)) {
+			slp = nfsd->nd_slp;
+
+			/*
+			 * First check to see if another nfsd has already
+			 * added this credential.
+			 */
+			nuidp = slp->ns_uidh[NUIDHASH(nsd->nsd_uid)];
+			while (nuidp) {
+				if (nuidp->nu_uid == nsd->nsd_uid)
+					break;
+				nuidp = nuidp->nu_hnext;
+			}
+			if (!nuidp) {
+			    /*
+			     * Nope, so we will.
+			     */
+			    if (slp->ns_numuids < nuidhash_max) {
+				slp->ns_numuids++;
+				nuidp = (struct nfsuid *)
+				   malloc(sizeof (struct nfsuid), M_NFSUID,
+					M_WAITOK);
+			    } else
+				nuidp = (struct nfsuid *)0;
+			    if ((slp->ns_flag & SLP_VALID) == 0) {
+				if (nuidp)
+				    free((caddr_t)nuidp, M_NFSUID);
+			    } else {
+				if (nuidp == (struct nfsuid *)0) {
+				    nuidp = slp->ns_lruprev;
+				    remque(nuidp);
+				    if (nuidp->nu_hprev)
+					nuidp->nu_hprev->nu_hnext =
+					    nuidp->nu_hnext;
+				    if (nuidp->nu_hnext)
+					nuidp->nu_hnext->nu_hprev =
+					    nuidp->nu_hprev;
+			        }
+				nuidp->nu_cr = nsd->nsd_cr;
+				if (nuidp->nu_cr.cr_ngroups > NGROUPS)
+					nuidp->nu_cr.cr_ngroups = NGROUPS;
+				nuidp->nu_cr.cr_ref = 1;
+				nuidp->nu_uid = nsd->nsd_uid;
+				insque(nuidp, (struct nfsuid *)slp);
+				nuh = &slp->ns_uidh[NUIDHASH(nsd->nsd_uid)];
+				if (nuidp->nu_hnext = *nuh)
+				    nuidp->nu_hnext->nu_hprev = nuidp;
+				nuidp->nu_hprev = (struct nfsuid *)0;
+				*nuh = nuidp;
+			    }
+			}
+		}
+		if ((uap->flag & NFSSVC_AUTHINFAIL) && (nfsd = nsd->nsd_nfsd))
+			nfsd->nd_flag |= NFSD_AUTHFAIL;
+		error = nfssvc_nfsd(nsd, uap->argp, p);
+	}
+	if (error == EINTR || error == ERESTART)
+		error = 0;
+	return (error);
+}
+
+/*
+ * Adds a socket to the list for servicing by nfsds.
+ */
+nfssvc_addsock(fp, mynam)
+	struct file *fp;
+	struct mbuf *mynam;
+{
+	register struct mbuf *m;
+	register int siz;
+	register struct nfssvc_sock *slp;
+	register struct socket *so;
+	struct nfssvc_sock *tslp;
+	int error, s;
+
+	so = (struct socket *)fp->f_data;
+	tslp = (struct nfssvc_sock *)0;
+	/*
+	 * Add it to the list, as required.
+	 */
+	if (so->so_proto->pr_protocol == IPPROTO_UDP) {
+		tslp = nfs_udpsock;
+		if (tslp->ns_flag & SLP_VALID) {
+			m_freem(mynam);
+			return (EPERM);
+		}
+#ifdef ISO
+	} else if (so->so_proto->pr_protocol == ISOPROTO_CLTP) {
+		tslp = nfs_cltpsock;
+		if (tslp->ns_flag & SLP_VALID) {
+			m_freem(mynam);
+			return (EPERM);
+		}
+#endif /* ISO */
+	}
+	if (so->so_type == SOCK_STREAM)
+		siz = NFS_MAXPACKET + sizeof (u_long);
+	else
+		siz = NFS_MAXPACKET;
+	if (error = soreserve(so, siz, siz)) {
+		m_freem(mynam);
+		return (error);
+	}
+
+	/*
+	 * Set protocol specific options { for now TCP only } and
+	 * reserve some space. For datagram sockets, this can get called
+	 * repeatedly for the same socket, but that isn't harmful.
+	 */
+	if (so->so_type == SOCK_STREAM) {
+		MGET(m, M_WAIT, MT_SOOPTS);
+		*mtod(m, int *) = 1;
+		m->m_len = sizeof(int);
+		sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
+	}
+	if (so->so_proto->pr_domain->dom_family == AF_INET &&
+	    so->so_proto->pr_protocol == IPPROTO_TCP) {
+		MGET(m, M_WAIT, MT_SOOPTS);
+		*mtod(m, int *) = 1;
+		m->m_len = sizeof(int);
+		sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
+	}
+	so->so_rcv.sb_flags &= ~SB_NOINTR;
+	so->so_rcv.sb_timeo = 0;
+	so->so_snd.sb_flags &= ~SB_NOINTR;
+	so->so_snd.sb_timeo = 0;
+	if (tslp)
+		slp = tslp;
+	else {
+		slp = (struct nfssvc_sock *)
+			malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
+		bzero((caddr_t)slp, sizeof (struct nfssvc_sock));
+		slp->ns_prev = nfssvc_sockhead.ns_prev;
+		slp->ns_prev->ns_next = slp;
+		slp->ns_next = &nfssvc_sockhead;
+		nfssvc_sockhead.ns_prev = slp;
+		slp->ns_lrunext = slp->ns_lruprev = (struct nfsuid *)slp;
+	}
+	slp->ns_so = so;
+	slp->ns_nam = mynam;
+	fp->f_count++;
+	slp->ns_fp = fp;
+	s = splnet();
+	so->so_upcallarg = (caddr_t)slp;
+	so->so_upcall = nfsrv_rcv;
+	slp->ns_flag = (SLP_VALID | SLP_NEEDQ);
+	nfsrv_wakenfsd(slp);
+	splx(s);
+	return (0);
+}
+
+/*
+ * Called by nfssvc() for nfsds. Just loops around servicing rpc requests
+ * until it is killed by a signal.
+ */
+nfssvc_nfsd(nsd, argp, p)
+	struct nfsd_srvargs *nsd;
+	caddr_t argp;
+	struct proc *p;
+{
+	register struct mbuf *m, *nam2;
+	register int siz;
+	register struct nfssvc_sock *slp;
+	register struct socket *so;
+	register int *solockp;
+	struct nfsd *nd = nsd->nsd_nfsd;
+	struct mbuf *mreq, *nam;
+	struct timeval starttime;
+	struct nfsuid *uidp;
+	int error, cacherep, s;
+	int sotype;
+
+	s = splnet();
+	if (nd == (struct nfsd *)0) {
+		nsd->nsd_nfsd = nd = (struct nfsd *)
+			malloc(sizeof (struct nfsd), M_NFSD, M_WAITOK);
+		bzero((caddr_t)nd, sizeof (struct nfsd));
+		nd->nd_procp = p;
+		nd->nd_cr.cr_ref = 1;
+		insque(nd, &nfsd_head);
+		nd->nd_nqlflag = NQL_NOVAL;
+		nfs_numnfsd++;
+	}
+	/*
+	 * Loop getting rpc requests until SIGKILL.
+	 */
+	for (;;) {
+		if ((nd->nd_flag & NFSD_REQINPROG) == 0) {
+			while (nd->nd_slp == (struct nfssvc_sock *)0 &&
+				 (nfsd_head.nd_flag & NFSD_CHECKSLP) == 0) {
+				nd->nd_flag |= NFSD_WAITING;
+				nfsd_waiting++;
+				error = tsleep((caddr_t)nd, PSOCK | PCATCH, "nfsd", 0);
+				nfsd_waiting--;
+				if (error)
+					goto done;
+			}
+			if (nd->nd_slp == (struct nfssvc_sock *)0 &&
+				(nfsd_head.nd_flag & NFSD_CHECKSLP)) {
+				slp = nfssvc_sockhead.ns_next;
+				while (slp != &nfssvc_sockhead) {
+				    if ((slp->ns_flag & (SLP_VALID | SLP_DOREC))
+					== (SLP_VALID | SLP_DOREC)) {
+					    slp->ns_flag &= ~SLP_DOREC;
+					    slp->ns_sref++;
+					    nd->nd_slp = slp;
+					    break;
+				    }
+				    slp = slp->ns_next;
+				}
+				if (slp == &nfssvc_sockhead)
+					nfsd_head.nd_flag &= ~NFSD_CHECKSLP;
+			}
+			if ((slp = nd->nd_slp) == (struct nfssvc_sock *)0)
+				continue;
+			if (slp->ns_flag & SLP_VALID) {
+				if (slp->ns_flag & SLP_DISCONN)
+					nfsrv_zapsock(slp);
+				else if (slp->ns_flag & SLP_NEEDQ) {
+					slp->ns_flag &= ~SLP_NEEDQ;
+					(void) nfs_sndlock(&slp->ns_solock,
+						(struct nfsreq *)0);
+					nfsrv_rcv(slp->ns_so, (caddr_t)slp,
+						M_WAIT);
+					nfs_sndunlock(&slp->ns_solock);
+				}
+				error = nfsrv_dorec(slp, nd);
+				nd->nd_flag |= NFSD_REQINPROG;
+			}
+		} else {
+			error = 0;
+			slp = nd->nd_slp;
+		}
+		if (error || (slp->ns_flag & SLP_VALID) == 0) {
+			nd->nd_slp = (struct nfssvc_sock *)0;
+			nd->nd_flag &= ~NFSD_REQINPROG;
+			nfsrv_slpderef(slp);
+			continue;
+		}
+		splx(s);
+		so = slp->ns_so;
+		sotype = so->so_type;
+		starttime = time;
+		if (so->so_proto->pr_flags & PR_CONNREQUIRED)
+			solockp = &slp->ns_solock;
+		else
+			solockp = (int *)0;
+		/*
+		 * nam == nam2 for connectionless protocols such as UDP
+		 * nam2 == NULL for connection based protocols to disable
+		 *    recent request caching.
+		 */
+		if (nam2 = nd->nd_nam) {
+			nam = nam2;
+			cacherep = RC_CHECKIT;
+		} else {
+			nam = slp->ns_nam;
+			cacherep = RC_DOIT;
+		}
+
+		/*
+		 * Check to see if authorization is needed.
+		 */
+		if (nd->nd_flag & NFSD_NEEDAUTH) {
+			static int logauth = 0;
+
+			nd->nd_flag &= ~NFSD_NEEDAUTH;
+			/*
+			 * Check for a mapping already installed.
+			 */
+			uidp = slp->ns_uidh[NUIDHASH(nd->nd_cr.cr_uid)];
+			while (uidp) {
+				if (uidp->nu_uid == nd->nd_cr.cr_uid)
+					break;
+				uidp = uidp->nu_hnext;
+			}
+			if (!uidp) {
+			    nsd->nsd_uid = nd->nd_cr.cr_uid;
+			    if (nam2 && logauth++ == 0)
+				log(LOG_WARNING, "Kerberized NFS using UDP\n");
+			    nsd->nsd_haddr =
+			      mtod(nam, struct sockaddr_in *)->sin_addr.s_addr;
+			    nsd->nsd_authlen = nd->nd_authlen;
+			    if (copyout(nd->nd_authstr, nsd->nsd_authstr,
+				nd->nd_authlen) == 0 &&
+				copyout((caddr_t)nsd, argp, sizeof (*nsd)) == 0)
+				return (ENEEDAUTH);
+			    cacherep = RC_DROPIT;
+			}
+		}
+		if (cacherep == RC_CHECKIT)
+			cacherep = nfsrv_getcache(nam2, nd, &mreq);
+
+		/*
+		 * Check for just starting up for NQNFS and send
+		 * fake "try again later" replies to the NQNFS clients.
+		 */
+		if (notstarted && nqnfsstarttime <= time.tv_sec) {
+			if (modify_flag) {
+				nqnfsstarttime = time.tv_sec + nqsrv_writeslack;
+				modify_flag = 0;
+			} else
+				notstarted = 0;
+		}
+		if (notstarted) {
+			if (nd->nd_nqlflag == NQL_NOVAL)
+				cacherep = RC_DROPIT;
+			else if (nd->nd_procnum != NFSPROC_WRITE) {
+				nd->nd_procnum = NFSPROC_NOOP;
+				nd->nd_repstat = NQNFS_TRYLATER;
+				cacherep = RC_DOIT;
+			} else
+				modify_flag = 1;
+		} else if (nd->nd_flag & NFSD_AUTHFAIL) {
+			nd->nd_flag &= ~NFSD_AUTHFAIL;
+			nd->nd_procnum = NFSPROC_NOOP;
+			nd->nd_repstat = NQNFS_AUTHERR;
+			cacherep = RC_DOIT;
+		}
+
+		switch (cacherep) {
+		case RC_DOIT:
+			error = (*(nfsrv_procs[nd->nd_procnum]))(nd,
+				nd->nd_mrep, nd->nd_md, nd->nd_dpos, &nd->nd_cr,
+				nam, &mreq);
+			if (nd->nd_cr.cr_ref != 1) {
+				printf("nfssvc cref=%d\n", nd->nd_cr.cr_ref);
+				panic("nfssvc cref");
+			}
+			if (error) {
+				if (nd->nd_procnum != NQNFSPROC_VACATED)
+					nfsstats.srv_errs++;
+				if (nam2) {
+					nfsrv_updatecache(nam2, nd, FALSE, mreq);
+					m_freem(nam2);
+				}
+				break;
+			}
+			nfsstats.srvrpccnt[nd->nd_procnum]++;
+			if (nam2)
+				nfsrv_updatecache(nam2, nd, TRUE, mreq);
+			nd->nd_mrep = (struct mbuf *)0;
+		case RC_REPLY:
+			m = mreq;
+			siz = 0;
+			while (m) {
+				siz += m->m_len;
+				m = m->m_next;
+			}
+			if (siz <= 0 || siz > NFS_MAXPACKET) {
+				printf("mbuf siz=%d\n",siz);
+				panic("Bad nfs svc reply");
+			}
+			m = mreq;
+			m->m_pkthdr.len = siz;
+			m->m_pkthdr.rcvif = (struct ifnet *)0;
+			/*
+			 * For stream protocols, prepend a Sun RPC
+			 * Record Mark.
+			 */
+			if (sotype == SOCK_STREAM) {
+				M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
+				*mtod(m, u_long *) = htonl(0x80000000 | siz);
+			}
+			if (solockp)
+				(void) nfs_sndlock(solockp, (struct nfsreq *)0);
+			if (slp->ns_flag & SLP_VALID)
+			    error = nfs_send(so, nam2, m, (struct nfsreq *)0);
+			else {
+			    error = EPIPE;
+			    m_freem(m);
+			}
+			if (nfsrtton)
+				nfsd_rt(&starttime, sotype, nd, nam, cacherep);
+			if (nam2)
+				MFREE(nam2, m);
+			if (nd->nd_mrep)
+				m_freem(nd->nd_mrep);
+			if (error == EPIPE)
+				nfsrv_zapsock(slp);
+			if (solockp)
+				nfs_sndunlock(solockp);
+			if (error == EINTR || error == ERESTART) {
+				nfsrv_slpderef(slp);
+				s = splnet();
+				goto done;
+			}
+			break;
+		case RC_DROPIT:
+			if (nfsrtton)
+				nfsd_rt(&starttime, sotype, nd, nam, cacherep);
+			m_freem(nd->nd_mrep);
+			m_freem(nam2);
+			break;
+		};
+		s = splnet();
+		if (nfsrv_dorec(slp, nd)) {
+			nd->nd_flag &= ~NFSD_REQINPROG;
+			nd->nd_slp = (struct nfssvc_sock *)0;
+			nfsrv_slpderef(slp);
+		}
+	}
+done:
+	remque(nd);
+	splx(s);
+	free((caddr_t)nd, M_NFSD);
+	nsd->nsd_nfsd = (struct nfsd *)0;
+	if (--nfs_numnfsd == 0)
+		nfsrv_init(TRUE);	/* Reinitialize everything */
+	return (error);
+}
+
+/*
+ * Asynchronous I/O daemons for client nfs.
+ * They do read-ahead and write-behind operations on the block I/O cache.
+ * Never returns unless it fails or gets killed.
+ */
+nfssvc_iod(p)
+	struct proc *p;
+{
+	register struct buf *bp;
+	register int i, myiod;
+	int error = 0;
+
+	/*
+	 * Assign my position or return error if too many already running
+	 */
+	myiod = -1;
+	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
+		if (nfs_asyncdaemon[i] == 0) {
+			nfs_asyncdaemon[i]++;
+			myiod = i;
+			break;
+		}
+	if (myiod == -1)
+		return (EBUSY);
+	nfs_numasync++;
+	/*
+	 * Just loop around doin our stuff until SIGKILL
+	 */
+	for (;;) {
+		while (nfs_bufq.tqh_first == NULL && error == 0) {
+			nfs_iodwant[myiod] = p;
+			error = tsleep((caddr_t)&nfs_iodwant[myiod],
+				PWAIT | PCATCH, "nfsidl", 0);
+		}
+		while ((bp = nfs_bufq.tqh_first) != NULL) {
+			/* Take one off the front of the list */
+			TAILQ_REMOVE(&nfs_bufq, bp, b_freelist);
+			if (bp->b_flags & B_READ)
+			    (void) nfs_doio(bp, bp->b_rcred, (struct proc *)0);
+			else
+			    (void) nfs_doio(bp, bp->b_wcred, (struct proc *)0);
+		}
+		if (error) {
+			nfs_asyncdaemon[myiod] = 0;
+			nfs_numasync--;
+			return (error);
+		}
+	}
+}
+
+/*
+ * Shut down a socket associated with an nfssvc_sock structure.
+ * Should be called with the send lock set, if required.
+ * The trick here is to increment the sref at the start, so that the nfsds
+ * will stop using it and clear ns_flag at the end so that it will not be
+ * reassigned during cleanup.
+ */
+nfsrv_zapsock(slp)
+	register struct nfssvc_sock *slp;
+{
+	register struct nfsuid *nuidp, *onuidp;
+	register int i;
+	struct socket *so;
+	struct file *fp;
+	struct mbuf *m;
+
+	slp->ns_flag &= ~SLP_ALLFLAGS;
+	if (fp = slp->ns_fp) {
+		slp->ns_fp = (struct file *)0;
+		so = slp->ns_so;
+		so->so_upcall = NULL;
+		soshutdown(so, 2);
+		closef(fp, (struct proc *)0);
+		if (slp->ns_nam)
+			MFREE(slp->ns_nam, m);
+		m_freem(slp->ns_raw);
+		m_freem(slp->ns_rec);
+		nuidp = slp->ns_lrunext;
+		while (nuidp != (struct nfsuid *)slp) {
+			onuidp = nuidp;
+			nuidp = nuidp->nu_lrunext;
+			free((caddr_t)onuidp, M_NFSUID);
+		}
+		slp->ns_lrunext = slp->ns_lruprev = (struct nfsuid *)slp;
+		for (i = 0; i < NUIDHASHSIZ; i++)
+			slp->ns_uidh[i] = (struct nfsuid *)0;
+	}
+}
+
+/*
+ * Get an authorization string for the uid by having the mount_nfs sitting
+ * on this mount point porpous out of the kernel and do it.
+ */
+nfs_getauth(nmp, rep, cred, auth_type, auth_str, auth_len)
+	register struct nfsmount *nmp;
+	struct nfsreq *rep;
+	struct ucred *cred;
+	int *auth_type;
+	char **auth_str;
+	int *auth_len;
+{
+	int error = 0;
+
+	while ((nmp->nm_flag & NFSMNT_WAITAUTH) == 0) {
+		nmp->nm_flag |= NFSMNT_WANTAUTH;
+		(void) tsleep((caddr_t)&nmp->nm_authtype, PSOCK,
+			"nfsauth1", 2 * hz);
+		if (error = nfs_sigintr(nmp, rep, rep->r_procp)) {
+			nmp->nm_flag &= ~NFSMNT_WANTAUTH;
+			return (error);
+		}
+	}
+	nmp->nm_flag &= ~(NFSMNT_WAITAUTH | NFSMNT_WANTAUTH);
+	nmp->nm_authstr = *auth_str = (char *)malloc(RPCAUTH_MAXSIZ, M_TEMP, M_WAITOK);
+	nmp->nm_authuid = cred->cr_uid;
+	wakeup((caddr_t)&nmp->nm_authstr);
+
+	/*
+	 * And wait for mount_nfs to do its stuff.
+	 */
+	while ((nmp->nm_flag & NFSMNT_HASAUTH) == 0 && error == 0) {
+		(void) tsleep((caddr_t)&nmp->nm_authlen, PSOCK,
+			"nfsauth2", 2 * hz);
+		error = nfs_sigintr(nmp, rep, rep->r_procp);
+	}
+	if (nmp->nm_flag & NFSMNT_AUTHERR) {
+		nmp->nm_flag &= ~NFSMNT_AUTHERR;
+		error = EAUTH;
+	}
+	if (error)
+		free((caddr_t)*auth_str, M_TEMP);
+	else {
+		*auth_type = nmp->nm_authtype;
+		*auth_len = nmp->nm_authlen;
+	}
+	nmp->nm_flag &= ~NFSMNT_HASAUTH;
+	nmp->nm_flag |= NFSMNT_WAITAUTH;
+	if (nmp->nm_flag & NFSMNT_WANTAUTH) {
+		nmp->nm_flag &= ~NFSMNT_WANTAUTH;
+		wakeup((caddr_t)&nmp->nm_authtype);
+	}
+	return (error);
+}
+
+/*
+ * Derefence a server socket structure. If it has no more references and
+ * is no longer valid, you can throw it away.
+ */
+void
+nfsrv_slpderef(slp)
+	register struct nfssvc_sock *slp;
+{
+	if (--(slp->ns_sref) == 0 && (slp->ns_flag & SLP_VALID) == 0) {
+		slp->ns_prev->ns_next = slp->ns_next;
+		slp->ns_next->ns_prev = slp->ns_prev;
+		free((caddr_t)slp, M_NFSSVC);
+	}
+}
+
+/*
+ * Initialize the data structures for the server.
+ * Handshake with any new nfsds starting up to avoid any chance of
+ * corruption.
+ */
+void
+nfsrv_init(terminating)
+	int terminating;
+{
+	register struct nfssvc_sock *slp;
+	struct nfssvc_sock *oslp;
+
+	if (nfssvc_sockhead.ns_flag & SLP_INIT)
+		panic("nfsd init");
+	nfssvc_sockhead.ns_flag |= SLP_INIT;
+	if (terminating) {
+		slp = nfssvc_sockhead.ns_next;
+		while (slp != &nfssvc_sockhead) {
+			if (slp->ns_flag & SLP_VALID)
+				nfsrv_zapsock(slp);
+			slp->ns_next->ns_prev = slp->ns_prev;
+			slp->ns_prev->ns_next = slp->ns_next;
+			oslp = slp;
+			slp = slp->ns_next;
+			free((caddr_t)oslp, M_NFSSVC);
+		}
+		nfsrv_cleancache();	/* And clear out server cache */
+	}
+	nfs_udpsock = (struct nfssvc_sock *)
+	    malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
+	bzero((caddr_t)nfs_udpsock, sizeof (struct nfssvc_sock));
+	nfs_cltpsock = (struct nfssvc_sock *)
+	    malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
+	bzero((caddr_t)nfs_cltpsock, sizeof (struct nfssvc_sock));
+	nfssvc_sockhead.ns_next = nfs_udpsock;
+	nfs_udpsock->ns_next = nfs_cltpsock;
+	nfs_cltpsock->ns_next = &nfssvc_sockhead;
+	nfssvc_sockhead.ns_prev = nfs_cltpsock;
+	nfs_cltpsock->ns_prev = nfs_udpsock;
+	nfs_udpsock->ns_prev = &nfssvc_sockhead;
+	nfs_udpsock->ns_lrunext = nfs_udpsock->ns_lruprev =
+		(struct nfsuid *)nfs_udpsock;
+	nfs_cltpsock->ns_lrunext = nfs_cltpsock->ns_lruprev =
+		(struct nfsuid *)nfs_cltpsock;
+	nfsd_head.nd_next = nfsd_head.nd_prev = &nfsd_head;
+	nfsd_head.nd_flag = 0;
+	nfssvc_sockhead.ns_flag &= ~SLP_INIT;
+	if (nfssvc_sockhead.ns_flag & SLP_WANTINIT) {
+		nfssvc_sockhead.ns_flag &= ~SLP_WANTINIT;
+		wakeup((caddr_t)&nfssvc_sockhead);
+	}
+}
+
+/*
+ * Add entries to the server monitor log.
+ */
+static void
+nfsd_rt(startp, sotype, nd, nam, cacherep)
+	struct timeval *startp;
+	int sotype;
+	register struct nfsd *nd;
+	struct mbuf *nam;
+	int cacherep;
+{
+	register struct drt *rt;
+
+	rt = &nfsdrt.drt[nfsdrt.pos];
+	if (cacherep == RC_DOIT)
+		rt->flag = 0;
+	else if (cacherep == RC_REPLY)
+		rt->flag = DRT_CACHEREPLY;
+	else
+		rt->flag = DRT_CACHEDROP;
+	if (sotype == SOCK_STREAM)
+		rt->flag |= DRT_TCP;
+	if (nd->nd_nqlflag != NQL_NOVAL)
+		rt->flag |= DRT_NQNFS;
+	rt->proc = nd->nd_procnum;
+	if (mtod(nam, struct sockaddr *)->sa_family == AF_INET)
+		rt->ipadr = mtod(nam, struct sockaddr_in *)->sin_addr.s_addr;
+	else
+		rt->ipadr = INADDR_ANY;
+	rt->resptime = ((time.tv_sec - startp->tv_sec) * 1000000) +
+		(time.tv_usec - startp->tv_usec);
+	rt->tstamp = time;
+	nfsdrt.pos = (nfsdrt.pos + 1) % NFSRTTLOGSIZ;
+}
diff --git a/sys/nfsserver/nfsm_subs.h b/sys/nfsserver/nfsm_subs.h
new file mode 100644
index 00000000000..879db360057
--- /dev/null
+++ b/sys/nfsserver/nfsm_subs.h
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfsm_subs.h	8.1 (Berkeley) 6/16/93
+ */
+
+/*
+ * These macros do strange and peculiar things to mbuf chains for
+ * the assistance of the nfs code. To attempt to use them for any
+ * other purpose will be dangerous. (they make weird assumptions)
+ */
+
+/*
+ * First define what the actual subs. return
+ */
+extern struct mbuf *nfsm_reqh();
+
+#define	M_HASCL(m)	((m)->m_flags & M_EXT)
+#define	NFSMINOFF(m) \
+		if (M_HASCL(m)) \
+			(m)->m_data = (m)->m_ext.ext_buf; \
+		else if ((m)->m_flags & M_PKTHDR) \
+			(m)->m_data = (m)->m_pktdat; \
+		else \
+			(m)->m_data = (m)->m_dat
+#define	NFSMADV(m, s)	(m)->m_data += (s)
+#define	NFSMSIZ(m)	((M_HASCL(m))?MCLBYTES: \
+				(((m)->m_flags & M_PKTHDR)?MHLEN:MLEN))
+
+/*
+ * Now for the macros that do the simple stuff and call the functions
+ * for the hard stuff.
+ * These macros use several vars. declared in nfsm_reqhead and these
+ * vars. must not be used elsewhere unless you are careful not to corrupt
+ * them. The vars. starting with pN and tN (N=1,2,3,..) are temporaries
+ * that may be used so long as the value is not expected to retained
+ * after a macro.
+ * I know, this is kind of dorkey, but it makes the actual op functions
+ * fairly clean and deals with the mess caused by the xdr discriminating
+ * unions.
+ */
+
+#define	nfsm_build(a,c,s) \
+		{ if ((s) > M_TRAILINGSPACE(mb)) { \
+			MGET(mb2, M_WAIT, MT_DATA); \
+			if ((s) > MLEN) \
+				panic("build > MLEN"); \
+			mb->m_next = mb2; \
+			mb = mb2; \
+			mb->m_len = 0; \
+			bpos = mtod(mb, caddr_t); \
+		} \
+		(a) = (c)(bpos); \
+		mb->m_len += (s); \
+		bpos += (s); }
+
+#define	nfsm_dissect(a,c,s) \
+		{ t1 = mtod(md, caddr_t)+md->m_len-dpos; \
+		if (t1 >= (s)) { \
+			(a) = (c)(dpos); \
+			dpos += (s); \
+		} else if (error = nfsm_disct(&md, &dpos, (s), t1, &cp2)) { \
+			m_freem(mrep); \
+			goto nfsmout; \
+		} else { \
+			(a) = (c)cp2; \
+		} }
+
+#define nfsm_fhtom(v) \
+		nfsm_build(cp,caddr_t,NFSX_FH); \
+		bcopy((caddr_t)&(VTONFS(v)->n_fh), cp, NFSX_FH)
+
+#define nfsm_srvfhtom(f) \
+		nfsm_build(cp,caddr_t,NFSX_FH); \
+		bcopy((caddr_t)(f), cp, NFSX_FH)
+
+#define nfsm_mtofh(d,v) \
+		{ struct nfsnode *np; nfsv2fh_t *fhp; \
+		nfsm_dissect(fhp,nfsv2fh_t *,NFSX_FH); \
+		if (error = nfs_nget((d)->v_mount, fhp, &np)) { \
+			m_freem(mrep); \
+			goto nfsmout; \
+		} \
+		(v) = NFSTOV(np); \
+		nfsm_loadattr(v, (struct vattr *)0); \
+		}
+
+#define	nfsm_loadattr(v,a) \
+		{ struct vnode *tvp = (v); \
+		if (error = nfs_loadattrcache(&tvp, &md, &dpos, (a))) { \
+			m_freem(mrep); \
+			goto nfsmout; \
+		} \
+		(v) = tvp; }
+
+#define	nfsm_strsiz(s,m) \
+		{ nfsm_dissect(tl,u_long *,NFSX_UNSIGNED); \
+		if (((s) = fxdr_unsigned(long,*tl)) > (m)) { \
+			m_freem(mrep); \
+			error = EBADRPC; \
+			goto nfsmout; \
+		} }
+
+#define	nfsm_srvstrsiz(s,m) \
+		{ nfsm_dissect(tl,u_long *,NFSX_UNSIGNED); \
+		if (((s) = fxdr_unsigned(long,*tl)) > (m) || (s) <= 0) { \
+			error = EBADRPC; \
+			nfsm_reply(0); \
+		} }
+
+#define nfsm_mtouio(p,s) \
+		if ((s) > 0 && \
+		   (error = nfsm_mbuftouio(&md,(p),(s),&dpos))) { \
+			m_freem(mrep); \
+			goto nfsmout; \
+		}
+
+#define nfsm_uiotom(p,s) \
+		if (error = nfsm_uiotombuf((p),&mb,(s),&bpos)) { \
+			m_freem(mreq); \
+			goto nfsmout; \
+		}
+
+#define	nfsm_reqhead(v,a,s) \
+		mb = mreq = nfsm_reqh((v),(a),(s),&bpos)
+
+#define nfsm_reqdone	m_freem(mrep); \
+		nfsmout: 
+
+#define nfsm_rndup(a)	(((a)+3)&(~0x3))
+
+#define	nfsm_request(v, t, p, c)	\
+		if (error = nfs_request((v), mreq, (t), (p), \
+		   (c), &mrep, &md, &dpos)) \
+			goto nfsmout
+
+#define	nfsm_strtom(a,s,m) \
+		if ((s) > (m)) { \
+			m_freem(mreq); \
+			error = ENAMETOOLONG; \
+			goto nfsmout; \
+		} \
+		t2 = nfsm_rndup(s)+NFSX_UNSIGNED; \
+		if (t2 <= M_TRAILINGSPACE(mb)) { \
+			nfsm_build(tl,u_long *,t2); \
+			*tl++ = txdr_unsigned(s); \
+			*(tl+((t2>>2)-2)) = 0; \
+			bcopy((caddr_t)(a), (caddr_t)tl, (s)); \
+		} else if (error = nfsm_strtmbuf(&mb, &bpos, (a), (s))) { \
+			m_freem(mreq); \
+			goto nfsmout; \
+		}
+
+#define	nfsm_srvdone \
+		nfsmout: \
+		return(error)
+
+#define	nfsm_reply(s) \
+		{ \
+		nfsd->nd_repstat = error; \
+		if (error) \
+		   (void) nfs_rephead(0, nfsd, error, cache, &frev, \
+			mrq, &mb, &bpos); \
+		else \
+		   (void) nfs_rephead((s), nfsd, error, cache, &frev, \
+			mrq, &mb, &bpos); \
+		m_freem(mrep); \
+		mreq = *mrq; \
+		if (error) \
+			return(0); \
+		}
+
+#define	nfsm_adv(s) \
+		t1 = mtod(md, caddr_t)+md->m_len-dpos; \
+		if (t1 >= (s)) { \
+			dpos += (s); \
+		} else if (error = nfs_adv(&md, &dpos, (s), t1)) { \
+			m_freem(mrep); \
+			goto nfsmout; \
+		}
+
+#define nfsm_srvmtofh(f) \
+		nfsm_dissect(tl, u_long *, NFSX_FH); \
+		bcopy((caddr_t)tl, (caddr_t)f, NFSX_FH)
+
+#define	nfsm_clget \
+		if (bp >= be) { \
+			if (mp == mb) \
+				mp->m_len += bp-bpos; \
+			MGET(mp, M_WAIT, MT_DATA); \
+			MCLGET(mp, M_WAIT); \
+			mp->m_len = NFSMSIZ(mp); \
+			mp2->m_next = mp; \
+			mp2 = mp; \
+			bp = mtod(mp, caddr_t); \
+			be = bp+mp->m_len; \
+		} \
+		tl = (u_long *)bp
+
+#define	nfsm_srvfillattr \
+	fp->fa_type = vtonfs_type(vap->va_type); \
+	fp->fa_mode = vtonfs_mode(vap->va_type, vap->va_mode); \
+	fp->fa_nlink = txdr_unsigned(vap->va_nlink); \
+	fp->fa_uid = txdr_unsigned(vap->va_uid); \
+	fp->fa_gid = txdr_unsigned(vap->va_gid); \
+	if (nfsd->nd_nqlflag == NQL_NOVAL) { \
+		fp->fa_nfsblocksize = txdr_unsigned(vap->va_blocksize); \
+		if (vap->va_type == VFIFO) \
+			fp->fa_nfsrdev = 0xffffffff; \
+		else \
+			fp->fa_nfsrdev = txdr_unsigned(vap->va_rdev); \
+		fp->fa_nfsfsid = txdr_unsigned(vap->va_fsid); \
+		fp->fa_nfsfileid = txdr_unsigned(vap->va_fileid); \
+		fp->fa_nfssize = txdr_unsigned(vap->va_size); \
+		fp->fa_nfsblocks = txdr_unsigned(vap->va_bytes / NFS_FABLKSIZE); \
+		txdr_nfstime(&vap->va_atime, &fp->fa_nfsatime); \
+		txdr_nfstime(&vap->va_mtime, &fp->fa_nfsmtime); \
+		fp->fa_nfsctime.nfs_sec = txdr_unsigned(vap->va_ctime.ts_sec); \
+		fp->fa_nfsctime.nfs_usec = txdr_unsigned(vap->va_gen); \
+	} else { \
+		fp->fa_nqblocksize = txdr_unsigned(vap->va_blocksize); \
+		if (vap->va_type == VFIFO) \
+			fp->fa_nqrdev = 0xffffffff; \
+		else \
+			fp->fa_nqrdev = txdr_unsigned(vap->va_rdev); \
+		fp->fa_nqfsid = txdr_unsigned(vap->va_fsid); \
+		fp->fa_nqfileid = txdr_unsigned(vap->va_fileid); \
+		txdr_hyper(&vap->va_size, &fp->fa_nqsize); \
+		txdr_hyper(&vap->va_bytes, &fp->fa_nqbytes); \
+		txdr_nqtime(&vap->va_atime, &fp->fa_nqatime); \
+		txdr_nqtime(&vap->va_mtime, &fp->fa_nqmtime); \
+		txdr_nqtime(&vap->va_ctime, &fp->fa_nqctime); \
+		fp->fa_nqflags = txdr_unsigned(vap->va_flags); \
+		fp->fa_nqgen = txdr_unsigned(vap->va_gen); \
+		txdr_hyper(&vap->va_filerev, &fp->fa_nqfilerev); \
+	}
+
diff --git a/sys/nfsserver/nfsrvcache.h b/sys/nfsserver/nfsrvcache.h
new file mode 100644
index 00000000000..26da2c275df
--- /dev/null
+++ b/sys/nfsserver/nfsrvcache.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfsrvcache.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Definitions for the server recent request cache
+ */
+
+#define	NFSRVCACHESIZ	256
+
+struct nfsrvcache {
+	struct	nfsrvcache *rc_forw;		/* Hash chain links */
+	struct	nfsrvcache **rc_back;		/* Hash chain links */
+	struct	nfsrvcache *rc_next;		/* Lru list */
+	struct	nfsrvcache **rc_prev;		/* Lru list */
+	u_long	rc_xid;				/* rpc id number */
+	union {
+		struct mbuf *ru_repmb;		/* Reply mbuf list OR */
+		int ru_repstat;			/* Reply status */
+	} rc_un;
+	union nethostaddr rc_haddr;		/* Host address */
+	short	rc_proc;			/* rpc proc number */
+	u_char	rc_state;		/* Current state of request */
+	u_char	rc_flag;		/* Flag bits */
+};
+
+#define	rc_reply	rc_un.ru_repmb
+#define	rc_status	rc_un.ru_repstat
+#define	rc_inetaddr	rc_haddr.had_inetaddr
+#define	rc_nam		rc_haddr.had_nam
+
+/* Cache entry states */
+#define	RC_UNUSED	0
+#define	RC_INPROG	1
+#define	RC_DONE		2
+
+/* Return values */
+#define	RC_DROPIT	0
+#define	RC_REPLY	1
+#define	RC_DOIT		2
+#define	RC_CHECKIT	3
+
+/* Flag bits */
+#define	RC_LOCKED	0x01
+#define	RC_WANTED	0x02
+#define	RC_REPSTATUS	0x04
+#define	RC_REPMBUF	0x08
+#define	RC_NQNFS	0x10
+#define	RC_INETADDR	0x20
+#define	RC_NAM		0x40
diff --git a/sys/nfsserver/nfsrvstats.h b/sys/nfsserver/nfsrvstats.h
new file mode 100644
index 00000000000..261fd42657a
--- /dev/null
+++ b/sys/nfsserver/nfsrvstats.h
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)nfs.h	8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Tunable constants for nfs
+ */
+
+#define	NFS_MAXIOVEC	34
+#define NFS_HZ		25		/* Ticks per second for NFS timeouts */
+#define	NFS_TIMEO	(1*NFS_HZ)	/* Default timeout = 1 second */
+#define	NFS_MINTIMEO	(1*NFS_HZ)	/* Min timeout to use */
+#define	NFS_MAXTIMEO	(60*NFS_HZ)	/* Max timeout to backoff to */
+#define	NFS_MINIDEMTIMEO (5*NFS_HZ)	/* Min timeout for non-idempotent ops*/
+#define	NFS_MAXREXMIT	100		/* Stop counting after this many */
+#define	NFS_MAXWINDOW	1024		/* Max number of outstanding requests */
+#define	NFS_RETRANS	10		/* Num of retrans for soft mounts */
+#define	NFS_MAXGRPS	16		/* Max. size of groups list */
+#define	NFS_MINATTRTIMO 5		/* Attribute cache timeout in sec */
+#define	NFS_MAXATTRTIMO 60
+#define	NFS_WSIZE	8192		/* Def. write data size <= 8192 */
+#define	NFS_RSIZE	8192		/* Def. read data size <= 8192 */
+#define	NFS_DEFRAHEAD	1		/* Def. read ahead # blocks */
+#define	NFS_MAXRAHEAD	4		/* Max. read ahead # blocks */
+#define	NFS_MAXREADDIR	NFS_MAXDATA	/* Max. size of directory read */
+#define	NFS_MAXUIDHASH	64		/* Max. # of hashed uid entries/mp */
+#define	NFS_MAXASYNCDAEMON 20	/* Max. number async_daemons runable */
+#define	NFS_DIRBLKSIZ	1024		/* Size of an NFS directory block */
+#define	NMOD(a)		((a) % nfs_asyncdaemons)
+
+/*
+ * Set the attribute timeout based on how recently the file has been modified.
+ */
+#define	NFS_ATTRTIMEO(np) \
+	((((np)->n_flag & NMODIFIED) || \
+	 (time.tv_sec - (np)->n_mtime) / 10 < NFS_MINATTRTIMO) ? NFS_MINATTRTIMO : \
+	 ((time.tv_sec - (np)->n_mtime) / 10 > NFS_MAXATTRTIMO ? NFS_MAXATTRTIMO : \
+	  (time.tv_sec - (np)->n_mtime) / 10))
+
+/*
+ * Structures for the nfssvc(2) syscall. Not that anyone but nfsd and mount_nfs
+ * should ever try and use it.
+ */
+struct nfsd_args {
+	int	sock;		/* Socket to serve */
+	caddr_t	name;		/* Client address for connection based sockets */
+	int	namelen;	/* Length of name */
+};
+
+struct nfsd_srvargs {
+	struct nfsd	*nsd_nfsd;	/* Pointer to in kernel nfsd struct */
+	uid_t		nsd_uid;	/* Effective uid mapped to cred */
+	u_long		nsd_haddr;	/* Ip address of client */
+	struct ucred	nsd_cr;		/* Cred. uid maps to */
+	int		nsd_authlen;	/* Length of auth string (ret) */
+	char		*nsd_authstr;	/* Auth string (ret) */
+};
+
+struct nfsd_cargs {
+	char		*ncd_dirp;	/* Mount dir path */
+	uid_t		ncd_authuid;	/* Effective uid */
+	int		ncd_authtype;	/* Type of authenticator */
+	int		ncd_authlen;	/* Length of authenticator string */
+	char		*ncd_authstr;	/* Authenticator string */
+};
+
+/*
+ * Stats structure
+ */
+struct nfsstats {
+	int	attrcache_hits;
+	int	attrcache_misses;
+	int	lookupcache_hits;
+	int	lookupcache_misses;
+	int	direofcache_hits;
+	int	direofcache_misses;
+	int	biocache_reads;
+	int	read_bios;
+	int	read_physios;
+	int	biocache_writes;
+	int	write_bios;
+	int	write_physios;
+	int	biocache_readlinks;
+	int	readlink_bios;
+	int	biocache_readdirs;
+	int	readdir_bios;
+	int	rpccnt[NFS_NPROCS];
+	int	rpcretries;
+	int	srvrpccnt[NFS_NPROCS];
+	int	srvrpc_errs;
+	int	srv_errs;
+	int	rpcrequests;
+	int	rpctimeouts;
+	int	rpcunexpected;
+	int	rpcinvalid;
+	int	srvcache_inproghits;
+	int	srvcache_idemdonehits;
+	int	srvcache_nonidemdonehits;
+	int	srvcache_misses;
+	int	srvnqnfs_leases;
+	int	srvnqnfs_maxleases;
+	int	srvnqnfs_getleases;
+};
+
+/*
+ * Flags for nfssvc() system call.
+ */
+#define	NFSSVC_BIOD	0x002
+#define	NFSSVC_NFSD	0x004
+#define	NFSSVC_ADDSOCK	0x008
+#define	NFSSVC_AUTHIN	0x010
+#define	NFSSVC_GOTAUTH	0x040
+#define	NFSSVC_AUTHINFAIL 0x080
+#define	NFSSVC_MNTD	0x100
+
+/*
+ * The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts.
+ * What should be in this set is open to debate, but I believe that since
+ * I/O system calls on ufs are never interrupted by signals the set should
+ * be minimal. My reasoning is that many current programs that use signals
+ * such as SIGALRM will not expect file I/O system calls to be interrupted
+ * by them and break.
+ */
+#ifdef KERNEL
+#define	NFSINT_SIGMASK	(sigmask(SIGINT)|sigmask(SIGTERM)|sigmask(SIGKILL)| \
+			 sigmask(SIGHUP)|sigmask(SIGQUIT))
+
+/*
+ * Socket errors ignored for connectionless sockets??
+ * For now, ignore them all
+ */
+#define	NFSIGNORE_SOERROR(s, e) \
+		((e) != EINTR && (e) != ERESTART && (e) != EWOULDBLOCK && \
+		((s) & PR_CONNREQUIRED) == 0)
+
+/*
+ * Nfs outstanding request list element
+ */
+struct nfsreq {
+	struct nfsreq	*r_next;
+	struct nfsreq	*r_prev;
+	struct mbuf	*r_mreq;
+	struct mbuf	*r_mrep;
+	struct mbuf	*r_md;
+	caddr_t		r_dpos;
+	struct nfsmount *r_nmp;
+	struct vnode	*r_vp;
+	u_long		r_xid;
+	int		r_flags;	/* flags on request, see below */
+	int		r_retry;	/* max retransmission count */
+	int		r_rexmit;	/* current retrans count */
+	int		r_timer;	/* tick counter on reply */
+	int		r_procnum;	/* NFS procedure number */
+	int		r_rtt;		/* RTT for rpc */
+	struct proc	*r_procp;	/* Proc that did I/O system call */
+};
+
+/* Flag values for r_flags */
+#define R_TIMING	0x01		/* timing request (in mntp) */
+#define R_SENT		0x02		/* request has been sent */
+#define	R_SOFTTERM	0x04		/* soft mnt, too many retries */
+#define	R_INTR		0x08		/* intr mnt, signal pending */
+#define	R_SOCKERR	0x10		/* Fatal error on socket */
+#define	R_TPRINTFMSG	0x20		/* Did a tprintf msg. */
+#define	R_MUSTRESEND	0x40		/* Must resend request */
+#define	R_GETONEREP	0x80		/* Probe for one reply only */
+
+struct nfsstats nfsstats;
+
+/*
+ * A list of nfssvc_sock structures is maintained with all the sockets
+ * that require service by the nfsd.
+ * The nfsuid structs hang off of the nfssvc_sock structs in both lru
+ * and uid hash lists.
+ */
+#define	NUIDHASHSIZ	32
+#define	NUIDHASH(uid)	((uid) & (NUIDHASHSIZ - 1))
+
+/*
+ * Network address hash list element
+ */
+union nethostaddr {
+	u_long had_inetaddr;
+	struct mbuf *had_nam;
+};
+
+struct nfsuid {
+	struct nfsuid	*nu_lrunext;	/* MUST be first */
+	struct nfsuid	*nu_lruprev;
+	struct nfsuid	*nu_hnext;
+	struct nfsuid	*nu_hprev;
+	int		nu_flag;	/* Flags */
+	uid_t		nu_uid;		/* Uid mapped by this entry */
+	union nethostaddr nu_haddr;	/* Host addr. for dgram sockets */
+	struct ucred	nu_cr;		/* Cred uid mapped to */
+};
+
+#define	nu_inetaddr	nu_haddr.had_inetaddr
+#define	nu_nam		nu_haddr.had_nam
+/* Bits for nu_flag */
+#define	NU_INETADDR	0x1
+
+struct nfssvc_sock {
+	struct nfsuid	*ns_lrunext;	/* MUST be first */
+	struct nfsuid	*ns_lruprev;
+	struct nfssvc_sock *ns_next;
+	struct nfssvc_sock *ns_prev;
+	int		ns_flag;
+	u_long		ns_sref;
+	struct file	*ns_fp;
+	struct socket	*ns_so;
+	int		ns_solock;
+	struct mbuf	*ns_nam;
+	int		ns_cc;
+	struct mbuf	*ns_raw;
+	struct mbuf	*ns_rawend;
+	int		ns_reclen;
+	struct mbuf	*ns_rec;
+	struct mbuf	*ns_recend;
+	int		ns_numuids;
+	struct nfsuid	*ns_uidh[NUIDHASHSIZ];
+};
+
+/* Bits for "ns_flag" */
+#define	SLP_VALID	0x01
+#define	SLP_DOREC	0x02
+#define	SLP_NEEDQ	0x04
+#define	SLP_DISCONN	0x08
+#define	SLP_GETSTREAM	0x10
+#define	SLP_INIT	0x20
+#define	SLP_WANTINIT	0x40
+
+#define SLP_ALLFLAGS	0xff
+
+/*
+ * One of these structures is allocated for each nfsd.
+ */
+struct nfsd {
+	struct nfsd	*nd_next;	/* Must be first */
+	struct nfsd	*nd_prev;
+	int		nd_flag;	/* NFSD_ flags */
+	struct nfssvc_sock *nd_slp;	/* Current socket */
+	struct mbuf	*nd_nam;	/* Client addr for datagram req. */
+	struct mbuf	*nd_mrep;	/* Req. mbuf list */
+	struct mbuf	*nd_md;
+	caddr_t		nd_dpos;	/* Position in list */
+	int		nd_procnum;	/* RPC procedure number */
+	u_long		nd_retxid;	/* RPC xid */
+	int		nd_repstat;	/* Reply status value */
+	struct ucred	nd_cr;		/* Credentials for req. */
+	int		nd_nqlflag;	/* Leasing flag */
+	int		nd_duration;	/* Lease duration */
+	int		nd_authlen;	/* Authenticator len */
+	u_char		nd_authstr[RPCAUTH_MAXSIZ]; /* Authenticator data */
+	struct proc	*nd_procp;	/* Proc ptr */
+};
+
+#define	NFSD_WAITING	0x01
+#define	NFSD_CHECKSLP	0x02
+#define	NFSD_REQINPROG	0x04
+#define	NFSD_NEEDAUTH	0x08
+#define	NFSD_AUTHFAIL	0x10
+#endif	/* KERNEL */
diff --git a/sys/sys/_sigset.h b/sys/sys/_sigset.h
new file mode 100644
index 00000000000..8ccded41c3b
--- /dev/null
+++ b/sys/sys/_sigset.h
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)signal.h	8.2 (Berkeley) 1/21/94
+ */
+
+#ifndef	_SYS_SIGNAL_H_
+#define	_SYS_SIGNAL_H_
+
+#define NSIG	32		/* counting 0; could be 33 (mask is 1-32) */
+
+#ifndef _ANSI_SOURCE
+#include <machine/signal.h>	/* sigcontext; codes for SIGILL, SIGFPE */
+#endif
+
+#define	SIGHUP	1	/* hangup */
+#define	SIGINT	2	/* interrupt */
+#define	SIGQUIT	3	/* quit */
+#define	SIGILL	4	/* illegal instruction (not reset when caught) */
+#ifndef _POSIX_SOURCE
+#define	SIGTRAP	5	/* trace trap (not reset when caught) */
+#endif
+#define	SIGABRT	6	/* abort() */
+#ifndef _POSIX_SOURCE
+#define	SIGIOT	SIGABRT	/* compatibility */
+#define	SIGEMT	7	/* EMT instruction */
+#endif
+#define	SIGFPE	8	/* floating point exception */
+#define	SIGKILL	9	/* kill (cannot be caught or ignored) */
+#ifndef _POSIX_SOURCE
+#define	SIGBUS	10	/* bus error */
+#endif
+#define	SIGSEGV	11	/* segmentation violation */
+#ifndef _POSIX_SOURCE
+#define	SIGSYS	12	/* bad argument to system call */
+#endif
+#define	SIGPIPE	13	/* write on a pipe with no one to read it */
+#define	SIGALRM	14	/* alarm clock */
+#define	SIGTERM	15	/* software termination signal from kill */
+#ifndef _POSIX_SOURCE
+#define	SIGURG	16	/* urgent condition on IO channel */
+#endif
+#define	SIGSTOP	17	/* sendable stop signal not from tty */
+#define	SIGTSTP	18	/* stop signal from tty */
+#define	SIGCONT	19	/* continue a stopped process */
+#define	SIGCHLD	20	/* to parent on child stop or exit */
+#define	SIGTTIN	21	/* to readers pgrp upon background tty read */
+#define	SIGTTOU	22	/* like TTIN for output if (tp->t_local&LTOSTOP) */
+#ifndef _POSIX_SOURCE
+#define	SIGIO	23	/* input/output possible signal */
+#define	SIGXCPU	24	/* exceeded CPU time limit */
+#define	SIGXFSZ	25	/* exceeded file size limit */
+#define	SIGVTALRM 26	/* virtual time alarm */
+#define	SIGPROF	27	/* profiling time alarm */
+#define SIGWINCH 28	/* window size changes */
+#define SIGINFO	29	/* information request */
+#endif
+#define SIGUSR1 30	/* user defined signal 1 */
+#define SIGUSR2 31	/* user defined signal 2 */
+
+#if defined(_ANSI_SOURCE) || defined(__cplusplus)
+/*
+ * Language spec sez we must list exactly one parameter, even though we
+ * actually supply three.  Ugh!
+ */
+#define	SIG_DFL		(void (*)(int))0
+#define	SIG_IGN		(void (*)(int))1
+#define	SIG_ERR		(void (*)(int))-1
+#else
+#define	SIG_DFL		(void (*)())0
+#define	SIG_IGN		(void (*)())1
+#define	SIG_ERR		(void (*)())-1
+#endif
+
+#ifndef _ANSI_SOURCE
+typedef unsigned int sigset_t;
+
+/*
+ * Signal vector "template" used in sigaction call.
+ */
+struct	sigaction {
+	void	(*sa_handler)();	/* signal handler */
+	sigset_t sa_mask;		/* signal mask to apply */
+	int	sa_flags;		/* see signal options below */
+};
+#ifndef _POSIX_SOURCE
+#define SA_ONSTACK	0x0001	/* take signal on signal stack */
+#define SA_RESTART	0x0002	/* restart system on signal return */
+#define	SA_DISABLE	0x0004	/* disable taking signals on alternate stack */
+#ifdef COMPAT_SUNOS
+#define	SA_USERTRAMP	0x0100	/* do not bounce off kernel's sigtramp */
+#endif
+#endif
+#define SA_NOCLDSTOP	0x0008	/* do not generate SIGCHLD on child stop */
+
+/*
+ * Flags for sigprocmask:
+ */
+#define	SIG_BLOCK	1	/* block specified signal set */
+#define	SIG_UNBLOCK	2	/* unblock specified signal set */
+#define	SIG_SETMASK	3	/* set specified signal set */
+
+#ifndef _POSIX_SOURCE
+#ifndef KERNEL
+#include <sys/cdefs.h>
+#endif
+typedef	void (*sig_t) __P((int));	/* type of signal function */
+
+/*
+ * Structure used in sigaltstack call.
+ */
+struct	sigaltstack {
+	char	*ss_base;		/* signal stack base */
+	int	ss_size;		/* signal stack length */
+	int	ss_flags;		/* SA_DISABLE and/or SA_ONSTACK */
+};
+#define	MINSIGSTKSZ	8192			/* minimum allowable stack */
+#define	SIGSTKSZ	(MINSIGSTKSZ + 32768)	/* recommended stack size */
+
+/*
+ * 4.3 compatibility:
+ * Signal vector "template" used in sigvec call.
+ */
+struct	sigvec {
+	void	(*sv_handler)();	/* signal handler */
+	int	sv_mask;		/* signal mask to apply */
+	int	sv_flags;		/* see signal options below */
+};
+
+#define SV_ONSTACK	SA_ONSTACK
+#define SV_INTERRUPT	SA_RESTART	/* same bit, opposite sense */
+#define sv_onstack sv_flags	/* isn't compatibility wonderful! */
+
+/*
+ * Structure used in sigstack call.
+ */
+struct	sigstack {
+	char	*ss_sp;			/* signal stack pointer */
+	int	ss_onstack;		/* current status */
+};
+
+/*
+ * Macro for converting signal number to a mask suitable for
+ * sigblock().
+ */
+#define sigmask(m)	(1 << ((m)-1))
+
+#define	BADSIG		SIG_ERR
+
+#endif	/* !_POSIX_SOURCE */
+#endif	/* !_ANSI_SOURCE */
+
+/*
+ * For historical reasons; programs expect signal's return value to be
+ * defined by <sys/signal.h>.
+ */
+__BEGIN_DECLS
+void	(*signal __P((int, void (*) __P((int))))) __P((int));
+__END_DECLS
+#endif	/* !_SYS_SIGNAL_H_ */
diff --git a/sys/sys/acct.h b/sys/sys/acct.h
new file mode 100644
index 00000000000..edc5bdbd563
--- /dev/null
+++ b/sys/sys/acct.h
@@ -0,0 +1,75 @@
+/*-
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)acct.h	8.2 (Berkeley) 1/21/94
+ */
+
+/*
+ * Accounting structures; these use a comp_t type which is a 3 bits base 8
+ * exponent, 13 bit fraction ``floating point'' number.  Units are 1/AHZ
+ * seconds.
+ */
+typedef u_short comp_t;
+
+struct acct {
+	char	ac_comm[10];	/* command name */
+	comp_t	ac_utime;	/* user time */
+	comp_t	ac_stime;	/* system time */
+	comp_t	ac_etime;	/* elapsed time */
+	time_t	ac_btime;	/* starting time */
+	uid_t	ac_uid;		/* user id */
+	gid_t	ac_gid;		/* group id */
+	short	ac_mem;		/* average memory usage */
+	comp_t	ac_io;		/* count of IO blocks */
+	dev_t	ac_tty;		/* controlling tty */
+#define	AFORK	0x01			/* forked but not execed */
+#define	ASU	0x02			/* used super-user permissions */
+#define	ACOMPAT	0x04			/* used compatibility mode */
+#define	ACORE	0x08			/* dumped core */
+#define	AXSIG	0x10			/* killed by a signal */
+	char	ac_flag;	/* accounting flags */
+};
+
+/*
+ * 1/AHZ is the granularity of the data encoded in the comp_t fields.
+ * This is not necessarily equal to hz.
+ */
+#define	AHZ	64
+
+#ifdef KERNEL
+struct vnode	*acctp;
+#endif
diff --git a/sys/sys/bio.h b/sys/sys/bio.h
new file mode 100644
index 00000000000..e6c329f239d
--- /dev/null
+++ b/sys/sys/bio.h
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)buf.h	8.7 (Berkeley) 1/21/94
+ */
+
+#ifndef _SYS_BUF_H_
+#define	_SYS_BUF_H_
+#include <sys/queue.h>
+
+#define NOLIST ((struct buf *)0x87654321)
+
+/*
+ * The buffer header describes an I/O operation in the kernel.
+ */
+struct buf {
+	LIST_ENTRY(buf) b_hash;		/* Hash chain. */
+	LIST_ENTRY(buf) b_vnbufs;	/* Buffer's associated vnode. */
+	TAILQ_ENTRY(buf) b_freelist;	/* Free list position if not active. */
+	struct	buf *b_actf, **b_actb;	/* Device driver queue when active. */
+	struct  proc *b_proc;		/* Associated proc; NULL if kernel. */
+	volatile long	b_flags;	/* B_* flags. */
+	int	b_error;		/* Errno value. */
+	long	b_bufsize;		/* Allocated buffer size. */
+	long	b_bcount;		/* Valid bytes in buffer. */
+	long	b_resid;		/* Remaining I/O. */
+	dev_t	b_dev;			/* Device associated with buffer. */
+	struct {
+		caddr_t	b_addr;		/* Memory, superblocks, indirect etc. */
+	} b_un;
+	void	*b_saveaddr;		/* Original b_addr for physio. */
+	daddr_t	b_lblkno;		/* Logical block number. */
+	daddr_t	b_blkno;		/* Underlying physical block number. */
+					/* Function to call upon completion. */
+	void	(*b_iodone) __P((struct buf *));
+	struct	vnode *b_vp;		/* Device vnode. */
+	int	b_pfcent;		/* Center page when swapping cluster. */
+	int	b_dirtyoff;		/* Offset in buffer of dirty region. */
+	int	b_dirtyend;		/* Offset of end of dirty region. */
+	struct	ucred *b_rcred;		/* Read credentials reference. */
+	struct	ucred *b_wcred;		/* Write credentials reference. */
+	int	b_validoff;		/* Offset in buffer of valid region. */
+	int	b_validend;		/* Offset of end of valid region. */
+};
+
+/* Device driver compatibility definitions. */
+#define	b_active b_bcount		/* Driver queue head: drive active. */
+#define	b_data	 b_un.b_addr		/* b_un.b_addr is not changeable. */
+#define	b_errcnt b_resid		/* Retry count while I/O in progress. */
+#define	iodone	 biodone		/* Old name for biodone. */
+#define	iowait	 biowait		/* Old name for biowait. */
+
+/*
+ * These flags are kept in b_flags.
+ */
+#define	B_AGE		0x00000001	/* Move to age queue when I/O done. */
+#define	B_APPENDWRITE	0x00000002	/* Append-write in progress. */
+#define	B_ASYNC		0x00000004	/* Start I/O, do not wait. */
+#define	B_BAD		0x00000008	/* Bad block revectoring in progress. */
+#define	B_BUSY		0x00000010	/* I/O in progress. */
+#define	B_CACHE		0x00000020	/* Bread found us in the cache. */
+#define	B_CALL		0x00000040	/* Call b_iodone from biodone. */
+#define	B_DELWRI	0x00000080	/* Delay I/O until buffer reused. */
+#define	B_DIRTY		0x00000100	/* Dirty page to be pushed out async. */
+#define	B_DONE		0x00000200	/* I/O completed. */
+#define	B_EINTR		0x00000400	/* I/O was interrupted */
+#define	B_ERROR		0x00000800	/* I/O error occurred. */
+#define	B_GATHERED	0x00001000	/* LFS: already in a segment. */
+#define	B_INVAL		0x00002000	/* Does not contain valid info. */
+#define	B_LOCKED	0x00004000	/* Locked in core (not reusable). */
+#define	B_NOCACHE	0x00008000	/* Do not cache block after use. */
+#define	B_PAGET		0x00010000	/* Page in/out of page table space. */
+#define	B_PGIN		0x00020000	/* Pagein op, so swap() can count it. */
+#define	B_PHYS		0x00040000	/* I/O to user memory. */
+#define	B_RAW		0x00080000	/* Set by physio for raw transfers. */
+#define	B_READ		0x00100000	/* Read buffer. */
+#define	B_TAPE		0x00200000	/* Magnetic tape I/O. */
+#define	B_UAREA		0x00400000	/* Buffer describes Uarea I/O. */
+#define	B_WANTED	0x00800000	/* Process wants this buffer. */
+#define	B_WRITE		0x00000000	/* Write buffer (pseudo flag). */
+#define	B_WRITEINPROG	0x01000000	/* Write in progress. */
+#define	B_XXX		0x02000000	/* Debugging flag. */
+
+/*
+ * This structure describes a clustered I/O.  It is stored in the b_saveaddr
+ * field of the buffer on which I/O is done.  At I/O completion, cluster
+ * callback uses the structure to parcel I/O's to individual buffers, and
+ * then free's this structure.
+ */
+struct cluster_save {
+	long	bs_bcount;		/* Saved b_bcount. */
+	long	bs_bufsize;		/* Saved b_bufsize. */
+	void	*bs_saveaddr;		/* Saved b_addr. */
+	int	bs_nchildren;		/* Number of associated buffers. */
+	struct buf **bs_children;	/* List of associated buffers. */
+};
+
+/*
+ * Zero out the buffer's data area.
+ */
+#define	clrbuf(bp) {							\
+	blkclr((bp)->b_data, (u_int)(bp)->b_bcount);			\
+	(bp)->b_resid = 0;						\
+}
+
+/* Flags to low-level allocation routines. */
+#define B_CLRBUF	0x01	/* Request allocated buffer be cleared. */
+#define B_SYNC		0x02	/* Do all allocations synchronously. */
+
+#ifdef KERNEL
+int	nbuf;			/* The number of buffer headers */
+struct	buf *buf;		/* The buffer headers. */
+char	*buffers;		/* The buffer contents. */
+int	bufpages;		/* Number of memory pages in the buffer pool. */
+struct	buf *swbuf;		/* Swap I/O buffer headers. */
+int	nswbuf;			/* Number of swap I/O buffer headers. */
+struct	buf bswlist;		/* Head of swap I/O buffer headers free list. */
+struct	buf *bclnlist;		/* Head of cleaned page list. */
+
+__BEGIN_DECLS
+int	allocbuf __P((struct buf *, int));
+int	bawrite __P((struct buf *));
+int	bdwrite __P((struct buf *));
+void	biodone __P((struct buf *));
+int	biowait __P((struct buf *));
+int	bread __P((struct vnode *, daddr_t, int,
+	    struct ucred *, struct buf **));
+int	breadn __P((struct vnode *, daddr_t, int, daddr_t *, int *, int,
+	    struct ucred *, struct buf **));
+int	brelse __P((struct buf *));
+void	bufinit __P((void));
+int	bwrite __P((struct buf *));
+void	cluster_callback __P((struct buf *));
+int	cluster_read __P((struct vnode *, u_quad_t, daddr_t, long,
+	    struct ucred *, struct buf **));
+void	cluster_write __P((struct buf *, u_quad_t));
+struct buf *getblk __P((struct vnode *, daddr_t, int, int, int));
+struct buf *geteblk __P((int));
+struct buf *getnewbuf __P((int slpflag, int slptimeo));
+struct buf *incore __P((struct vnode *, daddr_t));
+u_int	minphys __P((struct buf *bp));
+__END_DECLS
+#endif
+#endif /* !_SYS_BUF_H_ */
diff --git a/sys/sys/buf.h b/sys/sys/buf.h
new file mode 100644
index 00000000000..e6c329f239d
--- /dev/null
+++ b/sys/sys/buf.h
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)buf.h	8.7 (Berkeley) 1/21/94
+ */
+
+#ifndef _SYS_BUF_H_
+#define	_SYS_BUF_H_
+#include <sys/queue.h>
+
+#define NOLIST ((struct buf *)0x87654321)
+
+/*
+ * The buffer header describes an I/O operation in the kernel.
+ */
+struct buf {
+	LIST_ENTRY(buf) b_hash;		/* Hash chain. */
+	LIST_ENTRY(buf) b_vnbufs;	/* Buffer's associated vnode. */
+	TAILQ_ENTRY(buf) b_freelist;	/* Free list position if not active. */
+	struct	buf *b_actf, **b_actb;	/* Device driver queue when active. */
+	struct  proc *b_proc;		/* Associated proc; NULL if kernel. */
+	volatile long	b_flags;	/* B_* flags. */
+	int	b_error;		/* Errno value. */
+	long	b_bufsize;		/* Allocated buffer size. */
+	long	b_bcount;		/* Valid bytes in buffer. */
+	long	b_resid;		/* Remaining I/O. */
+	dev_t	b_dev;			/* Device associated with buffer. */
+	struct {
+		caddr_t	b_addr;		/* Memory, superblocks, indirect etc. */
+	} b_un;
+	void	*b_saveaddr;		/* Original b_addr for physio. */
+	daddr_t	b_lblkno;		/* Logical block number. */
+	daddr_t	b_blkno;		/* Underlying physical block number. */
+					/* Function to call upon completion. */
+	void	(*b_iodone) __P((struct buf *));
+	struct	vnode *b_vp;		/* Device vnode. */
+	int	b_pfcent;		/* Center page when swapping cluster. */
+	int	b_dirtyoff;		/* Offset in buffer of dirty region. */
+	int	b_dirtyend;		/* Offset of end of dirty region. */
+	struct	ucred *b_rcred;		/* Read credentials reference. */
+	struct	ucred *b_wcred;		/* Write credentials reference. */
+	int	b_validoff;		/* Offset in buffer of valid region. */
+	int	b_validend;		/* Offset of end of valid region. */
+};
+
+/* Device driver compatibility definitions. */
+#define	b_active b_bcount		/* Driver queue head: drive active. */
+#define	b_data	 b_un.b_addr		/* b_un.b_addr is not changeable. */
+#define	b_errcnt b_resid		/* Retry count while I/O in progress. */
+#define	iodone	 biodone		/* Old name for biodone. */
+#define	iowait	 biowait		/* Old name for biowait. */
+
+/*
+ * These flags are kept in b_flags.
+ */
+#define	B_AGE		0x00000001	/* Move to age queue when I/O done. */
+#define	B_APPENDWRITE	0x00000002	/* Append-write in progress. */
+#define	B_ASYNC		0x00000004	/* Start I/O, do not wait. */
+#define	B_BAD		0x00000008	/* Bad block revectoring in progress. */
+#define	B_BUSY		0x00000010	/* I/O in progress. */
+#define	B_CACHE		0x00000020	/* Bread found us in the cache. */
+#define	B_CALL		0x00000040	/* Call b_iodone from biodone. */
+#define	B_DELWRI	0x00000080	/* Delay I/O until buffer reused. */
+#define	B_DIRTY		0x00000100	/* Dirty page to be pushed out async. */
+#define	B_DONE		0x00000200	/* I/O completed. */
+#define	B_EINTR		0x00000400	/* I/O was interrupted */
+#define	B_ERROR		0x00000800	/* I/O error occurred. */
+#define	B_GATHERED	0x00001000	/* LFS: already in a segment. */
+#define	B_INVAL		0x00002000	/* Does not contain valid info. */
+#define	B_LOCKED	0x00004000	/* Locked in core (not reusable). */
+#define	B_NOCACHE	0x00008000	/* Do not cache block after use. */
+#define	B_PAGET		0x00010000	/* Page in/out of page table space. */
+#define	B_PGIN		0x00020000	/* Pagein op, so swap() can count it. */
+#define	B_PHYS		0x00040000	/* I/O to user memory. */
+#define	B_RAW		0x00080000	/* Set by physio for raw transfers. */
+#define	B_READ		0x00100000	/* Read buffer. */
+#define	B_TAPE		0x00200000	/* Magnetic tape I/O. */
+#define	B_UAREA		0x00400000	/* Buffer describes Uarea I/O. */
+#define	B_WANTED	0x00800000	/* Process wants this buffer. */
+#define	B_WRITE		0x00000000	/* Write buffer (pseudo flag). */
+#define	B_WRITEINPROG	0x01000000	/* Write in progress. */
+#define	B_XXX		0x02000000	/* Debugging flag. */
+
+/*
+ * This structure describes a clustered I/O.  It is stored in the b_saveaddr
+ * field of the buffer on which I/O is done.  At I/O completion, cluster
+ * callback uses the structure to parcel I/O's to individual buffers, and
+ * then free's this structure.
+ */
+struct cluster_save {
+	long	bs_bcount;		/* Saved b_bcount. */
+	long	bs_bufsize;		/* Saved b_bufsize. */
+	void	*bs_saveaddr;		/* Saved b_addr. */
+	int	bs_nchildren;		/* Number of associated buffers. */
+	struct buf **bs_children;	/* List of associated buffers. */
+};
+
+/*
+ * Zero out the buffer's data area.
+ */
+#define	clrbuf(bp) {							\
+	blkclr((bp)->b_data, (u_int)(bp)->b_bcount);			\
+	(bp)->b_resid = 0;						\
+}
+
+/* Flags to low-level allocation routines. */
+#define B_CLRBUF	0x01	/* Request allocated buffer be cleared. */
+#define B_SYNC		0x02	/* Do all allocations synchronously. */
+
+#ifdef KERNEL
+int	nbuf;			/* The number of buffer headers */
+struct	buf *buf;		/* The buffer headers. */
+char	*buffers;		/* The buffer contents. */
+int	bufpages;		/* Number of memory pages in the buffer pool. */
+struct	buf *swbuf;		/* Swap I/O buffer headers. */
+int	nswbuf;			/* Number of swap I/O buffer headers. */
+struct	buf bswlist;		/* Head of swap I/O buffer headers free list. */
+struct	buf *bclnlist;		/* Head of cleaned page list. */
+
+__BEGIN_DECLS
+int	allocbuf __P((struct buf *, int));
+int	bawrite __P((struct buf *));
+int	bdwrite __P((struct buf *));
+void	biodone __P((struct buf *));
+int	biowait __P((struct buf *));
+int	bread __P((struct vnode *, daddr_t, int,
+	    struct ucred *, struct buf **));
+int	breadn __P((struct vnode *, daddr_t, int, daddr_t *, int *, int,
+	    struct ucred *, struct buf **));
+int	brelse __P((struct buf *));
+void	bufinit __P((void));
+int	bwrite __P((struct buf *));
+void	cluster_callback __P((struct buf *));
+int	cluster_read __P((struct vnode *, u_quad_t, daddr_t, long,
+	    struct ucred *, struct buf **));
+void	cluster_write __P((struct buf *, u_quad_t));
+struct buf *getblk __P((struct vnode *, daddr_t, int, int, int));
+struct buf *geteblk __P((int));
+struct buf *getnewbuf __P((int slpflag, int slptimeo));
+struct buf *incore __P((struct vnode *, daddr_t));
+u_int	minphys __P((struct buf *bp));
+__END_DECLS
+#endif
+#endif /* !_SYS_BUF_H_ */
diff --git a/sys/sys/callout.h b/sys/sys/callout.h
new file mode 100644
index 00000000000..d685e56d3f0
--- /dev/null
+++ b/sys/sys/callout.h
@@ -0,0 +1,51 @@
+/*-
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)callout.h	8.2 (Berkeley) 1/21/94
+ */
+
+struct callout {
+	struct	callout *c_next;		/* next callout in queue */
+	void	*c_arg;				/* function argument */
+	void	(*c_func) __P((void *));	/* function to call */
+	int	c_time;				/* ticks to the event */
+};
+
+#ifdef KERNEL
+struct	callout *callfree, *callout, calltodo;
+int	ncallout;
+#endif
diff --git a/sys/sys/cdefs.h b/sys/sys/cdefs.h
new file mode 100644
index 00000000000..c104b9e964d
--- /dev/null
+++ b/sys/sys/cdefs.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Berkeley Software Design, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cdefs.h	8.7 (Berkeley) 1/21/94
+ */
+
+#ifndef	_CDEFS_H_
+#define	_CDEFS_H_
+
+#if defined(__cplusplus)
+#define	__BEGIN_DECLS	extern "C" {
+#define	__END_DECLS	};
+#else
+#define	__BEGIN_DECLS
+#define	__END_DECLS
+#endif
+
+/*
+ * The __CONCAT macro is used to concatenate parts of symbol names, e.g.
+ * with "#define OLD(foo) __CONCAT(old,foo)", OLD(foo) produces oldfoo.
+ * The __CONCAT macro is a bit tricky -- make sure you don't put spaces
+ * in between its arguments.  __CONCAT can also concatenate double-quoted
+ * strings produced by the __STRING macro, but this only works with ANSI C.
+ */
+#if defined(__STDC__) || defined(__cplusplus)
+#define	__P(protos)	protos		/* full-blown ANSI C */
+#define	__CONCAT(x,y)	x ## y
+#define	__STRING(x)	#x
+
+#define	__const		const		/* define reserved names to standard */
+#define	__signed	signed
+#define	__volatile	volatile
+#if defined(__cplusplus)
+#define	__inline	inline		/* convert to C++ keyword */
+#else
+#ifndef __GNUC__
+#define	__inline			/* delete GCC keyword */
+#endif /* !__GNUC__ */
+#endif /* !__cplusplus */
+
+#else	/* !(__STDC__ || __cplusplus) */
+#define	__P(protos)	()		/* traditional C preprocessor */
+#define	__CONCAT(x,y)	x/**/y
+#define	__STRING(x)	"x"
+
+#ifndef __GNUC__
+#define	__const				/* delete pseudo-ANSI C keywords */
+#define	__inline
+#define	__signed
+#define	__volatile
+/*
+ * In non-ANSI C environments, new programs will want ANSI-only C keywords
+ * deleted from the program and old programs will want them left alone.
+ * When using a compiler other than gcc, programs using the ANSI C keywords
+ * const, inline etc. as normal identifiers should define -DNO_ANSI_KEYWORDS.
+ * When using "gcc -traditional", we assume that this is the intent; if
+ * __GNUC__ is defined but __STDC__ is not, we leave the new keywords alone.
+ */
+#ifndef	NO_ANSI_KEYWORDS
+#define	const				/* delete ANSI C keywords */
+#define	inline
+#define	signed
+#define	volatile
+#endif
+#endif	/* !__GNUC__ */
+#endif	/* !(__STDC__ || __cplusplus) */
+
+/*
+ * GCC1 and some versions of GCC2 declare dead (non-returning) and
+ * pure (no side effects) functions using "volatile" and "const";
+ * unfortunately, these then cause warnings under "-ansi -pedantic".
+ * GCC2 uses a new, peculiar __attribute__((attrs)) style.  All of
+ * these work for GNU C++ (modulo a slight glitch in the C++ grammar
+ * in the distribution version of 2.5.5).
+ */
+#if !defined(__GNUC__) || __GNUC__ < 2 || __GNUC_MINOR__ < 5
+#define	__attribute__(x)	/* delete __attribute__ if non-gcc or gcc1 */
+#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
+#define	__dead		__volatile
+#define	__pure		__const
+#endif
+#endif
+
+/* Delete pseudo-keywords wherever they are not available or needed. */
+#ifndef __dead
+#define	__dead
+#define	__pure
+#endif
+
+#endif /* !_CDEFS_H_ */
diff --git a/sys/sys/clist.h b/sys/sys/clist.h
new file mode 100644
index 00000000000..bad26477015
--- /dev/null
+++ b/sys/sys/clist.h
@@ -0,0 +1,45 @@
+/*-
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)clist.h	8.1 (Berkeley) 6/4/93
+ */
+
+struct cblock {
+	struct cblock *c_next;		/* next cblock in queue */
+	char c_quote[CBQSIZE];		/* quoted characters */
+	char c_info[CBSIZE];		/* characters */
+};
+
+#ifdef KERNEL
+extern	struct cblock *cfree, *cfreelist;
+extern	int cfreecount, nclist;
+#endif
diff --git a/sys/sys/conf.h b/sys/sys/conf.h
new file mode 100644
index 00000000000..58cb6fa8339
--- /dev/null
+++ b/sys/sys/conf.h
@@ -0,0 +1,123 @@
+/*-
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)conf.h	8.3 (Berkeley) 1/21/94
+ */
+
+/*
+ * Definitions of device driver entry switches
+ */
+
+struct buf;
+struct proc;
+struct tty;
+struct uio;
+struct vnode;
+
+struct bdevsw {
+	int	(*d_open)	__P((dev_t dev, int oflags, int devtype,
+				     struct proc *p));
+	int	(*d_close)	__P((dev_t dev, int fflag, int devtype,
+				     struct proc *p));
+	int	(*d_strategy)	__P((struct buf *bp));
+	int	(*d_ioctl)	__P((dev_t dev, int cmd, caddr_t data,
+				     int fflag, struct proc *p));
+	int	(*d_dump)	();	/* parameters vary by architecture */
+	int	(*d_psize)	__P((dev_t dev));
+	int	d_flags;
+};
+
+#ifdef KERNEL
+extern struct bdevsw bdevsw[];
+#endif
+
+struct cdevsw {
+	int	(*d_open)	__P((dev_t dev, int oflags, int devtype,
+				     struct proc *p));
+	int	(*d_close)	__P((dev_t dev, int fflag, int devtype,
+				     struct proc *));
+	int	(*d_read)	__P((dev_t dev, struct uio *uio, int ioflag));
+	int	(*d_write)	__P((dev_t dev, struct uio *uio, int ioflag));
+	int	(*d_ioctl)	__P((dev_t dev, int cmd, caddr_t data,
+				     int fflag, struct proc *p));
+	int	(*d_stop)	__P((struct tty *tp, int rw));
+	int	(*d_reset)	__P((int uban));	/* XXX */
+	struct	tty *d_ttys;
+	int	(*d_select)	__P((dev_t dev, int which, struct proc *p));
+	int	(*d_mmap)	__P(());
+	int	(*d_strategy)	__P((struct buf *bp));
+};
+
+#ifdef KERNEL
+extern struct cdevsw cdevsw[];
+
+/* symbolic sleep message strings */
+extern char devopn[], devio[], devwait[], devin[], devout[];
+extern char devioc[], devcls[];
+#endif
+
+struct linesw {
+	int	(*l_open)	__P((dev_t dev, struct tty *tp));
+	int	(*l_close)	__P((struct tty *tp, int flag));
+	int	(*l_read)	__P((struct tty *tp, struct uio *uio,
+				     int flag));
+	int	(*l_write)	__P((struct tty *tp, struct uio *uio,
+				     int flag));
+	int	(*l_ioctl)	__P((struct tty *tp, int cmd, caddr_t data,
+				     int flag, struct proc *p));
+	int	(*l_rint)	__P((int c, struct tty *tp));
+	int	(*l_start)	__P((struct tty *tp));
+	int	(*l_modem)	__P((struct tty *tp, int flag));
+};
+
+#ifdef KERNEL
+extern struct linesw linesw[];
+#endif
+
+struct swdevt {
+	dev_t	sw_dev;
+	int	sw_flags;
+	int	sw_nblks;
+	struct	vnode *sw_vp;
+};
+#define	SW_FREED	0x01
+#define	SW_SEQUENTIAL	0x02
+#define sw_freed	sw_flags	/* XXX compat */
+
+#ifdef KERNEL
+extern struct swdevt swdevt[];
+#endif
diff --git a/sys/sys/device.h b/sys/sys/device.h
new file mode 100644
index 00000000000..0a233ed3e5c
--- /dev/null
+++ b/sys/sys/device.h
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Lawrence Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)device.h	8.2 (Berkeley) 2/17/94
+ */
+
+#ifndef _SYS_DEVICE_H_
+#define	_SYS_DEVICE_H_
+
+/*
+ * Minimal device structures.
+ * Note that all ``system'' device types are listed here.
+ */
+enum devclass {
+	DV_DULL,		/* generic, no special info */
+	DV_CPU,			/* CPU (carries resource utilization) */
+	DV_DISK,		/* disk drive (label, etc) */
+	DV_IFNET,		/* network interface */
+	DV_TAPE,		/* tape device */
+	DV_TTY			/* serial line interface (???) */
+};
+
+struct device {
+	enum	devclass dv_class;	/* this device's classification */
+	struct	device *dv_next;	/* next in list of all */
+	struct	cfdata *dv_cfdata;	/* config data that found us */
+	int	dv_unit;		/* device unit number */
+	char	dv_xname[16];		/* external name (name + unit) */
+	struct	device *dv_parent;	/* pointer to parent device */
+};
+
+/* `event' counters (use zero or more per device instance, as needed) */
+struct evcnt {
+	struct	evcnt *ev_next;		/* linked list */
+	struct	device *ev_dev;		/* associated device */
+	int	ev_count;		/* how many have occurred */
+	char	ev_name[8];		/* what to call them (systat display) */
+};
+
+/*
+ * Configuration data (i.e., data placed in ioconf.c).
+ */
+struct cfdata {
+	struct	cfdriver *cf_driver;	/* config driver */
+	short	cf_unit;		/* unit number */
+	short	cf_fstate;		/* finding state (below) */
+	int	*cf_loc;		/* locators (machine dependent) */
+	int	cf_flags;		/* flags from config */
+	short	*cf_parents;		/* potential parents */
+	void	(**cf_ivstubs)();	/* config-generated vectors, if any */
+};
+#define FSTATE_NOTFOUND	0	/* has not been found */
+#define	FSTATE_FOUND	1	/* has been found */
+#define	FSTATE_STAR	2	/* duplicable */
+
+typedef int (*cfmatch_t) __P((struct device *, struct cfdata *, void *));
+
+/*
+ * `configuration' driver (what the machine-independent autoconf uses).
+ * As devices are found, they are applied against all the potential matches.
+ * The one with the best match is taken, and a device structure (plus any
+ * other data desired) is allocated.  Pointers to these are placed into
+ * an array of pointers.  The array itself must be dynamic since devices
+ * can be found long after the machine is up and running.
+ */
+struct cfdriver {
+	void	**cd_devs;		/* devices found */
+	char	*cd_name;		/* device name */
+	cfmatch_t cd_match;		/* returns a match level */
+	void	(*cd_attach) __P((struct device *, struct device *, void *));
+	enum	devclass cd_class;	/* device classification */
+	size_t	cd_devsize;		/* size of dev data (for malloc) */
+	void	*cd_aux;		/* additional driver, if any */
+	int	cd_ndevs;		/* size of cd_devs array */
+};
+
+/*
+ * Configuration printing functions, and their return codes.  The second
+ * argument is NULL if the device was configured; otherwise it is the name
+ * of the parent device.  The return value is ignored if the device was
+ * configured, so most functions can return UNCONF unconditionally.
+ */
+typedef int (*cfprint_t) __P((void *, char *));
+#define	QUIET	0		/* print nothing */
+#define	UNCONF	1		/* print " not configured\n" */
+#define	UNSUPP	2		/* print " not supported\n" */
+
+/*
+ * Pseudo-device attach information (function + number of pseudo-devs).
+ */
+struct pdevinit {
+	void	(*pdev_attach) __P((int));
+	int	pdev_count;
+};
+
+struct	device *alldevs;	/* head of list of all devices */
+struct	evcnt *allevents;	/* head of list of all events */
+
+struct cfdata *config_search __P((cfmatch_t, struct device *, void *));
+struct cfdata *config_rootsearch __P((cfmatch_t, char *, void *));
+int config_found __P((struct device *, void *, cfprint_t));
+int config_rootfound __P((char *, void *));
+void config_attach __P((struct device *, struct cfdata *, void *, cfprint_t));
+void evcnt_attach __P((struct device *, const char *, struct evcnt *));
+#endif /* !_SYS_DEVICE_H_ */
diff --git a/sys/sys/dir.h b/sys/sys/dir.h
new file mode 100644
index 00000000000..0c4cd679cee
--- /dev/null
+++ b/sys/sys/dir.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)dir.h	8.2 (Berkeley) 1/4/94
+ */
+
+/*
+ * The information in this file should be obtained from <dirent.h>
+ * and is provided solely (and temporarily) for backward compatibility.
+ */
+
+#ifndef _SYS_DIR_H_
+#define	_SYS_DIR_H_
+
+#include <dirent.h>
+
+/*
+ * Backwards compatibility.
+ */
+#define direct dirent
+
+/*
+ * The DIRSIZ macro gives the minimum record length which will hold
+ * the directory entry.  This requires the amount of space in struct direct
+ * without the d_name field, plus enough space for the name with a terminating
+ * null byte (dp->d_namlen+1), rounded up to a 4 byte boundary.
+ */
+#undef DIRSIZ
+#define DIRSIZ(dp) \
+    ((sizeof (struct direct) - (MAXNAMLEN+1)) + (((dp)->d_namlen+1 + 3) &~ 3))
+
+#endif /* !_SYS_DIR_H_ */
diff --git a/sys/sys/dirent.h b/sys/sys/dirent.h
new file mode 100644
index 00000000000..1c4b96aa29c
--- /dev/null
+++ b/sys/sys/dirent.h
@@ -0,0 +1,76 @@
+/*-
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)dirent.h	8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * The dirent structure defines the format of directory entries returned by 
+ * the getdirentries(2) system call.
+ *
+ * A directory entry has a struct dirent at the front of it, containing its
+ * inode number, the length of the entry, and the length of the name
+ * contained in the entry.  These are followed by the name padded to a 4
+ * byte boundary with null bytes.  All names are guaranteed null terminated.
+ * The maximum length of a name in a directory is MAXNAMLEN.
+ */
+
+struct dirent {
+	unsigned long	d_fileno;	/* file number of entry */
+	unsigned short	d_reclen;	/* length of this record */
+	unsigned char	d_type; 	/* file type, see below */
+	unsigned char	d_namlen;	/* length of string in d_name */
+#ifdef _POSIX_SOURCE
+	char	d_name[255 + 1];	/* name must be no longer than this */
+#else
+#define	MAXNAMLEN	255
+	char	d_name[MAXNAMLEN + 1];	/* name must be no longer than this */
+#endif
+};
+
+/*
+ * File types
+ */
+#define	DT_UNKNOWN	 0
+#define	DT_FIFO		 1
+#define	DT_CHR		 2
+#define	DT_DIR		 4
+#define	DT_BLK		 6
+#define	DT_REG		 8
+#define	DT_LNK		10
+#define	DT_SOCK		12
+
+/*
+ * Convert between stat structure types and directory types.
+ */
+#define	IFTODT(mode)	(((mode) & 0170000) >> 12)
+#define	DTTOIF(dirtype)	((dirtype) << 12)
diff --git a/sys/sys/disk.h b/sys/sys/disk.h
new file mode 100644
index 00000000000..352ecf00274
--- /dev/null
+++ b/sys/sys/disk.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Lawrence Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)disk.h	8.1 (Berkeley) 6/2/93
+ *
+ * from: $Header: disk.h,v 1.5 92/11/19 04:33:03 torek Exp $ (LBL)
+ */
+
+/*
+ * Disk device structures.
+ *
+ * Note that this is only a preliminary outline.  The final disk structures
+ * may be somewhat different.
+ */
+struct buf;
+
+struct dkdevice {
+	struct	device dk_dev;		/* base device */
+	struct	dkdevice *dk_next;	/* list of disks; not yet used */
+	int	dk_bps;			/* xfer rate: bytes per second */
+	int	dk_bopenmask;		/* block devices open */
+	int	dk_copenmask;		/* character devices open */
+	int	dk_openmask;		/* composite (bopen|copen) */
+	int	dk_state;		/* label state   ### */
+	int	dk_blkshift;		/* shift to convert DEV_BSIZE to blks */
+	int	dk_byteshift;		/* shift to convert bytes to blks */
+	struct	dkdriver *dk_driver;	/* pointer to driver */
+	daddr_t	dk_labelsector;		/* sector containing label */
+	struct	disklabel dk_label;	/* label */
+};
+
+struct dkdriver {
+	void	(*d_strategy) __P((struct buf *));
+#ifdef notyet
+	int	(*d_open) __P((dev_t dev, int ifmt, int, struct proc *));
+	int	(*d_close) __P((dev_t dev, int, int ifmt, struct proc *));
+	int	(*d_ioctl) __P((dev_t dev, int cmd, caddr_t data, int fflag,
+				struct proc *));
+	int	(*d_dump) __P((dev_t));
+	void	(*d_start) __P((struct buf *, daddr_t));
+	int	(*d_mklabel) __P((struct dkdevice *));
+#endif
+};
+
+/* states */
+#define	DK_CLOSED	0		/* drive is closed */
+#define	DK_WANTOPEN	1		/* drive being opened */
+#define	DK_WANTOPENRAW	2		/* drive being opened */
+#define	DK_RDLABEL	3		/* label being read */
+#define	DK_OPEN		4		/* label read, drive open */
+#define	DK_OPENRAW	5		/* open without label */
+
+#ifdef DISKSORT_STATS
+/*
+ * Stats from disksort().
+ */
+struct disksort_stats {
+	long	ds_newhead;		/* # new queue heads created */
+	long	ds_newtail;		/* # new queue tails created */
+	long	ds_midfirst;		/* # insertions into sort list */
+	long	ds_endfirst;		/* # insertions at end of sort list */
+	long	ds_newsecond;		/* # inversions (2nd lists) created */
+	long	ds_midsecond;		/* # insertions into 2nd list */
+	long	ds_endsecond;		/* # insertions at end of 2nd list */
+};
+#endif
+
+#ifdef KERNEL
+void	disksort __P((struct buf *, struct buf *));
+char	*readdisklabel __P((struct dkdevice *, int));
+int	setdisklabel __P((struct dkdevice *, struct disklabel *));
+int	writedisklabel __P((struct dkdevice *, int));
+int	diskerr __P((struct dkdevice *, struct buf *, char *, int, int));
+#endif
diff --git a/sys/sys/disklabel.h b/sys/sys/disklabel.h
new file mode 100644
index 00000000000..a25ee29e363
--- /dev/null
+++ b/sys/sys/disklabel.h
@@ -0,0 +1,332 @@
+/*
+ * Copyright (c) 1987, 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)disklabel.h	8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * Disk description table, see disktab(5)
+ */
+#define	_PATH_DISKTAB	"/etc/disktab"
+#define	DISKTAB		"/etc/disktab"		/* deprecated */
+
+/*
+ * Each disk has a label which includes information about the hardware
+ * disk geometry, filesystem partitions, and drive specific information.
+ * The label is in block 0 or 1, possibly offset from the beginning
+ * to leave room for a bootstrap, etc.
+ */
+
+/* XXX these should be defined per controller (or drive) elsewhere, not here! */
+#ifdef i386
+#define LABELSECTOR	1			/* sector containing label */
+#define LABELOFFSET	0			/* offset of label in sector */
+#endif
+
+#ifndef	LABELSECTOR
+#define LABELSECTOR	0			/* sector containing label */
+#endif
+
+#ifndef	LABELOFFSET
+#define LABELOFFSET	64			/* offset of label in sector */
+#endif
+
+#define DISKMAGIC	((u_long) 0x82564557)	/* The disk magic number */
+#ifndef MAXPARTITIONS
+#define	MAXPARTITIONS	8
+#endif
+
+
+#ifndef LOCORE
+struct disklabel {
+	u_long	d_magic;		/* the magic number */
+	short	d_type;			/* drive type */
+	short	d_subtype;		/* controller/d_type specific */
+	char	d_typename[16];		/* type name, e.g. "eagle" */
+	/* 
+	 * d_packname contains the pack identifier and is returned when
+	 * the disklabel is read off the disk or in-core copy.
+	 * d_boot0 and d_boot1 are the (optional) names of the
+	 * primary (block 0) and secondary (block 1-15) bootstraps
+	 * as found in /usr/mdec.  These are returned when using
+	 * getdiskbyname(3) to retrieve the values from /etc/disktab.
+	 */
+#if defined(KERNEL) || defined(STANDALONE)
+	char	d_packname[16];			/* pack identifier */ 
+#else
+	union {
+		char	un_d_packname[16];	/* pack identifier */ 
+		struct {
+			char *un_d_boot0;	/* primary bootstrap name */
+			char *un_d_boot1;	/* secondary bootstrap name */
+		} un_b; 
+	} d_un; 
+#define d_packname	d_un.un_d_packname
+#define d_boot0		d_un.un_b.un_d_boot0
+#define d_boot1		d_un.un_b.un_d_boot1
+#endif	/* ! KERNEL or STANDALONE */
+			/* disk geometry: */
+	u_long	d_secsize;		/* # of bytes per sector */
+	u_long	d_nsectors;		/* # of data sectors per track */
+	u_long	d_ntracks;		/* # of tracks per cylinder */
+	u_long	d_ncylinders;		/* # of data cylinders per unit */
+	u_long	d_secpercyl;		/* # of data sectors per cylinder */
+	u_long	d_secperunit;		/* # of data sectors per unit */
+	/*
+	 * Spares (bad sector replacements) below
+	 * are not counted in d_nsectors or d_secpercyl.
+	 * Spare sectors are assumed to be physical sectors
+	 * which occupy space at the end of each track and/or cylinder.
+	 */
+	u_short	d_sparespertrack;	/* # of spare sectors per track */
+	u_short	d_sparespercyl;		/* # of spare sectors per cylinder */
+	/*
+	 * Alternate cylinders include maintenance, replacement,
+	 * configuration description areas, etc.
+	 */
+	u_long	d_acylinders;		/* # of alt. cylinders per unit */
+
+			/* hardware characteristics: */
+	/*
+	 * d_interleave, d_trackskew and d_cylskew describe perturbations
+	 * in the media format used to compensate for a slow controller.
+	 * Interleave is physical sector interleave, set up by the formatter
+	 * or controller when formatting.  When interleaving is in use,
+	 * logically adjacent sectors are not physically contiguous,
+	 * but instead are separated by some number of sectors.
+	 * It is specified as the ratio of physical sectors traversed
+	 * per logical sector.  Thus an interleave of 1:1 implies contiguous
+	 * layout, while 2:1 implies that logical sector 0 is separated
+	 * by one sector from logical sector 1.
+	 * d_trackskew is the offset of sector 0 on track N
+	 * relative to sector 0 on track N-1 on the same cylinder.
+	 * Finally, d_cylskew is the offset of sector 0 on cylinder N
+	 * relative to sector 0 on cylinder N-1.
+	 */
+	u_short	d_rpm;			/* rotational speed */
+	u_short	d_interleave;		/* hardware sector interleave */
+	u_short	d_trackskew;		/* sector 0 skew, per track */
+	u_short	d_cylskew;		/* sector 0 skew, per cylinder */
+	u_long	d_headswitch;		/* head switch time, usec */
+	u_long	d_trkseek;		/* track-to-track seek, usec */
+	u_long	d_flags;		/* generic flags */
+#define NDDATA 5
+	u_long	d_drivedata[NDDATA];	/* drive-type specific information */
+#define NSPARE 5
+	u_long	d_spare[NSPARE];	/* reserved for future use */
+	u_long	d_magic2;		/* the magic number (again) */
+	u_short	d_checksum;		/* xor of data incl. partitions */
+
+			/* filesystem and partition information: */
+	u_short	d_npartitions;		/* number of partitions in following */
+	u_long	d_bbsize;		/* size of boot area at sn0, bytes */
+	u_long	d_sbsize;		/* max size of fs superblock, bytes */
+	struct	partition {		/* the partition table */
+		u_long	p_size;		/* number of sectors in partition */
+		u_long	p_offset;	/* starting sector */
+		u_long	p_fsize;	/* filesystem basic fragment size */
+		u_char	p_fstype;	/* filesystem type, see below */
+		u_char	p_frag;		/* filesystem fragments per block */
+		union {
+			u_short	cpg;	/* UFS: FS cylinders per group */
+			u_short	sgs;	/* LFS: FS segment shift */
+		} __partition_u1;
+#define	p_cpg	__partition_u1.cpg
+#define	p_sgs	__partition_u1.sgs
+	} d_partitions[MAXPARTITIONS];	/* actually may be more */
+};
+#else /* LOCORE */
+	/*
+	 * offsets for asm boot files.
+	 */
+	.set	d_secsize,40
+	.set	d_nsectors,44
+	.set	d_ntracks,48
+	.set	d_ncylinders,52
+	.set	d_secpercyl,56
+	.set	d_secperunit,60
+	.set	d_end_,276		/* size of disk label */
+#endif /* LOCORE */
+
+/* d_type values: */
+#define	DTYPE_SMD		1		/* SMD, XSMD; VAX hp/up */
+#define	DTYPE_MSCP		2		/* MSCP */
+#define	DTYPE_DEC		3		/* other DEC (rk, rl) */
+#define	DTYPE_SCSI		4		/* SCSI */
+#define	DTYPE_ESDI		5		/* ESDI interface */
+#define	DTYPE_ST506		6		/* ST506 etc. */
+#define	DTYPE_HPIB		7		/* CS/80 on HP-IB */
+#define	DTYPE_HPFL		8		/* HP Fiber-link */
+#define	DTYPE_FLOPPY		10		/* floppy */
+
+#ifdef DKTYPENAMES
+static char *dktypenames[] = {
+	"unknown",
+	"SMD",
+	"MSCP",
+	"old DEC",
+	"SCSI",
+	"ESDI",
+	"ST506",
+	"HP-IB",
+	"HP-FL",
+	"type 9",
+	"floppy",
+	0
+};
+#define DKMAXTYPES	(sizeof(dktypenames) / sizeof(dktypenames[0]) - 1)
+#endif
+
+/*
+ * Filesystem type and version.
+ * Used to interpret other filesystem-specific
+ * per-partition information.
+ */
+#define	FS_UNUSED	0		/* unused */
+#define	FS_SWAP		1		/* swap */
+#define	FS_V6		2		/* Sixth Edition */
+#define	FS_V7		3		/* Seventh Edition */
+#define	FS_SYSV		4		/* System V */
+#define	FS_V71K		5		/* V7 with 1K blocks (4.1, 2.9) */
+#define	FS_V8		6		/* Eighth Edition, 4K blocks */
+#define	FS_BSDFFS	7		/* 4.2BSD fast file system */
+#define	FS_MSDOS	8		/* MSDOS file system */
+#define	FS_BSDLFS	9		/* 4.4BSD log-structured file system */
+#define	FS_OTHER	10		/* in use, but unknown/unsupported */
+#define	FS_HPFS		11		/* OS/2 high-performance file system */
+#define	FS_ISO9660	12		/* ISO 9660, normally CD-ROM */
+#define	FS_BOOT		13		/* partition contains bootstrap */
+
+#ifdef	DKTYPENAMES
+static char *fstypenames[] = {
+	"unused",
+	"swap",
+	"Version 6",
+	"Version 7",
+	"System V",
+	"4.1BSD",
+	"Eighth Edition",
+	"4.2BSD",
+	"MSDOS",
+	"4.4LFS",
+	"unknown",
+	"HPFS",
+	"ISO9660",
+	"boot",
+	0
+};
+#define FSMAXTYPES	(sizeof(fstypenames) / sizeof(fstypenames[0]) - 1)
+#endif
+
+/*
+ * flags shared by various drives:
+ */
+#define		D_REMOVABLE	0x01		/* removable media */
+#define		D_ECC		0x02		/* supports ECC */
+#define		D_BADSECT	0x04		/* supports bad sector forw. */
+#define		D_RAMDISK	0x08		/* disk emulator */
+#define		D_CHAIN		0x10		/* can do back-back transfers */
+
+/*
+ * Drive data for SMD.
+ */
+#define	d_smdflags	d_drivedata[0]
+#define		D_SSE		0x1		/* supports skip sectoring */
+#define	d_mindist	d_drivedata[1]
+#define	d_maxdist	d_drivedata[2]
+#define	d_sdist		d_drivedata[3]
+
+/*
+ * Drive data for ST506.
+ */
+#define d_precompcyl	d_drivedata[0]
+#define d_gap3		d_drivedata[1]		/* used only when formatting */
+
+/*
+ * Drive data for SCSI.
+ */
+#define	d_blind		d_drivedata[0]
+
+#ifndef LOCORE
+/*
+ * Structure used to perform a format
+ * or other raw operation, returning data
+ * and/or register values.
+ * Register identification and format
+ * are device- and driver-dependent.
+ */
+struct format_op {
+	char	*df_buf;
+	int	df_count;		/* value-result */
+	daddr_t	df_startblk;
+	int	df_reg[8];		/* result */
+};
+
+/*
+ * Structure used internally to retrieve
+ * information about a partition on a disk.
+ */
+struct partinfo {
+	struct disklabel *disklab;
+	struct partition *part;
+};
+
+/*
+ * Disk-specific ioctls.
+ */
+		/* get and set disklabel; DIOCGPART used internally */
+#define DIOCGDINFO	_IOR('d', 101, struct disklabel)/* get */
+#define DIOCSDINFO	_IOW('d', 102, struct disklabel)/* set */
+#define DIOCWDINFO	_IOW('d', 103, struct disklabel)/* set, update disk */
+#define DIOCGPART	_IOW('d', 104, struct partinfo)	/* get partition */
+
+/* do format operation, read or write */
+#define DIOCRFORMAT	_IOWR('d', 105, struct format_op)
+#define DIOCWFORMAT	_IOWR('d', 106, struct format_op)
+
+#define DIOCSSTEP	_IOW('d', 107, int)	/* set step rate */
+#define DIOCSRETRIES	_IOW('d', 108, int)	/* set # of retries */
+#define DIOCWLABEL	_IOW('d', 109, int)	/* write en/disable label */
+
+#define DIOCSBAD	_IOW('d', 110, struct dkbad)	/* set kernel dkbad */
+
+#endif /* LOCORE */
+
+#if !defined(KERNEL) && !defined(LOCORE)
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+struct disklabel *getdiskbyname __P((const char *));
+__END_DECLS
+
+#endif
diff --git a/sys/sys/diskmbr.h b/sys/sys/diskmbr.h
new file mode 100644
index 00000000000..a25ee29e363
--- /dev/null
+++ b/sys/sys/diskmbr.h
@@ -0,0 +1,332 @@
+/*
+ * Copyright (c) 1987, 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)disklabel.h	8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * Disk description table, see disktab(5)
+ */
+#define	_PATH_DISKTAB	"/etc/disktab"
+#define	DISKTAB		"/etc/disktab"		/* deprecated */
+
+/*
+ * Each disk has a label which includes information about the hardware
+ * disk geometry, filesystem partitions, and drive specific information.
+ * The label is in block 0 or 1, possibly offset from the beginning
+ * to leave room for a bootstrap, etc.
+ */
+
+/* XXX these should be defined per controller (or drive) elsewhere, not here! */
+#ifdef i386
+#define LABELSECTOR	1			/* sector containing label */
+#define LABELOFFSET	0			/* offset of label in sector */
+#endif
+
+#ifndef	LABELSECTOR
+#define LABELSECTOR	0			/* sector containing label */
+#endif
+
+#ifndef	LABELOFFSET
+#define LABELOFFSET	64			/* offset of label in sector */
+#endif
+
+#define DISKMAGIC	((u_long) 0x82564557)	/* The disk magic number */
+#ifndef MAXPARTITIONS
+#define	MAXPARTITIONS	8
+#endif
+
+
+#ifndef LOCORE
+struct disklabel {
+	u_long	d_magic;		/* the magic number */
+	short	d_type;			/* drive type */
+	short	d_subtype;		/* controller/d_type specific */
+	char	d_typename[16];		/* type name, e.g. "eagle" */
+	/* 
+	 * d_packname contains the pack identifier and is returned when
+	 * the disklabel is read off the disk or in-core copy.
+	 * d_boot0 and d_boot1 are the (optional) names of the
+	 * primary (block 0) and secondary (block 1-15) bootstraps
+	 * as found in /usr/mdec.  These are returned when using
+	 * getdiskbyname(3) to retrieve the values from /etc/disktab.
+	 */
+#if defined(KERNEL) || defined(STANDALONE)
+	char	d_packname[16];			/* pack identifier */ 
+#else
+	union {
+		char	un_d_packname[16];	/* pack identifier */ 
+		struct {
+			char *un_d_boot0;	/* primary bootstrap name */
+			char *un_d_boot1;	/* secondary bootstrap name */
+		} un_b; 
+	} d_un; 
+#define d_packname	d_un.un_d_packname
+#define d_boot0		d_un.un_b.un_d_boot0
+#define d_boot1		d_un.un_b.un_d_boot1
+#endif	/* ! KERNEL or STANDALONE */
+			/* disk geometry: */
+	u_long	d_secsize;		/* # of bytes per sector */
+	u_long	d_nsectors;		/* # of data sectors per track */
+	u_long	d_ntracks;		/* # of tracks per cylinder */
+	u_long	d_ncylinders;		/* # of data cylinders per unit */
+	u_long	d_secpercyl;		/* # of data sectors per cylinder */
+	u_long	d_secperunit;		/* # of data sectors per unit */
+	/*
+	 * Spares (bad sector replacements) below
+	 * are not counted in d_nsectors or d_secpercyl.
+	 * Spare sectors are assumed to be physical sectors
+	 * which occupy space at the end of each track and/or cylinder.
+	 */
+	u_short	d_sparespertrack;	/* # of spare sectors per track */
+	u_short	d_sparespercyl;		/* # of spare sectors per cylinder */
+	/*
+	 * Alternate cylinders include maintenance, replacement,
+	 * configuration description areas, etc.
+	 */
+	u_long	d_acylinders;		/* # of alt. cylinders per unit */
+
+			/* hardware characteristics: */
+	/*
+	 * d_interleave, d_trackskew and d_cylskew describe perturbations
+	 * in the media format used to compensate for a slow controller.
+	 * Interleave is physical sector interleave, set up by the formatter
+	 * or controller when formatting.  When interleaving is in use,
+	 * logically adjacent sectors are not physically contiguous,
+	 * but instead are separated by some number of sectors.
+	 * It is specified as the ratio of physical sectors traversed
+	 * per logical sector.  Thus an interleave of 1:1 implies contiguous
+	 * layout, while 2:1 implies that logical sector 0 is separated
+	 * by one sector from logical sector 1.
+	 * d_trackskew is the offset of sector 0 on track N
+	 * relative to sector 0 on track N-1 on the same cylinder.
+	 * Finally, d_cylskew is the offset of sector 0 on cylinder N
+	 * relative to sector 0 on cylinder N-1.
+	 */
+	u_short	d_rpm;			/* rotational speed */
+	u_short	d_interleave;		/* hardware sector interleave */
+	u_short	d_trackskew;		/* sector 0 skew, per track */
+	u_short	d_cylskew;		/* sector 0 skew, per cylinder */
+	u_long	d_headswitch;		/* head switch time, usec */
+	u_long	d_trkseek;		/* track-to-track seek, usec */
+	u_long	d_flags;		/* generic flags */
+#define NDDATA 5
+	u_long	d_drivedata[NDDATA];	/* drive-type specific information */
+#define NSPARE 5
+	u_long	d_spare[NSPARE];	/* reserved for future use */
+	u_long	d_magic2;		/* the magic number (again) */
+	u_short	d_checksum;		/* xor of data incl. partitions */
+
+			/* filesystem and partition information: */
+	u_short	d_npartitions;		/* number of partitions in following */
+	u_long	d_bbsize;		/* size of boot area at sn0, bytes */
+	u_long	d_sbsize;		/* max size of fs superblock, bytes */
+	struct	partition {		/* the partition table */
+		u_long	p_size;		/* number of sectors in partition */
+		u_long	p_offset;	/* starting sector */
+		u_long	p_fsize;	/* filesystem basic fragment size */
+		u_char	p_fstype;	/* filesystem type, see below */
+		u_char	p_frag;		/* filesystem fragments per block */
+		union {
+			u_short	cpg;	/* UFS: FS cylinders per group */
+			u_short	sgs;	/* LFS: FS segment shift */
+		} __partition_u1;
+#define	p_cpg	__partition_u1.cpg
+#define	p_sgs	__partition_u1.sgs
+	} d_partitions[MAXPARTITIONS];	/* actually may be more */
+};
+#else /* LOCORE */
+	/*
+	 * offsets for asm boot files.
+	 */
+	.set	d_secsize,40
+	.set	d_nsectors,44
+	.set	d_ntracks,48
+	.set	d_ncylinders,52
+	.set	d_secpercyl,56
+	.set	d_secperunit,60
+	.set	d_end_,276		/* size of disk label */
+#endif /* LOCORE */
+
+/* d_type values: */
+#define	DTYPE_SMD		1		/* SMD, XSMD; VAX hp/up */
+#define	DTYPE_MSCP		2		/* MSCP */
+#define	DTYPE_DEC		3		/* other DEC (rk, rl) */
+#define	DTYPE_SCSI		4		/* SCSI */
+#define	DTYPE_ESDI		5		/* ESDI interface */
+#define	DTYPE_ST506		6		/* ST506 etc. */
+#define	DTYPE_HPIB		7		/* CS/80 on HP-IB */
+#define	DTYPE_HPFL		8		/* HP Fiber-link */
+#define	DTYPE_FLOPPY		10		/* floppy */
+
+#ifdef DKTYPENAMES
+static char *dktypenames[] = {
+	"unknown",
+	"SMD",
+	"MSCP",
+	"old DEC",
+	"SCSI",
+	"ESDI",
+	"ST506",
+	"HP-IB",
+	"HP-FL",
+	"type 9",
+	"floppy",
+	0
+};
+#define DKMAXTYPES	(sizeof(dktypenames) / sizeof(dktypenames[0]) - 1)
+#endif
+
+/*
+ * Filesystem type and version.
+ * Used to interpret other filesystem-specific
+ * per-partition information.
+ */
+#define	FS_UNUSED	0		/* unused */
+#define	FS_SWAP		1		/* swap */
+#define	FS_V6		2		/* Sixth Edition */
+#define	FS_V7		3		/* Seventh Edition */
+#define	FS_SYSV		4		/* System V */
+#define	FS_V71K		5		/* V7 with 1K blocks (4.1, 2.9) */
+#define	FS_V8		6		/* Eighth Edition, 4K blocks */
+#define	FS_BSDFFS	7		/* 4.2BSD fast file system */
+#define	FS_MSDOS	8		/* MSDOS file system */
+#define	FS_BSDLFS	9		/* 4.4BSD log-structured file system */
+#define	FS_OTHER	10		/* in use, but unknown/unsupported */
+#define	FS_HPFS		11		/* OS/2 high-performance file system */
+#define	FS_ISO9660	12		/* ISO 9660, normally CD-ROM */
+#define	FS_BOOT		13		/* partition contains bootstrap */
+
+#ifdef	DKTYPENAMES
+static char *fstypenames[] = {
+	"unused",
+	"swap",
+	"Version 6",
+	"Version 7",
+	"System V",
+	"4.1BSD",
+	"Eighth Edition",
+	"4.2BSD",
+	"MSDOS",
+	"4.4LFS",
+	"unknown",
+	"HPFS",
+	"ISO9660",
+	"boot",
+	0
+};
+#define FSMAXTYPES	(sizeof(fstypenames) / sizeof(fstypenames[0]) - 1)
+#endif
+
+/*
+ * flags shared by various drives:
+ */
+#define		D_REMOVABLE	0x01		/* removable media */
+#define		D_ECC		0x02		/* supports ECC */
+#define		D_BADSECT	0x04		/* supports bad sector forw. */
+#define		D_RAMDISK	0x08		/* disk emulator */
+#define		D_CHAIN		0x10		/* can do back-back transfers */
+
+/*
+ * Drive data for SMD.
+ */
+#define	d_smdflags	d_drivedata[0]
+#define		D_SSE		0x1		/* supports skip sectoring */
+#define	d_mindist	d_drivedata[1]
+#define	d_maxdist	d_drivedata[2]
+#define	d_sdist		d_drivedata[3]
+
+/*
+ * Drive data for ST506.
+ */
+#define d_precompcyl	d_drivedata[0]
+#define d_gap3		d_drivedata[1]		/* used only when formatting */
+
+/*
+ * Drive data for SCSI.
+ */
+#define	d_blind		d_drivedata[0]
+
+#ifndef LOCORE
+/*
+ * Structure used to perform a format
+ * or other raw operation, returning data
+ * and/or register values.
+ * Register identification and format
+ * are device- and driver-dependent.
+ */
+struct format_op {
+	char	*df_buf;
+	int	df_count;		/* value-result */
+	daddr_t	df_startblk;
+	int	df_reg[8];		/* result */
+};
+
+/*
+ * Structure used internally to retrieve
+ * information about a partition on a disk.
+ */
+struct partinfo {
+	struct disklabel *disklab;
+	struct partition *part;
+};
+
+/*
+ * Disk-specific ioctls.
+ */
+		/* get and set disklabel; DIOCGPART used internally */
+#define DIOCGDINFO	_IOR('d', 101, struct disklabel)/* get */
+#define DIOCSDINFO	_IOW('d', 102, struct disklabel)/* set */
+#define DIOCWDINFO	_IOW('d', 103, struct disklabel)/* set, update disk */
+#define DIOCGPART	_IOW('d', 104, struct partinfo)	/* get partition */
+
+/* do format operation, read or write */
+#define DIOCRFORMAT	_IOWR('d', 105, struct format_op)
+#define DIOCWFORMAT	_IOWR('d', 106, struct format_op)
+
+#define DIOCSSTEP	_IOW('d', 107, int)	/* set step rate */
+#define DIOCSRETRIES	_IOW('d', 108, int)	/* set # of retries */
+#define DIOCWLABEL	_IOW('d', 109, int)	/* write en/disable label */
+
+#define DIOCSBAD	_IOW('d', 110, struct dkbad)	/* set kernel dkbad */
+
+#endif /* LOCORE */
+
+#if !defined(KERNEL) && !defined(LOCORE)
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+struct disklabel *getdiskbyname __P((const char *));
+__END_DECLS
+
+#endif
diff --git a/sys/sys/diskpc98.h b/sys/sys/diskpc98.h
new file mode 100644
index 00000000000..a25ee29e363
--- /dev/null
+++ b/sys/sys/diskpc98.h
@@ -0,0 +1,332 @@
+/*
+ * Copyright (c) 1987, 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)disklabel.h	8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * Disk description table, see disktab(5)
+ */
+#define	_PATH_DISKTAB	"/etc/disktab"
+#define	DISKTAB		"/etc/disktab"		/* deprecated */
+
+/*
+ * Each disk has a label which includes information about the hardware
+ * disk geometry, filesystem partitions, and drive specific information.
+ * The label is in block 0 or 1, possibly offset from the beginning
+ * to leave room for a bootstrap, etc.
+ */
+
+/* XXX these should be defined per controller (or drive) elsewhere, not here! */
+#ifdef i386
+#define LABELSECTOR	1			/* sector containing label */
+#define LABELOFFSET	0			/* offset of label in sector */
+#endif
+
+#ifndef	LABELSECTOR
+#define LABELSECTOR	0			/* sector containing label */
+#endif
+
+#ifndef	LABELOFFSET
+#define LABELOFFSET	64			/* offset of label in sector */
+#endif
+
+#define DISKMAGIC	((u_long) 0x82564557)	/* The disk magic number */
+#ifndef MAXPARTITIONS
+#define	MAXPARTITIONS	8
+#endif
+
+
+#ifndef LOCORE
+struct disklabel {
+	u_long	d_magic;		/* the magic number */
+	short	d_type;			/* drive type */
+	short	d_subtype;		/* controller/d_type specific */
+	char	d_typename[16];		/* type name, e.g. "eagle" */
+	/* 
+	 * d_packname contains the pack identifier and is returned when
+	 * the disklabel is read off the disk or in-core copy.
+	 * d_boot0 and d_boot1 are the (optional) names of the
+	 * primary (block 0) and secondary (block 1-15) bootstraps
+	 * as found in /usr/mdec.  These are returned when using
+	 * getdiskbyname(3) to retrieve the values from /etc/disktab.
+	 */
+#if defined(KERNEL) || defined(STANDALONE)
+	char	d_packname[16];			/* pack identifier */ 
+#else
+	union {
+		char	un_d_packname[16];	/* pack identifier */ 
+		struct {
+			char *un_d_boot0;	/* primary bootstrap name */
+			char *un_d_boot1;	/* secondary bootstrap name */
+		} un_b; 
+	} d_un; 
+#define d_packname	d_un.un_d_packname
+#define d_boot0		d_un.un_b.un_d_boot0
+#define d_boot1		d_un.un_b.un_d_boot1
+#endif	/* ! KERNEL or STANDALONE */
+			/* disk geometry: */
+	u_long	d_secsize;		/* # of bytes per sector */
+	u_long	d_nsectors;		/* # of data sectors per track */
+	u_long	d_ntracks;		/* # of tracks per cylinder */
+	u_long	d_ncylinders;		/* # of data cylinders per unit */
+	u_long	d_secpercyl;		/* # of data sectors per cylinder */
+	u_long	d_secperunit;		/* # of data sectors per unit */
+	/*
+	 * Spares (bad sector replacements) below
+	 * are not counted in d_nsectors or d_secpercyl.
+	 * Spare sectors are assumed to be physical sectors
+	 * which occupy space at the end of each track and/or cylinder.
+	 */
+	u_short	d_sparespertrack;	/* # of spare sectors per track */
+	u_short	d_sparespercyl;		/* # of spare sectors per cylinder */
+	/*
+	 * Alternate cylinders include maintenance, replacement,
+	 * configuration description areas, etc.
+	 */
+	u_long	d_acylinders;		/* # of alt. cylinders per unit */
+
+			/* hardware characteristics: */
+	/*
+	 * d_interleave, d_trackskew and d_cylskew describe perturbations
+	 * in the media format used to compensate for a slow controller.
+	 * Interleave is physical sector interleave, set up by the formatter
+	 * or controller when formatting.  When interleaving is in use,
+	 * logically adjacent sectors are not physically contiguous,
+	 * but instead are separated by some number of sectors.
+	 * It is specified as the ratio of physical sectors traversed
+	 * per logical sector.  Thus an interleave of 1:1 implies contiguous
+	 * layout, while 2:1 implies that logical sector 0 is separated
+	 * by one sector from logical sector 1.
+	 * d_trackskew is the offset of sector 0 on track N
+	 * relative to sector 0 on track N-1 on the same cylinder.
+	 * Finally, d_cylskew is the offset of sector 0 on cylinder N
+	 * relative to sector 0 on cylinder N-1.
+	 */
+	u_short	d_rpm;			/* rotational speed */
+	u_short	d_interleave;		/* hardware sector interleave */
+	u_short	d_trackskew;		/* sector 0 skew, per track */
+	u_short	d_cylskew;		/* sector 0 skew, per cylinder */
+	u_long	d_headswitch;		/* head switch time, usec */
+	u_long	d_trkseek;		/* track-to-track seek, usec */
+	u_long	d_flags;		/* generic flags */
+#define NDDATA 5
+	u_long	d_drivedata[NDDATA];	/* drive-type specific information */
+#define NSPARE 5
+	u_long	d_spare[NSPARE];	/* reserved for future use */
+	u_long	d_magic2;		/* the magic number (again) */
+	u_short	d_checksum;		/* xor of data incl. partitions */
+
+			/* filesystem and partition information: */
+	u_short	d_npartitions;		/* number of partitions in following */
+	u_long	d_bbsize;		/* size of boot area at sn0, bytes */
+	u_long	d_sbsize;		/* max size of fs superblock, bytes */
+	struct	partition {		/* the partition table */
+		u_long	p_size;		/* number of sectors in partition */
+		u_long	p_offset;	/* starting sector */
+		u_long	p_fsize;	/* filesystem basic fragment size */
+		u_char	p_fstype;	/* filesystem type, see below */
+		u_char	p_frag;		/* filesystem fragments per block */
+		union {
+			u_short	cpg;	/* UFS: FS cylinders per group */
+			u_short	sgs;	/* LFS: FS segment shift */
+		} __partition_u1;
+#define	p_cpg	__partition_u1.cpg
+#define	p_sgs	__partition_u1.sgs
+	} d_partitions[MAXPARTITIONS];	/* actually may be more */
+};
+#else /* LOCORE */
+	/*
+	 * offsets for asm boot files.
+	 */
+	.set	d_secsize,40
+	.set	d_nsectors,44
+	.set	d_ntracks,48
+	.set	d_ncylinders,52
+	.set	d_secpercyl,56
+	.set	d_secperunit,60
+	.set	d_end_,276		/* size of disk label */
+#endif /* LOCORE */
+
+/* d_type values: */
+#define	DTYPE_SMD		1		/* SMD, XSMD; VAX hp/up */
+#define	DTYPE_MSCP		2		/* MSCP */
+#define	DTYPE_DEC		3		/* other DEC (rk, rl) */
+#define	DTYPE_SCSI		4		/* SCSI */
+#define	DTYPE_ESDI		5		/* ESDI interface */
+#define	DTYPE_ST506		6		/* ST506 etc. */
+#define	DTYPE_HPIB		7		/* CS/80 on HP-IB */
+#define	DTYPE_HPFL		8		/* HP Fiber-link */
+#define	DTYPE_FLOPPY		10		/* floppy */
+
+#ifdef DKTYPENAMES
+static char *dktypenames[] = {
+	"unknown",
+	"SMD",
+	"MSCP",
+	"old DEC",
+	"SCSI",
+	"ESDI",
+	"ST506",
+	"HP-IB",
+	"HP-FL",
+	"type 9",
+	"floppy",
+	0
+};
+#define DKMAXTYPES	(sizeof(dktypenames) / sizeof(dktypenames[0]) - 1)
+#endif
+
+/*
+ * Filesystem type and version.
+ * Used to interpret other filesystem-specific
+ * per-partition information.
+ */
+#define	FS_UNUSED	0		/* unused */
+#define	FS_SWAP		1		/* swap */
+#define	FS_V6		2		/* Sixth Edition */
+#define	FS_V7		3		/* Seventh Edition */
+#define	FS_SYSV		4		/* System V */
+#define	FS_V71K		5		/* V7 with 1K blocks (4.1, 2.9) */
+#define	FS_V8		6		/* Eighth Edition, 4K blocks */
+#define	FS_BSDFFS	7		/* 4.2BSD fast file system */
+#define	FS_MSDOS	8		/* MSDOS file system */
+#define	FS_BSDLFS	9		/* 4.4BSD log-structured file system */
+#define	FS_OTHER	10		/* in use, but unknown/unsupported */
+#define	FS_HPFS		11		/* OS/2 high-performance file system */
+#define	FS_ISO9660	12		/* ISO 9660, normally CD-ROM */
+#define	FS_BOOT		13		/* partition contains bootstrap */
+
+#ifdef	DKTYPENAMES
+static char *fstypenames[] = {
+	"unused",
+	"swap",
+	"Version 6",
+	"Version 7",
+	"System V",
+	"4.1BSD",
+	"Eighth Edition",
+	"4.2BSD",
+	"MSDOS",
+	"4.4LFS",
+	"unknown",
+	"HPFS",
+	"ISO9660",
+	"boot",
+	0
+};
+#define FSMAXTYPES	(sizeof(fstypenames) / sizeof(fstypenames[0]) - 1)
+#endif
+
+/*
+ * flags shared by various drives:
+ */
+#define		D_REMOVABLE	0x01		/* removable media */
+#define		D_ECC		0x02		/* supports ECC */
+#define		D_BADSECT	0x04		/* supports bad sector forw. */
+#define		D_RAMDISK	0x08		/* disk emulator */
+#define		D_CHAIN		0x10		/* can do back-back transfers */
+
+/*
+ * Drive data for SMD.
+ */
+#define	d_smdflags	d_drivedata[0]
+#define		D_SSE		0x1		/* supports skip sectoring */
+#define	d_mindist	d_drivedata[1]
+#define	d_maxdist	d_drivedata[2]
+#define	d_sdist		d_drivedata[3]
+
+/*
+ * Drive data for ST506.
+ */
+#define d_precompcyl	d_drivedata[0]
+#define d_gap3		d_drivedata[1]		/* used only when formatting */
+
+/*
+ * Drive data for SCSI.
+ */
+#define	d_blind		d_drivedata[0]
+
+#ifndef LOCORE
+/*
+ * Structure used to perform a format
+ * or other raw operation, returning data
+ * and/or register values.
+ * Register identification and format
+ * are device- and driver-dependent.
+ */
+struct format_op {
+	char	*df_buf;
+	int	df_count;		/* value-result */
+	daddr_t	df_startblk;
+	int	df_reg[8];		/* result */
+};
+
+/*
+ * Structure used internally to retrieve
+ * information about a partition on a disk.
+ */
+struct partinfo {
+	struct disklabel *disklab;
+	struct partition *part;
+};
+
+/*
+ * Disk-specific ioctls.
+ */
+		/* get and set disklabel; DIOCGPART used internally */
+#define DIOCGDINFO	_IOR('d', 101, struct disklabel)/* get */
+#define DIOCSDINFO	_IOW('d', 102, struct disklabel)/* set */
+#define DIOCWDINFO	_IOW('d', 103, struct disklabel)/* set, update disk */
+#define DIOCGPART	_IOW('d', 104, struct partinfo)	/* get partition */
+
+/* do format operation, read or write */
+#define DIOCRFORMAT	_IOWR('d', 105, struct format_op)
+#define DIOCWFORMAT	_IOWR('d', 106, struct format_op)
+
+#define DIOCSSTEP	_IOW('d', 107, int)	/* set step rate */
+#define DIOCSRETRIES	_IOW('d', 108, int)	/* set # of retries */
+#define DIOCWLABEL	_IOW('d', 109, int)	/* write en/disable label */
+
+#define DIOCSBAD	_IOW('d', 110, struct dkbad)	/* set kernel dkbad */
+
+#endif /* LOCORE */
+
+#if !defined(KERNEL) && !defined(LOCORE)
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+struct disklabel *getdiskbyname __P((const char *));
+__END_DECLS
+
+#endif
diff --git a/sys/sys/dkbad.h b/sys/sys/dkbad.h
new file mode 100644
index 00000000000..c574000aaf8
--- /dev/null
+++ b/sys/sys/dkbad.h
@@ -0,0 +1,68 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)dkbad.h	8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * Definitions needed to perform bad sector revectoring ala DEC STD 144.
+ *
+ * The bad sector information is located in the first 5 even numbered
+ * sectors of the last track of the disk pack.  There are five identical
+ * copies of the information, described by the dkbad structure.
+ *
+ * Replacement sectors are allocated starting with the first sector before
+ * the bad sector information and working backwards towards the beginning of
+ * the disk.  A maximum of 126 bad sectors are supported.  The position of
+ * the bad sector in the bad sector table determines which replacement sector
+ * it corresponds to.
+ *
+ * The bad sector information and replacement sectors are conventionally
+ * only accessible through the 'c' file system partition of the disk.  If
+ * that partition is used for a file system, the user is responsible for
+ * making sure that it does not overlap the bad sector information or any
+ * replacement sectors.
+ */
+struct dkbad {
+	long bt_csn;			/* cartridge serial number */
+	u_short bt_mbz;			/* unused; should be 0 */
+	u_short bt_flag;		/* -1 => alignment cartridge */
+	struct bt_bad {
+		u_short bt_cyl;		/* cylinder number of bad sector */
+		u_short bt_trksec;	/* track and sector number */
+	} bt_bad[126];
+};
+
+#define	ECC	0
+#define	SSE	1
+#define	BSE	2
+#define	CONT	3
diff --git a/sys/sys/dkstat.h b/sys/sys/dkstat.h
new file mode 100644
index 00000000000..f7f5f1594a2
--- /dev/null
+++ b/sys/sys/dkstat.h
@@ -0,0 +1,64 @@
+/*-
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)dkstat.h	8.2 (Berkeley) 1/21/94
+ */
+
+#define	CP_USER		0
+#define	CP_NICE		1
+#define	CP_SYS		2
+#define	CP_INTR		3
+#define	CP_IDLE		4
+#define	CPUSTATES	5
+
+#define	DK_NDRIVE	8
+#ifdef KERNEL
+long cp_time[CPUSTATES];
+long dk_seek[DK_NDRIVE];
+long dk_time[DK_NDRIVE];
+long dk_wds[DK_NDRIVE];
+long dk_wpms[DK_NDRIVE];
+long dk_xfer[DK_NDRIVE];
+
+int dk_busy;
+int dk_ndrive;
+
+long tk_cancc;
+long tk_nin;
+long tk_nout;
+long tk_rawcc;
+#endif
diff --git a/sys/sys/dmap.h b/sys/sys/dmap.h
new file mode 100644
index 00000000000..2a6f538259e
--- /dev/null
+++ b/sys/sys/dmap.h
@@ -0,0 +1,60 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)dmap.h	8.2 (Berkeley) 1/4/94
+ */
+
+#ifndef _SYS_DMAP_H_
+#define	_SYS_DMAP_H_
+
+/*
+ * Definitions for the mapping of vitual swap space to the physical swap
+ * area - the disk map.
+ */
+#define	NDMAP	38		/* size of the swap area map */
+
+struct dmap {
+	swblk_t dm_size;	/* current size used by process */
+	swblk_t dm_alloc;	/* amount of physical swap space allocated */
+	swblk_t dm_map[NDMAP];	/* first disk block number in each chunk */
+};
+#ifdef KERNEL
+struct dmap zdmap;
+int dmmin, dmmax, dmtext;
+#endif
+
+/* The following structure is that ``returned'' from a call to vstodb(). */
+struct dblock {
+	swblk_t db_base;	/* base of physical contig drum block */
+	swblk_t db_size;	/* size of block */
+};
+#endif	/* !_SYS_DMAP_H_ */
diff --git a/sys/sys/domain.h b/sys/sys/domain.h
new file mode 100644
index 00000000000..b056347d539
--- /dev/null
+++ b/sys/sys/domain.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)domain.h	8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * Structure per communications domain.
+ */
+
+/*
+ * Forward structure declarations for function prototypes [sic].
+ */
+struct	mbuf;
+
+struct	domain {
+	int	dom_family;		/* AF_xxx */
+	char	*dom_name;
+	void	(*dom_init)		/* initialize domain data structures */
+		__P((void));
+	int	(*dom_externalize)	/* externalize access rights */
+		__P((struct mbuf *));
+	int	(*dom_dispose)		/* dispose of internalized rights */
+		__P((struct mbuf *));
+	struct	protosw *dom_protosw, *dom_protoswNPROTOSW;
+	struct	domain *dom_next;
+	int	(*dom_rtattach)		/* initialize routing table */
+		__P((void **, int));
+	int	dom_rtoffset;		/* an arg to rtattach, in bits */
+	int	dom_maxrtkey;		/* for routing layer */
+};
+
+#ifdef KERNEL
+struct	domain *domains;
+#endif
diff --git a/sys/sys/errno.h b/sys/sys/errno.h
new file mode 100644
index 00000000000..a4e4ea6eb69
--- /dev/null
+++ b/sys/sys/errno.h
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)errno.h	8.5 (Berkeley) 1/21/94
+ */
+
+#ifndef KERNEL
+extern int errno;			/* global error number */
+#endif
+
+#define	EPERM		1		/* Operation not permitted */
+#define	ENOENT		2		/* No such file or directory */
+#define	ESRCH		3		/* No such process */
+#define	EINTR		4		/* Interrupted system call */
+#define	EIO		5		/* Input/output error */
+#define	ENXIO		6		/* Device not configured */
+#define	E2BIG		7		/* Argument list too long */
+#define	ENOEXEC		8		/* Exec format error */
+#define	EBADF		9		/* Bad file descriptor */
+#define	ECHILD		10		/* No child processes */
+#define	EDEADLK		11		/* Resource deadlock avoided */
+					/* 11 was EAGAIN */
+#define	ENOMEM		12		/* Cannot allocate memory */
+#define	EACCES		13		/* Permission denied */
+#define	EFAULT		14		/* Bad address */
+#ifndef _POSIX_SOURCE
+#define	ENOTBLK		15		/* Block device required */
+#endif
+#define	EBUSY		16		/* Device busy */
+#define	EEXIST		17		/* File exists */
+#define	EXDEV		18		/* Cross-device link */
+#define	ENODEV		19		/* Operation not supported by device */
+#define	ENOTDIR		20		/* Not a directory */
+#define	EISDIR		21		/* Is a directory */
+#define	EINVAL		22		/* Invalid argument */
+#define	ENFILE		23		/* Too many open files in system */
+#define	EMFILE		24		/* Too many open files */
+#define	ENOTTY		25		/* Inappropriate ioctl for device */
+#ifndef _POSIX_SOURCE
+#define	ETXTBSY		26		/* Text file busy */
+#endif
+#define	EFBIG		27		/* File too large */
+#define	ENOSPC		28		/* No space left on device */
+#define	ESPIPE		29		/* Illegal seek */
+#define	EROFS		30		/* Read-only file system */
+#define	EMLINK		31		/* Too many links */
+#define	EPIPE		32		/* Broken pipe */
+
+/* math software */
+#define	EDOM		33		/* Numerical argument out of domain */
+#define	ERANGE		34		/* Result too large */
+
+/* non-blocking and interrupt i/o */
+#define	EAGAIN		35		/* Resource temporarily unavailable */
+#ifndef _POSIX_SOURCE
+#define	EWOULDBLOCK	EAGAIN		/* Operation would block */
+#define	EINPROGRESS	36		/* Operation now in progress */
+#define	EALREADY	37		/* Operation already in progress */
+
+/* ipc/network software -- argument errors */
+#define	ENOTSOCK	38		/* Socket operation on non-socket */
+#define	EDESTADDRREQ	39		/* Destination address required */
+#define	EMSGSIZE	40		/* Message too long */
+#define	EPROTOTYPE	41		/* Protocol wrong type for socket */
+#define	ENOPROTOOPT	42		/* Protocol not available */
+#define	EPROTONOSUPPORT	43		/* Protocol not supported */
+#define	ESOCKTNOSUPPORT	44		/* Socket type not supported */
+#define	EOPNOTSUPP	45		/* Operation not supported */
+#define	EPFNOSUPPORT	46		/* Protocol family not supported */
+#define	EAFNOSUPPORT	47		/* Address family not supported by protocol family */
+#define	EADDRINUSE	48		/* Address already in use */
+#define	EADDRNOTAVAIL	49		/* Can't assign requested address */
+
+/* ipc/network software -- operational errors */
+#define	ENETDOWN	50		/* Network is down */
+#define	ENETUNREACH	51		/* Network is unreachable */
+#define	ENETRESET	52		/* Network dropped connection on reset */
+#define	ECONNABORTED	53		/* Software caused connection abort */
+#define	ECONNRESET	54		/* Connection reset by peer */
+#define	ENOBUFS		55		/* No buffer space available */
+#define	EISCONN		56		/* Socket is already connected */
+#define	ENOTCONN	57		/* Socket is not connected */
+#define	ESHUTDOWN	58		/* Can't send after socket shutdown */
+#define	ETOOMANYREFS	59		/* Too many references: can't splice */
+#define	ETIMEDOUT	60		/* Operation timed out */
+#define	ECONNREFUSED	61		/* Connection refused */
+
+#define	ELOOP		62		/* Too many levels of symbolic links */
+#endif /* _POSIX_SOURCE */
+#define	ENAMETOOLONG	63		/* File name too long */
+
+/* should be rearranged */
+#ifndef _POSIX_SOURCE
+#define	EHOSTDOWN	64		/* Host is down */
+#define	EHOSTUNREACH	65		/* No route to host */
+#endif /* _POSIX_SOURCE */
+#define	ENOTEMPTY	66		/* Directory not empty */
+
+/* quotas & mush */
+#ifndef _POSIX_SOURCE
+#define	EPROCLIM	67		/* Too many processes */
+#define	EUSERS		68		/* Too many users */
+#define	EDQUOT		69		/* Disc quota exceeded */
+
+/* Network File System */
+#define	ESTALE		70		/* Stale NFS file handle */
+#define	EREMOTE		71		/* Too many levels of remote in path */
+#define	EBADRPC		72		/* RPC struct is bad */
+#define	ERPCMISMATCH	73		/* RPC version wrong */
+#define	EPROGUNAVAIL	74		/* RPC prog. not avail */
+#define	EPROGMISMATCH	75		/* Program version wrong */
+#define	EPROCUNAVAIL	76		/* Bad procedure for program */
+#endif /* _POSIX_SOURCE */
+
+#define	ENOLCK		77		/* No locks available */
+#define	ENOSYS		78		/* Function not implemented */
+
+#ifndef _POSIX_SOURCE
+#define	EFTYPE		79		/* Inappropriate file type or format */
+#define	EAUTH		80		/* Authentication error */
+#define	ENEEDAUTH	81		/* Need authenticator */
+#define	ELAST		81		/* Must be equal largest errno */
+#endif /* _POSIX_SOURCE */
+
+#ifdef KERNEL
+/* pseudo-errors returned inside kernel to modify return to process */
+#define	ERESTART	-1		/* restart syscall */
+#define	EJUSTRETURN	-2		/* don't modify regs, just return */
+#endif
diff --git a/sys/sys/exec.h b/sys/sys/exec.h
new file mode 100644
index 00000000000..443e1443414
--- /dev/null
+++ b/sys/sys/exec.h
@@ -0,0 +1,71 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)exec.h	8.3 (Berkeley) 1/21/94
+ */
+
+#include <machine/exec.h>
+
+/*
+ * The following structure is found at the top of the user stack of each
+ * user process. The ps program uses it to locate argv and environment
+ * strings. Programs that wish ps to display other information may modify
+ * it; normally ps_argvstr points to the text for argv[0], and ps_nargvstr
+ * is the same as the program's argc. The fields ps_envstr and ps_nenvstr
+ * are the equivalent for the environment.
+ */
+struct ps_strings {
+	char	*ps_argvstr;	/* first of 0 or more argument strings */
+	int	ps_nargvstr;	/* the number of argument strings */
+	char	*ps_envstr;	/* first of 0 or more environment strings */
+	int	ps_nenvstr;	/* the number of environment strings */
+};
+
+/*
+ * Address of ps_strings structure (in user space).
+ */
+#define	PS_STRINGS \
+	((struct ps_strings *)(USRSTACK - sizeof(struct ps_strings)))
+
+/*
+ * Arguments to the exec system call.
+ */
+struct execve_args {
+	char	*fname;
+	char	**argp;
+	char	**envp;
+};
diff --git a/sys/sys/fbio.h b/sys/sys/fbio.h
new file mode 100644
index 00000000000..63371b77ed8
--- /dev/null
+++ b/sys/sys/fbio.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software developed by the Computer Systems
+ * Engineering group at Lawrence Berkeley Laboratory under DARPA
+ * contract BG 91-66 and contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)fbio.h	8.2 (Berkeley) 10/30/93
+ *
+ * from: $Header: fbio.h,v 1.6 93/10/31 06:01:56 torek Exp $ (LBL)
+ */
+
+/*
+ * Frame buffer ioctls (from Sprite, trimmed to essentials for X11).
+ */
+
+/*
+ * Frame buffer type codes.
+ */
+#define	FBTYPE_SUN1BW		0	/* multibus mono */
+#define	FBTYPE_SUN1COLOR	1	/* multibus color */
+#define	FBTYPE_SUN2BW		2	/* memory mono */
+#define	FBTYPE_SUN2COLOR	3	/* color w/rasterop chips */
+#define	FBTYPE_SUN2GP		4	/* GP1/GP2 */
+#define	FBTYPE_SUN5COLOR	5	/* RoadRunner accelerator */
+#define	FBTYPE_SUN3COLOR	6	/* memory color */
+#define	FBTYPE_MEMCOLOR		7	/* memory 24-bit */
+#define	FBTYPE_SUN4COLOR	8	/* memory color w/overlay */
+
+#define	FBTYPE_NOTSUN1		9	/* reserved for customer */
+#define	FBTYPE_NOTSUN2		10	/* reserved for customer */
+#define	FBTYPE_NOTSUN3		11	/* reserved for customer */
+
+#define	FBTYPE_SUNFAST_COLOR	12	/* accelerated 8bit */
+#define	FBTYPE_SUNROP_COLOR	13	/* MEMCOLOR with rop h/w */
+#define	FBTYPE_SUNFB_VIDEO	14	/* Simple video mixing */
+#define	FBTYPE_RESERVED5	15	/* reserved, do not use */
+#define	FBTYPE_RESERVED4	16	/* reserved, do not use */
+#define	FBTYPE_RESERVED3	17	/* reserved, do not use */
+#define	FBTYPE_RESERVED2	18	/* reserved, do not use */
+#define	FBTYPE_RESERVED1	19	/* reserved, do not use */
+
+#define	FBTYPE_LASTPLUSONE	20	/* max number of fbs (change as add) */
+
+/*
+ * Frame buffer descriptor as returned by FBIOGTYPE.
+ */
+struct fbtype {
+	int	fb_type;	/* as defined above */
+	int	fb_height;	/* in pixels */
+	int	fb_width;	/* in pixels */
+	int	fb_depth;	/* bits per pixel */
+	int	fb_cmsize;	/* size of color map (entries) */
+	int	fb_size;	/* total size in bytes */
+};
+#define	FBIOGTYPE	_IOR('F', 0, struct fbtype)
+
+#ifdef notdef
+/*
+ * General purpose structure for passing info in and out of frame buffers
+ * (used for gp1) -- unsupported.
+ */
+struct fbinfo {
+	int	fb_physaddr;	/* physical frame buffer address */
+	int	fb_hwwidth;	/* fb board width */
+	int	fb_hwheight;	/* fb board height */
+	int	fb_addrdelta;	/* phys addr diff between boards */
+	u_char	*fb_ropaddr;	/* fb virtual addr */
+	int	fb_unit;	/* minor devnum of fb */
+};
+#define	FBIOGINFO	_IOR('F', 2, struct fbinfo)
+#endif
+
+/*
+ * Color map I/O.
+ */
+struct fbcmap {
+	int	index;		/* first element (0 origin) */
+	int	count;		/* number of elements */
+	u_char	*red;		/* red color map elements */
+	u_char	*green;		/* green color map elements */
+	u_char	*blue;		/* blue color map elements */
+};
+#define	FBIOPUTCMAP	_IOW('F', 3, struct fbcmap)
+#define	FBIOGETCMAP	_IOW('F', 4, struct fbcmap)
+
+/*
+ * Set/get attributes.
+ */
+#define	FB_ATTR_NDEVSPECIFIC	8	/* no. of device specific values */
+#define	FB_ATTR_NEMUTYPES	4	/* no. of emulation types */
+
+struct fbsattr {
+	int	flags;			/* flags; see below */
+	int	emu_type;		/* emulation type (-1 if unused) */
+	int	dev_specific[FB_ATTR_NDEVSPECIFIC];	/* catchall */
+};
+#define	FB_ATTR_AUTOINIT	1	/* emulation auto init flag */
+#define	FB_ATTR_DEVSPECIFIC	2	/* dev. specific stuff valid flag */
+
+struct fbgattr {
+	int	real_type;		/* real device type */
+	int	owner;			/* PID of owner, 0 if myself */
+	struct	fbtype fbtype;		/* fbtype info for real device */
+	struct	fbsattr sattr;		/* see above */
+	int	emu_types[FB_ATTR_NEMUTYPES];	/* possible emulations */
+						/* (-1 if unused) */
+};
+/*	FBIOSATTR	_IOW('F', 5, struct fbsattr) -- unsupported */
+#define	FBIOGATTR	_IOR('F', 6, struct fbgattr)
+
+/*
+ * Video control.
+ */
+#define	FBVIDEO_OFF		0
+#define	FBVIDEO_ON		1
+
+#define	FBIOSVIDEO	_IOW('F', 7, int)
+#define	FBIOGVIDEO	_IOR('F', 8, int)
+
+/*
+ * Hardware cursor control (for, e.g., CG6).  A rather complex and icky
+ * interface that smells like VMS, but there it is....
+ */
+struct fbcurpos {
+	short	x;
+	short	y;
+};
+
+struct fbcursor {
+	short	set;		/* flags; see below */
+	short	enable;		/* nonzero => cursor on, 0 => cursor off */
+	struct	fbcurpos pos;	/* position on display */
+	struct	fbcurpos hot;	/* hot-spot within cursor */
+	struct	fbcmap cmap;	/* cursor color map */
+	struct	fbcurpos size;	/* number of valid bits in image & mask */
+	caddr_t	image;		/* cursor image bits */
+	caddr_t	mask;		/* cursor mask bits */
+};
+#define	FB_CUR_SETCUR	0x01	/* set on/off (i.e., obey fbcursor.enable) */
+#define	FB_CUR_SETPOS	0x02	/* set position */
+#define	FB_CUR_SETHOT	0x04	/* set hot-spot */
+#define	FB_CUR_SETCMAP	0x08	/* set cursor color map */
+#define	FB_CUR_SETSHAPE	0x10	/* set size & bits */
+#define	FB_CUR_SETALL	(FB_CUR_SETCUR | FB_CUR_SETPOS | FB_CUR_SETHOT | \
+			 FB_CUR_SETCMAP | FB_CUR_SETSHAPE)
+
+/* controls for cursor attributes & shape (including position) */
+#define	FBIOSCURSOR	_IOW('F', 24, struct fbcursor)
+#define	FBIOGCURSOR	_IOWR('F', 25, struct fbcursor)
+
+/* controls for cursor position only */
+#define	FBIOSCURPOS	_IOW('F', 26, struct fbcurpos)
+#define	FBIOGCURPOS	_IOW('F', 27, struct fbcurpos)
+
+/* get maximum cursor size */
+#define	FBIOGCURMAX	_IOR('F', 28, struct fbcurpos)
diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h
new file mode 100644
index 00000000000..62762f3498a
--- /dev/null
+++ b/sys/sys/fcntl.h
@@ -0,0 +1,190 @@
+/*-
+ * Copyright (c) 1983, 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)fcntl.h	8.3 (Berkeley) 1/21/94
+ */
+
+#ifndef _SYS_FCNTL_H_
+#define	_SYS_FCNTL_H_
+
+/*
+ * This file includes the definitions for open and fcntl
+ * described by POSIX for <fcntl.h>; it also includes
+ * related kernel definitions.
+ */
+
+#ifndef KERNEL
+#include <sys/types.h>
+#endif
+
+/*
+ * File status flags: these are used by open(2), fcntl(2).
+ * They are also used (indirectly) in the kernel file structure f_flags,
+ * which is a superset of the open/fcntl flags.  Open flags and f_flags
+ * are inter-convertible using OFLAGS(fflags) and FFLAGS(oflags).
+ * Open/fcntl flags begin with O_; kernel-internal flags begin with F.
+ */
+/* open-only flags */
+#define	O_RDONLY	0x0000		/* open for reading only */
+#define	O_WRONLY	0x0001		/* open for writing only */
+#define	O_RDWR		0x0002		/* open for reading and writing */
+#define	O_ACCMODE	0x0003		/* mask for above modes */
+
+/*
+ * Kernel encoding of open mode; separate read and write bits that are
+ * independently testable: 1 greater than the above.
+ *
+ * XXX
+ * FREAD and FWRITE are excluded from the #ifdef KERNEL so that TIOCFLUSH,
+ * which was documented to use FREAD/FWRITE, continues to work.
+ */
+#ifndef _POSIX_SOURCE
+#define	FREAD		0x0001
+#define	FWRITE		0x0002
+#endif
+#define	O_NONBLOCK	0x0004		/* no delay */
+#define	O_APPEND	0x0008		/* set append mode */
+#ifndef _POSIX_SOURCE
+#define	O_SHLOCK	0x0010		/* open with shared file lock */
+#define	O_EXLOCK	0x0020		/* open with exclusive file lock */
+#define	O_ASYNC		0x0040		/* signal pgrp when data ready */
+#define	O_FSYNC		0x0080		/* synchronous writes */
+#endif
+#define	O_CREAT		0x0200		/* create if nonexistant */
+#define	O_TRUNC		0x0400		/* truncate to zero length */
+#define	O_EXCL		0x0800		/* error if already exists */
+#ifdef KERNEL
+#define	FMARK		0x1000		/* mark during gc() */
+#define	FDEFER		0x2000		/* defer for next gc pass */
+#define	FHASLOCK	0x4000		/* descriptor holds advisory lock */
+#endif
+
+/* defined by POSIX 1003.1; BSD default, so no bit required */
+#define	O_NOCTTY	0		/* don't assign controlling terminal */
+
+#ifdef KERNEL
+/* convert from open() flags to/from fflags; convert O_RD/WR to FREAD/FWRITE */
+#define	FFLAGS(oflags)	((oflags) + 1)
+#define	OFLAGS(fflags)	((fflags) - 1)
+
+/* bits to save after open */
+#define	FMASK		(FREAD|FWRITE|FAPPEND|FASYNC|FFSYNC|FNONBLOCK)
+/* bits settable by fcntl(F_SETFL, ...) */
+#define	FCNTLFLAGS	(FAPPEND|FASYNC|FFSYNC|FNONBLOCK)
+#endif
+
+/*
+ * The O_* flags used to have only F* names, which were used in the kernel
+ * and by fcntl.  We retain the F* names for the kernel f_flags field
+ * and for backward compatibility for fcntl.
+ */
+#ifndef _POSIX_SOURCE
+#define	FAPPEND		O_APPEND	/* kernel/compat */
+#define	FASYNC		O_ASYNC		/* kernel/compat */
+#define	FFSYNC		O_FSYNC		/* kernel */
+#define	FNONBLOCK	O_NONBLOCK	/* kernel */
+#define	FNDELAY		O_NONBLOCK	/* compat */
+#define	O_NDELAY	O_NONBLOCK	/* compat */
+#endif
+
+/*
+ * Constants used for fcntl(2)
+ */
+
+/* command values */
+#define	F_DUPFD		0		/* duplicate file descriptor */
+#define	F_GETFD		1		/* get file descriptor flags */
+#define	F_SETFD		2		/* set file descriptor flags */
+#define	F_GETFL		3		/* get file status flags */
+#define	F_SETFL		4		/* set file status flags */
+#ifndef _POSIX_SOURCE
+#define	F_GETOWN	5		/* get SIGIO/SIGURG proc/pgrp */
+#define F_SETOWN	6		/* set SIGIO/SIGURG proc/pgrp */
+#endif
+#define	F_GETLK		7		/* get record locking information */
+#define	F_SETLK		8		/* set record locking information */
+#define	F_SETLKW	9		/* F_SETLK; wait if blocked */
+
+/* file descriptor flags (F_GETFD, F_SETFD) */
+#define	FD_CLOEXEC	1		/* close-on-exec flag */
+
+/* record locking flags (F_GETLK, F_SETLK, F_SETLKW) */
+#define	F_RDLCK		1		/* shared or read lock */
+#define	F_UNLCK		2		/* unlock */
+#define	F_WRLCK		3		/* exclusive or write lock */
+#ifdef KERNEL
+#define	F_WAIT		0x010		/* Wait until lock is granted */
+#define	F_FLOCK		0x020	 	/* Use flock(2) semantics for lock */
+#define	F_POSIX		0x040	 	/* Use POSIX semantics for lock */
+#endif
+
+/*
+ * Advisory file segment locking data type -
+ * information passed to system by user
+ */
+struct flock {
+	off_t	l_start;	/* starting offset */
+	off_t	l_len;		/* len = 0 means until end of file */
+	pid_t	l_pid;		/* lock owner */
+	short	l_type;		/* lock type: read/write, etc. */
+	short	l_whence;	/* type of l_start */
+};
+
+
+#ifndef _POSIX_SOURCE
+/* lock operations for flock(2) */
+#define	LOCK_SH		0x01		/* shared file lock */
+#define	LOCK_EX		0x02		/* exclusive file lock */
+#define	LOCK_NB		0x04		/* don't block when locking */
+#define	LOCK_UN		0x08		/* unlock file */
+#endif
+
+
+#ifndef KERNEL
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+int	open __P((const char *, int, ...));
+int	creat __P((const char *, mode_t));
+int	fcntl __P((int, int, ...));
+#ifndef _POSIX_SOURCE
+int	flock __P((int, int));
+#endif /* !_POSIX_SOURCE */
+__END_DECLS
+#endif
+
+#endif /* !_SYS_FCNTL_H_ */
diff --git a/sys/sys/file.h b/sys/sys/file.h
new file mode 100644
index 00000000000..3d82190669a
--- /dev/null
+++ b/sys/sys/file.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)file.h	8.1 (Berkeley) 6/2/93
+ */
+
+#include <sys/fcntl.h>
+#include <sys/unistd.h>
+
+#ifdef KERNEL
+struct proc;
+struct uio;
+
+/*
+ * Kernel descriptor table.
+ * One entry for each open kernel vnode and socket.
+ */
+struct file {
+	struct	file *f_filef;	/* list of active files */
+	struct	file **f_fileb;	/* list of active files */
+	short	f_flag;		/* see fcntl.h */
+#define	DTYPE_VNODE	1	/* file */
+#define	DTYPE_SOCKET	2	/* communications endpoint */
+	short	f_type;		/* descriptor type */
+	short	f_count;	/* reference count */
+	short	f_msgcount;	/* references from message queue */
+	struct	ucred *f_cred;	/* credentials associated with descriptor */
+	struct	fileops {
+		int	(*fo_read)	__P((struct file *fp, struct uio *uio,
+					    struct ucred *cred));
+		int	(*fo_write)	__P((struct file *fp, struct uio *uio,
+					    struct ucred *cred));
+		int	(*fo_ioctl)	__P((struct file *fp, int com,
+					    caddr_t data, struct proc *p));
+		int	(*fo_select)	__P((struct file *fp, int which,
+					    struct proc *p));
+		int	(*fo_close)	__P((struct file *fp, struct proc *p));
+	} *f_ops;
+	off_t	f_offset;
+	caddr_t	f_data;		/* vnode or socket */
+};
+
+extern struct file *filehead;	/* head of list of open files */
+extern int maxfiles;		/* kernel limit on number of open files */
+extern int nfiles;		/* actual number of open files */
+
+#endif /* KERNEL */
diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h
new file mode 100644
index 00000000000..1071bc10597
--- /dev/null
+++ b/sys/sys/filedesc.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)filedesc.h	8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * This structure is used for the management of descriptors.  It may be
+ * shared by multiple processes.
+ *
+ * A process is initially started out with NDFILE descriptors stored within
+ * this structure, selected to be enough for typical applications based on
+ * the historical limit of 20 open files (and the usage of descriptors by
+ * shells).  If these descriptors are exhausted, a larger descriptor table
+ * may be allocated, up to a process' resource limit; the internal arrays
+ * are then unused.  The initial expansion is set to NDEXTENT; each time
+ * it runs out, it is doubled until the resource limit is reached. NDEXTENT
+ * should be selected to be the biggest multiple of OFILESIZE (see below)
+ * that will fit in a power-of-two sized piece of memory.
+ */
+#define NDFILE		20
+#define NDEXTENT	50		/* 250 bytes in 256-byte alloc. */ 
+
+struct filedesc {
+	struct	file **fd_ofiles;	/* file structures for open files */
+	char	*fd_ofileflags;		/* per-process open file flags */
+	struct	vnode *fd_cdir;		/* current directory */
+	struct	vnode *fd_rdir;		/* root directory */
+	int	fd_nfiles;		/* number of open files allocated */
+	u_short	fd_lastfile;		/* high-water mark of fd_ofiles */
+	u_short	fd_freefile;		/* approx. next free file */
+	u_short	fd_cmask;		/* mask for file creation */
+	u_short	fd_refcnt;		/* reference count */
+};
+
+/*
+ * Basic allocation of descriptors:
+ * one of the above, plus arrays for NDFILE descriptors.
+ */
+struct filedesc0 {
+	struct	filedesc fd_fd;
+	/*
+	 * These arrays are used when the number of open files is
+	 * <= NDFILE, and are then pointed to by the pointers above.
+	 */
+	struct	file *fd_dfiles[NDFILE];
+	char	fd_dfileflags[NDFILE];
+};
+
+/*
+ * Per-process open flags.
+ */
+#define	UF_EXCLOSE 	0x01		/* auto-close on exec */
+#define	UF_MAPPED 	0x02		/* mapped from device */
+
+/*
+ * Storage required per open file descriptor.
+ */
+#define OFILESIZE (sizeof(struct file *) + sizeof(char))
+
+#ifdef KERNEL
+/*
+ * Kernel global variables and routines.
+ */
+int	fdalloc __P((struct proc *p, int want, int *result));
+int	fdavail __P((struct proc *p, int n));
+int	falloc __P((struct proc *p, struct file **resultfp, int *resultfd));
+struct	filedesc *fdcopy __P((struct proc *p));
+void	fdfree __P((struct proc *p));
+#endif
diff --git a/sys/sys/filio.h b/sys/sys/filio.h
new file mode 100644
index 00000000000..5c8789b882b
--- /dev/null
+++ b/sys/sys/filio.h
@@ -0,0 +1,55 @@
+/*-
+ * Copyright (c) 1982, 1986, 1990, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)filio.h	8.1 (Berkeley) 3/28/94
+ */
+
+#ifndef	_SYS_FILIO_H_
+#define	_SYS_FILIO_H_
+
+#include <sys/ioccom.h>
+
+/* Generic file-descriptor ioctl's. */
+#define	FIOCLEX		 _IO('f', 1)		/* set close on exec on fd */
+#define	FIONCLEX	 _IO('f', 2)		/* remove close on exec */
+#define	FIONREAD	_IOR('f', 127, int)	/* get # bytes to read */
+#define	FIONBIO		_IOW('f', 126, int)	/* set/clear non-blocking i/o */
+#define	FIOASYNC	_IOW('f', 125, int)	/* set/clear async i/o */
+#define	FIOSETOWN	_IOW('f', 124, int)	/* set owner */
+#define	FIOGETOWN	_IOR('f', 123, int)	/* get owner */
+
+#endif /* !_SYS_FILIO_H_ */
diff --git a/sys/sys/gmon.h b/sys/sys/gmon.h
new file mode 100644
index 00000000000..b103df80a8a
--- /dev/null
+++ b/sys/sys/gmon.h
@@ -0,0 +1,159 @@
+/*-
+ * Copyright (c) 1982, 1986, 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)gmon.h	8.2 (Berkeley) 1/4/94
+ */
+
+#ifndef _SYS_GMON_H_
+#define _SYS_GMON_H_
+
+#include <machine/profile.h>
+
+/*
+ * Structure prepended to gmon.out profiling data file.
+ */
+struct gmonhdr {
+	u_long	lpc;		/* base pc address of sample buffer */
+	u_long	hpc;		/* max pc address of sampled buffer */
+	int	ncnt;		/* size of sample buffer (plus this header) */
+	int	version;	/* version number */
+	int	profrate;	/* profiling clock rate */
+	int	spare[3];	/* reserved */
+};
+#define GMONVERSION	0x00051879
+
+/*
+ * histogram counters are unsigned shorts (according to the kernel).
+ */
+#define	HISTCOUNTER	unsigned short
+
+/*
+ * fraction of text space to allocate for histogram counters here, 1/2
+ */
+#define	HISTFRACTION	2
+
+/*
+ * Fraction of text space to allocate for from hash buckets.
+ * The value of HASHFRACTION is based on the minimum number of bytes
+ * of separation between two subroutine call points in the object code.
+ * Given MIN_SUBR_SEPARATION bytes of separation the value of
+ * HASHFRACTION is calculated as:
+ *
+ *	HASHFRACTION = MIN_SUBR_SEPARATION / (2 * sizeof(short) - 1);
+ *
+ * For example, on the VAX, the shortest two call sequence is:
+ *
+ *	calls	$0,(r0)
+ *	calls	$0,(r0)
+ *
+ * which is separated by only three bytes, thus HASHFRACTION is 
+ * calculated as:
+ *
+ *	HASHFRACTION = 3 / (2 * 2 - 1) = 1
+ *
+ * Note that the division above rounds down, thus if MIN_SUBR_FRACTION
+ * is less than three, this algorithm will not work!
+ *
+ * In practice, however, call instructions are rarely at a minimal 
+ * distance.  Hence, we will define HASHFRACTION to be 2 across all
+ * architectures.  This saves a reasonable amount of space for 
+ * profiling data structures without (in practice) sacrificing
+ * any granularity.
+ */
+#define	HASHFRACTION	2
+
+/*
+ * percent of text space to allocate for tostructs with a minimum.
+ */
+#define ARCDENSITY	2
+#define MINARCS		50
+#define MAXARCS		((1 << (8 * sizeof(HISTCOUNTER))) - 2)
+
+struct tostruct {
+	u_long	selfpc;
+	long	count;
+	u_short	link;
+	u_short pad;
+};
+
+/*
+ * a raw arc, with pointers to the calling site and 
+ * the called site and a count.
+ */
+struct rawarc {
+	u_long	raw_frompc;
+	u_long	raw_selfpc;
+	long	raw_count;
+};
+
+/*
+ * general rounding functions.
+ */
+#define ROUNDDOWN(x,y)	(((x)/(y))*(y))
+#define ROUNDUP(x,y)	((((x)+(y)-1)/(y))*(y))
+
+/*
+ * The profiling data structures are housed in this structure.
+ */
+struct gmonparam {
+	int		state;
+	u_short		*kcount;
+	u_long		kcountsize;
+	u_short		*froms;
+	u_long		fromssize;
+	struct tostruct	*tos;
+	u_long		tossize;
+	long		tolimit;
+	u_long		lowpc;
+	u_long		highpc;
+	u_long		textsize;
+	u_long		hashfraction;
+};
+extern struct gmonparam _gmonparam;
+
+/*
+ * Possible states of profiling.
+ */
+#define	GMON_PROF_ON	0
+#define	GMON_PROF_BUSY	1
+#define	GMON_PROF_ERROR	2
+#define	GMON_PROF_OFF	3
+
+/*
+ * Sysctl definitions for extracting profiling information from the kernel.
+ */
+#define	GPROF_STATE	0	/* int: profiling enabling variable */
+#define	GPROF_COUNT	1	/* struct: profile tick count buffer */
+#define	GPROF_FROMS	2	/* struct: from location hash bucket */
+#define	GPROF_TOS	3	/* struct: destination/count structure */
+#define	GPROF_GMONPARAM	4	/* struct: profiling parameters (see above) */
+#endif /* !_SYS_GMON_H_ */
diff --git a/sys/sys/ioccom.h b/sys/sys/ioccom.h
new file mode 100644
index 00000000000..5bc11b328bd
--- /dev/null
+++ b/sys/sys/ioccom.h
@@ -0,0 +1,64 @@
+/*-
+ * Copyright (c) 1982, 1986, 1990, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ioccom.h	8.2 (Berkeley) 3/28/94
+ */
+
+#ifndef	_SYS_IOCCOM_H_
+#define	_SYS_IOCCOM_H_
+
+/*
+ * Ioctl's have the command encoded in the lower word, and the size of
+ * any in or out parameters in the upper word.  The high 3 bits of the
+ * upper word are used to encode the in/out status of the parameter.
+ */
+#define	IOCPARM_MASK	0x1fff		/* parameter length, at most 13 bits */
+#define	IOCPARM_LEN(x)	(((x) >> 16) & IOCPARM_MASK)
+#define	IOCBASECMD(x)	((x) & ~(IOCPARM_MASK << 16))
+#define	IOCGROUP(x)	(((x) >> 8) & 0xff)
+
+#define	IOCPARM_MAX	NBPG		/* max size of ioctl, mult. of NBPG */
+#define	IOC_VOID	0x20000000	/* no parameters */
+#define	IOC_OUT		0x40000000	/* copy out parameters */
+#define	IOC_IN		0x80000000	/* copy in parameters */
+#define	IOC_INOUT	(IOC_IN|IOC_OUT)
+#define	IOC_DIRMASK	0xe0000000	/* mask for IN/OUT/VOID */
+
+#define	_IOC(inout,group,num,len) \
+	(inout | ((len & IOCPARM_MASK) << 16) | ((group) << 8) | (num))
+#define	_IO(g,n)	_IOC(IOC_VOID,	(g), (n), 0)
+#define	_IOR(g,n,t)	_IOC(IOC_OUT,	(g), (n), sizeof(t))
+#define	_IOW(g,n,t)	_IOC(IOC_IN,	(g), (n), sizeof(t))
+/* this should be _IORW, but stdio got there first */
+#define	_IOWR(g,n,t)	_IOC(IOC_INOUT,	(g), (n), sizeof(t))
+
+#endif /* !_SYS_IOCCOM_H_ */
diff --git a/sys/sys/ioctl.h b/sys/sys/ioctl.h
new file mode 100644
index 00000000000..d04394fd181
--- /dev/null
+++ b/sys/sys/ioctl.h
@@ -0,0 +1,84 @@
+/*-
+ * Copyright (c) 1982, 1986, 1990, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ioctl.h	8.6 (Berkeley) 3/28/94
+ */
+
+#ifndef	_SYS_IOCTL_H_
+#define	_SYS_IOCTL_H_
+
+#include <sys/ttycom.h>
+
+/*
+ * Pun for SunOS prior to 3.2.  SunOS 3.2 and later support TIOCGWINSZ
+ * and TIOCSWINSZ (yes, even 3.2-3.5, the fact that it wasn't documented
+ * nonwithstanding).
+ */
+struct ttysize {
+	unsigned short	ts_lines;
+	unsigned short	ts_cols;
+	unsigned short	ts_xxx;
+	unsigned short	ts_yyy;
+};
+#define	TIOCGSIZE	TIOCGWINSZ
+#define	TIOCSSIZE	TIOCSWINSZ
+
+#include <sys/ioccom.h>
+
+#include <sys/filio.h>
+#include <sys/sockio.h>
+
+#ifndef KERNEL
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+int	ioctl __P((int, unsigned long, ...));
+__END_DECLS
+#endif /* !KERNEL */
+#endif /* !_SYS_IOCTL_H_ */
+
+/*
+ * Keep outside _SYS_IOCTL_H_
+ * Compatability with old terminal driver
+ *
+ * Source level -> #define USE_OLD_TTY
+ * Kernel level -> options COMPAT_43 or COMPAT_SUNOS
+ */
+#if defined(USE_OLD_TTY) || defined(COMPAT_43) || defined(COMPAT_SUNOS)
+#include <sys/ioctl_compat.h>
+#endif
diff --git a/sys/sys/ioctl_compat.h b/sys/sys/ioctl_compat.h
new file mode 100644
index 00000000000..fd87b514cf8
--- /dev/null
+++ b/sys/sys/ioctl_compat.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ioctl_compat.h	8.4 (Berkeley) 1/21/94
+ */
+
+#ifndef _SYS_IOCTL_COMPAT_H_
+#define	_SYS_IOCTL_COMPAT_H_
+
+#include <sys/ttychars.h>
+#include <sys/ttydev.h>
+
+struct tchars {
+	char	t_intrc;	/* interrupt */
+	char	t_quitc;	/* quit */
+	char	t_startc;	/* start output */
+	char	t_stopc;	/* stop output */
+	char	t_eofc;		/* end-of-file */
+	char	t_brkc;		/* input delimiter (like nl) */
+};
+
+struct ltchars {
+	char	t_suspc;	/* stop process signal */
+	char	t_dsuspc;	/* delayed stop process signal */
+	char	t_rprntc;	/* reprint line */
+	char	t_flushc;	/* flush output (toggles) */
+	char	t_werasc;	/* word erase */
+	char	t_lnextc;	/* literal next character */
+};
+
+/*
+ * Structure for TIOCGETP and TIOCSETP ioctls.
+ */
+#ifndef _SGTTYB_
+#define	_SGTTYB_
+struct sgttyb {
+	char	sg_ispeed;		/* input speed */
+	char	sg_ospeed;		/* output speed */
+	char	sg_erase;		/* erase character */
+	char	sg_kill;		/* kill character */
+	short	sg_flags;		/* mode flags */
+};
+#endif
+
+#ifdef USE_OLD_TTY
+# undef  TIOCGETD
+# define TIOCGETD	_IOR('t', 0, int)	/* get line discipline */
+# undef  TIOCSETD
+# define TIOCSETD	_IOW('t', 1, int)	/* set line discipline */
+#else
+# define OTIOCGETD	_IOR('t', 0, int)	/* get line discipline */
+# define OTIOCSETD	_IOW('t', 1, int)	/* set line discipline */
+#endif
+#define	TIOCHPCL	_IO('t', 2)		/* hang up on last close */
+#define	TIOCGETP	_IOR('t', 8,struct sgttyb)/* get parameters -- gtty */
+#define	TIOCSETP	_IOW('t', 9,struct sgttyb)/* set parameters -- stty */
+#define	TIOCSETN	_IOW('t',10,struct sgttyb)/* as above, but no flushtty*/
+#define	TIOCSETC	_IOW('t',17,struct tchars)/* set special characters */
+#define	TIOCGETC	_IOR('t',18,struct tchars)/* get special characters */
+#define		TANDEM		0x00000001	/* send stopc on out q full */
+#define		CBREAK		0x00000002	/* half-cooked mode */
+#define		LCASE		0x00000004	/* simulate lower case */
+#define		ECHO		0x00000008	/* echo input */
+#define		CRMOD		0x00000010	/* map \r to \r\n on output */
+#define		RAW		0x00000020	/* no i/o processing */
+#define		ODDP		0x00000040	/* get/send odd parity */
+#define		EVENP		0x00000080	/* get/send even parity */
+#define		ANYP		0x000000c0	/* get any parity/send none */
+#define		NLDELAY		0x00000300	/* \n delay */
+#define			NL0	0x00000000
+#define			NL1	0x00000100	/* tty 37 */
+#define			NL2	0x00000200	/* vt05 */
+#define			NL3	0x00000300
+#define		TBDELAY		0x00000c00	/* horizontal tab delay */
+#define			TAB0	0x00000000
+#define			TAB1	0x00000400	/* tty 37 */
+#define			TAB2	0x00000800
+#define		XTABS		0x00000c00	/* expand tabs on output */
+#define		CRDELAY		0x00003000	/* \r delay */
+#define			CR0	0x00000000
+#define			CR1	0x00001000	/* tn 300 */
+#define			CR2	0x00002000	/* tty 37 */
+#define			CR3	0x00003000	/* concept 100 */
+#define		VTDELAY		0x00004000	/* vertical tab delay */
+#define			FF0	0x00000000
+#define			FF1	0x00004000	/* tty 37 */
+#define		BSDELAY		0x00008000	/* \b delay */
+#define			BS0	0x00000000
+#define			BS1	0x00008000
+#define		ALLDELAY	(NLDELAY|TBDELAY|CRDELAY|VTDELAY|BSDELAY)
+#define		CRTBS		0x00010000	/* do backspacing for crt */
+#define		PRTERA		0x00020000	/* \ ... / erase */
+#define		CRTERA		0x00040000	/* " \b " to wipe out char */
+#define		TILDE		0x00080000	/* hazeltine tilde kludge */
+#define		MDMBUF		0x00100000	/*start/stop output on carrier*/
+#define		LITOUT		0x00200000	/* literal output */
+#define		TOSTOP		0x00400000	/*SIGSTOP on background output*/
+#define		FLUSHO		0x00800000	/* flush output to terminal */
+#define		NOHANG		0x01000000	/* (no-op) was no SIGHUP on carrier drop */
+#define		L001000		0x02000000
+#define		CRTKIL		0x04000000	/* kill line with " \b " */
+#define		PASS8		0x08000000
+#define		CTLECH		0x10000000	/* echo control chars as ^X */
+#define		PENDIN		0x20000000	/* tp->t_rawq needs reread */
+#define		DECCTQ		0x40000000	/* only ^Q starts after ^S */
+#define		NOFLSH		0x80000000	/* no output flush on signal */
+#define	TIOCLBIS	_IOW('t', 127, int)	/* bis local mode bits */
+#define	TIOCLBIC	_IOW('t', 126, int)	/* bic local mode bits */
+#define	TIOCLSET	_IOW('t', 125, int)	/* set entire local mode word */
+#define	TIOCLGET	_IOR('t', 124, int)	/* get local modes */
+#define		LCRTBS		(CRTBS>>16)
+#define		LPRTERA		(PRTERA>>16)
+#define		LCRTERA		(CRTERA>>16)
+#define		LTILDE		(TILDE>>16)
+#define		LMDMBUF		(MDMBUF>>16)
+#define		LLITOUT		(LITOUT>>16)
+#define		LTOSTOP		(TOSTOP>>16)
+#define		LFLUSHO		(FLUSHO>>16)
+#define		LNOHANG		(NOHANG>>16)
+#define		LCRTKIL		(CRTKIL>>16)
+#define		LPASS8		(PASS8>>16)
+#define		LCTLECH		(CTLECH>>16)
+#define		LPENDIN		(PENDIN>>16)
+#define		LDECCTQ		(DECCTQ>>16)
+#define		LNOFLSH		(NOFLSH>>16)
+#define	TIOCSLTC	_IOW('t',117,struct ltchars)/* set local special chars*/
+#define	TIOCGLTC	_IOR('t',116,struct ltchars)/* get local special chars*/
+#define OTIOCCONS	_IO('t', 98)	/* for hp300 -- sans int arg */
+#define	OTTYDISC	0
+#define	NETLDISC	1
+#define	NTTYDISC	2
+
+#endif /* !_SYS_IOCTL_COMPAT_H_ */
diff --git a/sys/sys/ipc.h b/sys/sys/ipc.h
new file mode 100644
index 00000000000..cc036a8e83b
--- /dev/null
+++ b/sys/sys/ipc.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 1988 University of Utah.
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ipc.h	8.3 (Berkeley) 1/21/94
+ */
+
+/*
+ * SVID compatible ipc.h file
+ */
+#ifndef _SYS_IPC_H_
+#define _SYS_IPC_H_
+
+typedef	long	key_t;	/* XXX should be in types.h */
+
+struct ipc_perm {
+	ushort	cuid;	/* creator user id */
+	ushort	cgid;	/* creator group id */
+	ushort	uid;	/* user id */
+	ushort	gid;	/* group id */
+	ushort	mode;	/* r/w permission */
+	ushort	seq;	/* sequence # (to generate unique msg/sem/shm id) */
+	key_t	key;	/* user specified msg/sem/shm key */
+};
+
+/* common mode bits */
+#define	IPC_R		00400	/* read permission */
+#define	IPC_W		00200	/* write/alter permission */
+
+/* SVID required constants (same values as system 5) */
+#define	IPC_CREAT	01000	/* create entry if key does not exist */
+#define	IPC_EXCL	02000	/* fail if key exists */
+#define	IPC_NOWAIT	04000	/* error if request must wait */
+
+#define	IPC_PRIVATE	(key_t)0 /* private key */
+
+#define	IPC_RMID	0	/* remove identifier */
+#define	IPC_SET		1	/* set options */
+#define	IPC_STAT	2	/* get options */
+
+#endif /* !_SYS_IPC_H_ */
diff --git a/sys/sys/kernel.h b/sys/sys/kernel.h
new file mode 100644
index 00000000000..682e6c8c194
--- /dev/null
+++ b/sys/sys/kernel.h
@@ -0,0 +1,59 @@
+/*-
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kernel.h	8.3 (Berkeley) 1/21/94
+ */
+
+/* Global variables for the kernel. */
+
+/* 1.1 */
+extern long hostid;
+extern char hostname[MAXHOSTNAMELEN];
+extern int hostnamelen;
+
+/* 1.2 */
+extern volatile struct timeval mono_time;
+extern struct timeval boottime;
+extern struct timeval runtime;
+extern volatile struct timeval time;
+extern struct timezone tz;			/* XXX */
+
+extern int tick;			/* usec per tick (1000000 / hz) */
+extern int hz;				/* system clock's frequency */
+extern int stathz;			/* statistics clock's frequency */
+extern int profhz;			/* profiling clock's frequency */
+extern int lbolt;			/* once a second sleep address */
diff --git a/sys/sys/ktrace.h b/sys/sys/ktrace.h
new file mode 100644
index 00000000000..1623c3562fe
--- /dev/null
+++ b/sys/sys/ktrace.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ktrace.h	8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * operations to ktrace system call  (KTROP(op))
+ */
+#define KTROP_SET		0	/* set trace points */
+#define KTROP_CLEAR		1	/* clear trace points */
+#define KTROP_CLEARFILE		2	/* stop all tracing to file */
+#define	KTROP(o)		((o)&3)	/* macro to extract operation */
+/*
+ * flags (ORed in with operation)
+ */
+#define KTRFLAG_DESCEND		4	/* perform op on all children too */
+
+/*
+ * ktrace record header
+ */
+struct ktr_header {
+	int	ktr_len;		/* length of buf */
+	short	ktr_type;		/* trace record type */
+	pid_t	ktr_pid;		/* process id */
+	char	ktr_comm[MAXCOMLEN+1];	/* command name */
+	struct	timeval ktr_time;	/* timestamp */
+	caddr_t	ktr_buf;
+};
+
+/*
+ * Test for kernel trace point
+ */
+#define KTRPOINT(p, type)	\
+	(((p)->p_traceflag & ((1<<(type))|KTRFAC_ACTIVE)) == (1<<(type)))
+
+/*
+ * ktrace record types
+ */
+
+/*
+ * KTR_SYSCALL - system call record
+ */
+#define KTR_SYSCALL	1
+struct ktr_syscall {
+	short	ktr_code;		/* syscall number */
+	short	ktr_narg;		/* number of arguments */
+	/*
+	 * followed by ktr_narg ints
+	 */
+};
+
+/*
+ * KTR_SYSRET - return from system call record
+ */
+#define KTR_SYSRET	2
+struct ktr_sysret {
+	short	ktr_code;
+	short	ktr_eosys;
+	int	ktr_error;
+	int	ktr_retval;
+};
+
+/*
+ * KTR_NAMEI - namei record
+ */
+#define KTR_NAMEI	3
+	/* record contains pathname */
+
+/*
+ * KTR_GENIO - trace generic process i/o
+ */
+#define KTR_GENIO	4
+struct ktr_genio {
+	int	ktr_fd;
+	enum	uio_rw ktr_rw;
+	/*
+	 * followed by data successfully read/written
+	 */
+};
+
+/*
+ * KTR_PSIG - trace processed signal
+ */
+#define	KTR_PSIG	5
+struct ktr_psig {
+	int	signo;
+	sig_t	action;
+	int	mask;
+	int	code;
+};
+
+/*
+ * KTR_CSW - trace context switches
+ */
+#define KTR_CSW		6
+struct ktr_csw {
+	int	out;	/* 1 if switch out, 0 if switch in */
+	int	user;	/* 1 if usermode (ivcsw), 0 if kernel (vcsw) */
+};
+
+/*
+ * kernel trace points (in p_traceflag)
+ */
+#define KTRFAC_MASK	0x00ffffff
+#define KTRFAC_SYSCALL	(1<<KTR_SYSCALL)
+#define KTRFAC_SYSRET	(1<<KTR_SYSRET)
+#define KTRFAC_NAMEI	(1<<KTR_NAMEI)
+#define KTRFAC_GENIO	(1<<KTR_GENIO)
+#define	KTRFAC_PSIG	(1<<KTR_PSIG)
+#define KTRFAC_CSW	(1<<KTR_CSW)
+/*
+ * trace flags (also in p_traceflags)
+ */
+#define KTRFAC_ROOT	0x80000000	/* root set this trace */
+#define KTRFAC_INHERIT	0x40000000	/* pass trace flags to children */
+#define KTRFAC_ACTIVE	0x20000000	/* ktrace logging in progress, ignore */
+
+#ifndef	KERNEL
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+int	ktrace __P((const char *, int, int, pid_t));
+__END_DECLS
+
+#endif	/* !KERNEL */
diff --git a/sys/sys/libkern.h b/sys/sys/libkern.h
new file mode 100644
index 00000000000..0e465e03dfd
--- /dev/null
+++ b/sys/sys/libkern.h
@@ -0,0 +1,98 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)libkern.h	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/types.h>
+
+static inline int
+imax(a, b)
+	int a, b;
+{
+	return (a > b ? a : b);
+}
+static inline int
+imin(a, b)
+	int a, b;
+{
+	return (a < b ? a : b);
+}
+static inline long
+lmax(a, b)
+	long a, b;
+{
+	return (a > b ? a : b);
+}
+static inline long
+lmin(a, b)
+	long a, b;
+{
+	return (a < b ? a : b);
+}
+static inline u_int
+max(a, b)
+	u_int a, b;
+{
+	return (a > b ? a : b);
+}
+static inline u_int
+min(a, b)
+	u_int a, b;
+{
+	return (a < b ? a : b);
+}
+static inline u_long
+ulmax(a, b)
+	u_long a, b;
+{
+	return (a > b ? a : b);
+}
+static inline u_long
+ulmin(a, b)
+	u_long a, b;
+{
+	return (a < b ? a : b);
+}
+
+/* Prototypes for non-quad routines. */
+int	 bcmp __P((const void *, const void *, size_t));
+int	 ffs __P((int));
+int	 locc __P((int, char *, u_int));
+u_long	 random __P((void));
+char	*rindex __P((const char *, int));
+int	 scanc __P((u_int, u_char *, u_char *, int));
+int	 skpc __P((int, int, char *));
+char	*strcat __P((char *, const char *));
+char	*strcpy __P((char *, const char *));
+size_t	 strlen __P((const char *));
+char	*strncpy __P((char *, const char *, size_t));
diff --git a/sys/sys/linedisc.h b/sys/sys/linedisc.h
new file mode 100644
index 00000000000..58cb6fa8339
--- /dev/null
+++ b/sys/sys/linedisc.h
@@ -0,0 +1,123 @@
+/*-
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)conf.h	8.3 (Berkeley) 1/21/94
+ */
+
+/*
+ * Definitions of device driver entry switches
+ */
+
+struct buf;
+struct proc;
+struct tty;
+struct uio;
+struct vnode;
+
+struct bdevsw {
+	int	(*d_open)	__P((dev_t dev, int oflags, int devtype,
+				     struct proc *p));
+	int	(*d_close)	__P((dev_t dev, int fflag, int devtype,
+				     struct proc *p));
+	int	(*d_strategy)	__P((struct buf *bp));
+	int	(*d_ioctl)	__P((dev_t dev, int cmd, caddr_t data,
+				     int fflag, struct proc *p));
+	int	(*d_dump)	();	/* parameters vary by architecture */
+	int	(*d_psize)	__P((dev_t dev));
+	int	d_flags;
+};
+
+#ifdef KERNEL
+extern struct bdevsw bdevsw[];
+#endif
+
+struct cdevsw {
+	int	(*d_open)	__P((dev_t dev, int oflags, int devtype,
+				     struct proc *p));
+	int	(*d_close)	__P((dev_t dev, int fflag, int devtype,
+				     struct proc *));
+	int	(*d_read)	__P((dev_t dev, struct uio *uio, int ioflag));
+	int	(*d_write)	__P((dev_t dev, struct uio *uio, int ioflag));
+	int	(*d_ioctl)	__P((dev_t dev, int cmd, caddr_t data,
+				     int fflag, struct proc *p));
+	int	(*d_stop)	__P((struct tty *tp, int rw));
+	int	(*d_reset)	__P((int uban));	/* XXX */
+	struct	tty *d_ttys;
+	int	(*d_select)	__P((dev_t dev, int which, struct proc *p));
+	int	(*d_mmap)	__P(());
+	int	(*d_strategy)	__P((struct buf *bp));
+};
+
+#ifdef KERNEL
+extern struct cdevsw cdevsw[];
+
+/* symbolic sleep message strings */
+extern char devopn[], devio[], devwait[], devin[], devout[];
+extern char devioc[], devcls[];
+#endif
+
+struct linesw {
+	int	(*l_open)	__P((dev_t dev, struct tty *tp));
+	int	(*l_close)	__P((struct tty *tp, int flag));
+	int	(*l_read)	__P((struct tty *tp, struct uio *uio,
+				     int flag));
+	int	(*l_write)	__P((struct tty *tp, struct uio *uio,
+				     int flag));
+	int	(*l_ioctl)	__P((struct tty *tp, int cmd, caddr_t data,
+				     int flag, struct proc *p));
+	int	(*l_rint)	__P((int c, struct tty *tp));
+	int	(*l_start)	__P((struct tty *tp));
+	int	(*l_modem)	__P((struct tty *tp, int flag));
+};
+
+#ifdef KERNEL
+extern struct linesw linesw[];
+#endif
+
+struct swdevt {
+	dev_t	sw_dev;
+	int	sw_flags;
+	int	sw_nblks;
+	struct	vnode *sw_vp;
+};
+#define	SW_FREED	0x01
+#define	SW_SEQUENTIAL	0x02
+#define sw_freed	sw_flags	/* XXX compat */
+
+#ifdef KERNEL
+extern struct swdevt swdevt[];
+#endif
diff --git a/sys/sys/malloc.h b/sys/sys/malloc.h
new file mode 100644
index 00000000000..ba67bda1f5a
--- /dev/null
+++ b/sys/sys/malloc.h
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 1987, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)malloc.h	8.3 (Berkeley) 1/12/94
+ */
+
+#ifndef _SYS_MALLOC_H_
+#define	_SYS_MALLOC_H_
+
+#define KMEMSTATS
+
+/*
+ * flags to malloc
+ */
+#define	M_WAITOK	0x0000
+#define	M_NOWAIT	0x0001
+
+/*
+ * Types of memory to be allocated
+ */
+#define	M_FREE		0	/* should be on free list */
+#define	M_MBUF		1	/* mbuf */
+#define	M_DEVBUF	2	/* device driver memory */
+#define	M_SOCKET	3	/* socket structure */
+#define	M_PCB		4	/* protocol control block */
+#define	M_RTABLE	5	/* routing tables */
+#define	M_HTABLE	6	/* IMP host tables */
+#define	M_FTABLE	7	/* fragment reassembly header */
+#define	M_ZOMBIE	8	/* zombie proc status */
+#define	M_IFADDR	9	/* interface address */
+#define	M_SOOPTS	10	/* socket options */
+#define	M_SONAME	11	/* socket name */
+#define	M_NAMEI		12	/* namei path name buffer */
+#define	M_GPROF		13	/* kernel profiling buffer */
+#define	M_IOCTLOPS	14	/* ioctl data buffer */
+#define	M_MAPMEM	15	/* mapped memory descriptors */
+#define	M_CRED		16	/* credentials */
+#define	M_PGRP		17	/* process group header */
+#define	M_SESSION	18	/* session header */
+#define	M_IOV		19	/* large iov's */
+#define	M_MOUNT		20	/* vfs mount struct */
+#define	M_FHANDLE	21	/* network file handle */
+#define	M_NFSREQ	22	/* NFS request header */
+#define	M_NFSMNT	23	/* NFS mount structure */
+#define	M_NFSNODE	24	/* NFS vnode private part */
+#define	M_VNODE		25	/* Dynamically allocated vnodes */
+#define	M_CACHE		26	/* Dynamically allocated cache entries */
+#define	M_DQUOT		27	/* UFS quota entries */
+#define	M_UFSMNT	28	/* UFS mount structure */
+#define	M_SHM		29	/* SVID compatible shared memory segments */
+#define	M_VMMAP		30	/* VM map structures */
+#define	M_VMMAPENT	31	/* VM map entry structures */
+#define	M_VMOBJ		32	/* VM object structure */
+#define	M_VMOBJHASH	33	/* VM object hash structure */
+#define	M_VMPMAP	34	/* VM pmap */
+#define	M_VMPVENT	35	/* VM phys-virt mapping entry */
+#define	M_VMPAGER	36	/* XXX: VM pager struct */
+#define	M_VMPGDATA	37	/* XXX: VM pager private data */
+#define	M_FILE		38	/* Open file structure */
+#define	M_FILEDESC	39	/* Open file descriptor table */
+#define	M_LOCKF		40	/* Byte-range locking structures */
+#define	M_PROC		41	/* Proc structures */
+#define	M_SUBPROC	42	/* Proc sub-structures */
+#define	M_SEGMENT	43	/* Segment for LFS */
+#define	M_LFSNODE	44	/* LFS vnode private part */
+#define	M_FFSNODE	45	/* FFS vnode private part */
+#define	M_MFSNODE	46	/* MFS vnode private part */
+#define	M_NQLEASE	47	/* Nqnfs lease */
+#define	M_NQMHOST	48	/* Nqnfs host address table */
+#define	M_NETADDR	49	/* Export host address structure */
+#define	M_NFSSVC	50	/* Nfs server structure */
+#define	M_NFSUID	51	/* Nfs uid mapping structure */
+#define	M_NFSD		52	/* Nfs server daemon structure */
+#define	M_IPMOPTS	53	/* internet multicast options */
+#define	M_IPMADDR	54	/* internet multicast address */
+#define	M_IFMADDR	55	/* link-level multicast address */
+#define	M_MRTABLE	56	/* multicast routing tables */
+#define M_ISOFSMNT	57	/* ISOFS mount structure */
+#define M_ISOFSNODE	58	/* ISOFS vnode private part */
+#define	M_TEMP		74	/* misc temporary data buffers */
+#define	M_LAST		75	/* Must be last type + 1 */
+
+#define INITKMEMNAMES { \
+	"free",		/* 0 M_FREE */ \
+	"mbuf",		/* 1 M_MBUF */ \
+	"devbuf",	/* 2 M_DEVBUF */ \
+	"socket",	/* 3 M_SOCKET */ \
+	"pcb",		/* 4 M_PCB */ \
+	"routetbl",	/* 5 M_RTABLE */ \
+	"hosttbl",	/* 6 M_HTABLE */ \
+	"fragtbl",	/* 7 M_FTABLE */ \
+	"zombie",	/* 8 M_ZOMBIE */ \
+	"ifaddr",	/* 9 M_IFADDR */ \
+	"soopts",	/* 10 M_SOOPTS */ \
+	"soname",	/* 11 M_SONAME */ \
+	"namei",	/* 12 M_NAMEI */ \
+	"gprof",	/* 13 M_GPROF */ \
+	"ioctlops",	/* 14 M_IOCTLOPS */ \
+	"mapmem",	/* 15 M_MAPMEM */ \
+	"cred",		/* 16 M_CRED */ \
+	"pgrp",		/* 17 M_PGRP */ \
+	"session",	/* 18 M_SESSION */ \
+	"iov",		/* 19 M_IOV */ \
+	"mount",	/* 20 M_MOUNT */ \
+	"fhandle",	/* 21 M_FHANDLE */ \
+	"NFS req",	/* 22 M_NFSREQ */ \
+	"NFS mount",	/* 23 M_NFSMNT */ \
+	"NFS node",	/* 24 M_NFSNODE */ \
+	"vnodes",	/* 25 M_VNODE */ \
+	"namecache",	/* 26 M_CACHE */ \
+	"UFS quota",	/* 27 M_DQUOT */ \
+	"UFS mount",	/* 28 M_UFSMNT */ \
+	"shm",		/* 29 M_SHM */ \
+	"VM map",	/* 30 M_VMMAP */ \
+	"VM mapent",	/* 31 M_VMMAPENT */ \
+	"VM object",	/* 32 M_VMOBJ */ \
+	"VM objhash",	/* 33 M_VMOBJHASH */ \
+	"VM pmap",	/* 34 M_VMPMAP */ \
+	"VM pvmap",	/* 35 M_VMPVENT */ \
+	"VM pager",	/* 36 M_VMPAGER */ \
+	"VM pgdata",	/* 37 M_VMPGDATA */ \
+	"file",		/* 38 M_FILE */ \
+	"file desc",	/* 39 M_FILEDESC */ \
+	"lockf",	/* 40 M_LOCKF */ \
+	"proc",		/* 41 M_PROC */ \
+	"subproc",	/* 42 M_SUBPROC */ \
+	"LFS segment",	/* 43 M_SEGMENT */ \
+	"LFS node",	/* 44 M_LFSNODE */ \
+	"FFS node",	/* 45 M_FFSNODE */ \
+	"MFS node",	/* 46 M_MFSNODE */ \
+	"NQNFS Lease",	/* 47 M_NQLEASE */ \
+	"NQNFS Host",	/* 48 M_NQMHOST */ \
+	"Export Host",	/* 49 M_NETADDR */ \
+	"NFS srvsock",	/* 50 M_NFSSVC */ \
+	"NFS uid",	/* 51 M_NFSUID */ \
+	"NFS daemon",	/* 52 M_NFSD */ \
+	"ip_moptions",	/* 53 M_IPMOPTS */ \
+	"in_multi",	/* 54 M_IPMADDR */ \
+	"ether_multi",	/* 55 M_IFMADDR */ \
+	"mrt",		/* 56 M_MRTABLE */ \
+	"ISOFS mount",	/* 57 M_ISOFSMNT */ \
+	"ISOFS node",	/* 58 M_ISOFSNODE */ \
+	NULL, NULL, NULL, NULL, NULL, \
+	NULL, NULL, NULL, NULL, NULL, \
+	NULL, NULL, NULL, NULL, NULL, \
+	"temp",		/* 74 M_TEMP */ \
+}
+
+struct kmemstats {
+	long	ks_inuse;	/* # of packets of this type currently in use */
+	long	ks_calls;	/* total packets of this type ever allocated */
+	long 	ks_memuse;	/* total memory held in bytes */
+	u_short	ks_limblocks;	/* number of times blocked for hitting limit */
+	u_short	ks_mapblocks;	/* number of times blocked for kernel map */
+	long	ks_maxused;	/* maximum number ever used */
+	long	ks_limit;	/* most that are allowed to exist */
+	long	ks_size;	/* sizes of this thing that are allocated */
+	long	ks_spare;
+};
+
+/*
+ * Array of descriptors that describe the contents of each page
+ */
+struct kmemusage {
+	short ku_indx;		/* bucket index */
+	union {
+		u_short freecnt;/* for small allocations, free pieces in page */
+		u_short pagecnt;/* for large allocations, pages alloced */
+	} ku_un;
+};
+#define ku_freecnt ku_un.freecnt
+#define ku_pagecnt ku_un.pagecnt
+
+/*
+ * Set of buckets for each size of memory block that is retained
+ */
+struct kmembuckets {
+	caddr_t kb_next;	/* list of free blocks */
+	caddr_t kb_last;	/* last free block */
+	long	kb_calls;	/* total calls to allocate this size */
+	long	kb_total;	/* total number of blocks allocated */
+	long	kb_totalfree;	/* # of free elements in this bucket */
+	long	kb_elmpercl;	/* # of elements in this sized allocation */
+	long	kb_highwat;	/* high water mark */
+	long	kb_couldfree;	/* over high water mark and could free */
+};
+
+#ifdef KERNEL
+#define	MINALLOCSIZE	(1 << MINBUCKET)
+#define BUCKETINDX(size) \
+	(size) <= (MINALLOCSIZE * 128) \
+		? (size) <= (MINALLOCSIZE * 8) \
+			? (size) <= (MINALLOCSIZE * 2) \
+				? (size) <= (MINALLOCSIZE * 1) \
+					? (MINBUCKET + 0) \
+					: (MINBUCKET + 1) \
+				: (size) <= (MINALLOCSIZE * 4) \
+					? (MINBUCKET + 2) \
+					: (MINBUCKET + 3) \
+			: (size) <= (MINALLOCSIZE* 32) \
+				? (size) <= (MINALLOCSIZE * 16) \
+					? (MINBUCKET + 4) \
+					: (MINBUCKET + 5) \
+				: (size) <= (MINALLOCSIZE * 64) \
+					? (MINBUCKET + 6) \
+					: (MINBUCKET + 7) \
+		: (size) <= (MINALLOCSIZE * 2048) \
+			? (size) <= (MINALLOCSIZE * 512) \
+				? (size) <= (MINALLOCSIZE * 256) \
+					? (MINBUCKET + 8) \
+					: (MINBUCKET + 9) \
+				: (size) <= (MINALLOCSIZE * 1024) \
+					? (MINBUCKET + 10) \
+					: (MINBUCKET + 11) \
+			: (size) <= (MINALLOCSIZE * 8192) \
+				? (size) <= (MINALLOCSIZE * 4096) \
+					? (MINBUCKET + 12) \
+					: (MINBUCKET + 13) \
+				: (size) <= (MINALLOCSIZE * 16384) \
+					? (MINBUCKET + 14) \
+					: (MINBUCKET + 15)
+
+/*
+ * Turn virtual addresses into kmem map indicies
+ */
+#define kmemxtob(alloc)	(kmembase + (alloc) * NBPG)
+#define btokmemx(addr)	(((caddr_t)(addr) - kmembase) / NBPG)
+#define btokup(addr)	(&kmemusage[((caddr_t)(addr) - kmembase) >> CLSHIFT])
+
+/*
+ * Macro versions for the usual cases of malloc/free
+ */
+#if defined(KMEMSTATS) || defined(DIAGNOSTIC)
+#define	MALLOC(space, cast, size, type, flags) \
+	(space) = (cast)malloc((u_long)(size), type, flags)
+#define FREE(addr, type) free((caddr_t)(addr), type)
+
+#else /* do not collect statistics */
+#define	MALLOC(space, cast, size, type, flags) { \
+	register struct kmembuckets *kbp = &bucket[BUCKETINDX(size)]; \
+	long s = splimp(); \
+	if (kbp->kb_next == NULL) { \
+		(space) = (cast)malloc((u_long)(size), type, flags); \
+	} else { \
+		(space) = (cast)kbp->kb_next; \
+		kbp->kb_next = *(caddr_t *)(space); \
+	} \
+	splx(s); \
+}
+
+#define FREE(addr, type) { \
+	register struct kmembuckets *kbp; \
+	register struct kmemusage *kup = btokup(addr); \
+	long s = splimp(); \
+	if (1 << kup->ku_indx > MAXALLOCSAVE) { \
+		free((caddr_t)(addr), type); \
+	} else { \
+		kbp = &bucket[kup->ku_indx]; \
+		if (kbp->kb_next == NULL) \
+			kbp->kb_next = (caddr_t)(addr); \
+		else \
+			*(caddr_t *)(kbp->kb_last) = (caddr_t)(addr); \
+		*(caddr_t *)(addr) = NULL; \
+		kbp->kb_last = (caddr_t)(addr); \
+	} \
+	splx(s); \
+}
+#endif /* do not collect statistics */
+
+extern struct kmemstats kmemstats[];
+extern struct kmemusage *kmemusage;
+extern char *kmembase;
+extern struct kmembuckets bucket[];
+extern void *malloc __P((unsigned long size, int type, int flags));
+extern void free __P((void *addr, int type));
+#endif /* KERNEL */
+#endif /* !_SYS_MALLOC_H_ */
diff --git a/sys/sys/map.h b/sys/sys/map.h
new file mode 100644
index 00000000000..6cec4b55653
--- /dev/null
+++ b/sys/sys/map.h
@@ -0,0 +1,82 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)map.h	8.3 (Berkeley) 1/26/94
+ */
+
+/*
+ * Resource allocation maps.
+ *
+ * Associated routines manage sub-allocation of an address space using
+ * an array of segment descriptors.  The first element of this array
+ * is a map structure, describing the arrays extent and the name
+ * of the controlled object.  Each additional structure represents
+ * a free segment of the address space.
+ *
+ * A call to rminit initializes a resource map and may also be used
+ * to free some address space for the map.  Subsequent calls to rmalloc
+ * and rmfree allocate and free space in the resource map.  If the resource
+ * map becomes too fragmented to be described in the available space,
+ * then some of the resource is discarded.  This may lead to critical
+ * shortages, but is better than not checking (as the previous versions
+ * of these routines did) or giving up and calling panic().  The routines
+ * could use linked lists and call a memory allocator when they run
+ * out of space, but that would not solve the out of space problem when
+ * called at interrupt time.
+ *
+ * N.B.: The address 0 in the resource address space is not available
+ * as it is used internally by the resource map routines.
+ */
+struct map {
+	struct	mapent *m_limit;	/* address of last slot in map */
+	char	*m_name;		/* name of resource, for messages */
+};
+
+struct mapent {
+	long	m_size;			/* size of this segment of the map */
+	long	m_addr;			/* start of segment */
+};
+
+#ifdef KERNEL
+#define	ARGMAPSIZE	16
+struct	map *kmemmap, *mbmap, *swapmap;
+int	nswapmap;
+
+long	rmalloc __P((struct map *, long));
+void	rmfree __P((struct map *, long, long));
+void	rminit __P((struct map *, long, long, char *, int));
+#endif
diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h
new file mode 100644
index 00000000000..f3ea7edefe6
--- /dev/null
+++ b/sys/sys/mbuf.h
@@ -0,0 +1,402 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)mbuf.h	8.3 (Berkeley) 1/21/94
+ */
+
+#ifndef M_WAITOK
+#include <sys/malloc.h>
+#endif
+
+/*
+ * Mbufs are of a single size, MSIZE (machine/machparam.h), which
+ * includes overhead.  An mbuf may add a single "mbuf cluster" of size
+ * MCLBYTES (also in machine/machparam.h), which has no additional overhead
+ * and is used instead of the internal data area; this is done when
+ * at least MINCLSIZE of data must be stored.
+ */
+
+#define	MLEN		(MSIZE - sizeof(struct m_hdr))	/* normal data len */
+#define	MHLEN		(MLEN - sizeof(struct pkthdr))	/* data len w/pkthdr */
+
+#define	MINCLSIZE	(MHLEN + MLEN)	/* smallest amount to put in cluster */
+#define	M_MAXCOMPRESS	(MHLEN / 2)	/* max amount to copy for compression */
+
+/*
+ * Macros for type conversion
+ * mtod(m,t) -	convert mbuf pointer to data pointer of correct type
+ * dtom(x) -	convert data pointer within mbuf to mbuf pointer (XXX)
+ * mtocl(x) -	convert pointer within cluster to cluster index #
+ * cltom(x) -	convert cluster # to ptr to beginning of cluster
+ */
+#define mtod(m,t)	((t)((m)->m_data))
+#define	dtom(x)		((struct mbuf *)((int)(x) & ~(MSIZE-1)))
+#define	mtocl(x)	(((u_int)(x) - (u_int)mbutl) >> MCLSHIFT)
+#define	cltom(x)	((caddr_t)((u_int)mbutl + ((u_int)(x) << MCLSHIFT)))
+
+/* header at beginning of each mbuf: */
+struct m_hdr {
+	struct	mbuf *mh_next;		/* next buffer in chain */
+	struct	mbuf *mh_nextpkt;	/* next chain in queue/record */
+	int	mh_len;			/* amount of data in this mbuf */
+	caddr_t	mh_data;		/* location of data */
+	short	mh_type;		/* type of data in this mbuf */
+	short	mh_flags;		/* flags; see below */
+};
+
+/* record/packet header in first mbuf of chain; valid if M_PKTHDR set */
+struct	pkthdr {
+	int	len;		/* total packet length */
+	struct	ifnet *rcvif;	/* rcv interface */
+};
+
+/* description of external storage mapped into mbuf, valid if M_EXT set */
+struct m_ext {
+	caddr_t	ext_buf;		/* start of buffer */
+	void	(*ext_free)();		/* free routine if not the usual */
+	u_int	ext_size;		/* size of buffer, for ext_free */
+};
+
+struct mbuf {
+	struct	m_hdr m_hdr;
+	union {
+		struct {
+			struct	pkthdr MH_pkthdr;	/* M_PKTHDR set */
+			union {
+				struct	m_ext MH_ext;	/* M_EXT set */
+				char	MH_databuf[MHLEN];
+			} MH_dat;
+		} MH;
+		char	M_databuf[MLEN];		/* !M_PKTHDR, !M_EXT */
+	} M_dat;
+};
+#define	m_next		m_hdr.mh_next
+#define	m_len		m_hdr.mh_len
+#define	m_data		m_hdr.mh_data
+#define	m_type		m_hdr.mh_type
+#define	m_flags		m_hdr.mh_flags
+#define	m_nextpkt	m_hdr.mh_nextpkt
+#define	m_act		m_nextpkt
+#define	m_pkthdr	M_dat.MH.MH_pkthdr
+#define	m_ext		M_dat.MH.MH_dat.MH_ext
+#define	m_pktdat	M_dat.MH.MH_dat.MH_databuf
+#define	m_dat		M_dat.M_databuf
+
+/* mbuf flags */
+#define	M_EXT		0x0001	/* has associated external storage */
+#define	M_PKTHDR	0x0002	/* start of record */
+#define	M_EOR		0x0004	/* end of record */
+
+/* mbuf pkthdr flags, also in m_flags */
+#define	M_BCAST		0x0100	/* send/received as link-level broadcast */
+#define	M_MCAST		0x0200	/* send/received as link-level multicast */
+
+/* flags copied when copying m_pkthdr */
+#define	M_COPYFLAGS	(M_PKTHDR|M_EOR|M_BCAST|M_MCAST)
+
+/* mbuf types */
+#define	MT_FREE		0	/* should be on free list */
+#define	MT_DATA		1	/* dynamic (data) allocation */
+#define	MT_HEADER	2	/* packet header */
+#define	MT_SOCKET	3	/* socket structure */
+#define	MT_PCB		4	/* protocol control block */
+#define	MT_RTABLE	5	/* routing tables */
+#define	MT_HTABLE	6	/* IMP host tables */
+#define	MT_ATABLE	7	/* address resolution tables */
+#define	MT_SONAME	8	/* socket name */
+#define	MT_SOOPTS	10	/* socket options */
+#define	MT_FTABLE	11	/* fragment reassembly header */
+#define	MT_RIGHTS	12	/* access rights */
+#define	MT_IFADDR	13	/* interface address */
+#define MT_CONTROL	14	/* extra-data protocol message */
+#define MT_OOBDATA	15	/* expedited data  */
+
+/* flags to m_get/MGET */
+#define	M_DONTWAIT	M_NOWAIT
+#define	M_WAIT		M_WAITOK
+
+/*
+ * mbuf utility macros:
+ *
+ *	MBUFLOCK(code)
+ * prevents a section of code from from being interrupted by network
+ * drivers.
+ */
+#define	MBUFLOCK(code) \
+	{ int ms = splimp(); \
+	  { code } \
+	  splx(ms); \
+	}
+
+/*
+ * mbuf allocation/deallocation macros:
+ *
+ *	MGET(struct mbuf *m, int how, int type)
+ * allocates an mbuf and initializes it to contain internal data.
+ *
+ *	MGETHDR(struct mbuf *m, int how, int type)
+ * allocates an mbuf and initializes it to contain a packet header
+ * and internal data.
+ */
+#define	MGET(m, how, type) { \
+	MALLOC((m), struct mbuf *, MSIZE, mbtypes[type], (how)); \
+	if (m) { \
+		(m)->m_type = (type); \
+		MBUFLOCK(mbstat.m_mtypes[type]++;) \
+		(m)->m_next = (struct mbuf *)NULL; \
+		(m)->m_nextpkt = (struct mbuf *)NULL; \
+		(m)->m_data = (m)->m_dat; \
+		(m)->m_flags = 0; \
+	} else \
+		(m) = m_retry((how), (type)); \
+}
+
+#define	MGETHDR(m, how, type) { \
+	MALLOC((m), struct mbuf *, MSIZE, mbtypes[type], (how)); \
+	if (m) { \
+		(m)->m_type = (type); \
+		MBUFLOCK(mbstat.m_mtypes[type]++;) \
+		(m)->m_next = (struct mbuf *)NULL; \
+		(m)->m_nextpkt = (struct mbuf *)NULL; \
+		(m)->m_data = (m)->m_pktdat; \
+		(m)->m_flags = M_PKTHDR; \
+	} else \
+		(m) = m_retryhdr((how), (type)); \
+}
+
+/*
+ * Mbuf cluster macros.
+ * MCLALLOC(caddr_t p, int how) allocates an mbuf cluster.
+ * MCLGET adds such clusters to a normal mbuf;
+ * the flag M_EXT is set upon success.
+ * MCLFREE releases a reference to a cluster allocated by MCLALLOC,
+ * freeing the cluster if the reference count has reached 0.
+ *
+ * Normal mbuf clusters are normally treated as character arrays
+ * after allocation, but use the first word of the buffer as a free list
+ * pointer while on the free list.
+ */
+union mcluster {
+	union	mcluster *mcl_next;
+	char	mcl_buf[MCLBYTES];
+};
+
+#define	MCLALLOC(p, how) \
+	MBUFLOCK( \
+	  if (mclfree == 0) \
+		(void)m_clalloc(1, (how)); \
+	  if ((p) = (caddr_t)mclfree) { \
+		++mclrefcnt[mtocl(p)]; \
+		mbstat.m_clfree--; \
+		mclfree = ((union mcluster *)(p))->mcl_next; \
+	  } \
+	)
+
+#define	MCLGET(m, how) \
+	{ MCLALLOC((m)->m_ext.ext_buf, (how)); \
+	  if ((m)->m_ext.ext_buf != NULL) { \
+		(m)->m_data = (m)->m_ext.ext_buf; \
+		(m)->m_flags |= M_EXT; \
+		(m)->m_ext.ext_size = MCLBYTES;  \
+	  } \
+	}
+
+#define	MCLFREE(p) \
+	MBUFLOCK ( \
+	  if (--mclrefcnt[mtocl(p)] == 0) { \
+		((union mcluster *)(p))->mcl_next = mclfree; \
+		mclfree = (union mcluster *)(p); \
+		mbstat.m_clfree++; \
+	  } \
+	)
+
+/*
+ * MFREE(struct mbuf *m, struct mbuf *n)
+ * Free a single mbuf and associated external storage.
+ * Place the successor, if any, in n.
+ */
+#ifdef notyet
+#define	MFREE(m, n) \
+	{ MBUFLOCK(mbstat.m_mtypes[(m)->m_type]--;) \
+	  if ((m)->m_flags & M_EXT) { \
+		if ((m)->m_ext.ext_free) \
+			(*((m)->m_ext.ext_free))((m)->m_ext.ext_buf, \
+			    (m)->m_ext.ext_size); \
+		else \
+			MCLFREE((m)->m_ext.ext_buf); \
+	  } \
+	  (n) = (m)->m_next; \
+	  FREE((m), mbtypes[(m)->m_type]); \
+	}
+#else /* notyet */
+#define	MFREE(m, nn) \
+	{ MBUFLOCK(mbstat.m_mtypes[(m)->m_type]--;) \
+	  if ((m)->m_flags & M_EXT) { \
+		MCLFREE((m)->m_ext.ext_buf); \
+	  } \
+	  (nn) = (m)->m_next; \
+	  FREE((m), mbtypes[(m)->m_type]); \
+	}
+#endif
+
+/*
+ * Copy mbuf pkthdr from from to to.
+ * from must have M_PKTHDR set, and to must be empty.
+ */
+#define	M_COPY_PKTHDR(to, from) { \
+	(to)->m_pkthdr = (from)->m_pkthdr; \
+	(to)->m_flags = (from)->m_flags & M_COPYFLAGS; \
+	(to)->m_data = (to)->m_pktdat; \
+}
+
+/*
+ * Set the m_data pointer of a newly-allocated mbuf (m_get/MGET) to place
+ * an object of the specified size at the end of the mbuf, longword aligned.
+ */
+#define	M_ALIGN(m, len) \
+	{ (m)->m_data += (MLEN - (len)) &~ (sizeof(long) - 1); }
+/*
+ * As above, for mbufs allocated with m_gethdr/MGETHDR
+ * or initialized by M_COPY_PKTHDR.
+ */
+#define	MH_ALIGN(m, len) \
+	{ (m)->m_data += (MHLEN - (len)) &~ (sizeof(long) - 1); }
+
+/*
+ * Compute the amount of space available
+ * before the current start of data in an mbuf.
+ */
+#define	M_LEADINGSPACE(m) \
+	((m)->m_flags & M_EXT ? /* (m)->m_data - (m)->m_ext.ext_buf */ 0 : \
+	    (m)->m_flags & M_PKTHDR ? (m)->m_data - (m)->m_pktdat : \
+	    (m)->m_data - (m)->m_dat)
+
+/*
+ * Compute the amount of space available
+ * after the end of data in an mbuf.
+ */
+#define	M_TRAILINGSPACE(m) \
+	((m)->m_flags & M_EXT ? (m)->m_ext.ext_buf + (m)->m_ext.ext_size - \
+	    ((m)->m_data + (m)->m_len) : \
+	    &(m)->m_dat[MLEN] - ((m)->m_data + (m)->m_len))
+
+/*
+ * Arrange to prepend space of size plen to mbuf m.
+ * If a new mbuf must be allocated, how specifies whether to wait.
+ * If how is M_DONTWAIT and allocation fails, the original mbuf chain
+ * is freed and m is set to NULL.
+ */
+#define	M_PREPEND(m, plen, how) { \
+	if (M_LEADINGSPACE(m) >= (plen)) { \
+		(m)->m_data -= (plen); \
+		(m)->m_len += (plen); \
+	} else \
+		(m) = m_prepend((m), (plen), (how)); \
+	if ((m) && (m)->m_flags & M_PKTHDR) \
+		(m)->m_pkthdr.len += (plen); \
+}
+
+/* change mbuf to new type */
+#define MCHTYPE(m, t) { \
+	MBUFLOCK(mbstat.m_mtypes[(m)->m_type]--; mbstat.m_mtypes[t]++;) \
+	(m)->m_type = t;\
+}
+
+/* length to m_copy to copy all */
+#define	M_COPYALL	1000000000
+
+/* compatiblity with 4.3 */
+#define  m_copy(m, o, l)	m_copym((m), (o), (l), M_DONTWAIT)
+
+/*
+ * Mbuf statistics.
+ */
+struct mbstat {
+	u_long	m_mbufs;	/* mbufs obtained from page pool */
+	u_long	m_clusters;	/* clusters obtained from page pool */
+	u_long	m_spare;	/* spare field */
+	u_long	m_clfree;	/* free clusters */
+	u_long	m_drops;	/* times failed to find space */
+	u_long	m_wait;		/* times waited for space */
+	u_long	m_drain;	/* times drained protocols for space */
+	u_short	m_mtypes[256];	/* type specific mbuf allocations */
+};
+
+#ifdef	KERNEL
+extern	struct mbuf *mbutl;		/* virtual address of mclusters */
+extern	char *mclrefcnt;		/* cluster reference counts */
+struct	mbstat mbstat;
+extern	int nmbclusters;
+union	mcluster *mclfree;
+int	max_linkhdr;			/* largest link-level header */
+int	max_protohdr;			/* largest protocol header */
+int	max_hdr;			/* largest link+protocol header */
+int	max_datalen;			/* MHLEN - max_hdr */
+extern	int mbtypes[];			/* XXX */
+
+struct	mbuf *m_copym __P((struct mbuf *, int, int, int));
+struct	mbuf *m_free __P((struct mbuf *));
+struct	mbuf *m_get __P((int, int));
+struct	mbuf *m_getclr __P((int, int));
+struct	mbuf *m_gethdr __P((int, int));
+struct	mbuf *m_prepend __P((struct mbuf *, int, int));
+struct	mbuf *m_pullup __P((struct mbuf *, int));
+struct	mbuf *m_retry __P((int, int));
+struct	mbuf *m_retryhdr __P((int, int));
+int	m_clalloc __P((int, int));
+void	m_copyback __P((struct mbuf *, int, int, caddr_t));
+void	m_freem __P((struct mbuf *));
+
+#ifdef MBTYPES
+int mbtypes[] = {				/* XXX */
+	M_FREE,		/* MT_FREE	0	   should be on free list */
+	M_MBUF,		/* MT_DATA	1	   dynamic (data) allocation */
+	M_MBUF,		/* MT_HEADER	2	   packet header */
+	M_SOCKET,	/* MT_SOCKET	3	   socket structure */
+	M_PCB,		/* MT_PCB	4	   protocol control block */
+	M_RTABLE,	/* MT_RTABLE	5	   routing tables */
+	M_HTABLE,	/* MT_HTABLE	6	   IMP host tables */
+	0,		/* MT_ATABLE	7	   address resolution tables */
+	M_MBUF,		/* MT_SONAME	8	   socket name */
+	0,		/* 		9 */
+	M_SOOPTS,	/* MT_SOOPTS	10	   socket options */
+	M_FTABLE,	/* MT_FTABLE	11	   fragment reassembly header */
+	M_MBUF,		/* MT_RIGHTS	12	   access rights */
+	M_IFADDR,	/* MT_IFADDR	13	   interface address */
+	M_MBUF,		/* MT_CONTROL	14	   extra-data protocol message */
+	M_MBUF,		/* MT_OOBDATA	15	   expedited data  */
+#ifdef DATAKIT
+	25, 26, 27, 28, 29, 30, 31, 32		/* datakit ugliness */
+#endif
+};
+#endif
+#endif
diff --git a/sys/sys/mman.h b/sys/sys/mman.h
new file mode 100644
index 00000000000..b3951c202cb
--- /dev/null
+++ b/sys/sys/mman.h
@@ -0,0 +1,89 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)mman.h	8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * Protections are chosen from these bits, or-ed together
+ */
+#define	PROT_READ	0x01	/* pages can be read */
+#define	PROT_WRITE	0x02	/* pages can be written */
+#define	PROT_EXEC	0x04	/* pages can be executed */
+
+/*
+ * Flags contain sharing type and options.
+ * Sharing types; choose one.
+ */
+#define	MAP_SHARED	0x0001	/* share changes */
+#define	MAP_PRIVATE	0x0002	/* changes are private */
+#define	MAP_COPY	0x0004	/* "copy" region at mmap time */
+
+/*
+ * Other flags
+ */
+#define	MAP_FIXED	 0x0010	/* map addr must be exactly as requested */
+#define	MAP_RENAME	 0x0020	/* Sun: rename private pages to file */
+#define	MAP_NORESERVE	 0x0040	/* Sun: don't reserve needed swap area */
+#define	MAP_INHERIT	 0x0080	/* region is retained after exec */
+#define	MAP_NOEXTEND	 0x0100	/* for MAP_FILE, don't change file size */
+#define	MAP_HASSEMAPHORE 0x0200	/* region may contain semaphores */
+
+/*
+ * Mapping type; default is map from file.
+ */
+#define	MAP_ANON	0x1000	/* allocated from memory, swap space */
+
+/*
+ * Advice to madvise
+ */
+#define	MADV_NORMAL	0	/* no further special treatment */
+#define	MADV_RANDOM	1	/* expect random page references */
+#define	MADV_SEQUENTIAL	2	/* expect sequential page references */
+#define	MADV_WILLNEED	3	/* will need these pages */
+#define	MADV_DONTNEED	4	/* dont need these pages */
+
+#ifndef KERNEL
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+/* Some of these int's should probably be size_t's */
+caddr_t	mmap __P((caddr_t, size_t, int, int, int, off_t));
+int	mprotect __P((caddr_t, size_t, int));
+int	munmap __P((caddr_t, size_t));
+int	msync __P((caddr_t, size_t));
+int	mlock __P((caddr_t, size_t));
+int	munlock __P((caddr_t, size_t));
+__END_DECLS
+
+#endif /* !KERNEL */
diff --git a/sys/sys/mount.h b/sys/sys/mount.h
new file mode 100644
index 00000000000..4561675ef0d
--- /dev/null
+++ b/sys/sys/mount.h
@@ -0,0 +1,418 @@
+/*
+ * Copyright (c) 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)mount.h	8.13 (Berkeley) 3/27/94
+ */
+
+#ifndef KERNEL
+#include <sys/ucred.h>
+#endif
+#include <sys/queue.h>
+
+typedef struct { long val[2]; } fsid_t;		/* file system id type */
+
+/*
+ * File identifier.
+ * These are unique per filesystem on a single machine.
+ */
+#define	MAXFIDSZ	16
+
+struct fid {
+	u_short		fid_len;		/* length of data in bytes */
+	u_short		fid_reserved;		/* force longword alignment */
+	char		fid_data[MAXFIDSZ];	/* data (variable length) */
+};
+
+/*
+ * file system statistics
+ */
+
+#define MNAMELEN 90	/* length of buffer for returned name */
+
+struct statfs {
+	short	f_type;			/* type of filesystem (see below) */
+	short	f_flags;		/* copy of mount flags */
+	long	f_bsize;		/* fundamental file system block size */
+	long	f_iosize;		/* optimal transfer block size */
+	long	f_blocks;		/* total data blocks in file system */
+	long	f_bfree;		/* free blocks in fs */
+	long	f_bavail;		/* free blocks avail to non-superuser */
+	long	f_files;		/* total file nodes in file system */
+	long	f_ffree;		/* free file nodes in fs */
+	fsid_t	f_fsid;			/* file system id */
+	long	f_spare[9];		/* spare for later */
+	char	f_mntonname[MNAMELEN];	/* directory on which mounted */
+	char	f_mntfromname[MNAMELEN];/* mounted filesystem */
+};
+
+/*
+ * File system types.
+ */
+#define	MOUNT_NONE	0
+#define	MOUNT_UFS	1	/* Fast Filesystem */
+#define	MOUNT_NFS	2	/* Sun-compatible Network Filesystem */
+#define	MOUNT_MFS	3	/* Memory-based Filesystem */
+#define	MOUNT_MSDOS	4	/* MS/DOS Filesystem */
+#define	MOUNT_LFS	5	/* Log-based Filesystem */
+#define	MOUNT_LOFS	6	/* Loopback Filesystem */
+#define	MOUNT_FDESC	7	/* File Descriptor Filesystem */
+#define	MOUNT_PORTAL	8	/* Portal Filesystem */
+#define MOUNT_NULL	9	/* Minimal Filesystem Layer */
+#define MOUNT_UMAP	10	/* User/Group Identifer Remapping Filesystem */
+#define MOUNT_KERNFS	11	/* Kernel Information Filesystem */
+#define MOUNT_PROCFS	12	/* /proc Filesystem */
+#define MOUNT_AFS	13	/* Andrew Filesystem */
+#define MOUNT_CD9660	14	/* ISO9660 (aka CDROM) Filesystem */
+#define MOUNT_UNION	15	/* Union (translucent) Filesystem */
+#define	MOUNT_MAXTYPE	15
+
+#define INITMOUNTNAMES { \
+	"none",		/*  0 MOUNT_NONE */ \
+	"ufs",		/*  1 MOUNT_UFS */ \
+	"nfs",		/*  2 MOUNT_NFS */ \
+	"mfs",		/*  3 MOUNT_MFS */ \
+	"msdos",	/*  4 MOUNT_MSDOS */ \
+	"lfs",		/*  5 MOUNT_LFS */ \
+	"lofs",		/*  6 MOUNT_LOFS */ \
+	"fdesc",	/*  7 MOUNT_FDESC */ \
+	"portal",	/*  8 MOUNT_PORTAL */ \
+	"null",		/*  9 MOUNT_NULL */ \
+	"umap",		/* 10 MOUNT_UMAP */ \
+	"kernfs",	/* 11 MOUNT_KERNFS */ \
+	"procfs",	/* 12 MOUNT_PROCFS */ \
+	"afs",		/* 13 MOUNT_AFS */ \
+	"iso9660fs",	/* 14 MOUNT_CD9660 */ \
+	"union",	/* 15 MOUNT_UNION */ \
+	0,		/* 16 MOUNT_SPARE */ \
+}
+
+/*
+ * Structure per mounted file system.  Each mounted file system has an
+ * array of operations and an instance record.  The file systems are
+ * put on a doubly linked list.
+ */
+LIST_HEAD(vnodelst, vnode);
+
+struct mount {
+	TAILQ_ENTRY(mount) mnt_list;		/* mount list */
+	struct vfsops	*mnt_op;		/* operations on fs */
+	struct vnode	*mnt_vnodecovered;	/* vnode we mounted on */
+	struct vnodelst	mnt_vnodelist;		/* list of vnodes this mount */
+	int		mnt_flag;		/* flags */
+	int		mnt_maxsymlinklen;	/* max size of short symlink */
+	struct statfs	mnt_stat;		/* cache of filesystem stats */
+	qaddr_t		mnt_data;		/* private data */
+};
+
+/*
+ * Mount flags.
+ *
+ * Unmount uses MNT_FORCE flag.
+ */
+#define	MNT_RDONLY	0x00000001	/* read only filesystem */
+#define	MNT_SYNCHRONOUS	0x00000002	/* file system written synchronously */
+#define	MNT_NOEXEC	0x00000004	/* can't exec from filesystem */
+#define	MNT_NOSUID	0x00000008	/* don't honor setuid bits on fs */
+#define	MNT_NODEV	0x00000010	/* don't interpret special files */
+#define	MNT_UNION	0x00000020	/* union with underlying filesystem */
+#define	MNT_ASYNC	0x00000040	/* file system written asynchronously */
+
+/*
+ * exported mount flags.
+ */
+#define	MNT_EXRDONLY	0x00000080	/* exported read only */
+#define	MNT_EXPORTED	0x00000100	/* file system is exported */
+#define	MNT_DEFEXPORTED	0x00000200	/* exported to the world */
+#define	MNT_EXPORTANON	0x00000400	/* use anon uid mapping for everyone */
+#define	MNT_EXKERB	0x00000800	/* exported with Kerberos uid mapping */
+
+/*
+ * Flags set by internal operations.
+ */
+#define	MNT_LOCAL	0x00001000	/* filesystem is stored locally */
+#define	MNT_QUOTA	0x00002000	/* quotas are enabled on filesystem */
+#define	MNT_ROOTFS	0x00004000	/* identifies the root filesystem */
+#define	MNT_USER	0x00008000	/* mounted by a user */
+
+/*
+ * Mask of flags that are visible to statfs()
+ */
+#define	MNT_VISFLAGMASK	0x0000ffff
+
+/*
+ * filesystem control flags.
+ *
+ * MNT_MLOCK lock the mount entry so that name lookup cannot proceed
+ * past the mount point.  This keeps the subtree stable during mounts
+ * and unmounts.
+ */
+#define	MNT_UPDATE	0x00010000	/* not a real mount, just an update */
+#define	MNT_DELEXPORT	0x00020000	/* delete export host lists */
+#define	MNT_RELOAD	0x00040000	/* reload filesystem data */
+#define	MNT_FORCE	0x00080000	/* force unmount or readonly change */
+#define	MNT_MLOCK	0x00100000	/* lock so that subtree is stable */
+#define	MNT_MWAIT	0x00200000	/* someone is waiting for lock */
+#define MNT_MPBUSY	0x00400000	/* scan of mount point in progress */
+#define MNT_MPWANT	0x00800000	/* waiting for mount point */
+#define MNT_UNMOUNT	0x01000000	/* unmount in progress */
+#define MNT_WANTRDWR	0x02000000	/* want upgrade to read/write */
+
+/*
+ * Operations supported on mounted file system.
+ */
+#ifdef KERNEL
+#ifdef __STDC__
+struct nameidata;
+struct mbuf;
+#endif
+
+struct vfsops {
+	int	(*vfs_mount)	__P((struct mount *mp, char *path, caddr_t data,
+				    struct nameidata *ndp, struct proc *p));
+	int	(*vfs_start)	__P((struct mount *mp, int flags,
+				    struct proc *p));
+	int	(*vfs_unmount)	__P((struct mount *mp, int mntflags,
+				    struct proc *p));
+	int	(*vfs_root)	__P((struct mount *mp, struct vnode **vpp));
+	int	(*vfs_quotactl)	__P((struct mount *mp, int cmds, uid_t uid,
+				    caddr_t arg, struct proc *p));
+	int	(*vfs_statfs)	__P((struct mount *mp, struct statfs *sbp,
+				    struct proc *p));
+	int	(*vfs_sync)	__P((struct mount *mp, int waitfor,
+				    struct ucred *cred, struct proc *p));
+	int	(*vfs_vget)	__P((struct mount *mp, ino_t ino,
+				    struct vnode **vpp));
+	int	(*vfs_fhtovp)	__P((struct mount *mp, struct fid *fhp,
+				    struct mbuf *nam, struct vnode **vpp,
+				    int *exflagsp, struct ucred **credanonp));
+	int	(*vfs_vptofh)	__P((struct vnode *vp, struct fid *fhp));
+	int	(*vfs_init)	__P((void));
+};
+
+#define VFS_MOUNT(MP, PATH, DATA, NDP, P) \
+	(*(MP)->mnt_op->vfs_mount)(MP, PATH, DATA, NDP, P)
+#define VFS_START(MP, FLAGS, P)	  (*(MP)->mnt_op->vfs_start)(MP, FLAGS, P)
+#define VFS_UNMOUNT(MP, FORCE, P) (*(MP)->mnt_op->vfs_unmount)(MP, FORCE, P)
+#define VFS_ROOT(MP, VPP)	  (*(MP)->mnt_op->vfs_root)(MP, VPP)
+#define VFS_QUOTACTL(MP,C,U,A,P)  (*(MP)->mnt_op->vfs_quotactl)(MP, C, U, A, P)
+#define VFS_STATFS(MP, SBP, P)	  (*(MP)->mnt_op->vfs_statfs)(MP, SBP, P)
+#define VFS_SYNC(MP, WAIT, C, P)  (*(MP)->mnt_op->vfs_sync)(MP, WAIT, C, P)
+#define VFS_VGET(MP, INO, VPP)	  (*(MP)->mnt_op->vfs_vget)(MP, INO, VPP)
+#define VFS_FHTOVP(MP, FIDP, NAM, VPP, EXFLG, CRED) \
+	(*(MP)->mnt_op->vfs_fhtovp)(MP, FIDP, NAM, VPP, EXFLG, CRED)
+#define	VFS_VPTOFH(VP, FIDP)	  (*(VP)->v_mount->mnt_op->vfs_vptofh)(VP, FIDP)
+#endif /* KERNEL */
+
+/*
+ * Flags for various system call interfaces.
+ *
+ * waitfor flags to vfs_sync() and getfsstat()
+ */
+#define MNT_WAIT	1
+#define MNT_NOWAIT	2
+
+/*
+ * Generic file handle
+ */
+struct fhandle {
+	fsid_t	fh_fsid;	/* File system id of mount point */
+	struct	fid fh_fid;	/* File sys specific id */
+};
+typedef struct fhandle	fhandle_t;
+
+#ifdef KERNEL
+#include <net/radix.h>
+#include <sys/socket.h>		/* XXX for AF_MAX */
+
+/*
+ * Network address lookup element
+ */
+struct netcred {
+	struct	radix_node netc_rnodes[2];
+	int	netc_exflags;
+	struct	ucred netc_anon;
+};
+
+/*
+ * Network export information
+ */
+struct netexport {
+	struct	netcred ne_defexported;		      /* Default export */
+	struct	radix_node_head *ne_rtable[AF_MAX+1]; /* Individual exports */
+};
+#endif /* KERNEL */
+
+/*
+ * Export arguments for local filesystem mount calls.
+ */
+struct export_args {
+	int	ex_flags;		/* export related flags */
+	uid_t	ex_root;		/* mapping for root uid */
+	struct	ucred ex_anon;		/* mapping for anonymous user */
+	struct	sockaddr *ex_addr;	/* net address to which exported */
+	int	ex_addrlen;		/* and the net address length */
+	struct	sockaddr *ex_mask;	/* mask of valid bits in saddr */
+	int	ex_masklen;		/* and the smask length */
+};
+
+/*
+ * Arguments to mount UFS-based filesystems
+ */
+struct ufs_args {
+	char	*fspec;			/* block special device to mount */
+	struct	export_args export;	/* network export information */
+};
+
+#ifdef MFS
+/*
+ * Arguments to mount MFS
+ */
+struct mfs_args {
+	char	*fspec;			/* name to export for statfs */
+	struct	export_args export;	/* if exported MFSes are supported */
+	caddr_t	base;			/* base of file system in memory */
+	u_long size;			/* size of file system */
+};
+#endif /* MFS */
+
+#ifdef CD9660
+/*
+ * Arguments to mount ISO 9660 filesystems.
+ */
+struct iso_args {
+	char *fspec;			/* block special device to mount */
+	struct	export_args export;	/* network export info */
+	int flags;			/* mounting flags, see below */
+
+};
+#define ISOFSMNT_NORRIP		0x00000001 /* disable Rock Ridge Ext.*/
+#define ISOFSMNT_GENS		0x00000002 /* enable generation numbers */
+#define ISOFSMNT_EXTATT		0x00000004 /* enable extended attributes */
+#endif /* CD9660 */
+
+#ifdef NFS
+/*
+ * File Handle (32 bytes for version 2), variable up to 1024 for version 3
+ */
+union nfsv2fh {
+	fhandle_t	fh_generic;
+	u_char		fh_bytes[32];
+};
+typedef union nfsv2fh nfsv2fh_t;
+
+/*
+ * Arguments to mount NFS
+ */
+struct nfs_args {
+	struct sockaddr	*addr;		/* file server address */
+	int		addrlen;	/* length of address */
+	int		sotype;		/* Socket type */
+	int		proto;		/* and Protocol */
+	nfsv2fh_t	*fh;		/* File handle to be mounted */
+	int		flags;		/* flags */
+	int		wsize;		/* write size in bytes */
+	int		rsize;		/* read size in bytes */
+	int		timeo;		/* initial timeout in .1 secs */
+	int		retrans;	/* times to retry send */
+	int		maxgrouplist;	/* Max. size of group list */
+	int		readahead;	/* # of blocks to readahead */
+	int		leaseterm;	/* Term (sec) of lease */
+	int		deadthresh;	/* Retrans threshold */
+	char		*hostname;	/* server's name */
+};
+
+
+/*
+ * NFS mount option flags
+ */
+#define	NFSMNT_SOFT		0x00000001  /* soft mount (hard is default) */
+#define	NFSMNT_WSIZE		0x00000002  /* set write size */
+#define	NFSMNT_RSIZE		0x00000004  /* set read size */
+#define	NFSMNT_TIMEO		0x00000008  /* set initial timeout */
+#define	NFSMNT_RETRANS		0x00000010  /* set number of request retrys */
+#define	NFSMNT_MAXGRPS		0x00000020  /* set maximum grouplist size */
+#define	NFSMNT_INT		0x00000040  /* allow interrupts on hard mount */
+#define	NFSMNT_NOCONN		0x00000080  /* Don't Connect the socket */
+#define	NFSMNT_NQNFS		0x00000100  /* Use Nqnfs protocol */
+#define	NFSMNT_MYWRITE		0x00000200  /* Assume writes were mine */
+#define	NFSMNT_KERB		0x00000400  /* Use Kerberos authentication */
+#define	NFSMNT_DUMBTIMR		0x00000800  /* Don't estimate rtt dynamically */
+#define	NFSMNT_RDIRALOOK	0x00001000  /* Do lookup with readdir (nqnfs) */
+#define	NFSMNT_LEASETERM	0x00002000  /* set lease term (nqnfs) */
+#define	NFSMNT_READAHEAD	0x00004000  /* set read ahead */
+#define	NFSMNT_DEADTHRESH	0x00008000  /* set dead server retry thresh */
+#define	NFSMNT_NQLOOKLEASE	0x00010000  /* Get lease for lookup */
+#define	NFSMNT_RESVPORT		0x00020000  /* Allocate a reserved port */
+#define	NFSMNT_INTERNAL		0xffe00000  /* Bits set internally */
+#define	NFSMNT_MNTD		0x00200000  /* Mnt server for mnt point */
+#define	NFSMNT_DISMINPROG	0x00400000  /* Dismount in progress */
+#define	NFSMNT_DISMNT		0x00800000  /* Dismounted */
+#define	NFSMNT_SNDLOCK		0x01000000  /* Send socket lock */
+#define	NFSMNT_WANTSND		0x02000000  /* Want above */
+#define	NFSMNT_RCVLOCK		0x04000000  /* Rcv socket lock */
+#define	NFSMNT_WANTRCV		0x08000000  /* Want above */
+#define	NFSMNT_WAITAUTH		0x10000000  /* Wait for authentication */
+#define	NFSMNT_HASAUTH		0x20000000  /* Has authenticator */
+#define	NFSMNT_WANTAUTH		0x40000000  /* Wants an authenticator */
+#define	NFSMNT_AUTHERR		0x80000000  /* Authentication error */
+#endif /* NFS */
+
+#ifdef KERNEL
+/*
+ * exported vnode operations
+ */
+struct	mount *getvfs __P((fsid_t *));      /* return vfs given fsid */
+int	vfs_export			    /* process mount export info */
+	  __P((struct mount *, struct netexport *, struct export_args *));
+struct	netcred *vfs_export_lookup	    /* lookup host in fs export list */
+	  __P((struct mount *, struct netexport *, struct mbuf *));
+int	vfs_lock __P((struct mount *));     /* lock a vfs */
+int	vfs_mountedon __P((struct vnode *));/* is a vfs mounted on vp */
+void	vfs_unlock __P((struct mount *));   /* unlock a vfs */
+extern	TAILQ_HEAD(mntlist, mount) mountlist;	/* mounted filesystem list */
+extern	struct vfsops *vfssw[];			/* filesystem type table */
+
+#else /* KERNEL */
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+int	fstatfs __P((int, struct statfs *));
+int	getfh __P((const char *, fhandle_t *));
+int	getfsstat __P((struct statfs *, long, int));
+int	getmntinfo __P((struct statfs **, int));
+int	mount __P((int, const char *, int, void *));
+int	statfs __P((const char *, struct statfs *));
+int	unmount __P((const char *, int));
+__END_DECLS
+
+#endif /* KERNEL */
diff --git a/sys/sys/msgbuf.h b/sys/sys/msgbuf.h
new file mode 100644
index 00000000000..57ee0b6f30a
--- /dev/null
+++ b/sys/sys/msgbuf.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 1981, 1984, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)msgbuf.h	8.1 (Berkeley) 6/2/93
+ */
+
+#define	MSG_BSIZE	(4096 - 3 * sizeof(long))
+struct	msgbuf {
+#define	MSG_MAGIC	0x063061
+	long	msg_magic;
+	long	msg_bufx;		/* write pointer */
+	long	msg_bufr;		/* read pointer */
+	char	msg_bufc[MSG_BSIZE];	/* buffer */
+};
+#ifdef KERNEL
+struct	msgbuf *msgbufp;
+#endif
diff --git a/sys/sys/mtio.h b/sys/sys/mtio.h
new file mode 100644
index 00000000000..7b4ef0c017c
--- /dev/null
+++ b/sys/sys/mtio.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)mtio.h	8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * Structures and definitions for mag tape io control commands
+ */
+
+/* structure for MTIOCTOP - mag tape op command */
+struct mtop {
+	short	mt_op;		/* operations defined below */
+	daddr_t	mt_count;	/* how many of them */
+};
+
+/* operations */
+#define MTWEOF		0	/* write an end-of-file record */
+#define MTFSF		1	/* forward space file */
+#define MTBSF		2	/* backward space file */
+#define MTFSR		3	/* forward space record */
+#define MTBSR		4	/* backward space record */
+#define MTREW		5	/* rewind */
+#define MTOFFL		6	/* rewind and put the drive offline */
+#define MTNOP		7	/* no operation, sets status only */
+#define MTCACHE		8	/* enable controller cache */
+#define MTNOCACHE	9	/* disable controller cache */
+
+/* structure for MTIOCGET - mag tape get status command */
+
+struct mtget {
+	short	mt_type;	/* type of magtape device */
+/* the following two registers are grossly device dependent */
+	short	mt_dsreg;	/* ``drive status'' register */
+	short	mt_erreg;	/* ``error'' register */
+/* end device-dependent registers */
+	short	mt_resid;	/* residual count */
+/* the following two are not yet implemented */
+	daddr_t	mt_fileno;	/* file number of current position */
+	daddr_t	mt_blkno;	/* block number of current position */
+/* end not yet implemented */
+};
+
+/*
+ * Constants for mt_type byte.  These are the same
+ * for controllers compatible with the types listed.
+ */
+#define	MT_ISTS		0x01		/* TS-11 */
+#define	MT_ISHT		0x02		/* TM03 Massbus: TE16, TU45, TU77 */
+#define	MT_ISTM		0x03		/* TM11/TE10 Unibus */
+#define	MT_ISMT		0x04		/* TM78/TU78 Massbus */
+#define	MT_ISUT		0x05		/* SI TU-45 emulation on Unibus */
+#define	MT_ISCPC	0x06		/* SUN */
+#define	MT_ISAR		0x07		/* SUN */
+#define	MT_ISTMSCP	0x08		/* DEC TMSCP protocol (TU81, TK50) */
+#define MT_ISCY		0x09		/* CCI Cipher */
+#define MT_ISCT		0x0a		/* HP 1/4 tape */
+#define MT_ISFHP	0x0b		/* HP 7980 1/2 tape */
+#define MT_ISEXABYTE	0x0c		/* Exabyte */
+#define MT_ISEXA8200	0x0c		/* Exabyte EXB-8200 */
+#define MT_ISEXA8500	0x0d		/* Exabyte EXB-8500 */
+#define MT_ISVIPER1	0x0e		/* Archive Viper-150 */
+#define MT_ISPYTHON	0x0f		/* Archive Python (DAT) */
+#define MT_ISHPDAT	0x10		/* HP 35450A DAT drive */
+#define MT_ISMFOUR	0x11		/* M4 Data 1/2 9track drive */
+#define MT_ISTK50	0x12		/* DEC SCSI TK50 */
+#define MT_ISMT02	0x13		/* Emulex MT02 SCSI tape controller */
+
+/* mag tape io control commands */
+#define	MTIOCTOP	_IOW('m', 1, struct mtop)	/* do a mag tape op */
+#define	MTIOCGET	_IOR('m', 2, struct mtget)	/* get tape status */
+#define MTIOCIEOT	_IO('m', 3)			/* ignore EOT error */
+#define MTIOCEEOT	_IO('m', 4)			/* enable EOT error */
+
+#ifndef KERNEL
+#define	DEFTAPE	"/dev/rmt12"
+#endif
+
+#ifdef	KERNEL
+/*
+ * minor device number
+ */
+
+#define	T_UNIT		003		/* unit selection */
+#define	T_NOREWIND	004		/* no rewind on close */
+#define	T_DENSEL	030		/* density select */
+#define	T_800BPI	000		/* select  800 bpi */
+#define	T_1600BPI	010		/* select 1600 bpi */
+#define	T_6250BPI	020		/* select 6250 bpi */
+#define	T_BADBPI	030		/* undefined selection */
+#endif
diff --git a/sys/sys/namei.h b/sys/sys/namei.h
new file mode 100644
index 00000000000..74ff3602c26
--- /dev/null
+++ b/sys/sys/namei.h
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 1985, 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)namei.h	8.2 (Berkeley) 1/4/94
+ */
+
+#ifndef _SYS_NAMEI_H_
+#define	_SYS_NAMEI_H_
+
+/*
+ * Encapsulation of namei parameters.
+ */
+struct nameidata {
+	/*
+	 * Arguments to namei/lookup.
+	 */
+	caddr_t	ni_dirp;		/* pathname pointer */
+	enum	uio_seg ni_segflg;	/* location of pathname */
+     /* u_long	ni_nameiop;		   namei operation */
+     /* u_long	ni_flags;		   flags to namei */
+     /* struct	proc *ni_proc;		   process requesting lookup */
+	/*
+	 * Arguments to lookup.
+	 */
+     /* struct	ucred *ni_cred;		   credentials */
+	struct	vnode *ni_startdir;	/* starting directory */
+	struct	vnode *ni_rootdir;	/* logical root directory */
+	/*
+	 * Results: returned from/manipulated by lookup
+	 */
+	struct	vnode *ni_vp;		/* vnode of result */
+	struct	vnode *ni_dvp;		/* vnode of intermediate directory */
+	/*
+	 * Shared between namei and lookup/commit routines.
+	 */
+	long	ni_pathlen;		/* remaining chars in path */
+	char	*ni_next;		/* next location in pathname */
+	u_long	ni_loopcnt;		/* count of symlinks encountered */
+	/*
+	 * Lookup parameters: this structure describes the subset of
+	 * information from the nameidata structure that is passed
+	 * through the VOP interface.
+	 */
+	struct componentname {
+		/*
+		 * Arguments to lookup.
+		 */
+		u_long	cn_nameiop;	/* namei operation */
+		u_long	cn_flags;	/* flags to namei */
+		struct	proc *cn_proc;	/* process requesting lookup */
+		struct	ucred *cn_cred;	/* credentials */
+		/*
+		 * Shared between lookup and commit routines.
+		 */
+		char	*cn_pnbuf;	/* pathname buffer */
+		char	*cn_nameptr;	/* pointer to looked up name */
+		long	cn_namelen;	/* length of looked up component */
+		u_long	cn_hash;	/* hash value of looked up name */
+		long	cn_consume;	/* chars to consume in lookup() */
+	} ni_cnd;
+};
+
+#ifdef KERNEL
+/*
+ * namei operations
+ */
+#define	LOOKUP		0	/* perform name lookup only */
+#define	CREATE		1	/* setup for file creation */
+#define	DELETE		2	/* setup for file deletion */
+#define	RENAME		3	/* setup for file renaming */
+#define	OPMASK		3	/* mask for operation */
+/*
+ * namei operational modifier flags, stored in ni_cnd.flags
+ */
+#define	LOCKLEAF	0x0004	/* lock inode on return */
+#define	LOCKPARENT	0x0008	/* want parent vnode returned locked */
+#define	WANTPARENT	0x0010	/* want parent vnode returned unlocked */
+#define	NOCACHE		0x0020	/* name must not be left in cache */
+#define	FOLLOW		0x0040	/* follow symbolic links */
+#define	NOFOLLOW	0x0000	/* do not follow symbolic links (pseudo) */
+#define	MODMASK		0x00fc	/* mask of operational modifiers */
+/*
+ * Namei parameter descriptors.
+ *
+ * SAVENAME may be set by either the callers of namei or by VOP_LOOKUP.
+ * If the caller of namei sets the flag (for example execve wants to
+ * know the name of the program that is being executed), then it must
+ * free the buffer. If VOP_LOOKUP sets the flag, then the buffer must
+ * be freed by either the commit routine or the VOP_ABORT routine.
+ * SAVESTART is set only by the callers of namei. It implies SAVENAME
+ * plus the addition of saving the parent directory that contains the
+ * name in ni_startdir. It allows repeated calls to lookup for the
+ * name being sought. The caller is responsible for releasing the
+ * buffer and for vrele'ing ni_startdir.
+ */
+#define	NOCROSSMOUNT	0x00100	/* do not cross mount points */
+#define	RDONLY		0x00200	/* lookup with read-only semantics */
+#define	HASBUF		0x00400	/* has allocated pathname buffer */
+#define	SAVENAME	0x00800	/* save pathanme buffer */
+#define	SAVESTART	0x01000	/* save starting directory */
+#define ISDOTDOT	0x02000	/* current component name is .. */
+#define MAKEENTRY	0x04000	/* entry is to be added to name cache */
+#define ISLASTCN	0x08000	/* this is last component of pathname */
+#define ISSYMLINK	0x10000	/* symlink needs interpretation */
+#define PARAMASK	0xfff00	/* mask of parameter descriptors */
+/*
+ * Initialization of an nameidata structure.
+ */
+#define NDINIT(ndp, op, flags, segflg, namep, p) { \
+	(ndp)->ni_cnd.cn_nameiop = op; \
+	(ndp)->ni_cnd.cn_flags = flags; \
+	(ndp)->ni_segflg = segflg; \
+	(ndp)->ni_dirp = namep; \
+	(ndp)->ni_cnd.cn_proc = p; \
+}
+#endif
+
+/*
+ * This structure describes the elements in the cache of recent
+ * names looked up by namei. NCHNAMLEN is sized to make structure
+ * size a power of two to optimize malloc's. Minimum reasonable
+ * size is 15.
+ */
+
+#define	NCHNAMLEN	31	/* maximum name segment length we bother with */
+
+struct	namecache {
+	struct	namecache *nc_forw;	/* hash chain */
+	struct	namecache **nc_back;	/* hash chain */
+	struct	namecache *nc_nxt;	/* LRU chain */
+	struct	namecache **nc_prev;	/* LRU chain */
+	struct	vnode *nc_dvp;		/* vnode of parent of name */
+	u_long	nc_dvpid;		/* capability number of nc_dvp */
+	struct	vnode *nc_vp;		/* vnode the name refers to */
+	u_long	nc_vpid;		/* capability number of nc_vp */
+	char	nc_nlen;		/* length of name */
+	char	nc_name[NCHNAMLEN];	/* segment name */
+};
+
+#ifdef KERNEL
+u_long	nextvnodeid;
+int	namei __P((struct nameidata *ndp));
+int	lookup __P((struct nameidata *ndp));
+#endif
+
+/*
+ * Stats on usefulness of namei caches.
+ */
+struct	nchstats {
+	long	ncs_goodhits;		/* hits that we can really use */
+	long	ncs_neghits;		/* negative hits that we can use */
+	long	ncs_badhits;		/* hits we must drop */
+	long	ncs_falsehits;		/* hits with id mismatch */
+	long	ncs_miss;		/* misses */
+	long	ncs_long;		/* long names that ignore cache */
+	long	ncs_pass2;		/* names found with passes == 2 */
+	long	ncs_2passes;		/* number of times we attempt it */
+};
+#endif /* !_SYS_NAMEI_H_ */
diff --git a/sys/sys/param.h b/sys/sys/param.h
new file mode 100644
index 00000000000..91bdfd8facc
--- /dev/null
+++ b/sys/sys/param.h
@@ -0,0 +1,216 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)param.h	8.2 (Berkeley) 1/21/94
+ */
+
+#define	BSD	199306		/* System version (year & month). */
+#define BSD4_3	1
+#define BSD4_4	1
+
+#ifndef NULL
+#define	NULL	0
+#endif
+
+#ifndef LOCORE
+#include <sys/types.h>
+#endif
+
+/*
+ * Machine-independent constants (some used in following include files).
+ * Redefined constants are from POSIX 1003.1 limits file.
+ *
+ * MAXCOMLEN should be >= sizeof(ac_comm) (see <acct.h>)
+ * MAXLOGNAME should be >= UT_NAMESIZE (see <utmp.h>)
+ */
+#include <sys/syslimits.h>
+
+#define	MAXCOMLEN	16		/* max command name remembered */
+#define	MAXINTERP	32		/* max interpreter file name length */
+#define	MAXLOGNAME	12		/* max login name length */
+#define	MAXUPRC		CHILD_MAX	/* max simultaneous processes */
+#define	NCARGS		ARG_MAX		/* max bytes for an exec function */
+#define	NGROUPS		NGROUPS_MAX	/* max number groups */
+#define	NOFILE		OPEN_MAX	/* max open files per process */
+#define	NOGROUP		65535		/* marker for empty group set member */
+#define MAXHOSTNAMELEN	256		/* max hostname size */
+
+/* More types and definitions used throughout the kernel. */
+#ifdef KERNEL
+#include <sys/cdefs.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/ucred.h>
+#include <sys/uio.h>
+#endif
+
+/* Signals. */
+#include <sys/signal.h>
+
+/* Machine type dependent parameters. */
+#include <machine/param.h>
+#include <machine/limits.h>
+
+/*
+ * Priorities.  Note that with 32 run queues, differences less than 4 are
+ * insignificant.
+ */
+#define	PSWP	0
+#define	PVM	4
+#define	PINOD	8
+#define	PRIBIO	16
+#define	PVFS	20
+#define	PZERO	22		/* No longer magic, shouldn't be here.  XXX */
+#define	PSOCK	24
+#define	PWAIT	32
+#define	PLOCK	36
+#define	PPAUSE	40
+#define	PUSER	50
+#define	MAXPRI	127		/* Priorities range from 0 through MAXPRI. */
+
+#define	PRIMASK	0x0ff
+#define	PCATCH	0x100		/* OR'd with pri for tsleep to check signals */
+
+#define	NZERO	0		/* default "nice" */
+
+#define	NBPW	sizeof(int)	/* number of bytes per word (integer) */
+
+#define	CMASK	022		/* default file mask: S_IWGRP|S_IWOTH */
+#define	NODEV	(dev_t)(-1)	/* non-existent device */
+
+/*
+ * Clustering of hardware pages on machines with ridiculously small
+ * page sizes is done here.  The paging subsystem deals with units of
+ * CLSIZE pte's describing NBPG (from machine/machparam.h) pages each.
+ */
+#define	CLBYTES		(CLSIZE*NBPG)
+#define	CLOFSET		(CLSIZE*NBPG-1)	/* for clusters, like PGOFSET */
+#define	claligned(x)	((((int)(x))&CLOFSET)==0)
+#define	CLOFF		CLOFSET
+#define	CLSHIFT		(PGSHIFT+CLSIZELOG2)
+
+#if CLSIZE==1
+#define	clbase(i)	(i)
+#define	clrnd(i)	(i)
+#else
+/* Give the base virtual address (first of CLSIZE). */
+#define	clbase(i)	((i) &~ (CLSIZE-1))
+/* Round a number of clicks up to a whole cluster. */
+#define	clrnd(i)	(((i) + (CLSIZE-1)) &~ (CLSIZE-1))
+#endif
+
+#define	CBLOCK	64		/* Clist block size, must be a power of 2. */
+#define CBQSIZE	(CBLOCK/NBBY)	/* Quote bytes/cblock - can do better. */
+				/* Data chars/clist. */
+#define	CBSIZE	(CBLOCK - sizeof(struct cblock *) - CBQSIZE)
+#define	CROUND	(CBLOCK - 1)	/* Clist rounding. */
+
+/*
+ * File system parameters and macros.
+ *
+ * The file system is made out of blocks of at most MAXBSIZE units, with
+ * smaller units (fragments) only in the last direct block.  MAXBSIZE
+ * primarily determines the size of buffers in the buffer pool.  It may be
+ * made larger without any effect on existing file systems; however making
+ * it smaller make make some file systems unmountable.
+ */
+#define	MAXBSIZE	MAXPHYS
+#define MAXFRAG 	8
+
+/*
+ * MAXPATHLEN defines the longest permissable path length after expanding
+ * symbolic links. It is used to allocate a temporary buffer from the buffer
+ * pool in which to do the name expansion, hence should be a power of two,
+ * and must be less than or equal to MAXBSIZE.  MAXSYMLINKS defines the
+ * maximum number of symbolic links that may be expanded in a path name.
+ * It should be set high enough to allow all legitimate uses, but halt
+ * infinite loops reasonably quickly.
+ */
+#define	MAXPATHLEN	PATH_MAX
+#define MAXSYMLINKS	8
+
+/* Bit map related macros. */
+#define	setbit(a,i)	((a)[(i)/NBBY] |= 1<<((i)%NBBY))
+#define	clrbit(a,i)	((a)[(i)/NBBY] &= ~(1<<((i)%NBBY)))
+#define	isset(a,i)	((a)[(i)/NBBY] & (1<<((i)%NBBY)))
+#define	isclr(a,i)	(((a)[(i)/NBBY] & (1<<((i)%NBBY))) == 0)
+
+/* Macros for counting and rounding. */
+#ifndef howmany
+#define	howmany(x, y)	(((x)+((y)-1))/(y))
+#endif
+#define	roundup(x, y)	((((x)+((y)-1))/(y))*(y))
+#define powerof2(x)	((((x)-1)&(x))==0)
+
+/* Macros for min/max. */
+#ifndef KERNEL
+#define	MIN(a,b) (((a)<(b))?(a):(b))
+#define	MAX(a,b) (((a)>(b))?(a):(b))
+#endif
+
+/*
+ * Constants for setting the parameters of the kernel memory allocator.
+ *
+ * 2 ** MINBUCKET is the smallest unit of memory that will be
+ * allocated. It must be at least large enough to hold a pointer.
+ *
+ * Units of memory less or equal to MAXALLOCSAVE will permanently
+ * allocate physical memory; requests for these size pieces of
+ * memory are quite fast. Allocations greater than MAXALLOCSAVE must
+ * always allocate and free physical memory; requests for these
+ * size allocations should be done infrequently as they will be slow.
+ *
+ * Constraints: CLBYTES <= MAXALLOCSAVE <= 2 ** (MINBUCKET + 14), and
+ * MAXALLOCSIZE must be a power of two.
+ */
+#define MINBUCKET	4		/* 4 => min allocation of 16 bytes */
+#define MAXALLOCSAVE	(2 * CLBYTES)
+
+/*
+ * Scale factor for scaled integers used to count %cpu time and load avgs.
+ *
+ * The number of CPU `tick's that map to a unique `%age' can be expressed
+ * by the formula (1 / (2 ^ (FSHIFT - 11))).  The maximum load average that
+ * can be calculated (assuming 32 bits) can be closely approximated using
+ * the formula (2 ^ (2 * (16 - FSHIFT))) for (FSHIFT < 15).
+ *
+ * For the scheduler to maintain a 1:1 mapping of CPU `tick' to `%age',
+ * FSHIFT must be at least 11; this gives us a maximum load avg of ~1024.
+ */
+#define	FSHIFT	11		/* bits to right of fixed binary point */
+#define FSCALE	(1<<FSHIFT)
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
new file mode 100644
index 00000000000..bbe60cddcac
--- /dev/null
+++ b/sys/sys/proc.h
@@ -0,0 +1,263 @@
+/*-
+ * Copyright (c) 1986, 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)proc.h	8.8 (Berkeley) 1/21/94
+ */
+
+#ifndef _SYS_PROC_H_
+#define	_SYS_PROC_H_
+
+#include <machine/proc.h>		/* Machine-dependent proc substruct. */
+#include <sys/select.h>			/* For struct selinfo. */
+
+/*
+ * One structure allocated per session.
+ */
+struct	session {
+	int	s_count;		/* Ref cnt; pgrps in session. */
+	struct	proc *s_leader;		/* Session leader. */
+	struct	vnode *s_ttyvp;		/* Vnode of controlling terminal. */
+	struct	tty *s_ttyp;		/* Controlling terminal. */
+	char	s_login[MAXLOGNAME];	/* Setlogin() name. */
+};
+
+/*
+ * One structure allocated per process group.
+ */
+struct	pgrp {
+	struct	pgrp *pg_hforw;		/* Forward link in hash bucket. */
+	struct	proc *pg_mem;		/* Pointer to pgrp members. */
+	struct	session *pg_session;	/* Pointer to session. */
+	pid_t	pg_id;			/* Pgrp id. */
+	int	pg_jobc;	/* # procs qualifying pgrp for job control */
+};
+
+/*
+ * Description of a process.
+ *
+ * This structure contains the information needed to manage a thread of
+ * control, known in UN*X as a process; it has references to substructures
+ * containing descriptions of things that the process uses, but may share
+ * with related processes.  The process structure and the substructures
+ * are always addressible except for those marked "(PROC ONLY)" below,
+ * which might be addressible only on a processor on which the process
+ * is running.
+ */
+struct	proc {
+	struct	proc *p_forw;		/* Doubly-linked run/sleep queue. */
+	struct	proc *p_back;
+	struct	proc *p_next;		/* Linked list of active procs */
+	struct	proc **p_prev;		/*    and zombies. */
+
+	/* substructures: */
+	struct	pcred *p_cred;		/* Process owner's identity. */
+	struct	filedesc *p_fd;		/* Ptr to open files structure. */
+	struct	pstats *p_stats;	/* Accounting/statistics (PROC ONLY). */
+	struct	plimit *p_limit;	/* Process limits. */
+	struct	vmspace *p_vmspace;	/* Address space. */
+	struct	sigacts *p_sigacts;	/* Signal actions, state (PROC ONLY). */
+
+#define	p_ucred		p_cred->pc_ucred
+#define	p_rlimit	p_limit->pl_rlimit
+
+	int	p_flag;			/* P_* flags. */
+	char	p_stat;			/* S* process status. */
+	char	p_pad1[3];
+
+	pid_t	p_pid;			/* Process identifier. */
+	struct	proc *p_hash;	 /* Hashed based on p_pid for kill+exit+... */
+	struct	proc *p_pgrpnxt; /* Pointer to next process in process group. */
+	struct	proc *p_pptr;	 /* Pointer to process structure of parent. */
+	struct	proc *p_osptr;	 /* Pointer to older sibling processes. */
+
+/* The following fields are all zeroed upon creation in fork. */
+#define	p_startzero	p_ysptr
+	struct	proc *p_ysptr;	 /* Pointer to younger siblings. */
+	struct	proc *p_cptr;	 /* Pointer to youngest living child. */
+	pid_t	p_oppid;	 /* Save parent pid during ptrace. XXX */
+	int	p_dupfd;	 /* Sideways return value from fdopen. XXX */
+
+	/* scheduling */
+	u_int	p_estcpu;	 /* Time averaged value of p_cpticks. */
+	int	p_cpticks;	 /* Ticks of cpu time. */
+	fixpt_t	p_pctcpu;	 /* %cpu for this process during p_swtime */
+	void	*p_wchan;	 /* Sleep address. */
+	char	*p_wmesg;	 /* Reason for sleep. */
+	u_int	p_swtime;	 /* Time swapped in or out. */
+	u_int	p_slptime;	 /* Time since last blocked. */
+
+	struct	itimerval p_realtimer;	/* Alarm timer. */
+	struct	timeval p_rtime;	/* Real time. */
+	u_quad_t p_uticks;		/* Statclock hits in user mode. */
+	u_quad_t p_sticks;		/* Statclock hits in system mode. */
+	u_quad_t p_iticks;		/* Statclock hits processing intr. */
+
+	int	p_traceflag;		/* Kernel trace points. */
+	struct	vnode *p_tracep;	/* Trace to vnode. */
+
+	int	p_siglist;		/* Signals arrived but not delivered. */
+
+	struct	vnode *p_textvp;	/* Vnode of executable. */
+
+	long	p_spare[5];		/* pad to 256, avoid shifting eproc. */
+
+/* End area that is zeroed on creation. */
+#define	p_endzero	p_startcopy
+
+/* The following fields are all copied upon creation in fork. */
+#define	p_startcopy	p_sigmask
+
+	sigset_t p_sigmask;	/* Current signal mask. */
+	sigset_t p_sigignore;	/* Signals being ignored. */
+	sigset_t p_sigcatch;	/* Signals being caught by user. */
+
+	u_char	p_priority;	/* Process priority. */
+	u_char	p_usrpri;	/* User-priority based on p_cpu and p_nice. */
+	char	p_nice;		/* Process "nice" value. */
+	char	p_comm[MAXCOMLEN+1];
+
+	struct 	pgrp *p_pgrp;	/* Pointer to process group. */
+
+/* End area that is copied on creation. */
+#define	p_endcopy	p_thread
+	int	p_thread;	/* Id for this "thread"; Mach glue. XXX */
+	struct	user *p_addr;	/* Kernel virtual addr of u-area (PROC ONLY). */
+	struct	mdproc p_md;	/* Any machine-dependent fields. */
+
+	u_short	p_xstat;	/* Exit status for wait; also stop signal. */
+	u_short	p_acflag;	/* Accounting flags. */
+	struct	rusage *p_ru;	/* Exit information. XXX */
+
+};
+
+#define	p_session	p_pgrp->pg_session
+#define	p_pgid		p_pgrp->pg_id
+
+/* Status values. */
+#define	SIDL	1		/* Process being created by fork. */
+#define	SRUN	2		/* Currently runnable. */
+#define	SSLEEP	3		/* Sleeping on an address. */
+#define	SSTOP	4		/* Process debugging or suspension. */
+#define	SZOMB	5		/* Awaiting collection by parent. */
+
+/* These flags are kept in p_flags. */
+#define	P_ADVLOCK	0x00001	/* Process may hold a POSIX advisory lock. */
+#define	P_CONTROLT	0x00002	/* Has a controlling terminal. */
+#define	P_INMEM		0x00004	/* Loaded into memory. */
+#define	P_NOCLDSTOP	0x00008	/* No SIGCHLD when children stop. */
+#define	P_PPWAIT	0x00010	/* Parent is waiting for child to exec/exit. */
+#define	P_PROFIL	0x00020	/* Has started profiling. */
+#define	P_SELECT	0x00040	/* Selecting; wakeup/waiting danger. */
+#define	P_SINTR		0x00080	/* Sleep is interruptible. */
+#define	P_SUGID		0x00100	/* Had set id privileges since last exec. */
+#define	P_SYSTEM	0x00200	/* System proc: no sigs, stats or swapping. */
+#define	P_TIMEOUT	0x00400	/* Timing out during sleep. */
+#define	P_TRACED	0x00800	/* Debugged process being traced. */
+#define	P_WAITED	0x01000	/* Debugging process has waited for child. */
+#define	P_WEXIT		0x02000	/* Working on exiting. */
+#define P_EXEC		0x04000	/* Process called exec. */
+
+/* Should probably be changed into a hold count. */
+#define	P_NOSWAP	0x08000	/* Another flag to prevent swap out. */
+#define	P_PHYSIO	0x10000	/* Doing physical I/O. */
+
+/* Should be moved to machine-dependent areas. */
+#define	P_OWEUPC	0x20000	/* Owe process an addupc() call at next ast. */
+
+/*
+ * MOVE TO ucred.h?
+ *
+ * Shareable process credentials (always resident).  This includes a reference
+ * to the current user credentials as well as real and saved ids that may be
+ * used to change ids.
+ */
+struct	pcred {
+	struct	ucred *pc_ucred;	/* Current credentials. */
+	uid_t	p_ruid;			/* Real user id. */
+	uid_t	p_svuid;		/* Saved effective user id. */
+	gid_t	p_rgid;			/* Real group id. */
+	gid_t	p_svgid;		/* Saved effective group id. */
+	int	p_refcnt;		/* Number of references. */
+};
+
+#ifdef KERNEL
+/*
+ * We use process IDs <= PID_MAX; PID_MAX + 1 must also fit in a pid_t,
+ * as it is used to represent "no process group".
+ */
+#define	PID_MAX		30000
+#define	NO_PID		30001
+#define	PIDHASH(pid)	((pid) & pidhashmask)
+
+#define SESS_LEADER(p)	((p)->p_session->s_leader == (p))
+#define	SESSHOLD(s)	((s)->s_count++)
+#define	SESSRELE(s) {							\
+	if (--(s)->s_count == 0)					\
+		FREE(s, M_SESSION);					\
+}
+
+extern struct proc *pidhash[];		/* In param.c. */
+extern struct pgrp *pgrphash[];		/* In param.c. */
+extern struct proc *curproc;		/* Current running proc. */
+extern struct proc proc0;		/* Process slot for swapper. */
+extern int nprocs, maxproc;		/* Current and max number of procs. */
+extern int pidhashmask;			/* In param.c. */
+
+volatile struct proc *allproc; 		/* List of active procs. */
+struct proc *zombproc;			/* List of zombie procs. */
+struct proc *initproc, *pageproc;	/* Process slots for init, pager. */
+
+#define	NQS	32			/* 32 run queues. */
+int	whichqs;			/* Bit mask summary of non-empty Q's. */
+struct	prochd {
+	struct	proc *ph_link;		/* Linked list of running processes. */
+	struct	proc *ph_rlink;
+} qs[NQS];
+
+struct proc *pfind __P((pid_t));	/* Find process by id. */
+struct pgrp *pgfind __P((pid_t));	/* Find process group by id. */
+
+void	mi_switch __P((void));
+void	resetpriority __P((struct proc *));
+void	setrunnable __P((struct proc *));
+void	setrunqueue __P((struct proc *));
+void	sleep __P((void *chan, int pri));
+int	tsleep __P((void *chan, int pri, char *wmesg, int timo));
+void	unsleep __P((struct proc *));
+void	wakeup __P((void *chan));
+#endif	/* KERNEL */
+#endif	/* !_SYS_PROC_H_ */
diff --git a/sys/sys/protosw.h b/sys/sys/protosw.h
new file mode 100644
index 00000000000..76ac720d85f
--- /dev/null
+++ b/sys/sys/protosw.h
@@ -0,0 +1,210 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)protosw.h	8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * Protocol switch table.
+ *
+ * Each protocol has a handle initializing one of these structures,
+ * which is used for protocol-protocol and system-protocol communication.
+ *
+ * A protocol is called through the pr_init entry before any other.
+ * Thereafter it is called every 200ms through the pr_fasttimo entry and
+ * every 500ms through the pr_slowtimo for timer based actions.
+ * The system will call the pr_drain entry if it is low on space and
+ * this should throw away any non-critical data.
+ *
+ * Protocols pass data between themselves as chains of mbufs using
+ * the pr_input and pr_output hooks.  Pr_input passes data up (towards
+ * UNIX) and pr_output passes it down (towards the imps); control
+ * information passes up and down on pr_ctlinput and pr_ctloutput.
+ * The protocol is responsible for the space occupied by any the
+ * arguments to these entries and must dispose it.
+ *
+ * The userreq routine interfaces protocols to the system and is
+ * described below.
+ */
+struct protosw {
+	short	pr_type;		/* socket type used for */
+	struct	domain *pr_domain;	/* domain protocol a member of */
+	short	pr_protocol;		/* protocol number */
+	short	pr_flags;		/* see below */
+/* protocol-protocol hooks */
+	void	(*pr_input)();		/* input to protocol (from below) */
+	int	(*pr_output)();		/* output to protocol (from above) */
+	void	(*pr_ctlinput)();	/* control input (from below) */
+	int	(*pr_ctloutput)();	/* control output (from above) */
+/* user-protocol hook */
+	int	(*pr_usrreq)();		/* user request: see list below */
+/* utility hooks */
+	void	(*pr_init)();		/* initialization hook */
+	void	(*pr_fasttimo)();	/* fast timeout (200ms) */
+	void	(*pr_slowtimo)();	/* slow timeout (500ms) */
+	void	(*pr_drain)();		/* flush any excess space possible */
+	int	(*pr_sysctl)();		/* sysctl for protocol */
+};
+
+#define	PR_SLOWHZ	2		/* 2 slow timeouts per second */
+#define	PR_FASTHZ	5		/* 5 fast timeouts per second */
+
+/*
+ * Values for pr_flags.
+ * PR_ADDR requires PR_ATOMIC;
+ * PR_ADDR and PR_CONNREQUIRED are mutually exclusive.
+ */
+#define	PR_ATOMIC	0x01		/* exchange atomic messages only */
+#define	PR_ADDR		0x02		/* addresses given with messages */
+#define	PR_CONNREQUIRED	0x04		/* connection required by protocol */
+#define	PR_WANTRCVD	0x08		/* want PRU_RCVD calls */
+#define	PR_RIGHTS	0x10		/* passes capabilities */
+
+/*
+ * The arguments to usrreq are:
+ *	(*protosw[].pr_usrreq)(up, req, m, nam, opt);
+ * where up is a (struct socket *), req is one of these requests,
+ * m is a optional mbuf chain containing a message,
+ * nam is an optional mbuf chain containing an address,
+ * and opt is a pointer to a socketopt structure or nil.
+ * The protocol is responsible for disposal of the mbuf chain m,
+ * the caller is responsible for any space held by nam and opt.
+ * A non-zero return from usrreq gives an
+ * UNIX error number which should be passed to higher level software.
+ */
+#define	PRU_ATTACH		0	/* attach protocol to up */
+#define	PRU_DETACH		1	/* detach protocol from up */
+#define	PRU_BIND		2	/* bind socket to address */
+#define	PRU_LISTEN		3	/* listen for connection */
+#define	PRU_CONNECT		4	/* establish connection to peer */
+#define	PRU_ACCEPT		5	/* accept connection from peer */
+#define	PRU_DISCONNECT		6	/* disconnect from peer */
+#define	PRU_SHUTDOWN		7	/* won't send any more data */
+#define	PRU_RCVD		8	/* have taken data; more room now */
+#define	PRU_SEND		9	/* send this data */
+#define	PRU_ABORT		10	/* abort (fast DISCONNECT, DETATCH) */
+#define	PRU_CONTROL		11	/* control operations on protocol */
+#define	PRU_SENSE		12	/* return status into m */
+#define	PRU_RCVOOB		13	/* retrieve out of band data */
+#define	PRU_SENDOOB		14	/* send out of band data */
+#define	PRU_SOCKADDR		15	/* fetch socket's address */
+#define	PRU_PEERADDR		16	/* fetch peer's address */
+#define	PRU_CONNECT2		17	/* connect two sockets */
+/* begin for protocols internal use */
+#define	PRU_FASTTIMO		18	/* 200ms timeout */
+#define	PRU_SLOWTIMO		19	/* 500ms timeout */
+#define	PRU_PROTORCV		20	/* receive from below */
+#define	PRU_PROTOSEND		21	/* send to below */
+
+#define	PRU_NREQ		21
+
+#ifdef PRUREQUESTS
+char *prurequests[] = {
+	"ATTACH",	"DETACH",	"BIND",		"LISTEN",
+	"CONNECT",	"ACCEPT",	"DISCONNECT",	"SHUTDOWN",
+	"RCVD",		"SEND",		"ABORT",	"CONTROL",
+	"SENSE",	"RCVOOB",	"SENDOOB",	"SOCKADDR",
+	"PEERADDR",	"CONNECT2",	"FASTTIMO",	"SLOWTIMO",
+	"PROTORCV",	"PROTOSEND",
+};
+#endif
+
+/*
+ * The arguments to the ctlinput routine are
+ *	(*protosw[].pr_ctlinput)(cmd, sa, arg);
+ * where cmd is one of the commands below, sa is a pointer to a sockaddr,
+ * and arg is an optional caddr_t argument used within a protocol family.
+ */
+#define	PRC_IFDOWN		0	/* interface transition */
+#define	PRC_ROUTEDEAD		1	/* select new route if possible ??? */
+#define	PRC_QUENCH2		3	/* DEC congestion bit says slow down */
+#define	PRC_QUENCH		4	/* some one said to slow down */
+#define	PRC_MSGSIZE		5	/* message size forced drop */
+#define	PRC_HOSTDEAD		6	/* host appears to be down */
+#define	PRC_HOSTUNREACH		7	/* deprecated (use PRC_UNREACH_HOST) */
+#define	PRC_UNREACH_NET		8	/* no route to network */
+#define	PRC_UNREACH_HOST	9	/* no route to host */
+#define	PRC_UNREACH_PROTOCOL	10	/* dst says bad protocol */
+#define	PRC_UNREACH_PORT	11	/* bad port # */
+/* was	PRC_UNREACH_NEEDFRAG	12	   (use PRC_MSGSIZE) */
+#define	PRC_UNREACH_SRCFAIL	13	/* source route failed */
+#define	PRC_REDIRECT_NET	14	/* net routing redirect */
+#define	PRC_REDIRECT_HOST	15	/* host routing redirect */
+#define	PRC_REDIRECT_TOSNET	16	/* redirect for type of service & net */
+#define	PRC_REDIRECT_TOSHOST	17	/* redirect for tos & host */
+#define	PRC_TIMXCEED_INTRANS	18	/* packet lifetime expired in transit */
+#define	PRC_TIMXCEED_REASS	19	/* lifetime expired on reass q */
+#define	PRC_PARAMPROB		20	/* header incorrect */
+
+#define	PRC_NCMDS		21
+
+#define	PRC_IS_REDIRECT(cmd)	\
+	((cmd) >= PRC_REDIRECT_NET && (cmd) <= PRC_REDIRECT_TOSHOST)
+
+#ifdef PRCREQUESTS
+char	*prcrequests[] = {
+	"IFDOWN", "ROUTEDEAD", "#2", "DEC-BIT-QUENCH2",
+	"QUENCH", "MSGSIZE", "HOSTDEAD", "#7",
+	"NET-UNREACH", "HOST-UNREACH", "PROTO-UNREACH", "PORT-UNREACH",
+	"#12", "SRCFAIL-UNREACH", "NET-REDIRECT", "HOST-REDIRECT",
+	"TOSNET-REDIRECT", "TOSHOST-REDIRECT", "TX-INTRANS", "TX-REASS",
+	"PARAMPROB"
+};
+#endif
+
+/*
+ * The arguments to ctloutput are:
+ *	(*protosw[].pr_ctloutput)(req, so, level, optname, optval);
+ * req is one of the actions listed below, so is a (struct socket *),
+ * level is an indication of which protocol layer the option is intended.
+ * optname is a protocol dependent socket option request,
+ * optval is a pointer to a mbuf-chain pointer, for value-return results.
+ * The protocol is responsible for disposal of the mbuf chain *optval
+ * if supplied,
+ * the caller is responsible for any space held by *optval, when returned.
+ * A non-zero return from usrreq gives an
+ * UNIX error number which should be passed to higher level software.
+ */
+#define	PRCO_GETOPT	0
+#define	PRCO_SETOPT	1
+
+#define	PRCO_NCMDS	2
+
+#ifdef PRCOREQUESTS
+char	*prcorequests[] = {
+	"GETOPT", "SETOPT",
+};
+#endif
+
+#ifdef KERNEL
+extern	struct protosw *pffindproto(), *pffindtype();
+#endif
diff --git a/sys/sys/ptrace.h b/sys/sys/ptrace.h
new file mode 100644
index 00000000000..f7f99d474a4
--- /dev/null
+++ b/sys/sys/ptrace.h
@@ -0,0 +1,67 @@
+/*-
+ * Copyright (c) 1984, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ptrace.h	8.2 (Berkeley) 1/4/94
+ */
+
+#ifndef	_SYS_PTRACE_H_
+#define	_SYS_PTRACE_H_
+
+#define	PT_TRACE_ME	0	/* child declares it's being traced */
+#define	PT_READ_I	1	/* read word in child's I space */
+#define	PT_READ_D	2	/* read word in child's D space */
+#define	PT_READ_U	3	/* read word in child's user structure */
+#define	PT_WRITE_I	4	/* write word in child's I space */
+#define	PT_WRITE_D	5	/* write word in child's D space */
+#define	PT_WRITE_U	6	/* write word in child's user structure */
+#define	PT_CONTINUE	7	/* continue the child */
+#define	PT_KILL		8	/* kill the child process */
+#define	PT_STEP		9	/* single step the child */
+#define	PT_ATTACH	10	/* trace some running process */
+#define	PT_DETACH	11	/* stop tracing a process */
+
+#define	PT_FIRSTMACH	32	/* for machine-specific requests */
+#include <machine/ptrace.h>	/* machine-specific requests, if any */
+
+#ifdef KERNEL
+void	proc_reparent __P((struct proc *child, struct proc *newparent));
+#else /* !KERNEL */
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+int	ptrace __P((int _request, pid_t _pid, caddr_t _addr, int _data));
+__END_DECLS
+
+#endif /* !KERNEL */
+
+#endif	/* !_SYS_PTRACE_H_ */
diff --git a/sys/sys/queue.h b/sys/sys/queue.h
new file mode 100644
index 00000000000..c200c9f4ccf
--- /dev/null
+++ b/sys/sys/queue.h
@@ -0,0 +1,245 @@
+/* 
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)queue.h	8.4 (Berkeley) 1/4/94
+ */
+
+#ifndef	_SYS_QUEUE_H_
+#define	_SYS_QUEUE_H_
+
+/*
+ * This file defines three types of data structures: lists, tail queues,
+ * and circular queues.
+ *
+ * A list is headed by a single forward pointer (or an array of forward
+ * pointers for a hash table header). The elements are doubly linked
+ * so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list after
+ * an existing element or at the head of the list. A list may only be
+ * traversed in the forward direction.
+ *
+ * A tail queue is headed by a pair of pointers, one to the head of the
+ * list and the other to the tail of the list. The elements are doubly
+ * linked so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list after
+ * an existing element, at the head of the list, or at the end of the
+ * list. A tail queue may only be traversed in the forward direction.
+ *
+ * A circle queue is headed by a pair of pointers, one to the head of the
+ * list and the other to the tail of the list. The elements are doubly
+ * linked so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before or after
+ * an existing element, at the head of the list, or at the end of the list.
+ * A circle queue may be traversed in either direction, but has a more
+ * complex end of list detection.
+ *
+ * For details on the use of these macros, see the queue(3) manual page.
+ */
+
+/*
+ * List definitions.
+ */
+#define LIST_HEAD(name, type)						\
+struct name {								\
+	struct type *lh_first;	/* first element */			\
+}
+
+#define LIST_ENTRY(type)						\
+struct {								\
+	struct type *le_next;	/* next element */			\
+	struct type **le_prev;	/* address of previous next element */	\
+}
+
+/*
+ * List functions.
+ */
+#define	LIST_INIT(head) {						\
+	(head)->lh_first = NULL;					\
+}
+
+#define LIST_INSERT_AFTER(listelm, elm, field) {			\
+	if (((elm)->field.le_next = (listelm)->field.le_next) != NULL)	\
+		(listelm)->field.le_next->field.le_prev =		\
+		    &(elm)->field.le_next;				\
+	(listelm)->field.le_next = (elm);				\
+	(elm)->field.le_prev = &(listelm)->field.le_next;		\
+}
+
+#define LIST_INSERT_HEAD(head, elm, field) {				\
+	if (((elm)->field.le_next = (head)->lh_first) != NULL)		\
+		(head)->lh_first->field.le_prev = &(elm)->field.le_next;\
+	(head)->lh_first = (elm);					\
+	(elm)->field.le_prev = &(head)->lh_first;			\
+}
+
+#define LIST_REMOVE(elm, field) {					\
+	if ((elm)->field.le_next != NULL)				\
+		(elm)->field.le_next->field.le_prev = 			\
+		    (elm)->field.le_prev;				\
+	*(elm)->field.le_prev = (elm)->field.le_next;			\
+}
+
+/*
+ * Tail queue definitions.
+ */
+#define TAILQ_HEAD(name, type)						\
+struct name {								\
+	struct type *tqh_first;	/* first element */			\
+	struct type **tqh_last;	/* addr of last next element */		\
+}
+
+#define TAILQ_ENTRY(type)						\
+struct {								\
+	struct type *tqe_next;	/* next element */			\
+	struct type **tqe_prev;	/* address of previous next element */	\
+}
+
+/*
+ * Tail queue functions.
+ */
+#define	TAILQ_INIT(head) {						\
+	(head)->tqh_first = NULL;					\
+	(head)->tqh_last = &(head)->tqh_first;				\
+}
+
+#define TAILQ_INSERT_HEAD(head, elm, field) {				\
+	if (((elm)->field.tqe_next = (head)->tqh_first) != NULL)	\
+		(elm)->field.tqe_next->field.tqe_prev =			\
+		    &(elm)->field.tqe_next;				\
+	else								\
+		(head)->tqh_last = &(elm)->field.tqe_next;		\
+	(head)->tqh_first = (elm);					\
+	(elm)->field.tqe_prev = &(head)->tqh_first;			\
+}
+
+#define TAILQ_INSERT_TAIL(head, elm, field) {				\
+	(elm)->field.tqe_next = NULL;					\
+	(elm)->field.tqe_prev = (head)->tqh_last;			\
+	*(head)->tqh_last = (elm);					\
+	(head)->tqh_last = &(elm)->field.tqe_next;			\
+}
+
+#define TAILQ_INSERT_AFTER(head, listelm, elm, field) {			\
+	if (((elm)->field.tqe_next = (listelm)->field.tqe_next) != NULL)\
+		(elm)->field.tqe_next->field.tqe_prev = 		\
+		    &(elm)->field.tqe_next;				\
+	else								\
+		(head)->tqh_last = &(elm)->field.tqe_next;		\
+	(listelm)->field.tqe_next = (elm);				\
+	(elm)->field.tqe_prev = &(listelm)->field.tqe_next;		\
+}
+
+#define TAILQ_REMOVE(head, elm, field) {				\
+	if (((elm)->field.tqe_next) != NULL)				\
+		(elm)->field.tqe_next->field.tqe_prev = 		\
+		    (elm)->field.tqe_prev;				\
+	else								\
+		(head)->tqh_last = (elm)->field.tqe_prev;		\
+	*(elm)->field.tqe_prev = (elm)->field.tqe_next;			\
+}
+
+/*
+ * Circular queue definitions.
+ */
+#define CIRCLEQ_HEAD(name, type)					\
+struct name {								\
+	struct type *cqh_first;		/* first element */		\
+	struct type *cqh_last;		/* last element */		\
+}
+
+#define CIRCLEQ_ENTRY(type)						\
+struct {								\
+	struct type *cqe_next;		/* next element */		\
+	struct type *cqe_prev;		/* previous element */		\
+}
+
+/*
+ * Circular queue functions.
+ */
+#define	CIRCLEQ_INIT(head) {						\
+	(head)->cqh_first = (void *)(head);				\
+	(head)->cqh_last = (void *)(head);				\
+}
+
+#define CIRCLEQ_INSERT_AFTER(head, listelm, elm, field) {		\
+	(elm)->field.cqe_next = (listelm)->field.cqe_next;		\
+	(elm)->field.cqe_prev = (listelm);				\
+	if ((listelm)->field.cqe_next == (void *)(head))		\
+		(head)->cqh_last = (elm);				\
+	else								\
+		(listelm)->field.cqe_next->field.cqe_prev = (elm);	\
+	(listelm)->field.cqe_next = (elm);				\
+}
+
+#define CIRCLEQ_INSERT_BEFORE(head, listelm, elm, field) {		\
+	(elm)->field.cqe_next = (listelm);				\
+	(elm)->field.cqe_prev = (listelm)->field.cqe_prev;		\
+	if ((listelm)->field.cqe_prev == (void *)(head))		\
+		(head)->cqh_first = (elm);				\
+	else								\
+		(listelm)->field.cqe_prev->field.cqe_next = (elm);	\
+	(listelm)->field.cqe_prev = (elm);				\
+}
+
+#define CIRCLEQ_INSERT_HEAD(head, elm, field) {				\
+	(elm)->field.cqe_next = (head)->cqh_first;			\
+	(elm)->field.cqe_prev = (void *)(head);				\
+	if ((head)->cqh_last == (void *)(head))				\
+		(head)->cqh_last = (elm);				\
+	else								\
+		(head)->cqh_first->field.cqe_prev = (elm);		\
+	(head)->cqh_first = (elm);					\
+}
+
+#define CIRCLEQ_INSERT_TAIL(head, elm, field) {				\
+	(elm)->field.cqe_next = (void *)(head);				\
+	(elm)->field.cqe_prev = (head)->cqh_last;			\
+	if ((head)->cqh_first == (void *)(head))			\
+		(head)->cqh_first = (elm);				\
+	else								\
+		(head)->cqh_last->field.cqe_next = (elm);		\
+	(head)->cqh_last = (elm);					\
+}
+
+#define	CIRCLEQ_REMOVE(head, elm, field) {				\
+	if ((elm)->field.cqe_next == (void *)(head))			\
+		(head)->cqh_last = (elm)->field.cqe_prev;		\
+	else								\
+		(elm)->field.cqe_next->field.cqe_prev =			\
+		    (elm)->field.cqe_prev;				\
+	if ((elm)->field.cqe_prev == (void *)(head))			\
+		(head)->cqh_first = (elm)->field.cqe_next;		\
+	else								\
+		(elm)->field.cqe_prev->field.cqe_next =			\
+		    (elm)->field.cqe_next;				\
+}
+#endif	/* !_SYS_QUEUE_H_ */
diff --git a/sys/sys/reboot.h b/sys/sys/reboot.h
new file mode 100644
index 00000000000..c3c957e17ee
--- /dev/null
+++ b/sys/sys/reboot.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)reboot.h	8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * Arguments to reboot system call.
+ * These are passed to boot program in r11,
+ * and on to init.
+ */
+#define	RB_AUTOBOOT	0	/* flags for system auto-booting itself */
+
+#define	RB_ASKNAME	0x01	/* ask for file name to reboot from */
+#define	RB_SINGLE	0x02	/* reboot to single user only */
+#define	RB_NOSYNC	0x04	/* dont sync before reboot */
+#define	RB_HALT		0x08	/* don't reboot, just halt */
+#define	RB_INITNAME	0x10	/* name given for /etc/init (unused) */
+#define	RB_DFLTROOT	0x20	/* use compiled-in rootdev */
+#define	RB_KDB		0x40	/* give control to kernel debugger */
+#define	RB_RDONLY	0x80	/* mount root fs read-only */
+#define	RB_DUMP		0x100	/* dump kernel memory before reboot */
+#define	RB_MINIROOT	0x200	/* mini-root present in memory at boot time */
+
+/*
+ * Constants for converting boot-style device number to type,
+ * adaptor (uba, mba, etc), unit number and partition number.
+ * Type (== major device number) is in the low byte
+ * for backward compatibility.  Except for that of the "magic
+ * number", each mask applies to the shifted value.
+ * Format:
+ *	 (4) (4) (4) (4)  (8)     (8)
+ *	--------------------------------
+ *	|MA | AD| CT| UN| PART  | TYPE |
+ *	--------------------------------
+ */
+#define	B_ADAPTORSHIFT		24
+#define	B_ADAPTORMASK		0x0f
+#define	B_ADAPTOR(val)		(((val) >> B_ADAPTORSHIFT) & B_ADAPTORMASK)
+#define B_CONTROLLERSHIFT	20
+#define B_CONTROLLERMASK	0xf
+#define	B_CONTROLLER(val)	(((val)>>B_CONTROLLERSHIFT) & B_CONTROLLERMASK)
+#define B_UNITSHIFT		16
+#define B_UNITMASK		0xf
+#define	B_UNIT(val)		(((val) >> B_UNITSHIFT) & B_UNITMASK)
+#define B_PARTITIONSHIFT	8
+#define B_PARTITIONMASK		0xff
+#define	B_PARTITION(val)	(((val) >> B_PARTITIONSHIFT) & B_PARTITIONMASK)
+#define	B_TYPESHIFT		0
+#define	B_TYPEMASK		0xff
+#define	B_TYPE(val)		(((val) >> B_TYPESHIFT) & B_TYPEMASK)
+
+#define	B_MAGICMASK	((u_long)0xf0000000)
+#define	B_DEVMAGIC	((u_long)0xa0000000)
+
+#define MAKEBOOTDEV(type, adaptor, controller, unit, partition) \
+	(((type) << B_TYPESHIFT) | ((adaptor) << B_ADAPTORSHIFT) | \
+	((controller) << B_CONTROLLERSHIFT) | ((unit) << B_UNITSHIFT) | \
+	((partition) << B_PARTITIONSHIFT) | B_DEVMAGIC)
diff --git a/sys/sys/resource.h b/sys/sys/resource.h
new file mode 100644
index 00000000000..559f1ac6c37
--- /dev/null
+++ b/sys/sys/resource.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)resource.h	8.2 (Berkeley) 1/4/94
+ */
+
+#ifndef _SYS_RESOURCE_H_
+#define	_SYS_RESOURCE_H_
+
+/*
+ * Process priority specifications to get/setpriority.
+ */
+#define	PRIO_MIN	-20
+#define	PRIO_MAX	20
+
+#define	PRIO_PROCESS	0
+#define	PRIO_PGRP	1
+#define	PRIO_USER	2
+
+/*
+ * Resource utilization information.
+ */
+
+#define	RUSAGE_SELF	0
+#define	RUSAGE_CHILDREN	-1
+
+struct	rusage {
+	struct timeval ru_utime;	/* user time used */
+	struct timeval ru_stime;	/* system time used */
+	long	ru_maxrss;		/* max resident set size */
+#define	ru_first	ru_ixrss
+	long	ru_ixrss;		/* integral shared memory size */
+	long	ru_idrss;		/* integral unshared data " */
+	long	ru_isrss;		/* integral unshared stack " */
+	long	ru_minflt;		/* page reclaims */
+	long	ru_majflt;		/* page faults */
+	long	ru_nswap;		/* swaps */
+	long	ru_inblock;		/* block input operations */
+	long	ru_oublock;		/* block output operations */
+	long	ru_msgsnd;		/* messages sent */
+	long	ru_msgrcv;		/* messages received */
+	long	ru_nsignals;		/* signals received */
+	long	ru_nvcsw;		/* voluntary context switches */
+	long	ru_nivcsw;		/* involuntary " */
+#define	ru_last		ru_nivcsw
+};
+
+/*
+ * Resource limits
+ */
+#define	RLIMIT_CPU	0		/* cpu time in milliseconds */
+#define	RLIMIT_FSIZE	1		/* maximum file size */
+#define	RLIMIT_DATA	2		/* data size */
+#define	RLIMIT_STACK	3		/* stack size */
+#define	RLIMIT_CORE	4		/* core file size */
+#define	RLIMIT_RSS	5		/* resident set size */
+#define	RLIMIT_MEMLOCK	6		/* locked-in-memory address space */
+#define	RLIMIT_NPROC	7		/* number of processes */
+#define	RLIMIT_NOFILE	8		/* number of open files */
+
+#define	RLIM_NLIMITS	9		/* number of resource limits */
+
+#define	RLIM_INFINITY	(((u_quad_t)1 << 63) - 1)
+
+struct orlimit {
+	long	rlim_cur;		/* current (soft) limit */
+	long	rlim_max;		/* maximum value for rlim_cur */
+};
+
+struct rlimit {
+	quad_t	rlim_cur;		/* current (soft) limit */
+	quad_t	rlim_max;		/* maximum value for rlim_cur */
+};
+
+/* Load average structure. */
+struct loadavg {
+	fixpt_t ldavg[3];
+	long fscale;
+};
+
+#ifdef KERNEL
+extern struct loadavg averunnable;
+
+#else
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+int	getpriority __P((int, int));
+int	getrlimit __P((int, struct rlimit *));
+int	getrusage __P((int, struct rusage *));
+int	setpriority __P((int, int, int));
+int	setrlimit __P((int, const struct rlimit *));
+__END_DECLS
+
+#endif	/* KERNEL */
+#endif	/* !_SYS_RESOURCE_H_ */
diff --git a/sys/sys/resourcevar.h b/sys/sys/resourcevar.h
new file mode 100644
index 00000000000..0f8d5e30eed
--- /dev/null
+++ b/sys/sys/resourcevar.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)resourcevar.h	8.3 (Berkeley) 2/22/94
+ */
+
+#ifndef	_SYS_RESOURCEVAR_H_
+#define	_SYS_RESOURCEVAR_H_
+
+/*
+ * Kernel per-process accounting / statistics
+ * (not necessarily resident except when running).
+ */
+struct pstats {
+#define	pstat_startzero	p_ru
+	struct	rusage p_ru;		/* stats for this proc */
+	struct	rusage p_cru;		/* sum of stats for reaped children */
+#define	pstat_endzero	pstat_startcopy
+
+#define	pstat_startcopy	p_timer
+	struct	itimerval p_timer[3];	/* virtual-time timers */
+
+	struct uprof {			/* profile arguments */
+		caddr_t	pr_base;	/* buffer base */
+		u_long	pr_size;	/* buffer size */
+		u_long	pr_off;		/* pc offset */
+		u_long	pr_scale;	/* pc scaling */
+		u_long	pr_addr;	/* temp storage for addr until AST */
+		u_long	pr_ticks;	/* temp storage for ticks until AST */
+	} p_prof;
+#define	pstat_endcopy	p_start
+	struct	timeval p_start;	/* starting time */
+};
+
+/*
+ * Kernel shareable process resource limits.  Because this structure
+ * is moderately large but changes infrequently, it is normally
+ * shared copy-on-write after forks.  If a group of processes
+ * ("threads") share modifications, the PL_SHAREMOD flag is set,
+ * and a copy must be made for the child of a new fork that isn't
+ * sharing modifications to the limits.
+ */
+struct plimit {
+	struct	rlimit pl_rlimit[RLIM_NLIMITS];
+#define	PL_SHAREMOD	0x01		/* modifications are shared */
+	int	p_lflags;
+	int	p_refcnt;		/* number of references */
+};
+
+/* add user profiling from AST */
+#define	ADDUPROF(p)							\
+	addupc_task(p,							\
+	    (p)->p_stats->p_prof.pr_addr, (p)->p_stats->p_prof.pr_ticks)
+
+#ifdef KERNEL
+void	 addupc_intr __P((struct proc *p, u_long pc, u_int ticks));
+void	 addupc_task __P((struct proc *p, u_long pc, u_int ticks));
+struct plimit
+	*limcopy __P((struct plimit *lim));
+#endif
+#endif	/* !_SYS_RESOURCEVAR_H_ */
diff --git a/sys/sys/select.h b/sys/sys/select.h
new file mode 100644
index 00000000000..a279c592fbe
--- /dev/null
+++ b/sys/sys/select.h
@@ -0,0 +1,56 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)select.h	8.2 (Berkeley) 1/4/94
+ */
+
+#ifndef _SYS_SELECT_H_
+#define	_SYS_SELECT_H_
+
+/*
+ * Used to maintain information about processes that wish to be
+ * notified when I/O becomes possible.
+ */
+struct selinfo {
+	pid_t	si_pid;		/* process to be notified */
+	short	si_flags;	/* see below */
+};
+#define	SI_COLL	0x0001		/* collision occurred */
+
+#ifdef KERNEL
+struct proc;
+
+void	selrecord __P((struct proc *selector, struct selinfo *));
+void	selwakeup __P((struct selinfo *));
+#endif
+
+#endif /* !_SYS_SELECT_H_ */
diff --git a/sys/sys/selinfo.h b/sys/sys/selinfo.h
new file mode 100644
index 00000000000..a279c592fbe
--- /dev/null
+++ b/sys/sys/selinfo.h
@@ -0,0 +1,56 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)select.h	8.2 (Berkeley) 1/4/94
+ */
+
+#ifndef _SYS_SELECT_H_
+#define	_SYS_SELECT_H_
+
+/*
+ * Used to maintain information about processes that wish to be
+ * notified when I/O becomes possible.
+ */
+struct selinfo {
+	pid_t	si_pid;		/* process to be notified */
+	short	si_flags;	/* see below */
+};
+#define	SI_COLL	0x0001		/* collision occurred */
+
+#ifdef KERNEL
+struct proc;
+
+void	selrecord __P((struct proc *selector, struct selinfo *));
+void	selwakeup __P((struct selinfo *));
+#endif
+
+#endif /* !_SYS_SELECT_H_ */
diff --git a/sys/sys/signal.h b/sys/sys/signal.h
new file mode 100644
index 00000000000..8ccded41c3b
--- /dev/null
+++ b/sys/sys/signal.h
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)signal.h	8.2 (Berkeley) 1/21/94
+ */
+
+#ifndef	_SYS_SIGNAL_H_
+#define	_SYS_SIGNAL_H_
+
+#define NSIG	32		/* counting 0; could be 33 (mask is 1-32) */
+
+#ifndef _ANSI_SOURCE
+#include <machine/signal.h>	/* sigcontext; codes for SIGILL, SIGFPE */
+#endif
+
+#define	SIGHUP	1	/* hangup */
+#define	SIGINT	2	/* interrupt */
+#define	SIGQUIT	3	/* quit */
+#define	SIGILL	4	/* illegal instruction (not reset when caught) */
+#ifndef _POSIX_SOURCE
+#define	SIGTRAP	5	/* trace trap (not reset when caught) */
+#endif
+#define	SIGABRT	6	/* abort() */
+#ifndef _POSIX_SOURCE
+#define	SIGIOT	SIGABRT	/* compatibility */
+#define	SIGEMT	7	/* EMT instruction */
+#endif
+#define	SIGFPE	8	/* floating point exception */
+#define	SIGKILL	9	/* kill (cannot be caught or ignored) */
+#ifndef _POSIX_SOURCE
+#define	SIGBUS	10	/* bus error */
+#endif
+#define	SIGSEGV	11	/* segmentation violation */
+#ifndef _POSIX_SOURCE
+#define	SIGSYS	12	/* bad argument to system call */
+#endif
+#define	SIGPIPE	13	/* write on a pipe with no one to read it */
+#define	SIGALRM	14	/* alarm clock */
+#define	SIGTERM	15	/* software termination signal from kill */
+#ifndef _POSIX_SOURCE
+#define	SIGURG	16	/* urgent condition on IO channel */
+#endif
+#define	SIGSTOP	17	/* sendable stop signal not from tty */
+#define	SIGTSTP	18	/* stop signal from tty */
+#define	SIGCONT	19	/* continue a stopped process */
+#define	SIGCHLD	20	/* to parent on child stop or exit */
+#define	SIGTTIN	21	/* to readers pgrp upon background tty read */
+#define	SIGTTOU	22	/* like TTIN for output if (tp->t_local&LTOSTOP) */
+#ifndef _POSIX_SOURCE
+#define	SIGIO	23	/* input/output possible signal */
+#define	SIGXCPU	24	/* exceeded CPU time limit */
+#define	SIGXFSZ	25	/* exceeded file size limit */
+#define	SIGVTALRM 26	/* virtual time alarm */
+#define	SIGPROF	27	/* profiling time alarm */
+#define SIGWINCH 28	/* window size changes */
+#define SIGINFO	29	/* information request */
+#endif
+#define SIGUSR1 30	/* user defined signal 1 */
+#define SIGUSR2 31	/* user defined signal 2 */
+
+#if defined(_ANSI_SOURCE) || defined(__cplusplus)
+/*
+ * Language spec sez we must list exactly one parameter, even though we
+ * actually supply three.  Ugh!
+ */
+#define	SIG_DFL		(void (*)(int))0
+#define	SIG_IGN		(void (*)(int))1
+#define	SIG_ERR		(void (*)(int))-1
+#else
+#define	SIG_DFL		(void (*)())0
+#define	SIG_IGN		(void (*)())1
+#define	SIG_ERR		(void (*)())-1
+#endif
+
+#ifndef _ANSI_SOURCE
+typedef unsigned int sigset_t;
+
+/*
+ * Signal vector "template" used in sigaction call.
+ */
+struct	sigaction {
+	void	(*sa_handler)();	/* signal handler */
+	sigset_t sa_mask;		/* signal mask to apply */
+	int	sa_flags;		/* see signal options below */
+};
+#ifndef _POSIX_SOURCE
+#define SA_ONSTACK	0x0001	/* take signal on signal stack */
+#define SA_RESTART	0x0002	/* restart system on signal return */
+#define	SA_DISABLE	0x0004	/* disable taking signals on alternate stack */
+#ifdef COMPAT_SUNOS
+#define	SA_USERTRAMP	0x0100	/* do not bounce off kernel's sigtramp */
+#endif
+#endif
+#define SA_NOCLDSTOP	0x0008	/* do not generate SIGCHLD on child stop */
+
+/*
+ * Flags for sigprocmask:
+ */
+#define	SIG_BLOCK	1	/* block specified signal set */
+#define	SIG_UNBLOCK	2	/* unblock specified signal set */
+#define	SIG_SETMASK	3	/* set specified signal set */
+
+#ifndef _POSIX_SOURCE
+#ifndef KERNEL
+#include <sys/cdefs.h>
+#endif
+typedef	void (*sig_t) __P((int));	/* type of signal function */
+
+/*
+ * Structure used in sigaltstack call.
+ */
+struct	sigaltstack {
+	char	*ss_base;		/* signal stack base */
+	int	ss_size;		/* signal stack length */
+	int	ss_flags;		/* SA_DISABLE and/or SA_ONSTACK */
+};
+#define	MINSIGSTKSZ	8192			/* minimum allowable stack */
+#define	SIGSTKSZ	(MINSIGSTKSZ + 32768)	/* recommended stack size */
+
+/*
+ * 4.3 compatibility:
+ * Signal vector "template" used in sigvec call.
+ */
+struct	sigvec {
+	void	(*sv_handler)();	/* signal handler */
+	int	sv_mask;		/* signal mask to apply */
+	int	sv_flags;		/* see signal options below */
+};
+
+#define SV_ONSTACK	SA_ONSTACK
+#define SV_INTERRUPT	SA_RESTART	/* same bit, opposite sense */
+#define sv_onstack sv_flags	/* isn't compatibility wonderful! */
+
+/*
+ * Structure used in sigstack call.
+ */
+struct	sigstack {
+	char	*ss_sp;			/* signal stack pointer */
+	int	ss_onstack;		/* current status */
+};
+
+/*
+ * Macro for converting signal number to a mask suitable for
+ * sigblock().
+ */
+#define sigmask(m)	(1 << ((m)-1))
+
+#define	BADSIG		SIG_ERR
+
+#endif	/* !_POSIX_SOURCE */
+#endif	/* !_ANSI_SOURCE */
+
+/*
+ * For historical reasons; programs expect signal's return value to be
+ * defined by <sys/signal.h>.
+ */
+__BEGIN_DECLS
+void	(*signal __P((int, void (*) __P((int))))) __P((int));
+__END_DECLS
+#endif	/* !_SYS_SIGNAL_H_ */
diff --git a/sys/sys/signalvar.h b/sys/sys/signalvar.h
new file mode 100644
index 00000000000..3d7e68bc530
--- /dev/null
+++ b/sys/sys/signalvar.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)signalvar.h	8.3 (Berkeley) 1/4/94
+ */
+
+#ifndef	_SYS_SIGNALVAR_H_		/* tmp for user.h */
+#define	_SYS_SIGNALVAR_H_
+
+/*
+ * Kernel signal definitions and data structures,
+ * not exported to user programs.
+ */
+
+/*
+ * Process signal actions and state, needed only within the process
+ * (not necessarily resident).
+ */
+struct	sigacts {
+	sig_t	ps_sigact[NSIG];	/* disposition of signals */
+	sigset_t ps_catchmask[NSIG];	/* signals to be blocked */
+	sigset_t ps_sigonstack;		/* signals to take on sigstack */
+	sigset_t ps_sigintr;		/* signals that interrupt syscalls */
+	sigset_t ps_oldmask;		/* saved mask from before sigpause */
+	int	ps_flags;		/* signal flags, below */
+	struct	sigaltstack ps_sigstk;	/* sp & on stack state variable */
+	int	ps_sig;			/* for core dump/debugger XXX */
+	int	ps_code;		/* for core dump/debugger XXX */
+	int	ps_addr;		/* for core dump/debugger XXX */
+	sigset_t ps_usertramp;		/* SunOS compat; libc sigtramp XXX */
+};
+
+/* signal flags */
+#define	SAS_OLDMASK	0x01		/* need to restore mask before pause */
+#define	SAS_ALTSTACK	0x02		/* have alternate signal stack */
+
+/* additional signal action values, used only temporarily/internally */
+#define	SIG_CATCH	(void (*)())2
+#define	SIG_HOLD	(void (*)())3
+
+/*
+ * get signal action for process and signal; currently only for current process
+ */
+#define SIGACTION(p, sig)	(p->p_sigacts->ps_sigact[(sig)])
+
+/*
+ * Determine signal that should be delivered to process p, the current
+ * process, 0 if none.  If there is a pending stop signal with default
+ * action, the process stops in issig().
+ */
+#define	CURSIG(p)							\
+	(((p)->p_siglist == 0 ||					\
+	    ((p)->p_flag & P_TRACED) == 0 &&				\
+	    ((p)->p_siglist & ~(p)->p_sigmask) == 0) ?			\
+	    0 : issignal(p))
+
+/*
+ * Clear a pending signal from a process.
+ */
+#define	CLRSIG(p, sig)	{ (p)->p_siglist &= ~sigmask(sig); }
+
+/*
+ * Signal properties and actions.
+ * The array below categorizes the signals and their default actions
+ * according to the following properties:
+ */
+#define	SA_KILL		0x01		/* terminates process by default */
+#define	SA_CORE		0x02		/* ditto and coredumps */
+#define	SA_STOP		0x04		/* suspend process */
+#define	SA_TTYSTOP	0x08		/* ditto, from tty */
+#define	SA_IGNORE	0x10		/* ignore by default */
+#define	SA_CONT		0x20		/* continue if suspended */
+#define	SA_CANTMASK	0x40		/* non-maskable, catchable */
+
+#ifdef	SIGPROP
+int sigprop[NSIG + 1] = {
+	0,			/* unused */
+	SA_KILL,		/* SIGHUP */
+	SA_KILL,		/* SIGINT */
+	SA_KILL|SA_CORE,	/* SIGQUIT */
+	SA_KILL|SA_CORE,	/* SIGILL */
+	SA_KILL|SA_CORE,	/* SIGTRAP */
+	SA_KILL|SA_CORE,	/* SIGABRT */
+	SA_KILL|SA_CORE,	/* SIGEMT */
+	SA_KILL|SA_CORE,	/* SIGFPE */
+	SA_KILL,		/* SIGKILL */
+	SA_KILL|SA_CORE,	/* SIGBUS */
+	SA_KILL|SA_CORE,	/* SIGSEGV */
+	SA_KILL|SA_CORE,	/* SIGSYS */
+	SA_KILL,		/* SIGPIPE */
+	SA_KILL,		/* SIGALRM */
+	SA_KILL,		/* SIGTERM */
+	SA_IGNORE,		/* SIGURG */
+	SA_STOP,		/* SIGSTOP */
+	SA_STOP|SA_TTYSTOP,	/* SIGTSTP */
+	SA_IGNORE|SA_CONT,	/* SIGCONT */
+	SA_IGNORE,		/* SIGCHLD */
+	SA_STOP|SA_TTYSTOP,	/* SIGTTIN */
+	SA_STOP|SA_TTYSTOP,	/* SIGTTOU */
+	SA_IGNORE,		/* SIGIO */
+	SA_KILL,		/* SIGXCPU */
+	SA_KILL,		/* SIGXFSZ */
+	SA_KILL,		/* SIGVTALRM */
+	SA_KILL,		/* SIGPROF */
+	SA_IGNORE,		/* SIGWINCH  */
+	SA_IGNORE,		/* SIGINFO */
+	SA_KILL,		/* SIGUSR1 */
+	SA_KILL,		/* SIGUSR2 */
+};
+
+#define	contsigmask	(sigmask(SIGCONT))
+#define	stopsigmask	(sigmask(SIGSTOP) | sigmask(SIGTSTP) | \
+			    sigmask(SIGTTIN) | sigmask(SIGTTOU))
+
+#endif /* SIGPROP */
+
+#define	sigcantmask	(sigmask(SIGKILL) | sigmask(SIGSTOP))
+
+#ifdef KERNEL
+/*
+ * Machine-independent functions:
+ */
+int	coredump __P((struct proc *p));
+void	execsigs __P((struct proc *p));
+void	gsignal __P((int pgid, int sig));
+int	issig __P((struct proc *p));
+void	pgsignal __P((struct pgrp *pgrp, int sig, int checkctty));
+void	postsig __P((int sig));
+void	psignal __P((struct proc *p, int sig));
+void	siginit __P((struct proc *p));
+void	trapsignal __P((struct proc *p, int sig, unsigned code));
+
+/*
+ * Machine-dependent functions:
+ */
+void	sendsig __P((sig_t action, int sig, int returnmask, unsigned code));
+#endif	/* KERNEL */
+#endif	/* !_SYS_SIGNALVAR_H_ */
diff --git a/sys/sys/socket.h b/sys/sys/socket.h
new file mode 100644
index 00000000000..f6728e98854
--- /dev/null
+++ b/sys/sys/socket.h
@@ -0,0 +1,339 @@
+/*
+ * Copyright (c) 1982, 1985, 1986, 1988, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)socket.h	8.4 (Berkeley) 2/21/94
+ */
+
+#ifndef _SYS_SOCKET_H_
+#define	_SYS_SOCKET_H_
+
+/*
+ * Definitions related to sockets: types, address families, options.
+ */
+
+/*
+ * Types
+ */
+#define	SOCK_STREAM	1		/* stream socket */
+#define	SOCK_DGRAM	2		/* datagram socket */
+#define	SOCK_RAW	3		/* raw-protocol interface */
+#define	SOCK_RDM	4		/* reliably-delivered message */
+#define	SOCK_SEQPACKET	5		/* sequenced packet stream */
+
+/*
+ * Option flags per-socket.
+ */
+#define	SO_DEBUG	0x0001		/* turn on debugging info recording */
+#define	SO_ACCEPTCONN	0x0002		/* socket has had listen() */
+#define	SO_REUSEADDR	0x0004		/* allow local address reuse */
+#define	SO_KEEPALIVE	0x0008		/* keep connections alive */
+#define	SO_DONTROUTE	0x0010		/* just use interface addresses */
+#define	SO_BROADCAST	0x0020		/* permit sending of broadcast msgs */
+#define	SO_USELOOPBACK	0x0040		/* bypass hardware when possible */
+#define	SO_LINGER	0x0080		/* linger on close if data present */
+#define	SO_OOBINLINE	0x0100		/* leave received OOB data in line */
+#define	SO_REUSEPORT	0x0200		/* allow local address & port reuse */
+
+/*
+ * Additional options, not kept in so_options.
+ */
+#define SO_SNDBUF	0x1001		/* send buffer size */
+#define SO_RCVBUF	0x1002		/* receive buffer size */
+#define SO_SNDLOWAT	0x1003		/* send low-water mark */
+#define SO_RCVLOWAT	0x1004		/* receive low-water mark */
+#define SO_SNDTIMEO	0x1005		/* send timeout */
+#define SO_RCVTIMEO	0x1006		/* receive timeout */
+#define	SO_ERROR	0x1007		/* get error status and clear */
+#define	SO_TYPE		0x1008		/* get socket type */
+
+/*
+ * Structure used for manipulating linger option.
+ */
+struct	linger {
+	int	l_onoff;		/* option on/off */
+	int	l_linger;		/* linger time */
+};
+
+/*
+ * Level number for (get/set)sockopt() to apply to socket itself.
+ */
+#define	SOL_SOCKET	0xffff		/* options for socket level */
+
+/*
+ * Address families.
+ */
+#define	AF_UNSPEC	0		/* unspecified */
+#define	AF_LOCAL	1		/* local to host (pipes, portals) */
+#define	AF_UNIX		AF_LOCAL	/* backward compatibility */
+#define	AF_INET		2		/* internetwork: UDP, TCP, etc. */
+#define	AF_IMPLINK	3		/* arpanet imp addresses */
+#define	AF_PUP		4		/* pup protocols: e.g. BSP */
+#define	AF_CHAOS	5		/* mit CHAOS protocols */
+#define	AF_NS		6		/* XEROX NS protocols */
+#define	AF_ISO		7		/* ISO protocols */
+#define	AF_OSI		AF_ISO
+#define	AF_ECMA		8		/* european computer manufacturers */
+#define	AF_DATAKIT	9		/* datakit protocols */
+#define	AF_CCITT	10		/* CCITT protocols, X.25 etc */
+#define	AF_SNA		11		/* IBM SNA */
+#define AF_DECnet	12		/* DECnet */
+#define AF_DLI		13		/* DEC Direct data link interface */
+#define AF_LAT		14		/* LAT */
+#define	AF_HYLINK	15		/* NSC Hyperchannel */
+#define	AF_APPLETALK	16		/* Apple Talk */
+#define	AF_ROUTE	17		/* Internal Routing Protocol */
+#define	AF_LINK		18		/* Link layer interface */
+#define	pseudo_AF_XTP	19		/* eXpress Transfer Protocol (no AF) */
+#define	AF_COIP		20		/* connection-oriented IP, aka ST II */
+#define	AF_CNT		21		/* Computer Network Technology */
+#define pseudo_AF_RTIP	22		/* Help Identify RTIP packets */
+#define	AF_IPX		23		/* Novell Internet Protocol */
+#define	AF_SIP		24		/* Simple Internet Protocol */
+#define pseudo_AF_PIP	25		/* Help Identify PIP packets */
+
+#define	AF_MAX		26
+
+/*
+ * Structure used by kernel to store most
+ * addresses.
+ */
+struct sockaddr {
+	u_char	sa_len;			/* total length */
+	u_char	sa_family;		/* address family */
+	char	sa_data[14];		/* actually longer; address value */
+};
+
+/*
+ * Structure used by kernel to pass protocol
+ * information in raw sockets.
+ */
+struct sockproto {
+	u_short	sp_family;		/* address family */
+	u_short	sp_protocol;		/* protocol */
+};
+
+/*
+ * Protocol families, same as address families for now.
+ */
+#define	PF_UNSPEC	AF_UNSPEC
+#define	PF_LOCAL	AF_LOCAL
+#define	PF_UNIX		PF_LOCAL	/* backward compatibility */
+#define	PF_INET		AF_INET
+#define	PF_IMPLINK	AF_IMPLINK
+#define	PF_PUP		AF_PUP
+#define	PF_CHAOS	AF_CHAOS
+#define	PF_NS		AF_NS
+#define	PF_ISO		AF_ISO
+#define	PF_OSI		AF_ISO
+#define	PF_ECMA		AF_ECMA
+#define	PF_DATAKIT	AF_DATAKIT
+#define	PF_CCITT	AF_CCITT
+#define	PF_SNA		AF_SNA
+#define PF_DECnet	AF_DECnet
+#define PF_DLI		AF_DLI
+#define PF_LAT		AF_LAT
+#define	PF_HYLINK	AF_HYLINK
+#define	PF_APPLETALK	AF_APPLETALK
+#define	PF_ROUTE	AF_ROUTE
+#define	PF_LINK		AF_LINK
+#define	PF_XTP		pseudo_AF_XTP	/* really just proto family, no AF */
+#define	PF_COIP		AF_COIP
+#define	PF_CNT		AF_CNT
+#define	PF_SIP		AF_SIP
+#define	PF_IPX		AF_IPX		/* same format as AF_NS */
+#define PF_RTIP		pseudo_AF_FTIP	/* same format as AF_INET */
+#define PF_PIP		pseudo_AF_PIP
+
+#define	PF_MAX		AF_MAX
+
+/*
+ * Definitions for network related sysctl, CTL_NET.
+ *
+ * Second level is protocol family.
+ * Third level is protocol number.
+ *
+ * Further levels are defined by the individual families below.
+ */
+#define NET_MAXID	AF_MAX
+
+#define CTL_NET_NAMES { \
+	{ 0, 0 }, \
+	{ "unix", CTLTYPE_NODE }, \
+	{ "inet", CTLTYPE_NODE }, \
+	{ "implink", CTLTYPE_NODE }, \
+	{ "pup", CTLTYPE_NODE }, \
+	{ "chaos", CTLTYPE_NODE }, \
+	{ "xerox_ns", CTLTYPE_NODE }, \
+	{ "iso", CTLTYPE_NODE }, \
+	{ "emca", CTLTYPE_NODE }, \
+	{ "datakit", CTLTYPE_NODE }, \
+	{ "ccitt", CTLTYPE_NODE }, \
+	{ "ibm_sna", CTLTYPE_NODE }, \
+	{ "decnet", CTLTYPE_NODE }, \
+	{ "dec_dli", CTLTYPE_NODE }, \
+	{ "lat", CTLTYPE_NODE }, \
+	{ "hylink", CTLTYPE_NODE }, \
+	{ "appletalk", CTLTYPE_NODE }, \
+	{ "route", CTLTYPE_NODE }, \
+	{ "link_layer", CTLTYPE_NODE }, \
+	{ "xtp", CTLTYPE_NODE }, \
+	{ "coip", CTLTYPE_NODE }, \
+	{ "cnt", CTLTYPE_NODE }, \
+	{ "rtip", CTLTYPE_NODE }, \
+	{ "ipx", CTLTYPE_NODE }, \
+	{ "sip", CTLTYPE_NODE }, \
+	{ "pip", CTLTYPE_NODE }, \
+}
+
+/*
+ * PF_ROUTE - Routing table
+ *
+ * Three additional levels are defined:
+ *	Fourth: address family, 0 is wildcard
+ *	Fifth: type of info, defined below
+ *	Sixth: flag(s) to mask with for NET_RT_FLAGS
+ */
+#define NET_RT_DUMP	1		/* dump; may limit to a.f. */
+#define NET_RT_FLAGS	2		/* by flags, e.g. RESOLVING */
+#define NET_RT_IFLIST	3		/* survey interface list */
+#define	NET_RT_MAXID	4
+
+#define CTL_NET_RT_NAMES { \
+	{ 0, 0 }, \
+	{ "dump", CTLTYPE_STRUCT }, \
+	{ "flags", CTLTYPE_STRUCT }, \
+	{ "iflist", CTLTYPE_STRUCT }, \
+}
+
+/*
+ * Maximum queue length specifiable by listen.
+ */
+#define	SOMAXCONN	5
+
+/*
+ * Message header for recvmsg and sendmsg calls.
+ * Used value-result for recvmsg, value only for sendmsg.
+ */
+struct msghdr {
+	caddr_t	msg_name;		/* optional address */
+	u_int	msg_namelen;		/* size of address */
+	struct	iovec *msg_iov;		/* scatter/gather array */
+	u_int	msg_iovlen;		/* # elements in msg_iov */
+	caddr_t	msg_control;		/* ancillary data, see below */
+	u_int	msg_controllen;		/* ancillary data buffer len */
+	int	msg_flags;		/* flags on received message */
+};
+
+#define	MSG_OOB		0x1		/* process out-of-band data */
+#define	MSG_PEEK	0x2		/* peek at incoming message */
+#define	MSG_DONTROUTE	0x4		/* send without using routing tables */
+#define	MSG_EOR		0x8		/* data completes record */
+#define	MSG_TRUNC	0x10		/* data discarded before delivery */
+#define	MSG_CTRUNC	0x20		/* control data lost before delivery */
+#define	MSG_WAITALL	0x40		/* wait for full request or error */
+#define	MSG_DONTWAIT	0x80		/* this message should be nonblocking */
+
+/*
+ * Header for ancillary data objects in msg_control buffer.
+ * Used for additional information with/about a datagram
+ * not expressible by flags.  The format is a sequence
+ * of message elements headed by cmsghdr structures.
+ */
+struct cmsghdr {
+	u_int	cmsg_len;		/* data byte count, including hdr */
+	int	cmsg_level;		/* originating protocol */
+	int	cmsg_type;		/* protocol-specific type */
+/* followed by	u_char  cmsg_data[]; */
+};
+
+/* given pointer to struct cmsghdr, return pointer to data */
+#define	CMSG_DATA(cmsg)		((u_char *)((cmsg) + 1))
+
+/* given pointer to struct cmsghdr, return pointer to next cmsghdr */
+#define	CMSG_NXTHDR(mhdr, cmsg)	\
+	(((caddr_t)(cmsg) + (cmsg)->cmsg_len + sizeof(struct cmsghdr) > \
+	    (mhdr)->msg_control + (mhdr)->msg_controllen) ? \
+	    (struct cmsghdr *)NULL : \
+	    (struct cmsghdr *)((caddr_t)(cmsg) + ALIGN((cmsg)->cmsg_len)))
+
+#define	CMSG_FIRSTHDR(mhdr)	((struct cmsghdr *)(mhdr)->msg_control)
+
+/* "Socket"-level control message types: */
+#define	SCM_RIGHTS	0x01		/* access rights (array of int) */
+
+/*
+ * 4.3 compat sockaddr, move to compat file later
+ */
+struct osockaddr {
+	u_short	sa_family;		/* address family */
+	char	sa_data[14];		/* up to 14 bytes of direct address */
+};
+
+/*
+ * 4.3-compat message header (move to compat file later).
+ */
+struct omsghdr {
+	caddr_t	msg_name;		/* optional address */
+	int	msg_namelen;		/* size of address */
+	struct	iovec *msg_iov;		/* scatter/gather array */
+	int	msg_iovlen;		/* # elements in msg_iov */
+	caddr_t	msg_accrights;		/* access rights sent/received */
+	int	msg_accrightslen;
+};
+
+#ifndef	KERNEL
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+int	accept __P((int, struct sockaddr *, int *));
+int	bind __P((int, const struct sockaddr *, int));
+int	connect __P((int, const struct sockaddr *, int));
+int	getpeername __P((int, struct sockaddr *, int *));
+int	getsockname __P((int, struct sockaddr *, int *));
+int	getsockopt __P((int, int, int, void *, int *));
+int	listen __P((int, int));
+ssize_t	recv __P((int, void *, size_t, int));
+ssize_t	recvfrom __P((int, void *, size_t, int, struct sockaddr *, int *));
+ssize_t	recvmsg __P((int, struct msghdr *, int));
+ssize_t	send __P((int, const void *, size_t, int));
+ssize_t	sendto __P((int, const void *,
+	    size_t, int, const struct sockaddr *, int));
+ssize_t	sendmsg __P((int, const struct msghdr *, int));
+int	setsockopt __P((int, int, int, const void *, int));
+int	shutdown __P((int, int));
+int	socket __P((int, int, int));
+int	socketpair __P((int, int, int, int *));
+__END_DECLS
+
+#endif /* !KERNEL */
+#endif /* !_SYS_SOCKET_H_ */
diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h
new file mode 100644
index 00000000000..ff104046c7c
--- /dev/null
+++ b/sys/sys/socketvar.h
@@ -0,0 +1,207 @@
+/*-
+ * Copyright (c) 1982, 1986, 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)socketvar.h	8.1 (Berkeley) 6/2/93
+ */
+
+#include <sys/select.h>			/* for struct selinfo */
+
+/*
+ * Kernel structure per socket.
+ * Contains send and receive buffer queues,
+ * handle on protocol and pointer to protocol
+ * private data and error information.
+ */
+struct socket {
+	short	so_type;		/* generic type, see socket.h */
+	short	so_options;		/* from socket call, see socket.h */
+	short	so_linger;		/* time to linger while closing */
+	short	so_state;		/* internal state flags SS_*, below */
+	caddr_t	so_pcb;			/* protocol control block */
+	struct	protosw *so_proto;	/* protocol handle */
+/*
+ * Variables for connection queueing.
+ * Socket where accepts occur is so_head in all subsidiary sockets.
+ * If so_head is 0, socket is not related to an accept.
+ * For head socket so_q0 queues partially completed connections,
+ * while so_q is a queue of connections ready to be accepted.
+ * If a connection is aborted and it has so_head set, then
+ * it has to be pulled out of either so_q0 or so_q.
+ * We allow connections to queue up based on current queue lengths
+ * and limit on number of queued connections for this socket.
+ */
+	struct	socket *so_head;	/* back pointer to accept socket */
+	struct	socket *so_q0;		/* queue of partial connections */
+	struct	socket *so_q;		/* queue of incoming connections */
+	short	so_q0len;		/* partials on so_q0 */
+	short	so_qlen;		/* number of connections on so_q */
+	short	so_qlimit;		/* max number queued connections */
+	short	so_timeo;		/* connection timeout */
+	u_short	so_error;		/* error affecting connection */
+	pid_t	so_pgid;		/* pgid for signals */
+	u_long	so_oobmark;		/* chars to oob mark */
+/*
+ * Variables for socket buffering.
+ */
+	struct	sockbuf {
+		u_long	sb_cc;		/* actual chars in buffer */
+		u_long	sb_hiwat;	/* max actual char count */
+		u_long	sb_mbcnt;	/* chars of mbufs used */
+		u_long	sb_mbmax;	/* max chars of mbufs to use */
+		long	sb_lowat;	/* low water mark */
+		struct	mbuf *sb_mb;	/* the mbuf chain */
+		struct	selinfo sb_sel;	/* process selecting read/write */
+		short	sb_flags;	/* flags, see below */
+		short	sb_timeo;	/* timeout for read/write */
+	} so_rcv, so_snd;
+#define	SB_MAX		(256*1024)	/* default for max chars in sockbuf */
+#define	SB_LOCK		0x01		/* lock on data queue */
+#define	SB_WANT		0x02		/* someone is waiting to lock */
+#define	SB_WAIT		0x04		/* someone is waiting for data/space */
+#define	SB_SEL		0x08		/* someone is selecting */
+#define	SB_ASYNC	0x10		/* ASYNC I/O, need signals */
+#define	SB_NOTIFY	(SB_WAIT|SB_SEL|SB_ASYNC)
+#define	SB_NOINTR	0x40		/* operations not interruptible */
+
+	caddr_t	so_tpcb;		/* Wisc. protocol control block XXX */
+	void	(*so_upcall) __P((struct socket *so, caddr_t arg, int waitf));
+	caddr_t	so_upcallarg;		/* Arg for above */
+};
+
+/*
+ * Socket state bits.
+ */
+#define	SS_NOFDREF		0x001	/* no file table ref any more */
+#define	SS_ISCONNECTED		0x002	/* socket connected to a peer */
+#define	SS_ISCONNECTING		0x004	/* in process of connecting to peer */
+#define	SS_ISDISCONNECTING	0x008	/* in process of disconnecting */
+#define	SS_CANTSENDMORE		0x010	/* can't send more data to peer */
+#define	SS_CANTRCVMORE		0x020	/* can't receive more data from peer */
+#define	SS_RCVATMARK		0x040	/* at mark on input */
+
+#define	SS_PRIV			0x080	/* privileged for broadcast, raw... */
+#define	SS_NBIO			0x100	/* non-blocking ops */
+#define	SS_ASYNC		0x200	/* async i/o notify */
+#define	SS_ISCONFIRMING		0x400	/* deciding to accept connection req */
+
+
+/*
+ * Macros for sockets and socket buffering.
+ */
+
+/*
+ * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
+ * This is problematical if the fields are unsigned, as the space might
+ * still be negative (cc > hiwat or mbcnt > mbmax).  Should detect
+ * overflow and return 0.  Should use "lmin" but it doesn't exist now.
+ */
+#define	sbspace(sb) \
+    ((long) imin((int)((sb)->sb_hiwat - (sb)->sb_cc), \
+	 (int)((sb)->sb_mbmax - (sb)->sb_mbcnt)))
+
+/* do we have to send all at once on a socket? */
+#define	sosendallatonce(so) \
+    ((so)->so_proto->pr_flags & PR_ATOMIC)
+
+/* can we read something from so? */
+#define	soreadable(so) \
+    ((so)->so_rcv.sb_cc >= (so)->so_rcv.sb_lowat || \
+	((so)->so_state & SS_CANTRCVMORE) || \
+	(so)->so_qlen || (so)->so_error)
+
+/* can we write something to so? */
+#define	sowriteable(so) \
+    (sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat && \
+	(((so)->so_state&SS_ISCONNECTED) || \
+	  ((so)->so_proto->pr_flags&PR_CONNREQUIRED)==0) || \
+     ((so)->so_state & SS_CANTSENDMORE) || \
+     (so)->so_error)
+
+/* adjust counters in sb reflecting allocation of m */
+#define	sballoc(sb, m) { \
+	(sb)->sb_cc += (m)->m_len; \
+	(sb)->sb_mbcnt += MSIZE; \
+	if ((m)->m_flags & M_EXT) \
+		(sb)->sb_mbcnt += (m)->m_ext.ext_size; \
+}
+
+/* adjust counters in sb reflecting freeing of m */
+#define	sbfree(sb, m) { \
+	(sb)->sb_cc -= (m)->m_len; \
+	(sb)->sb_mbcnt -= MSIZE; \
+	if ((m)->m_flags & M_EXT) \
+		(sb)->sb_mbcnt -= (m)->m_ext.ext_size; \
+}
+
+/*
+ * Set lock on sockbuf sb; sleep if lock is already held.
+ * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
+ * Returns error without lock if sleep is interrupted.
+ */
+#define sblock(sb, wf) ((sb)->sb_flags & SB_LOCK ? \
+		(((wf) == M_WAITOK) ? sb_lock(sb) : EWOULDBLOCK) : \
+		((sb)->sb_flags |= SB_LOCK), 0)
+
+/* release lock on sockbuf sb */
+#define	sbunlock(sb) { \
+	(sb)->sb_flags &= ~SB_LOCK; \
+	if ((sb)->sb_flags & SB_WANT) { \
+		(sb)->sb_flags &= ~SB_WANT; \
+		wakeup((caddr_t)&(sb)->sb_flags); \
+	} \
+}
+
+#define	sorwakeup(so)	{ sowakeup((so), &(so)->so_rcv); \
+			  if ((so)->so_upcall) \
+			    (*((so)->so_upcall))((so), (so)->so_upcallarg, M_DONTWAIT); \
+			}
+
+#define	sowwakeup(so)	sowakeup((so), &(so)->so_snd)
+
+#ifdef KERNEL
+u_long	sb_max;
+/* to catch callers missing new second argument to sonewconn: */
+#define	sonewconn(head, connstatus)	sonewconn1((head), (connstatus))
+struct	socket *sonewconn1 __P((struct socket *head, int connstatus));
+
+/* strings for sleep message: */
+extern	char netio[], netcon[], netcls[];
+
+/*
+ * File operations on sockets.
+ */
+int	soo_read __P((struct file *fp, struct uio *uio, struct ucred *cred));
+int	soo_write __P((struct file *fp, struct uio *uio, struct ucred *cred));
+int	soo_ioctl __P((struct file *fp, int com, caddr_t data, struct proc *p));
+int	soo_select __P((struct file *fp, int which, struct proc *p));
+int 	soo_close __P((struct file *fp, struct proc *p));
+#endif
diff --git a/sys/sys/sockio.h b/sys/sys/sockio.h
new file mode 100644
index 00000000000..eb5a44a598d
--- /dev/null
+++ b/sys/sys/sockio.h
@@ -0,0 +1,77 @@
+/*-
+ * Copyright (c) 1982, 1986, 1990, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)sockio.h	8.1 (Berkeley) 3/28/94
+ */
+
+#ifndef	_SYS_SOCKIO_H_
+#define	_SYS_SOCKIO_H_
+
+#include <sys/ioccom.h>
+
+/* Socket ioctl's. */
+#define	SIOCSHIWAT	 _IOW('s',  0, int)		/* set high watermark */
+#define	SIOCGHIWAT	 _IOR('s',  1, int)		/* get high watermark */
+#define	SIOCSLOWAT	 _IOW('s',  2, int)		/* set low watermark */
+#define	SIOCGLOWAT	 _IOR('s',  3, int)		/* get low watermark */
+#define	SIOCATMARK	 _IOR('s',  7, int)		/* at oob mark? */
+#define	SIOCSPGRP	 _IOW('s',  8, int)		/* set process group */
+#define	SIOCGPGRP	 _IOR('s',  9, int)		/* get process group */
+
+#define	SIOCADDRT	 _IOW('r', 10, struct ortentry)	/* add route */
+#define	SIOCDELRT	 _IOW('r', 11, struct ortentry)	/* delete route */
+
+#define	SIOCSIFADDR	 _IOW('i', 12, struct ifreq)	/* set ifnet address */
+#define	OSIOCGIFADDR	_IOWR('i', 13, struct ifreq)	/* get ifnet address */
+#define	SIOCGIFADDR	_IOWR('i', 33, struct ifreq)	/* get ifnet address */
+#define	SIOCSIFDSTADDR	 _IOW('i', 14, struct ifreq)	/* set p-p address */
+#define	OSIOCGIFDSTADDR	_IOWR('i', 15, struct ifreq)	/* get p-p address */
+#define	SIOCGIFDSTADDR	_IOWR('i', 34, struct ifreq)	/* get p-p address */
+#define	SIOCSIFFLAGS	 _IOW('i', 16, struct ifreq)	/* set ifnet flags */
+#define	SIOCGIFFLAGS	_IOWR('i', 17, struct ifreq)	/* get ifnet flags */
+#define	OSIOCGIFBRDADDR	_IOWR('i', 18, struct ifreq)	/* get broadcast addr */
+#define	SIOCGIFBRDADDR	_IOWR('i', 35, struct ifreq)	/* get broadcast addr */
+#define	SIOCSIFBRDADDR	 _IOW('i', 19, struct ifreq)	/* set broadcast addr */
+#define	OSIOCGIFCONF	_IOWR('i', 20, struct ifconf)	/* get ifnet list */
+#define	SIOCGIFCONF	_IOWR('i', 36, struct ifconf)	/* get ifnet list */
+#define	OSIOCGIFNETMASK	_IOWR('i', 21, struct ifreq)	/* get net addr mask */
+#define	SIOCGIFNETMASK	_IOWR('i', 37, struct ifreq)	/* get net addr mask */
+#define	SIOCSIFNETMASK	 _IOW('i', 22, struct ifreq)	/* set net addr mask */
+#define	SIOCGIFMETRIC	_IOWR('i', 23, struct ifreq)	/* get IF metric */
+#define	SIOCSIFMETRIC	 _IOW('i', 24, struct ifreq)	/* set IF metric */
+#define	SIOCDIFADDR	 _IOW('i', 25, struct ifreq)	/* delete IF addr */
+#define	SIOCAIFADDR	 _IOW('i', 26, struct ifaliasreq)/* add/chg IF alias */
+
+#define	SIOCADDMULTI	 _IOW('i', 49, struct ifreq)	/* add m'cast addr */
+#define	SIOCDELMULTI	 _IOW('i', 50, struct ifreq)	/* del m'cast addr */
+
+#endif /* !_SYS_SOCKIO_H_ */
diff --git a/sys/sys/stat.h b/sys/sys/stat.h
new file mode 100644
index 00000000000..07020c36770
--- /dev/null
+++ b/sys/sys/stat.h
@@ -0,0 +1,193 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)stat.h	8.6 (Berkeley) 3/8/94
+ */
+
+#ifndef _SYS_STAT_H_
+#define	_SYS_STAT_H_
+
+#include <sys/time.h>
+
+#ifndef _POSIX_SOURCE
+struct ostat {
+	unsigned short	st_dev;		/* inode's device */
+	ino_t	st_ino;			/* inode's number */
+	mode_t	st_mode;		/* inode protection mode */
+	nlink_t	st_nlink;		/* number of hard links */
+	unsigned short	st_uid;		/* user ID of the file's owner */
+	unsigned short	st_gid;		/* group ID of the file's group */
+	unsigned short	st_rdev;	/* device type */
+	long	st_size;		/* file size, in bytes */
+	struct	timespec st_atimespec;	/* time of last access */
+	struct	timespec st_mtimespec;	/* time of last data modification */
+	struct	timespec st_ctimespec;	/* time of last file status change */
+	long	st_blksize;		/* optimal blocksize for I/O */
+	long	st_blocks;		/* blocks allocated for file */
+	unsigned long	st_flags;	/* user defined flags for file */
+	unsigned long	st_gen;		/* file generation number */
+};
+#endif /* !_POSIX_SOURCE */
+
+struct stat {
+	dev_t	st_dev;			/* inode's device */
+	ino_t	st_ino;			/* inode's number */
+	mode_t	st_mode;		/* inode protection mode */
+	nlink_t	st_nlink;		/* number of hard links */
+	uid_t	st_uid;			/* user ID of the file's owner */
+	gid_t	st_gid;			/* group ID of the file's group */
+	dev_t	st_rdev;		/* device type */
+	struct	timespec st_atimespec;	/* time of last access */
+	struct	timespec st_mtimespec;	/* time of last data modification */
+	struct	timespec st_ctimespec;	/* time of last file status change */
+	off_t	st_size;		/* file size, in bytes */
+	quad_t	st_blocks;		/* blocks allocated for file */
+	unsigned long	st_blksize;	/* optimal blocksize for I/O */
+	unsigned long	st_flags;	/* user defined flags for file */
+	unsigned long	st_gen;		/* file generation number */
+	long	st_lspare;
+	quad_t	st_qspare[2];
+};
+#define st_atime st_atimespec.ts_sec
+#define st_mtime st_mtimespec.ts_sec
+#define st_ctime st_ctimespec.ts_sec
+
+#define	S_ISUID	0004000			/* set user id on execution */
+#define	S_ISGID	0002000			/* set group id on execution */
+#ifndef _POSIX_SOURCE
+#define	S_ISTXT	0001000			/* sticky bit */
+#endif
+
+#define	S_IRWXU	0000700			/* RWX mask for owner */
+#define	S_IRUSR	0000400			/* R for owner */
+#define	S_IWUSR	0000200			/* W for owner */
+#define	S_IXUSR	0000100			/* X for owner */
+
+#ifndef _POSIX_SOURCE
+#define	S_IREAD		S_IRUSR
+#define	S_IWRITE	S_IWUSR
+#define	S_IEXEC		S_IXUSR
+#endif
+
+#define	S_IRWXG	0000070			/* RWX mask for group */
+#define	S_IRGRP	0000040			/* R for group */
+#define	S_IWGRP	0000020			/* W for group */
+#define	S_IXGRP	0000010			/* X for group */
+
+#define	S_IRWXO	0000007			/* RWX mask for other */
+#define	S_IROTH	0000004			/* R for other */
+#define	S_IWOTH	0000002			/* W for other */
+#define	S_IXOTH	0000001			/* X for other */
+
+#ifndef _POSIX_SOURCE
+#define	S_IFMT	 0170000		/* type of file mask */
+#define	S_IFIFO	 0010000		/* named pipe (fifo) */
+#define	S_IFCHR	 0020000		/* character special */
+#define	S_IFDIR	 0040000		/* directory */
+#define	S_IFBLK	 0060000		/* block special */
+#define	S_IFREG	 0100000		/* regular */
+#define	S_IFLNK	 0120000		/* symbolic link */
+#define	S_IFSOCK 0140000		/* socket */
+#define	S_ISVTX	 0001000		/* save swapped text even after use */
+#endif
+
+#define	S_ISDIR(m)	((m & 0170000) == 0040000)	/* directory */
+#define	S_ISCHR(m)	((m & 0170000) == 0020000)	/* char special */
+#define	S_ISBLK(m)	((m & 0170000) == 0060000)	/* block special */
+#define	S_ISREG(m)	((m & 0170000) == 0100000)	/* regular file */
+#define	S_ISFIFO(m)	((m & 0170000) == 0100000 || \
+			 (m & 0170000) == 0140000)	/* fifo or socket */
+#ifndef _POSIX_SOURCE
+#define	S_ISLNK(m)	((m & 0170000) == 0120000)	/* symbolic link */
+#define	S_ISSOCK(m)	((m & 0170000) == 0100000 || \
+			 (m & 0170000) == 0140000)	/* fifo or socket */
+#endif
+
+#ifndef _POSIX_SOURCE
+#define	ACCESSPERMS	(S_IRWXU|S_IRWXG|S_IRWXO)	/* 0777 */
+							/* 7777 */
+#define	ALLPERMS	(S_ISUID|S_ISGID|S_ISTXT|S_IRWXU|S_IRWXG|S_IRWXO)
+							/* 0666 */
+#define	DEFFILEMODE	(S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)
+
+#define S_BLKSIZE	512		/* block size used in the stat struct */
+
+/*
+ * Definitions of flags stored in file flags word.
+ *
+ * Super-user and owner changeable flags.
+ */
+#define	UF_SETTABLE	0x0000ffff	/* mask of owner changeable flags */
+#define	UF_NODUMP	0x00000001	/* do not dump file */
+#define	UF_IMMUTABLE	0x00000002	/* file may not be changed */
+#define	UF_APPEND	0x00000004	/* writes to file may only append */
+/*
+ * Super-user changeable flags.
+ */
+#define	SF_SETTABLE	0xffff0000	/* mask of superuser changeable flags */
+#define	SF_ARCHIVED	0x00010000	/* file is archived */
+#define	SF_IMMUTABLE	0x00020000	/* file may not be changed */
+#define	SF_APPEND	0x00040000	/* writes to file may only append */
+
+#ifdef KERNEL
+/*
+ * Shorthand abbreviations of above.
+ */
+#define	APPEND		(UF_APPEND | SF_APPEND)
+#define	IMMUTABLE	(UF_IMMUTABLE | SF_IMMUTABLE)
+#endif
+#endif
+
+#ifndef KERNEL
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+int	chmod __P((const char *, mode_t));
+int	fstat __P((int, struct stat *));
+int	mkdir __P((const char *, mode_t));
+int	mkfifo __P((const char *, mode_t));
+int	stat __P((const char *, struct stat *));
+mode_t	umask __P((mode_t));
+#ifndef _POSIX_SOURCE
+int	chflags __P((const char *, u_long));
+int	fchflags __P((int, u_long));
+int	fchmod __P((int, mode_t));
+int	lstat __P((const char *, struct stat *));
+#endif
+__END_DECLS
+#endif
+#endif /* !_SYS_STAT_H_ */
diff --git a/sys/sys/syscall.h b/sys/sys/syscall.h
new file mode 100644
index 00000000000..8df8eb4fc51
--- /dev/null
+++ b/sys/sys/syscall.h
@@ -0,0 +1,186 @@
+/*
+ * System call numbers.
+ *
+ * DO NOT EDIT-- this file is automatically generated.
+ * created from	@(#)syscalls.master	8.2 (Berkeley) 1/13/94
+ */
+
+#define	SYS_syscall	0
+#define	SYS_exit	1
+#define	SYS_fork	2
+#define	SYS_read	3
+#define	SYS_write	4
+#define	SYS_open	5
+#define	SYS_close	6
+#define	SYS_wait4	7
+				/* 8 is old creat */
+#define	SYS_link	9
+#define	SYS_unlink	10
+				/* 11 is obsolete execv */
+#define	SYS_chdir	12
+#define	SYS_fchdir	13
+#define	SYS_mknod	14
+#define	SYS_chmod	15
+#define	SYS_chown	16
+#define	SYS_break	17
+#define	SYS_getfsstat	18
+				/* 19 is old lseek */
+#define	SYS_getpid	20
+#define	SYS_mount	21
+#define	SYS_unmount	22
+#define	SYS_setuid	23
+#define	SYS_getuid	24
+#define	SYS_geteuid	25
+#define	SYS_ptrace	26
+#define	SYS_recvmsg	27
+#define	SYS_sendmsg	28
+#define	SYS_recvfrom	29
+#define	SYS_accept	30
+#define	SYS_getpeername	31
+#define	SYS_getsockname	32
+#define	SYS_access	33
+#define	SYS_chflags	34
+#define	SYS_fchflags	35
+#define	SYS_sync	36
+#define	SYS_kill	37
+				/* 38 is old stat */
+#define	SYS_getppid	39
+				/* 40 is old lstat */
+#define	SYS_dup	41
+#define	SYS_pipe	42
+#define	SYS_getegid	43
+#define	SYS_profil	44
+#define	SYS_ktrace	45
+#define	SYS_sigaction	46
+#define	SYS_getgid	47
+#define	SYS_sigprocmask	48
+#define	SYS_getlogin	49
+#define	SYS_setlogin	50
+#define	SYS_acct	51
+#define	SYS_sigpending	52
+#define	SYS_sigaltstack	53
+#define	SYS_ioctl	54
+#define	SYS_reboot	55
+#define	SYS_revoke	56
+#define	SYS_symlink	57
+#define	SYS_readlink	58
+#define	SYS_execve	59
+#define	SYS_umask	60
+#define	SYS_chroot	61
+				/* 62 is old fstat */
+				/* 63 is old getkerninfo */
+				/* 64 is old getpagesize */
+#define	SYS_msync	65
+#define	SYS_vfork	66
+				/* 67 is obsolete vread */
+				/* 68 is obsolete vwrite */
+#define	SYS_sbrk	69
+#define	SYS_sstk	70
+				/* 71 is old mmap */
+#define	SYS_vadvise	72
+#define	SYS_munmap	73
+#define	SYS_mprotect	74
+#define	SYS_madvise	75
+				/* 76 is obsolete vhangup */
+				/* 77 is obsolete vlimit */
+#define	SYS_mincore	78
+#define	SYS_getgroups	79
+#define	SYS_setgroups	80
+#define	SYS_getpgrp	81
+#define	SYS_setpgid	82
+#define	SYS_setitimer	83
+				/* 84 is old wait */
+#define	SYS_swapon	85
+#define	SYS_getitimer	86
+				/* 87 is old gethostname */
+				/* 88 is old sethostname */
+#define	SYS_getdtablesize	89
+#define	SYS_dup2	90
+#define	SYS_fcntl	92
+#define	SYS_select	93
+#define	SYS_fsync	95
+#define	SYS_setpriority	96
+#define	SYS_socket	97
+#define	SYS_connect	98
+				/* 99 is old accept */
+#define	SYS_getpriority	100
+				/* 101 is old send */
+				/* 102 is old recv */
+#define	SYS_sigreturn	103
+#define	SYS_bind	104
+#define	SYS_setsockopt	105
+#define	SYS_listen	106
+				/* 107 is obsolete vtimes */
+				/* 108 is old sigvec */
+				/* 109 is old sigblock */
+				/* 110 is old sigsetmask */
+#define	SYS_sigsuspend	111
+				/* 112 is old sigstack */
+				/* 113 is old recvmsg */
+				/* 114 is old sendmsg */
+#define	SYS_vtrace	115
+				/* 115 is obsolete vtrace */
+#define	SYS_gettimeofday	116
+#define	SYS_getrusage	117
+#define	SYS_getsockopt	118
+#define	SYS_resuba	119
+#define	SYS_readv	120
+#define	SYS_writev	121
+#define	SYS_settimeofday	122
+#define	SYS_fchown	123
+#define	SYS_fchmod	124
+				/* 125 is old recvfrom */
+				/* 126 is old setreuid */
+				/* 127 is old setregid */
+#define	SYS_rename	128
+				/* 129 is old truncate */
+				/* 130 is old ftruncate */
+#define	SYS_flock	131
+#define	SYS_mkfifo	132
+#define	SYS_sendto	133
+#define	SYS_shutdown	134
+#define	SYS_socketpair	135
+#define	SYS_mkdir	136
+#define	SYS_rmdir	137
+#define	SYS_utimes	138
+				/* 139 is obsolete 4.2 sigreturn */
+#define	SYS_adjtime	140
+				/* 141 is old getpeername */
+				/* 142 is old gethostid */
+				/* 143 is old sethostid */
+				/* 144 is old getrlimit */
+				/* 145 is old setrlimit */
+				/* 146 is old killpg */
+#define	SYS_setsid	147
+#define	SYS_quotactl	148
+				/* 149 is old quota */
+				/* 150 is old getsockname */
+#define	SYS_nfssvc	155
+				/* 156 is old getdirentries */
+#define	SYS_statfs	157
+#define	SYS_fstatfs	158
+#define	SYS_getfh	161
+#define	SYS_shmsys	171
+#define	SYS_setgid	181
+#define	SYS_setegid	182
+#define	SYS_seteuid	183
+#define	SYS_lfs_bmapv	184
+#define	SYS_lfs_markv	185
+#define	SYS_lfs_segclean	186
+#define	SYS_lfs_segwait	187
+#define	SYS_stat	188
+#define	SYS_fstat	189
+#define	SYS_lstat	190
+#define	SYS_pathconf	191
+#define	SYS_fpathconf	192
+#define	SYS_getrlimit	194
+#define	SYS_setrlimit	195
+#define	SYS_getdirentries	196
+#define	SYS_mmap	197
+#define	SYS___syscall	198
+#define	SYS_lseek	199
+#define	SYS_truncate	200
+#define	SYS_ftruncate	201
+#define	SYS___sysctl	202
+#define	SYS_mlock	203
+#define	SYS_munlock	204
diff --git a/sys/sys/sysctl.h b/sys/sys/sysctl.h
new file mode 100644
index 00000000000..4ad83a74542
--- /dev/null
+++ b/sys/sys/sysctl.h
@@ -0,0 +1,344 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Mike Karels at Berkeley Software Design, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)sysctl.h	8.1 (Berkeley) 6/2/93
+ */
+
+#ifndef _SYS_SYSCTL_H_
+#define	_SYS_SYSCTL_H_
+
+/*
+ * These are for the eproc structure defined below.
+ */
+#ifndef KERNEL
+#include <sys/time.h>
+#include <sys/ucred.h>
+#include <sys/proc.h>
+#include <vm/vm.h>
+#endif
+
+/*
+ * Definitions for sysctl call.  The sysctl call uses a hierarchical name
+ * for objects that can be examined or modified.  The name is expressed as
+ * a sequence of integers.  Like a file path name, the meaning of each
+ * component depends on its place in the hierarchy.  The top-level and kern
+ * identifiers are defined here, and other identifiers are defined in the
+ * respective subsystem header files.
+ */
+
+#define CTL_MAXNAME	12	/* largest number of components supported */
+
+/*
+ * Each subsystem defined by sysctl defines a list of variables
+ * for that subsystem. Each name is either a node with further 
+ * levels defined below it, or it is a leaf of some particular
+ * type given below. Each sysctl level defines a set of name/type
+ * pairs to be used by sysctl(1) in manipulating the subsystem.
+ */
+struct ctlname {
+	char	*ctl_name;	/* subsystem name */
+	int	ctl_type;	/* type of name */
+};
+#define	CTLTYPE_NODE	1	/* name is a node */
+#define	CTLTYPE_INT	2	/* name describes an integer */
+#define	CTLTYPE_STRING	3	/* name describes a string */
+#define	CTLTYPE_QUAD	4	/* name describes a 64-bit number */
+#define	CTLTYPE_STRUCT	5	/* name describes a structure */
+
+/*
+ * Top-level identifiers
+ */
+#define	CTL_UNSPEC	0		/* unused */
+#define	CTL_KERN	1		/* "high kernel": proc, limits */
+#define	CTL_VM		2		/* virtual memory */
+#define	CTL_FS		3		/* file system, mount type is next */
+#define	CTL_NET		4		/* network, see socket.h */
+#define	CTL_DEBUG	5		/* debugging parameters */
+#define	CTL_HW		6		/* generic cpu/io */
+#define	CTL_MACHDEP	7		/* machine dependent */
+#define	CTL_USER	8		/* user-level */
+#define	CTL_MAXID	9		/* number of valid top-level ids */
+
+#define CTL_NAMES { \
+	{ 0, 0 }, \
+	{ "kern", CTLTYPE_NODE }, \
+	{ "vm", CTLTYPE_NODE }, \
+	{ "fs", CTLTYPE_NODE }, \
+	{ "net", CTLTYPE_NODE }, \
+	{ "debug", CTLTYPE_NODE }, \
+	{ "hw", CTLTYPE_NODE }, \
+	{ "machdep", CTLTYPE_NODE }, \
+	{ "user", CTLTYPE_NODE }, \
+}
+
+/*
+ * CTL_KERN identifiers
+ */
+#define	KERN_OSTYPE	 	 1	/* string: system version */
+#define	KERN_OSRELEASE	 	 2	/* string: system release */
+#define	KERN_OSREV	 	 3	/* int: system revision */
+#define	KERN_VERSION	 	 4	/* string: compile time info */
+#define	KERN_MAXVNODES	 	 5	/* int: max vnodes */
+#define	KERN_MAXPROC	 	 6	/* int: max processes */
+#define	KERN_MAXFILES	 	 7	/* int: max open files */
+#define	KERN_ARGMAX	 	 8	/* int: max arguments to exec */
+#define	KERN_SECURELVL	 	 9	/* int: system security level */
+#define	KERN_HOSTNAME		10	/* string: hostname */
+#define	KERN_HOSTID		11	/* int: host identifier */
+#define	KERN_CLOCKRATE		12	/* struct: struct clockrate */
+#define	KERN_VNODE		13	/* struct: vnode structures */
+#define	KERN_PROC		14	/* struct: process entries */
+#define	KERN_FILE		15	/* struct: file entries */
+#define	KERN_PROF		16	/* node: kernel profiling info */
+#define	KERN_POSIX1		17	/* int: POSIX.1 version */
+#define	KERN_NGROUPS		18	/* int: # of supplemental group ids */
+#define	KERN_JOB_CONTROL	19	/* int: is job control available */
+#define	KERN_SAVED_IDS		20	/* int: saved set-user/group-ID */
+#define	KERN_BOOTTIME		21	/* struct: time kernel was booted */
+#define	KERN_MAXID		22	/* number of valid kern ids */
+
+#define CTL_KERN_NAMES { \
+	{ 0, 0 }, \
+	{ "ostype", CTLTYPE_STRING }, \
+	{ "osrelease", CTLTYPE_STRING }, \
+	{ "osrevision", CTLTYPE_INT }, \
+	{ "version", CTLTYPE_STRING }, \
+	{ "maxvnodes", CTLTYPE_INT }, \
+	{ "maxproc", CTLTYPE_INT }, \
+	{ "maxfiles", CTLTYPE_INT }, \
+	{ "argmax", CTLTYPE_INT }, \
+	{ "securelevel", CTLTYPE_INT }, \
+	{ "hostname", CTLTYPE_STRING }, \
+	{ "hostid", CTLTYPE_INT }, \
+	{ "clockrate", CTLTYPE_STRUCT }, \
+	{ "vnode", CTLTYPE_STRUCT }, \
+	{ "proc", CTLTYPE_STRUCT }, \
+	{ "file", CTLTYPE_STRUCT }, \
+	{ "profiling", CTLTYPE_NODE }, \
+	{ "posix1version", CTLTYPE_INT }, \
+	{ "ngroups", CTLTYPE_INT }, \
+	{ "job_control", CTLTYPE_INT }, \
+	{ "saved_ids", CTLTYPE_INT }, \
+	{ "boottime", CTLTYPE_STRUCT }, \
+}
+
+/* 
+ * KERN_PROC subtypes
+ */
+#define KERN_PROC_ALL		0	/* everything */
+#define	KERN_PROC_PID		1	/* by process id */
+#define	KERN_PROC_PGRP		2	/* by process group id */
+#define	KERN_PROC_SESSION	3	/* by session of pid */
+#define	KERN_PROC_TTY		4	/* by controlling tty */
+#define	KERN_PROC_UID		5	/* by effective uid */
+#define	KERN_PROC_RUID		6	/* by real uid */
+
+/* 
+ * KERN_PROC subtype ops return arrays of augmented proc structures:
+ */
+struct kinfo_proc {
+	struct	proc kp_proc;			/* proc structure */
+	struct	eproc {
+		struct	proc *e_paddr;		/* address of proc */
+		struct	session *e_sess;	/* session pointer */
+		struct	pcred e_pcred;		/* process credentials */
+		struct	ucred e_ucred;		/* current credentials */
+#ifdef sparc
+		struct {
+			segsz_t	vm_rssize;	/* resident set size */
+			segsz_t	vm_tsize;	/* text size */
+			segsz_t	vm_dsize;	/* data size */
+			segsz_t	vm_ssize;	/* stack size */
+		} e_vm;
+#else
+		struct	vmspace e_vm;		/* address space */
+#endif
+		pid_t	e_ppid;			/* parent process id */
+		pid_t	e_pgid;			/* process group id */
+		short	e_jobc;			/* job control counter */
+		dev_t	e_tdev;			/* controlling tty dev */
+		pid_t	e_tpgid;		/* tty process group id */
+		struct	session *e_tsess;	/* tty session pointer */
+#define	WMESGLEN	7
+		char	e_wmesg[WMESGLEN+1];	/* wchan message */
+		segsz_t e_xsize;		/* text size */
+		short	e_xrssize;		/* text rss */
+		short	e_xccount;		/* text references */
+		short	e_xswrss;
+		long	e_flag;
+#define	EPROC_CTTY	0x01	/* controlling tty vnode active */
+#define	EPROC_SLEADER	0x02	/* session leader */
+		char	e_login[MAXLOGNAME];	/* setlogin() name */
+		long	e_spare[4];
+	} kp_eproc;
+};
+
+/*
+ * CTL_HW identifiers
+ */
+#define	HW_MACHINE	 1		/* string: machine class */
+#define	HW_MODEL	 2		/* string: specific machine model */
+#define	HW_NCPU		 3		/* int: number of cpus */
+#define	HW_BYTEORDER	 4		/* int: machine byte order */
+#define	HW_PHYSMEM	 5		/* int: total memory */
+#define	HW_USERMEM	 6		/* int: non-kernel memory */
+#define	HW_PAGESIZE	 7		/* int: software page size */
+#define	HW_DISKNAMES	 8		/* strings: disk drive names */
+#define	HW_DISKSTATS	 9		/* struct: diskstats[] */
+#define	HW_MAXID	10		/* number of valid hw ids */
+
+#define CTL_HW_NAMES { \
+	{ 0, 0 }, \
+	{ "machine", CTLTYPE_STRING }, \
+	{ "model", CTLTYPE_STRING }, \
+	{ "ncpu", CTLTYPE_INT }, \
+	{ "byteorder", CTLTYPE_INT }, \
+	{ "physmem", CTLTYPE_INT }, \
+	{ "usermem", CTLTYPE_INT }, \
+	{ "pagesize", CTLTYPE_INT }, \
+	{ "disknames", CTLTYPE_STRUCT }, \
+	{ "diskstats", CTLTYPE_STRUCT }, \
+}
+
+/*
+ * CTL_USER definitions
+ */
+#define	USER_CS_PATH		 1	/* string: _CS_PATH */
+#define	USER_BC_BASE_MAX	 2	/* int: BC_BASE_MAX */
+#define	USER_BC_DIM_MAX		 3	/* int: BC_DIM_MAX */
+#define	USER_BC_SCALE_MAX	 4	/* int: BC_SCALE_MAX */
+#define	USER_BC_STRING_MAX	 5	/* int: BC_STRING_MAX */
+#define	USER_COLL_WEIGHTS_MAX	 6	/* int: COLL_WEIGHTS_MAX */
+#define	USER_EXPR_NEST_MAX	 7	/* int: EXPR_NEST_MAX */
+#define	USER_LINE_MAX		 8	/* int: LINE_MAX */
+#define	USER_RE_DUP_MAX		 9	/* int: RE_DUP_MAX */
+#define	USER_POSIX2_VERSION	10	/* int: POSIX2_VERSION */
+#define	USER_POSIX2_C_BIND	11	/* int: POSIX2_C_BIND */
+#define	USER_POSIX2_C_DEV	12	/* int: POSIX2_C_DEV */
+#define	USER_POSIX2_CHAR_TERM	13	/* int: POSIX2_CHAR_TERM */
+#define	USER_POSIX2_FORT_DEV	14	/* int: POSIX2_FORT_DEV */
+#define	USER_POSIX2_FORT_RUN	15	/* int: POSIX2_FORT_RUN */
+#define	USER_POSIX2_LOCALEDEF	16	/* int: POSIX2_LOCALEDEF */
+#define	USER_POSIX2_SW_DEV	17	/* int: POSIX2_SW_DEV */
+#define	USER_POSIX2_UPE		18	/* int: POSIX2_UPE */
+#define	USER_STREAM_MAX		19	/* int: POSIX2_STREAM_MAX */
+#define	USER_TZNAME_MAX		20	/* int: POSIX2_TZNAME_MAX */
+#define	USER_MAXID		21	/* number of valid user ids */
+
+#define	CTL_USER_NAMES { \
+	{ 0, 0 }, \
+	{ "cs_path", CTLTYPE_STRING }, \
+	{ "bc_base_max", CTLTYPE_INT }, \
+	{ "bc_dim_max", CTLTYPE_INT }, \
+	{ "bc_scale_max", CTLTYPE_INT }, \
+	{ "bc_string_max", CTLTYPE_INT }, \
+	{ "coll_weights_max", CTLTYPE_INT }, \
+	{ "expr_nest_max", CTLTYPE_INT }, \
+	{ "line_max", CTLTYPE_INT }, \
+	{ "re_dup_max", CTLTYPE_INT }, \
+	{ "posix2_version", CTLTYPE_INT }, \
+	{ "posix2_c_bind", CTLTYPE_INT }, \
+	{ "posix2_c_dev", CTLTYPE_INT }, \
+	{ "posix2_char_term", CTLTYPE_INT }, \
+	{ "posix2_fort_dev", CTLTYPE_INT }, \
+	{ "posix2_fort_run", CTLTYPE_INT }, \
+	{ "posix2_localedef", CTLTYPE_INT }, \
+	{ "posix2_sw_dev", CTLTYPE_INT }, \
+	{ "posix2_upe", CTLTYPE_INT }, \
+	{ "stream_max", CTLTYPE_INT }, \
+	{ "tzname_max", CTLTYPE_INT }, \
+}
+
+/*
+ * CTL_DEBUG definitions
+ *
+ * Second level identifier specifies which debug variable.
+ * Third level identifier specifies which stucture component.
+ */
+#define	CTL_DEBUG_NAME		0	/* string: variable name */
+#define	CTL_DEBUG_VALUE		1	/* int: variable value */
+#define	CTL_DEBUG_MAXID		20
+
+#ifdef	KERNEL
+#ifdef	DEBUG
+/*
+ * CTL_DEBUG variables.
+ *
+ * These are declared as separate variables so that they can be
+ * individually initialized at the location of their associated
+ * variable. The loader prevents multiple use by issuing errors
+ * if a variable is initialized in more than one place. They are
+ * aggregated into an array in debug_sysctl(), so that it can
+ * conveniently locate them when querried. If more debugging
+ * variables are added, they must also be declared here and also
+ * entered into the array.
+ */
+struct ctldebug {
+	char	*debugname;	/* name of debugging variable */
+	int	*debugvar;	/* pointer to debugging variable */
+};
+extern struct ctldebug debug0, debug1, debug2, debug3, debug4;
+extern struct ctldebug debug5, debug6, debug7, debug8, debug9;
+extern struct ctldebug debug10, debug11, debug12, debug13, debug14;
+extern struct ctldebug debug15, debug16, debug17, debug18, debug19;
+#endif	/* DEBUG */
+
+/*
+ * Internal sysctl function calling convention:
+ *
+ *	(*sysctlfn)(name, namelen, oldval, oldlenp, newval, newlen);
+ *
+ * The name parameter points at the next component of the name to be
+ * interpreted.  The namelen parameter is the number of integers in
+ * the name.
+ */
+typedef int (sysctlfn)
+    __P((int *, u_int, void *, size_t *, void *, size_t, struct proc *));
+
+int sysctl_int __P((void *, size_t *, void *, size_t, int *));
+int sysctl_rdint __P((void *, size_t *, void *, int));
+int sysctl_string __P((void *, size_t *, void *, size_t, char *, int));
+int sysctl_rdstring __P((void *, size_t *, void *, char *));
+int sysctl_rdstruct __P((void *, size_t *, void *, void *, int));
+void fill_eproc __P((struct proc *, struct eproc *));
+
+#else	/* !KERNEL */
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+int	sysctl __P((int *, u_int, void *, size_t *, void *, size_t));
+__END_DECLS
+#endif	/* KERNEL */
+#endif	/* !_SYS_SYSCTL_H_ */
diff --git a/sys/sys/syslimits.h b/sys/sys/syslimits.h
new file mode 100644
index 00000000000..550000c6503
--- /dev/null
+++ b/sys/sys/syslimits.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)syslimits.h	8.1 (Berkeley) 6/2/93
+ */
+
+#define	ARG_MAX			20480	/* max bytes for an exec function */
+#define	CHILD_MAX		   40	/* max simultaneous processes */
+#define	LINK_MAX		32767	/* max file link count */
+#define	MAX_CANON		  255	/* max bytes in term canon input line */
+#define	MAX_INPUT		  255	/* max bytes in terminal input */
+#define	NAME_MAX		  255	/* max bytes in a file name */
+#define	NGROUPS_MAX		   16	/* max supplemental group id's */
+#define	OPEN_MAX		   64	/* max open files per process */
+#define	PATH_MAX		 1024	/* max bytes in pathname */
+#define	PIPE_BUF		  512	/* max bytes for atomic pipe writes */
+
+#define	BC_BASE_MAX		   99	/* max ibase/obase values in bc(1) */
+#define	BC_DIM_MAX		 2048	/* max array elements in bc(1) */
+#define	BC_SCALE_MAX		   99	/* max scale value in bc(1) */
+#define	BC_STRING_MAX		 1000	/* max const string length in bc(1) */
+#define	COLL_WEIGHTS_MAX	    0	/* max weights for order keyword */
+#define	EXPR_NEST_MAX		   32	/* max expressions nested in expr(1) */
+#define	LINE_MAX		 2048	/* max bytes in an input line */
+#define	RE_DUP_MAX		  255	/* max RE's in interval notation */
diff --git a/sys/sys/syslog.h b/sys/sys/syslog.h
new file mode 100644
index 00000000000..935db2d4484
--- /dev/null
+++ b/sys/sys/syslog.h
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)syslog.h	8.1 (Berkeley) 6/2/93
+ */
+
+#define	_PATH_LOG	"/dev/log"
+
+/*
+ * priorities/facilities are encoded into a single 32-bit quantity, where the
+ * bottom 3 bits are the priority (0-7) and the top 28 bits are the facility
+ * (0-big number).  Both the priorities and the facilities map roughly
+ * one-to-one to strings in the syslogd(8) source code.  This mapping is
+ * included in this file.
+ *
+ * priorities (these are ordered)
+ */
+#define	LOG_EMERG	0	/* system is unusable */
+#define	LOG_ALERT	1	/* action must be taken immediately */
+#define	LOG_CRIT	2	/* critical conditions */
+#define	LOG_ERR		3	/* error conditions */
+#define	LOG_WARNING	4	/* warning conditions */
+#define	LOG_NOTICE	5	/* normal but significant condition */
+#define	LOG_INFO	6	/* informational */
+#define	LOG_DEBUG	7	/* debug-level messages */
+
+#define	LOG_PRIMASK	0x07	/* mask to extract priority part (internal) */
+				/* extract priority */
+#define	LOG_PRI(p)	((p) & LOG_PRIMASK)
+#define	LOG_MAKEPRI(fac, pri)	(((fac) << 3) | (pri))
+
+#ifdef SYSLOG_NAMES
+#define	INTERNAL_NOPRI	0x10	/* the "no priority" priority */
+				/* mark "facility" */
+#define	INTERNAL_MARK	LOG_MAKEPRI(LOG_NFACILITIES, 0)
+typedef struct _code {
+	char	*c_name;
+	int	c_val;
+} CODE;
+
+CODE prioritynames[] = {
+	"alert",	LOG_ALERT,
+	"crit",		LOG_CRIT,
+	"debug",	LOG_DEBUG,
+	"emerg",	LOG_EMERG,
+	"err",		LOG_ERR,
+	"error",	LOG_ERR,		/* DEPRECATED */
+	"info",		LOG_INFO,
+	"none",		INTERNAL_NOPRI,		/* INTERNAL */
+	"notice",	LOG_NOTICE,
+	"panic", 	LOG_EMERG,		/* DEPRECATED */
+	"warn",		LOG_WARNING,		/* DEPRECATED */
+	"warning",	LOG_WARNING,
+	NULL,		-1,
+};
+#endif
+
+/* facility codes */
+#define	LOG_KERN	(0<<3)	/* kernel messages */
+#define	LOG_USER	(1<<3)	/* random user-level messages */
+#define	LOG_MAIL	(2<<3)	/* mail system */
+#define	LOG_DAEMON	(3<<3)	/* system daemons */
+#define	LOG_AUTH	(4<<3)	/* security/authorization messages */
+#define	LOG_SYSLOG	(5<<3)	/* messages generated internally by syslogd */
+#define	LOG_LPR		(6<<3)	/* line printer subsystem */
+#define	LOG_NEWS	(7<<3)	/* network news subsystem */
+#define	LOG_UUCP	(8<<3)	/* UUCP subsystem */
+#define	LOG_CRON	(9<<3)	/* clock daemon */
+#define	LOG_AUTHPRIV	(10<<3)	/* security/authorization messages (private) */
+#define	LOG_FTP		(11<<3)	/* ftp daemon */
+
+	/* other codes through 15 reserved for system use */
+#define	LOG_LOCAL0	(16<<3)	/* reserved for local use */
+#define	LOG_LOCAL1	(17<<3)	/* reserved for local use */
+#define	LOG_LOCAL2	(18<<3)	/* reserved for local use */
+#define	LOG_LOCAL3	(19<<3)	/* reserved for local use */
+#define	LOG_LOCAL4	(20<<3)	/* reserved for local use */
+#define	LOG_LOCAL5	(21<<3)	/* reserved for local use */
+#define	LOG_LOCAL6	(22<<3)	/* reserved for local use */
+#define	LOG_LOCAL7	(23<<3)	/* reserved for local use */
+
+#define	LOG_NFACILITIES	24	/* current number of facilities */
+#define	LOG_FACMASK	0x03f8	/* mask to extract facility part */
+				/* facility of pri */
+#define	LOG_FAC(p)	(((p) & LOG_FACMASK) >> 3)
+
+#ifdef SYSLOG_NAMES
+CODE facilitynames[] = {
+	"auth",		LOG_AUTH,
+	"authpriv",	LOG_AUTHPRIV,
+	"cron", 	LOG_CRON,
+	"daemon",	LOG_DAEMON,
+	"ftp",		LOG_FTP,
+	"kern",		LOG_KERN,
+	"lpr",		LOG_LPR,
+	"mail",		LOG_MAIL,
+	"mark", 	INTERNAL_MARK,		/* INTERNAL */
+	"news",		LOG_NEWS,
+	"security",	LOG_AUTH,		/* DEPRECATED */
+	"syslog",	LOG_SYSLOG,
+	"user",		LOG_USER,
+	"uucp",		LOG_UUCP,
+	"local0",	LOG_LOCAL0,
+	"local1",	LOG_LOCAL1,
+	"local2",	LOG_LOCAL2,
+	"local3",	LOG_LOCAL3,
+	"local4",	LOG_LOCAL4,
+	"local5",	LOG_LOCAL5,
+	"local6",	LOG_LOCAL6,
+	"local7",	LOG_LOCAL7,
+	NULL,		-1,
+};
+#endif
+
+#ifdef KERNEL
+#define	LOG_PRINTF	-1	/* pseudo-priority to indicate use of printf */
+#endif
+
+/*
+ * arguments to setlogmask.
+ */
+#define	LOG_MASK(pri)	(1 << (pri))		/* mask for one priority */
+#define	LOG_UPTO(pri)	((1 << ((pri)+1)) - 1)	/* all priorities through pri */
+
+/*
+ * Option flags for openlog.
+ *
+ * LOG_ODELAY no longer does anything.
+ * LOG_NDELAY is the inverse of what it used to be.
+ */
+#define	LOG_PID		0x01	/* log the pid with each message */
+#define	LOG_CONS	0x02	/* log on the console if errors in sending */
+#define	LOG_ODELAY	0x04	/* delay open until first syslog() (default) */
+#define	LOG_NDELAY	0x08	/* don't delay open */
+#define	LOG_NOWAIT	0x10	/* don't wait for console forks: DEPRECATED */
+#define	LOG_PERROR	0x20	/* log to stderr as well */
+
+#ifndef KERNEL
+
+/*
+ * Don't use va_list in the vsyslog() prototype.   Va_list is typedef'd in two
+ * places (<machine/varargs.h> and <machine/stdarg.h>), so if we include one
+ * of them here we may collide with the utility's includes.  It's unreasonable
+ * for utilities to have to include one of them to include syslog.h, so we get
+ * _BSD_VA_LIST_ from <machine/ansi.h> and use it.
+ */
+#include <machine/ansi.h>
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+void	closelog __P((void));
+void	openlog __P((const char *, int, int));
+int	setlogmask __P((int));
+void	syslog __P((int, const char *, ...));
+void	vsyslog __P((int, const char *, _BSD_VA_LIST_));
+__END_DECLS
+
+#endif /* !KERNEL */
diff --git a/sys/sys/systm.h b/sys/sys/systm.h
new file mode 100644
index 00000000000..91cb64bd5fa
--- /dev/null
+++ b/sys/sys/systm.h
@@ -0,0 +1,165 @@
+/*-
+ * Copyright (c) 1982, 1988, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)systm.h	8.4 (Berkeley) 2/23/94
+ */
+
+/*
+ * The `securelevel' variable controls the security level of the system.
+ * It can only be decreased by process 1 (/sbin/init).
+ *
+ * Security levels are as follows:
+ *   -1	permannently insecure mode - always run system in level 0 mode.
+ *    0	insecure mode - immutable and append-only flags make be turned off.
+ *	All devices may be read or written subject to permission modes.
+ *    1	secure mode - immutable and append-only flags may not be changed;
+ *	raw disks of mounted filesystems, /dev/mem, and /dev/kmem are
+ *	read-only.
+ *    2	highly secure mode - same as (1) plus raw disks are always
+ *	read-only whether mounted or not. This level precludes tampering 
+ *	with filesystems by unmounting them, but also inhibits running
+ *	newfs while the system is secured.
+ *
+ * In normal operation, the system runs in level 0 mode while single user
+ * and in level 1 mode while multiuser. If level 2 mode is desired while
+ * running multiuser, it can be set in the multiuser startup script
+ * (/etc/rc.local) using sysctl(1). If it is desired to run the system
+ * in level 0 mode while multiuser, initialize the variable securelevel
+ * in /sys/kern/kern_sysctl.c to -1. Note that it is NOT initialized to
+ * zero as that would allow the vmunix binary to be patched to -1.
+ * Without initialization, securelevel loads in the BSS area which only
+ * comes into existence when the kernel is loaded and hence cannot be
+ * patched by a stalking hacker.
+ */
+extern int securelevel;		/* system security level */
+extern const char *panicstr;	/* panic message */
+extern char version[];		/* system version */
+extern char copyright[];	/* system copyright */
+
+extern int nblkdev;		/* number of entries in bdevsw */
+extern int nchrdev;		/* number of entries in cdevsw */
+extern int nswdev;		/* number of swap devices */
+extern int nswap;		/* size of swap space */
+
+extern int selwait;		/* select timeout address */
+
+extern u_char curpriority;	/* priority of current process */
+
+extern int maxmem;		/* max memory per process */
+extern int physmem;		/* physical memory */
+
+extern dev_t dumpdev;		/* dump device */
+extern long dumplo;		/* offset into dumpdev */
+
+extern dev_t rootdev;		/* root device */
+extern struct vnode *rootvp;	/* vnode equivalent to above */
+
+extern dev_t swapdev;		/* swapping device */
+extern struct vnode *swapdev_vp;/* vnode equivalent to above */
+
+extern struct sysent {		/* system call table */
+	int	sy_narg;	/* number of arguments */
+	int	(*sy_call)();	/* implementing function */
+} sysent[];
+
+extern int boothowto;		/* reboot flags, from console subsystem */
+
+/* casts to keep lint happy */
+#define	insque(q,p)	_insque((caddr_t)q,(caddr_t)p)
+#define	remque(q)	_remque((caddr_t)q)
+
+/*
+ * General function declarations.
+ */
+int	nullop __P((void));
+int	enodev __P((void));
+int	enoioctl __P((void));
+int	enxio __P((void));
+int	eopnotsupp __P((void));
+int	seltrue __P((dev_t dev, int which, struct proc *p));
+void	*hashinit __P((int count, int type, u_long *hashmask));
+
+#ifdef __GNUC__
+volatile void	panic __P((const char *, ...));
+#else
+void	panic __P((const char *, ...));
+#endif
+void	tablefull __P((const char *));
+void	addlog __P((const char *, ...));
+void	log __P((int, const char *, ...));
+void	printf __P((const char *, ...));
+int	sprintf __P((char *buf, const char *, ...));
+void	ttyprintf __P((struct tty *, const char *, ...));
+
+void	bcopy __P((const void *from, void *to, u_int len));
+void	ovbcopy __P((const void *from, void *to, u_int len));
+void	bzero __P((void *buf, u_int len));
+
+int	copystr __P((void *kfaddr, void *kdaddr, u_int len, u_int *done));
+int	copyinstr __P((void *udaddr, void *kaddr, u_int len, u_int *done));
+int	copyoutstr __P((void *kaddr, void *udaddr, u_int len, u_int *done));
+int	copyin __P((void *udaddr, void *kaddr, u_int len));
+int	copyout __P((void *kaddr, void *udaddr, u_int len));
+
+int	fubyte __P((void *base));
+#ifdef notdef
+int	fuibyte __P((void *base));
+#endif
+int	subyte __P((void *base, int byte));
+int	suibyte __P((void *base, int byte));
+int	fuword __P((void *base));
+int	fuiword __P((void *base));
+int	suword __P((void *base, int word));
+int	suiword __P((void *base, int word));
+
+int	hzto __P((struct timeval *tv));
+void	timeout __P((void (*func)(void *), void *arg, int ticks));
+void	untimeout __P((void (*func)(void *), void *arg));
+void	realitexpire __P((void *));
+
+struct clockframe;
+void	hardclock __P((struct clockframe *frame));
+void	softclock __P((void));
+void	statclock __P((struct clockframe *frame));
+
+void	initclocks __P((void));
+
+void	startprofclock __P((struct proc *));
+void	stopprofclock __P((struct proc *));
+void	setstatclockrate __P((int hzrate));
+
+#include <libkern/libkern.h>
diff --git a/sys/sys/tablet.h b/sys/sys/tablet.h
new file mode 100644
index 00000000000..cbb3f23d006
--- /dev/null
+++ b/sys/sys/tablet.h
@@ -0,0 +1,94 @@
+/*-
+ * Copyright (c) 1985, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tablet.h	8.3 (Berkeley) 1/4/94
+ */
+
+#ifndef _SYS_TABLET_H_
+#define	_SYS_TABLET_H_
+
+/*
+ * Tablet line discipline.
+ */
+#include <sys/ioctl.h>
+
+/*
+ * Reads on the tablet return one of the following structures, depending on
+ * the underlying tablet type.  The first two are defined such that a read of
+ * sizeof (gtcopos) on a non-gtco tablet will return meaningful info.  The
+ * in-proximity bit is simulated where the tablet does not directly provide
+ * the information.
+ */
+struct	tbpos {
+	int	xpos, ypos;	/* raw x-y coordinates */
+	short	status;		/* buttons/pen down */
+#define	TBINPROX	0100000		/* pen in proximity of tablet */
+	short	scount;		/* sample count */
+};
+
+struct	gtcopos {
+	int	xpos, ypos;	/* raw x-y coordinates */
+	short	status;		/* as above */
+	short	scount;		/* sample count */
+	short	xtilt, ytilt;	/* raw tilt */
+	short	pressure;
+	short	pad;		/* pad to longword boundary */
+};
+
+struct	polpos {
+	short	p_x, p_y, p_z;	/* raw 3-space coordinates */
+	short	p_azi, p_pit, p_rol;	/* azimuth, pitch, and roll */
+	short	p_stat;		/* status, as above */
+	char	p_key;		/* calculator input keyboard */
+};
+
+#define BIOSMODE	_IOW('b', 1, int)	/* set mode bit(s) */
+#define BIOGMODE	_IOR('b', 2, int)	/* get mode bit(s) */
+#define	TBMODE		0xfff0		/* mode bits: */
+#define		TBPOINT		0x0010		/* single point */
+#define		TBRUN		0x0000		/* runs contin. */
+#define		TBSTOP		0x0020		/* shut-up */
+#define		TBGO		0x0000		/* ~TBSTOP */
+#define	TBTYPE		0x000f		/* tablet type: */
+#define		TBUNUSED	0x0
+#define		TBHITACHI	0x1		/* hitachi tablet */
+#define		TBTIGER		0x2		/* hitachi tiger */
+#define		TBGTCO		0x3		/* gtco */
+#define		TBPOL		0x4		/* polhemus 3space */
+#define		TBHDG		0x5		/* hdg-1111b, low res */
+#define		TBHDGHIRES	0x6		/* hdg-1111b, high res */
+#define		TBDIGI		0x7		/* gtco digi-pad, low res */
+#define		TBDIGIHIRES	0x8		/* gtco digi-pad, high res */
+#define BIOSTYPE	_IOW('b', 3, int)	/* set tablet type */
+#define BIOGTYPE	_IOR('b', 4, int)	/* get tablet type*/
+
+#endif /* !_SYS_TABLET_H_ */
diff --git a/sys/sys/termios.h b/sys/sys/termios.h
new file mode 100644
index 00000000000..4ad04a10fb1
--- /dev/null
+++ b/sys/sys/termios.h
@@ -0,0 +1,278 @@
+/*
+ * Copyright (c) 1988, 1989, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)termios.h	8.3 (Berkeley) 3/28/94
+ */
+
+#ifndef _SYS_TERMIOS_H_
+#define _SYS_TERMIOS_H_
+
+/* 
+ * Special Control Characters 
+ *
+ * Index into c_cc[] character array.
+ *
+ *	Name	     Subscript	Enabled by 
+ */
+#define	VEOF		0	/* ICANON */
+#define	VEOL		1	/* ICANON */
+#ifndef _POSIX_SOURCE
+#define	VEOL2		2	/* ICANON */
+#endif
+#define	VERASE		3	/* ICANON */
+#ifndef _POSIX_SOURCE
+#define VWERASE 	4	/* ICANON */
+#endif 
+#define VKILL		5	/* ICANON */
+#ifndef _POSIX_SOURCE
+#define	VREPRINT 	6	/* ICANON */
+#endif
+/*			7	   spare 1 */
+#define VINTR		8	/* ISIG */
+#define VQUIT		9	/* ISIG */
+#define VSUSP		10	/* ISIG */
+#ifndef _POSIX_SOURCE
+#define VDSUSP		11	/* ISIG */
+#endif
+#define VSTART		12	/* IXON, IXOFF */
+#define VSTOP		13	/* IXON, IXOFF */
+#ifndef _POSIX_SOURCE
+#define	VLNEXT		14	/* IEXTEN */
+#define	VDISCARD	15	/* IEXTEN */
+#endif
+#define VMIN		16	/* !ICANON */
+#define VTIME		17	/* !ICANON */
+#ifndef _POSIX_SOURCE
+#define VSTATUS		18	/* ICANON */
+/*			19	   spare 2 */
+#endif
+#define	NCCS		20
+
+#define _POSIX_VDISABLE	((unsigned char)'\377')
+
+#ifndef _POSIX_SOURCE
+#define CCEQ(val, c)	(c == val ? val != _POSIX_VDISABLE : 0)
+#endif
+
+/*
+ * Input flags - software input processing
+ */
+#define	IGNBRK		0x00000001	/* ignore BREAK condition */
+#define	BRKINT		0x00000002	/* map BREAK to SIGINTR */
+#define	IGNPAR		0x00000004	/* ignore (discard) parity errors */
+#define	PARMRK		0x00000008	/* mark parity and framing errors */
+#define	INPCK		0x00000010	/* enable checking of parity errors */
+#define	ISTRIP		0x00000020	/* strip 8th bit off chars */
+#define	INLCR		0x00000040	/* map NL into CR */
+#define	IGNCR		0x00000080	/* ignore CR */
+#define	ICRNL		0x00000100	/* map CR to NL (ala CRMOD) */
+#define	IXON		0x00000200	/* enable output flow control */
+#define	IXOFF		0x00000400	/* enable input flow control */
+#ifndef _POSIX_SOURCE
+#define	IXANY		0x00000800	/* any char will restart after stop */
+#define IMAXBEL		0x00002000	/* ring bell on input queue full */
+#endif  /*_POSIX_SOURCE */
+
+/*
+ * Output flags - software output processing
+ */
+#define	OPOST		0x00000001	/* enable following output processing */
+#ifndef _POSIX_SOURCE
+#define ONLCR		0x00000002	/* map NL to CR-NL (ala CRMOD) */
+#define OXTABS		0x00000004	/* expand tabs to spaces */
+#define ONOEOT		0x00000008	/* discard EOT's (^D) on output) */
+#endif  /*_POSIX_SOURCE */
+
+/*
+ * Control flags - hardware control of terminal
+ */
+#ifndef _POSIX_SOURCE
+#define	CIGNORE		0x00000001	/* ignore control flags */
+#endif
+#define CSIZE		0x00000300	/* character size mask */
+#define     CS5		    0x00000000	    /* 5 bits (pseudo) */
+#define     CS6		    0x00000100	    /* 6 bits */
+#define     CS7		    0x00000200	    /* 7 bits */
+#define     CS8		    0x00000300	    /* 8 bits */
+#define CSTOPB		0x00000400	/* send 2 stop bits */
+#define CREAD		0x00000800	/* enable receiver */
+#define PARENB		0x00001000	/* parity enable */
+#define PARODD		0x00002000	/* odd parity, else even */
+#define HUPCL		0x00004000	/* hang up on last close */
+#define CLOCAL		0x00008000	/* ignore modem status lines */
+#ifndef _POSIX_SOURCE
+#define CCTS_OFLOW	0x00010000	/* CTS flow control of output */
+#define CRTSCTS		CCTS_OFLOW	/* ??? */
+#define CRTS_IFLOW	0x00020000	/* RTS flow control of input */
+#define	MDMBUF		0x00100000	/* flow control output via Carrier */
+#endif
+
+
+/* 
+ * "Local" flags - dumping ground for other state
+ *
+ * Warning: some flags in this structure begin with
+ * the letter "I" and look like they belong in the
+ * input flag.
+ */
+
+#ifndef _POSIX_SOURCE
+#define	ECHOKE		0x00000001	/* visual erase for line kill */
+#endif  /*_POSIX_SOURCE */
+#define	ECHOE		0x00000002	/* visually erase chars */
+#define	ECHOK		0x00000004	/* echo NL after line kill */
+#define ECHO		0x00000008	/* enable echoing */
+#define	ECHONL		0x00000010	/* echo NL even if ECHO is off */
+#ifndef _POSIX_SOURCE
+#define	ECHOPRT		0x00000020	/* visual erase mode for hardcopy */
+#define ECHOCTL  	0x00000040	/* echo control chars as ^(Char) */
+#endif  /*_POSIX_SOURCE */
+#define	ISIG		0x00000080	/* enable signals INTR, QUIT, [D]SUSP */
+#define	ICANON		0x00000100	/* canonicalize input lines */
+#ifndef _POSIX_SOURCE
+#define ALTWERASE	0x00000200	/* use alternate WERASE algorithm */
+#endif  /*_POSIX_SOURCE */
+#define	IEXTEN		0x00000400	/* enable DISCARD and LNEXT */
+#define EXTPROC         0x00000800      /* external processing */
+#define TOSTOP		0x00400000	/* stop background jobs from output */
+#ifndef _POSIX_SOURCE
+#define FLUSHO		0x00800000	/* output being flushed (state) */
+#define	NOKERNINFO	0x02000000	/* no kernel output from VSTATUS */
+#define PENDIN		0x20000000	/* XXX retype pending input (state) */
+#endif  /*_POSIX_SOURCE */
+#define	NOFLSH		0x80000000	/* don't flush after interrupt */
+
+typedef unsigned long	tcflag_t;
+typedef unsigned char	cc_t;
+typedef long		speed_t;
+
+struct termios {
+	tcflag_t	c_iflag;	/* input flags */
+	tcflag_t	c_oflag;	/* output flags */
+	tcflag_t	c_cflag;	/* control flags */
+	tcflag_t	c_lflag;	/* local flags */
+	cc_t		c_cc[NCCS];	/* control chars */
+	long		c_ispeed;	/* input speed */
+	long		c_ospeed;	/* output speed */
+};
+
+/* 
+ * Commands passed to tcsetattr() for setting the termios structure.
+ */
+#define	TCSANOW		0		/* make change immediate */
+#define	TCSADRAIN	1		/* drain output, then change */
+#define	TCSAFLUSH	2		/* drain output, flush input */
+#ifndef _POSIX_SOURCE
+#define TCSASOFT	0x10		/* flag - don't alter h.w. state */
+#endif
+
+/*
+ * Standard speeds
+ */
+#define B0	0
+#define B50	50
+#define B75	75
+#define B110	110
+#define B134	134
+#define B150	150
+#define B200	200
+#define B300	300
+#define B600	600
+#define B1200	1200
+#define	B1800	1800
+#define B2400	2400
+#define B4800	4800
+#define B9600	9600
+#define B19200	19200
+#define B38400	38400
+#ifndef _POSIX_SOURCE
+#define B7200	7200
+#define B14400	14400
+#define B28800	28800
+#define B57600	57600
+#define B76800	76800
+#define B115200	115200
+#define B230400	230400
+#define EXTA	19200
+#define EXTB	38400
+#endif  /* !_POSIX_SOURCE */
+
+#ifndef KERNEL
+
+#define	TCIFLUSH	1
+#define	TCOFLUSH	2
+#define TCIOFLUSH	3
+#define	TCOOFF		1
+#define	TCOON		2
+#define TCIOFF		3
+#define TCION		4
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+speed_t	cfgetispeed __P((const struct termios *));
+speed_t	cfgetospeed __P((const struct termios *));
+int	cfsetispeed __P((struct termios *, speed_t));
+int	cfsetospeed __P((struct termios *, speed_t));
+int	tcgetattr __P((int, struct termios *));
+int	tcsetattr __P((int, int, const struct termios *));
+int	tcdrain __P((int));
+int	tcflow __P((int, int));
+int	tcflush __P((int, int));
+int	tcsendbreak __P((int, int));
+
+#ifndef _POSIX_SOURCE
+void	cfmakeraw __P((struct termios *));
+int	cfsetspeed __P((struct termios *, speed_t));
+#endif /* !_POSIX_SOURCE */
+__END_DECLS
+
+#endif /* !KERNEL */
+
+#ifndef _POSIX_SOURCE
+
+/*
+ * Include tty ioctl's that aren't just for backwards compatibility
+ * with the old tty driver.  These ioctl definitions were previously
+ * in <sys/ioctl.h>.
+ */
+#include <sys/ttycom.h>
+#endif
+
+/*
+ * END OF PROTECTED INCLUDE.
+ */
+#endif /* !_SYS_TERMIOS_H_ */
+
+#ifndef _POSIX_SOURCE
+#include <sys/ttydefaults.h>
+#endif
diff --git a/sys/sys/time.h b/sys/sys/time.h
new file mode 100644
index 00000000000..53227712a3b
--- /dev/null
+++ b/sys/sys/time.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)time.h	8.1 (Berkeley) 6/2/93
+ */
+
+#ifndef _SYS_TIME_H_
+#define _SYS_TIME_H_
+
+/*
+ * Structure returned by gettimeofday(2) system call,
+ * and used in other calls.
+ */
+struct timeval {
+	long	tv_sec;		/* seconds */
+	long	tv_usec;	/* and microseconds */
+};
+
+/*
+ * Structure defined by POSIX.4 to be like a timeval.
+ */
+struct timespec {
+	long	ts_sec;		/* seconds */
+	long	ts_nsec;	/* and nanoseconds */
+};
+
+#define	TIMEVAL_TO_TIMESPEC(tv, ts) {					\
+	(ts)->ts_sec = (tv)->tv_sec;					\
+	(ts)->ts_nsec = (tv)->tv_usec * 1000;				\
+}
+#define	TIMESPEC_TO_TIMEVAL(tv, ts) {					\
+	(tv)->tv_sec = (ts)->ts_sec;					\
+	(tv)->tv_usec = (ts)->ts_nsec / 1000;				\
+}
+
+struct timezone {
+	int	tz_minuteswest;	/* minutes west of Greenwich */
+	int	tz_dsttime;	/* type of dst correction */
+};
+#define	DST_NONE	0	/* not on dst */
+#define	DST_USA		1	/* USA style dst */
+#define	DST_AUST	2	/* Australian style dst */
+#define	DST_WET		3	/* Western European dst */
+#define	DST_MET		4	/* Middle European dst */
+#define	DST_EET		5	/* Eastern European dst */
+#define	DST_CAN		6	/* Canada */
+
+/* Operations on timevals. */
+#define	timerclear(tvp)		(tvp)->tv_sec = (tvp)->tv_usec = 0
+#define	timerisset(tvp)		((tvp)->tv_sec || (tvp)->tv_usec)
+#define	timercmp(tvp, uvp, cmp)						\
+	(((tvp)->tv_sec == (uvp)->tv_sec) ?				\
+	    ((tvp)->tv_usec cmp (uvp)->tv_usec) :			\
+	    ((tvp)->tv_sec cmp (uvp)->tv_sec))
+
+/*
+ * Names of the interval timers, and structure
+ * defining a timer setting.
+ */
+#define	ITIMER_REAL	0
+#define	ITIMER_VIRTUAL	1
+#define	ITIMER_PROF	2
+
+struct	itimerval {
+	struct	timeval it_interval;	/* timer interval */
+	struct	timeval it_value;	/* current value */
+};
+
+/*
+ * Getkerninfo clock information structure
+ */
+struct clockinfo {
+	int	hz;		/* clock frequency */
+	int	tick;		/* micro-seconds per hz tick */
+	int	stathz;		/* statistics clock frequency */
+	int	profhz;		/* profiling clock frequency */
+};
+
+#ifndef KERNEL
+#include <time.h>
+
+#ifndef _POSIX_SOURCE
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+int	adjtime __P((const struct timeval *, struct timeval *));
+int	getitimer __P((int, struct itimerval *));
+int	gettimeofday __P((struct timeval *, struct timezone *));
+int	setitimer __P((int, const struct itimerval *, struct itimerval *));
+int	settimeofday __P((const struct timeval *, const struct timezone *));
+int	utimes __P((const char *, const struct timeval *));
+__END_DECLS
+#endif /* !POSIX */
+
+#endif /* !KERNEL */
+
+#endif /* !_SYS_TIME_H_ */
diff --git a/sys/sys/timeb.h b/sys/sys/timeb.h
new file mode 100644
index 00000000000..2ab010514b6
--- /dev/null
+++ b/sys/sys/timeb.h
@@ -0,0 +1,47 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)timeb.h	8.2 (Berkeley) 1/21/94
+ */
+
+/* The ftime(2) system call structure -- deprecated. */
+struct timeb {
+	time_t	time;			/* seconds since the Epoch */
+	unsigned short millitm;		/* + milliseconds since the Epoch */
+	short	timezone;		/* minutes west of CUT */
+	short	dstflag;		/* DST == non-zero */
+};
diff --git a/sys/sys/times.h b/sys/sys/times.h
new file mode 100644
index 00000000000..23a15008291
--- /dev/null
+++ b/sys/sys/times.h
@@ -0,0 +1,65 @@
+/*-
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)times.h	8.4 (Berkeley) 1/21/94
+ */
+
+#ifndef	_SYS_TIMES_H_
+#define	_SYS_TIMES_H_
+
+#include <machine/ansi.h>
+
+#ifdef	_BSD_CLOCK_T_
+typedef	_BSD_CLOCK_T_	clock_t;
+#undef	_BSD_CLOCK_T_
+#endif
+
+struct tms {
+	clock_t tms_utime;	/* User CPU time */
+	clock_t tms_stime;	/* System CPU time */
+	clock_t tms_cutime;	/* User CPU time of terminated child procs */
+	clock_t tms_cstime;	/* System CPU time of terminated child procs */
+};
+
+#ifndef KERNEL
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+clock_t	times __P((struct tms *));
+__END_DECLS
+#endif
+#endif /* !_SYS_TIMES_H_ */
diff --git a/sys/sys/timetc.h b/sys/sys/timetc.h
new file mode 100644
index 00000000000..53227712a3b
--- /dev/null
+++ b/sys/sys/timetc.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)time.h	8.1 (Berkeley) 6/2/93
+ */
+
+#ifndef _SYS_TIME_H_
+#define _SYS_TIME_H_
+
+/*
+ * Structure returned by gettimeofday(2) system call,
+ * and used in other calls.
+ */
+struct timeval {
+	long	tv_sec;		/* seconds */
+	long	tv_usec;	/* and microseconds */
+};
+
+/*
+ * Structure defined by POSIX.4 to be like a timeval.
+ */
+struct timespec {
+	long	ts_sec;		/* seconds */
+	long	ts_nsec;	/* and nanoseconds */
+};
+
+#define	TIMEVAL_TO_TIMESPEC(tv, ts) {					\
+	(ts)->ts_sec = (tv)->tv_sec;					\
+	(ts)->ts_nsec = (tv)->tv_usec * 1000;				\
+}
+#define	TIMESPEC_TO_TIMEVAL(tv, ts) {					\
+	(tv)->tv_sec = (ts)->ts_sec;					\
+	(tv)->tv_usec = (ts)->ts_nsec / 1000;				\
+}
+
+struct timezone {
+	int	tz_minuteswest;	/* minutes west of Greenwich */
+	int	tz_dsttime;	/* type of dst correction */
+};
+#define	DST_NONE	0	/* not on dst */
+#define	DST_USA		1	/* USA style dst */
+#define	DST_AUST	2	/* Australian style dst */
+#define	DST_WET		3	/* Western European dst */
+#define	DST_MET		4	/* Middle European dst */
+#define	DST_EET		5	/* Eastern European dst */
+#define	DST_CAN		6	/* Canada */
+
+/* Operations on timevals. */
+#define	timerclear(tvp)		(tvp)->tv_sec = (tvp)->tv_usec = 0
+#define	timerisset(tvp)		((tvp)->tv_sec || (tvp)->tv_usec)
+#define	timercmp(tvp, uvp, cmp)						\
+	(((tvp)->tv_sec == (uvp)->tv_sec) ?				\
+	    ((tvp)->tv_usec cmp (uvp)->tv_usec) :			\
+	    ((tvp)->tv_sec cmp (uvp)->tv_sec))
+
+/*
+ * Names of the interval timers, and structure
+ * defining a timer setting.
+ */
+#define	ITIMER_REAL	0
+#define	ITIMER_VIRTUAL	1
+#define	ITIMER_PROF	2
+
+struct	itimerval {
+	struct	timeval it_interval;	/* timer interval */
+	struct	timeval it_value;	/* current value */
+};
+
+/*
+ * Getkerninfo clock information structure
+ */
+struct clockinfo {
+	int	hz;		/* clock frequency */
+	int	tick;		/* micro-seconds per hz tick */
+	int	stathz;		/* statistics clock frequency */
+	int	profhz;		/* profiling clock frequency */
+};
+
+#ifndef KERNEL
+#include <time.h>
+
+#ifndef _POSIX_SOURCE
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+int	adjtime __P((const struct timeval *, struct timeval *));
+int	getitimer __P((int, struct itimerval *));
+int	gettimeofday __P((struct timeval *, struct timezone *));
+int	setitimer __P((int, const struct itimerval *, struct itimerval *));
+int	settimeofday __P((const struct timeval *, const struct timezone *));
+int	utimes __P((const char *, const struct timeval *));
+__END_DECLS
+#endif /* !POSIX */
+
+#endif /* !KERNEL */
+
+#endif /* !_SYS_TIME_H_ */
diff --git a/sys/sys/tprintf.h b/sys/sys/tprintf.h
new file mode 100644
index 00000000000..5b83aaec029
--- /dev/null
+++ b/sys/sys/tprintf.h
@@ -0,0 +1,41 @@
+/*-
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tprintf.h	8.1 (Berkeley) 6/2/93
+ */
+
+typedef struct session *tpr_t;
+
+tpr_t	tprintf_open __P((struct proc *));
+void	tprintf_close __P((tpr_t));
+
+void	tprintf __P((tpr_t, const char *fmt, ...));
diff --git a/sys/sys/trace.h b/sys/sys/trace.h
new file mode 100644
index 00000000000..d401f1459d7
--- /dev/null
+++ b/sys/sys/trace.h
@@ -0,0 +1,116 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)trace.h	8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * File system buffer tracing points; all trace <pack(dev, size), bn>
+ */
+#define	TR_BREADHIT	0	/* buffer read found in cache */
+#define	TR_BREADMISS	1	/* buffer read not in cache */
+#define	TR_BWRITE	2	/* buffer written */
+#define	TR_BREADHITRA	3	/* buffer read-ahead found in cache */
+#define	TR_BREADMISSRA	4	/* buffer read-ahead not in cache */
+#define	TR_XFODMISS	5	/* exe fod read */
+#define	TR_XFODHIT	6	/* exe fod read */
+#define	TR_BRELSE	7	/* brelse */
+#define	TR_BREALLOC	8	/* expand/contract a buffer */
+
+/*
+ * Memory allocator trace points; all trace the amount of memory involved
+ */
+#define	TR_MALL		10	/* memory allocated */
+
+/*
+ * Paging trace points: all are <vaddr, pid>
+ */
+#define	TR_INTRANS	20	/* page intransit block */
+#define	TR_EINTRANS	21	/* page intransit wait done */
+#define	TR_FRECLAIM	22	/* reclaim from free list */
+#define	TR_RECLAIM	23	/* reclaim from loop */
+#define	TR_XSFREC	24	/* reclaim from free list instead of drum */
+#define	TR_XIFREC	25	/* reclaim from free list instead of fsys */
+#define	TR_WAITMEM	26	/* wait for memory in pagein */
+#define	TR_EWAITMEM	27	/* end memory wait in pagein */
+#define	TR_ZFOD		28	/* zfod page fault */
+#define	TR_EXFOD	29	/* exec fod page fault */
+#define	TR_VRFOD	30	/* vread fod page fault */
+#define	TR_CACHEFOD	31	/* fod in file system cache */
+#define	TR_SWAPIN	32	/* drum page fault */
+#define	TR_PGINDONE	33	/* page in done */
+#define	TR_SWAPIO	34	/* swap i/o request arrives */
+
+/*
+ * System call trace points.
+ */
+#define	TR_VADVISE	40	/* vadvise occurred with <arg, pid> */
+
+/*
+ * Miscellaneous
+ */
+#define	TR_STAMP	45	/* user said vtrace(VTR_STAMP, value); */
+
+/*
+ * This defines the size of the trace flags array.
+ */
+#define	TR_NFLAGS	100	/* generous */
+
+#define	TRCSIZ		4096
+
+/*
+ * Specifications of the vtrace() system call, which takes one argument.
+ */
+#define	VTRACE		64+51
+
+#define	VTR_DISABLE	0		/* set a trace flag to 0 */
+#define	VTR_ENABLE	1		/* set a trace flag to 1 */
+#define	VTR_VALUE	2		/* return value of a trace flag */
+#define	VTR_UALARM	3		/* set alarm to go off (sig 16) */
+					/* in specified number of hz */
+#define	VTR_STAMP	4		/* user specified stamp */
+
+#ifdef KERNEL
+#ifdef TRACE
+struct	proc *traceproc;
+int	tracewhich, tracebuf[TRCSIZ];
+u_int	tracex;
+char	traceflags[TR_NFLAGS];
+#define	pack(v,b)	(((v)->v_mount->mnt_stat.f_fsid.val[0])<<16)|(b)
+#define	trace(a,b,c) {							\
+	if (traceflags[a])						\
+		trace1(a,b,c);						\
+}
+#else
+#define	trace(a,b,c)
+#endif
+#endif
diff --git a/sys/sys/tty.h b/sys/sys/tty.h
new file mode 100644
index 00000000000..4a89b0382ad
--- /dev/null
+++ b/sys/sys/tty.h
@@ -0,0 +1,217 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tty.h	8.6 (Berkeley) 1/21/94
+ */
+
+#include <sys/termios.h>
+#include <sys/select.h>		/* For struct selinfo. */
+
+/*
+ * Clists are character lists, which is a variable length linked list
+ * of cblocks, with a count of the number of characters in the list.
+ */
+struct clist {
+	int	c_cc;		/* Number of characters in the clist. */
+	char	*c_cf;		/* Pointer to the first cblock. */
+	char	*c_cl;		/* Pointer to the last cblock. */
+};
+
+/*
+ * Per-tty structure.
+ *
+ * Should be split in two, into device and tty drivers.
+ * Glue could be masks of what to echo and circular buffer
+ * (low, high, timeout).
+ */
+struct tty {
+	struct	clist t_rawq;		/* Device raw input queue. */
+	long	t_rawcc;		/* Raw input queue statistics. */
+	struct	clist t_canq;		/* Device canonical queue. */
+	long	t_cancc;		/* Canonical queue statistics. */
+	struct	clist t_outq;		/* Device output queue. */
+	long	t_outcc;		/* Output queue statistics. */
+	char	t_line;			/* Interface to device drivers. */
+	dev_t	t_dev;			/* Device. */
+	int	t_state;		/* Device and driver (TS*) state. */
+	int	t_flags;		/* Tty flags. */
+	struct	pgrp *t_pgrp;		/* Foreground process group. */
+	struct	session *t_session;	/* Enclosing session. */
+	struct	selinfo t_rsel;		/* Tty read/oob select. */
+	struct	selinfo t_wsel;		/* Tty write select. */
+	struct	termios t_termios;	/* Termios state. */
+	struct	winsize t_winsize;	/* Window size. */
+					/* Start output. */
+	void	(*t_oproc) __P((struct tty *));
+					/* Stop output. */
+	void	(*t_stop) __P((struct tty *, int));
+					/* Set hardware state. */
+	int	(*t_param) __P((struct tty *, struct termios *));
+	void	*t_sc;			/* XXX: net/if_sl.c:sl_softc. */
+	short	t_column;		/* Tty output column. */
+	short	t_rocount, t_rocol;	/* Tty. */
+	short	t_hiwat;		/* High water mark. */
+	short	t_lowat;		/* Low water mark. */
+	short	t_gen;			/* Generation number. */
+};
+
+#define	t_cc		t_termios.c_cc
+#define	t_cflag		t_termios.c_cflag
+#define	t_iflag		t_termios.c_iflag
+#define	t_ispeed	t_termios.c_ispeed
+#define	t_lflag		t_termios.c_lflag
+#define	t_min		t_termios.c_min
+#define	t_oflag		t_termios.c_oflag
+#define	t_ospeed	t_termios.c_ospeed
+#define	t_time		t_termios.c_time
+
+#define	TTIPRI	25			/* Sleep priority for tty reads. */
+#define	TTOPRI	26			/* Sleep priority for tty writes. */
+
+#define	TTMASK	15
+#define	OBUFSIZ	100
+#define	TTYHOG	1024
+
+#ifdef KERNEL
+#define	TTMAXHIWAT	roundup(2048, CBSIZE)
+#define	TTMINHIWAT	roundup(100, CBSIZE)
+#define	TTMAXLOWAT	256
+#define	TTMINLOWAT	32
+#endif
+
+/* These flags are kept in t_state. */
+#define	TS_ASLEEP	0x00001		/* Process waiting for tty. */
+#define	TS_ASYNC	0x00002		/* Tty in async I/O mode. */
+#define	TS_BUSY		0x00004		/* Draining output. */
+#define	TS_CARR_ON	0x00008		/* Carrier is present. */
+#define	TS_FLUSH	0x00010		/* Outq has been flushed during DMA. */
+#define	TS_ISOPEN	0x00020		/* Open has completed. */
+#define	TS_TBLOCK	0x00040		/* Further input blocked. */
+#define	TS_TIMEOUT	0x00080		/* Wait for output char processing. */
+#define	TS_TTSTOP	0x00100		/* Output paused. */
+#define	TS_WOPEN	0x00200		/* Open in progress. */
+#define	TS_XCLUDE	0x00400		/* Tty requires exclusivity. */
+
+/* State for intra-line fancy editing work. */
+#define	TS_BKSL		0x00800		/* State for lowercase \ work. */
+#define	TS_CNTTB	0x01000		/* Counting tab width, ignore FLUSHO. */
+#define	TS_ERASE	0x02000		/* Within a \.../ for PRTRUB. */
+#define	TS_LNCH		0x04000		/* Next character is literal. */
+#define	TS_TYPEN	0x08000		/* Retyping suspended input (PENDIN). */
+#define	TS_LOCAL	(TS_BKSL | TS_CNTTB | TS_ERASE | TS_LNCH | TS_TYPEN)
+
+/* Character type information. */
+#define	ORDINARY	0
+#define	CONTROL		1
+#define	BACKSPACE	2
+#define	NEWLINE		3
+#define	TAB		4
+#define	VTAB		5
+#define	RETURN		6
+
+struct speedtab {
+	int sp_speed;			/* Speed. */
+	int sp_code;			/* Code. */
+};
+
+/* Modem control commands (driver). */
+#define	DMSET		0
+#define	DMBIS		1
+#define	DMBIC		2
+#define	DMGET		3
+
+/* Flags on a character passed to ttyinput. */
+#define	TTY_CHARMASK	0x000000ff	/* Character mask */
+#define	TTY_QUOTE	0x00000100	/* Character quoted */
+#define	TTY_ERRORMASK	0xff000000	/* Error mask */
+#define	TTY_FE		0x01000000	/* Framing error or BREAK condition */
+#define	TTY_PE		0x02000000	/* Parity error */
+
+/* Is tp controlling terminal for p? */
+#define	isctty(p, tp)							\
+	((p)->p_session == (tp)->t_session && (p)->p_flag & P_CONTROLT)
+
+/* Is p in background of tp? */
+#define	isbackground(p, tp)						\
+	(isctty((p), (tp)) && (p)->p_pgrp != (tp)->t_pgrp)
+
+#ifdef KERNEL
+extern	struct ttychars ttydefaults;
+
+/* Symbolic sleep message strings. */
+extern	 char ttyin[], ttyout[], ttopen[], ttclos[], ttybg[], ttybuf[];
+
+int	 b_to_q __P((char *cp, int cc, struct clist *q));
+void	 catq __P((struct clist *from, struct clist *to));
+void	 clist_init __P((void));
+int	 getc __P((struct clist *q));
+void	 ndflush __P((struct clist *q, int cc));
+int	 ndqb __P((struct clist *q, int flag));
+char	*nextc __P((struct clist *q, char *cp, int *c));
+int	 putc __P((int c, struct clist *q));
+int	 q_to_b __P((struct clist *q, char *cp, int cc));
+int	 unputc __P((struct clist *q));
+
+int	 nullmodem __P((struct tty *tp, int flag));
+int	 tputchar __P((int c, struct tty *tp));
+int	 ttioctl __P((struct tty *tp, int com, void *data, int flag));
+int	 ttread __P((struct tty *tp, struct uio *uio, int flag));
+void	 ttrstrt __P((void *tp));
+int	 ttselect __P((dev_t device, int rw, struct proc *p));
+void	 ttsetwater __P((struct tty *tp));
+int	 ttspeedtab __P((int speed, struct speedtab *table));
+int	 ttstart __P((struct tty *tp));
+void	 ttwakeup __P((struct tty *tp));
+int	 ttwrite __P((struct tty *tp, struct uio *uio, int flag));
+void	 ttychars __P((struct tty *tp));
+int	 ttycheckoutq __P((struct tty *tp, int wait));
+int	 ttyclose __P((struct tty *tp));
+void	 ttyflush __P((struct tty *tp, int rw));
+void	 ttyinfo __P((struct tty *tp));
+int	 ttyinput __P((int c, struct tty *tp));
+int	 ttylclose __P((struct tty *tp, int flag));
+int	 ttymodem __P((struct tty *tp, int flag));
+int	 ttyopen __P((dev_t device, struct tty *tp));
+int	 ttyoutput __P((int c, struct tty *tp));
+void	 ttypend __P((struct tty *tp));
+void	 ttyretype __P((struct tty *tp));
+void	 ttyrub __P((int c, struct tty *tp));
+int	 ttysleep __P((struct tty *tp,
+	    void *chan, int pri, char *wmesg, int timeout));
+int	 ttywait __P((struct tty *tp));
+int	 ttywflush __P((struct tty *tp));
+#endif
diff --git a/sys/sys/ttychars.h b/sys/sys/ttychars.h
new file mode 100644
index 00000000000..1a23aa77091
--- /dev/null
+++ b/sys/sys/ttychars.h
@@ -0,0 +1,63 @@
+/*-
+ * Copyright (c) 1982, 1986, 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ttychars.h	8.2 (Berkeley) 1/4/94
+ */
+
+/*
+ * 4.3 COMPATIBILITY FILE
+ *
+ * User visible structures and constants related to terminal handling.
+ */
+#ifndef _SYS_TTYCHARS_H_
+#define	_SYS_TTYCHARS_H_
+
+struct ttychars {
+	char	tc_erase;	/* erase last character */
+	char	tc_kill;	/* erase entire line */
+	char	tc_intrc;	/* interrupt */
+	char	tc_quitc;	/* quit */
+	char	tc_startc;	/* start output */
+	char	tc_stopc;	/* stop output */
+	char	tc_eofc;	/* end-of-file */
+	char	tc_brkc;	/* input delimiter (like nl) */
+	char	tc_suspc;	/* stop process signal */
+	char	tc_dsuspc;	/* delayed stop process signal */
+	char	tc_rprntc;	/* reprint line */
+	char	tc_flushc;	/* flush output (toggles) */
+	char	tc_werasc;	/* word erase */
+	char	tc_lnextc;	/* literal next character */
+};
+#ifdef USE_OLD_TTY
+#include <sys/ttydefaults.h>	/* to pick up character defaults */
+#endif
+#endif /* !_SYS_TTYCHARS_H_ */
diff --git a/sys/sys/ttycom.h b/sys/sys/ttycom.h
new file mode 100644
index 00000000000..a12d8d00354
--- /dev/null
+++ b/sys/sys/ttycom.h
@@ -0,0 +1,128 @@
+/*-
+ * Copyright (c) 1982, 1986, 1990, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ttycom.h	8.1 (Berkeley) 3/28/94
+ */
+
+#ifndef	_SYS_TTYCOM_H_
+#define	_SYS_TTYCOM_H_
+
+#include <sys/ioccom.h>
+
+/*
+ * Tty ioctl's except for those supported only for backwards compatibility
+ * with the old tty driver.
+ */
+
+/*
+ * Window/terminal size structure.  This information is stored by the kernel
+ * in order to provide a consistent interface, but is not used by the kernel.
+ */
+struct winsize {
+	unsigned short	ws_row;		/* rows, in characters */
+	unsigned short	ws_col;		/* columns, in characters */
+	unsigned short	ws_xpixel;	/* horizontal size, pixels */
+	unsigned short	ws_ypixel;	/* vertical size, pixels */
+};
+
+#define	TIOCMODG	_IOR('t', 3, int)	/* get modem control state */
+#define	TIOCMODS	_IOW('t', 4, int)	/* set modem control state */
+#define		TIOCM_LE	0001		/* line enable */
+#define		TIOCM_DTR	0002		/* data terminal ready */
+#define		TIOCM_RTS	0004		/* request to send */
+#define		TIOCM_ST	0010		/* secondary transmit */
+#define		TIOCM_SR	0020		/* secondary receive */
+#define		TIOCM_CTS	0040		/* clear to send */
+#define		TIOCM_CAR	0100		/* carrier detect */
+#define		TIOCM_CD	TIOCM_CAR
+#define		TIOCM_RNG	0200		/* ring */
+#define		TIOCM_RI	TIOCM_RNG
+#define		TIOCM_DSR	0400		/* data set ready */
+						/* 8-10 compat */
+#define	TIOCEXCL	 _IO('t', 13)		/* set exclusive use of tty */
+#define	TIOCNXCL	 _IO('t', 14)		/* reset exclusive use of tty */
+						/* 15 unused */
+#define	TIOCFLUSH	_IOW('t', 16, int)	/* flush buffers */
+						/* 17-18 compat */
+#define	TIOCGETA	_IOR('t', 19, struct termios) /* get termios struct */
+#define	TIOCSETA	_IOW('t', 20, struct termios) /* set termios struct */
+#define	TIOCSETAW	_IOW('t', 21, struct termios) /* drain output, set */
+#define	TIOCSETAF	_IOW('t', 22, struct termios) /* drn out, fls in, set */
+#define	TIOCGETD	_IOR('t', 26, int)	/* get line discipline */
+#define	TIOCSETD	_IOW('t', 27, int)	/* set line discipline */
+						/* 127-124 compat */
+#define	TIOCSBRK	 _IO('t', 123)		/* set break bit */
+#define	TIOCCBRK	 _IO('t', 122)		/* clear break bit */
+#define	TIOCSDTR	 _IO('t', 121)		/* set data terminal ready */
+#define	TIOCCDTR	 _IO('t', 120)		/* clear data terminal ready */
+#define	TIOCGPGRP	_IOR('t', 119, int)	/* get pgrp of tty */
+#define	TIOCSPGRP	_IOW('t', 118, int)	/* set pgrp of tty */
+						/* 117-116 compat */
+#define	TIOCOUTQ	_IOR('t', 115, int)	/* output queue size */
+#define	TIOCSTI		_IOW('t', 114, char)	/* simulate terminal input */
+#define	TIOCNOTTY	 _IO('t', 113)		/* void tty association */
+#define	TIOCPKT		_IOW('t', 112, int)	/* pty: set/clear packet mode */
+#define		TIOCPKT_DATA		0x00	/* data packet */
+#define		TIOCPKT_FLUSHREAD	0x01	/* flush packet */
+#define		TIOCPKT_FLUSHWRITE	0x02	/* flush packet */
+#define		TIOCPKT_STOP		0x04	/* stop output */
+#define		TIOCPKT_START		0x08	/* start output */
+#define		TIOCPKT_NOSTOP		0x10	/* no more ^S, ^Q */
+#define		TIOCPKT_DOSTOP		0x20	/* now do ^S ^Q */
+#define		TIOCPKT_IOCTL		0x40	/* state change of pty driver */
+#define	TIOCSTOP	 _IO('t', 111)		/* stop output, like ^S */
+#define	TIOCSTART	 _IO('t', 110)		/* start output, like ^Q */
+#define	TIOCMSET	_IOW('t', 109, int)	/* set all modem bits */
+#define	TIOCMBIS	_IOW('t', 108, int)	/* bis modem bits */
+#define	TIOCMBIC	_IOW('t', 107, int)	/* bic modem bits */
+#define	TIOCMGET	_IOR('t', 106, int)	/* get all modem bits */
+#define	TIOCREMOTE	_IOW('t', 105, int)	/* remote input editing */
+#define	TIOCGWINSZ	_IOR('t', 104, struct winsize)	/* get window size */
+#define	TIOCSWINSZ	_IOW('t', 103, struct winsize)	/* set window size */
+#define	TIOCUCNTL	_IOW('t', 102, int)	/* pty: set/clr usr cntl mode */
+#define		UIOCCMD(n)	_IO('u', n)	/* usr cntl op "n" */
+#define	TIOCCONS	_IOW('t', 98, int)	/* become virtual console */
+#define	TIOCSCTTY	 _IO('t', 97)		/* become controlling tty */
+#define	TIOCEXT		_IOW('t', 96, int)	/* pty: external processing */
+#define	TIOCSIG		 _IO('t', 95)		/* pty: generate signal */
+#define	TIOCDRAIN	 _IO('t', 94)		/* wait till output drained */
+
+#define	TTYDISC		0		/* termios tty line discipline */
+#define	TABLDISC	3		/* tablet discipline */
+#define	SLIPDISC	4		/* serial IP discipline */
+
+#endif /* !_SYS_TTYCOM_H_ */
diff --git a/sys/sys/ttydefaults.h b/sys/sys/ttydefaults.h
new file mode 100644
index 00000000000..1a8aaa5bd00
--- /dev/null
+++ b/sys/sys/ttydefaults.h
@@ -0,0 +1,96 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ttydefaults.h	8.4 (Berkeley) 1/21/94
+ */
+
+/*
+ * System wide defaults for terminal state.
+ */
+#ifndef _SYS_TTYDEFAULTS_H_
+#define	_SYS_TTYDEFAULTS_H_
+
+/*
+ * Defaults on "first" open.
+ */
+#define	TTYDEF_IFLAG	(BRKINT | ISTRIP | ICRNL | IMAXBEL | IXON | IXANY)
+#define TTYDEF_OFLAG	(OPOST | ONLCR | OXTABS)
+#define TTYDEF_LFLAG	(ECHO | ICANON | ISIG | IEXTEN | ECHOE|ECHOKE|ECHOCTL)
+#define TTYDEF_CFLAG	(CREAD | CS7 | PARENB | HUPCL)
+#define TTYDEF_SPEED	(B9600)
+
+/*
+ * Control Character Defaults
+ */
+#define CTRL(x)	(x&037)
+#define	CEOF		CTRL('d')
+#define	CEOL		((unsigned char)'\377')	/* XXX avoid _POSIX_VDISABLE */
+#define	CERASE		0177
+#define	CINTR		CTRL('c')
+#define	CSTATUS		((unsigned char)'\377')	/* XXX avoid _POSIX_VDISABLE */
+#define	CKILL		CTRL('u')
+#define	CMIN		1
+#define	CQUIT		034		/* FS, ^\ */
+#define	CSUSP		CTRL('z')
+#define	CTIME		0
+#define	CDSUSP		CTRL('y')
+#define	CSTART		CTRL('q')
+#define	CSTOP		CTRL('s')
+#define	CLNEXT		CTRL('v')
+#define	CDISCARD 	CTRL('o')
+#define	CWERASE 	CTRL('w')
+#define	CREPRINT 	CTRL('r')
+#define	CEOT		CEOF
+/* compat */
+#define	CBRK		CEOL
+#define CRPRNT		CREPRINT
+#define	CFLUSH		CDISCARD
+
+/* PROTECTED INCLUSION ENDS HERE */
+#endif /* !_SYS_TTYDEFAULTS_H_ */
+
+/*
+ * #define TTYDEFCHARS to include an array of default control characters.
+ */
+#ifdef TTYDEFCHARS
+cc_t	ttydefchars[NCCS] = {
+	CEOF,	CEOL,	CEOL,	CERASE, CWERASE, CKILL, CREPRINT, 
+	_POSIX_VDISABLE, CINTR,	CQUIT,	CSUSP,	CDSUSP,	CSTART,	CSTOP,	CLNEXT,
+	CDISCARD, CMIN,	CTIME,  CSTATUS, _POSIX_VDISABLE
+};
+#undef TTYDEFCHARS
+#endif
diff --git a/sys/sys/ttydev.h b/sys/sys/ttydev.h
new file mode 100644
index 00000000000..c52a2136926
--- /dev/null
+++ b/sys/sys/ttydev.h
@@ -0,0 +1,60 @@
+/*-
+ * Copyright (c) 1982, 1986, 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ttydev.h	8.2 (Berkeley) 1/4/94
+ */
+
+/* COMPATABILITY HEADER FILE */
+
+#ifndef _SYS_TTYDEV_H_
+#define	_SYS_TTYDEV_H_
+
+#ifdef USE_OLD_TTY
+#define B0	0
+#define B50	1
+#define B75	2
+#define B110	3
+#define B134	4
+#define B150	5
+#define B200	6
+#define B300	7
+#define B600	8
+#define B1200	9
+#define	B1800	10
+#define B2400	11
+#define B4800	12
+#define B9600	13
+#define EXTA	14
+#define EXTB	15
+#endif /* USE_OLD_TTY */
+
+#endif /* !_SYS_TTYDEV_H_ */
diff --git a/sys/sys/types.h b/sys/sys/types.h
new file mode 100644
index 00000000000..76d2975d31b
--- /dev/null
+++ b/sys/sys/types.h
@@ -0,0 +1,162 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)types.h	8.4 (Berkeley) 1/21/94
+ */
+
+#ifndef _SYS_TYPES_H_
+#define	_SYS_TYPES_H_
+
+/* Machine type dependent parameters. */
+#include <machine/endian.h>
+
+#ifndef _POSIX_SOURCE
+typedef	unsigned char	u_char;
+typedef	unsigned short	u_short;
+typedef	unsigned int	u_int;
+typedef	unsigned long	u_long;
+typedef	unsigned short	ushort;		/* Sys V compatibility */
+typedef	unsigned int	uint;		/* Sys V compatibility */
+#endif
+
+typedef	unsigned long long u_quad_t;	/* quads */
+typedef	long long	quad_t;
+typedef	quad_t *	qaddr_t;
+
+typedef	char *		caddr_t;	/* core address */
+typedef	long		daddr_t;	/* disk address */
+typedef	unsigned long	dev_t;		/* device number */
+typedef unsigned long	fixpt_t;	/* fixed point number */
+typedef	unsigned long	gid_t;		/* group id */
+typedef	unsigned long	ino_t;		/* inode number */
+typedef	unsigned short	mode_t;		/* permissions */
+typedef	unsigned short	nlink_t;	/* link count */
+typedef	quad_t		off_t;		/* file offset */
+typedef	long		pid_t;		/* process id */
+typedef	long		segsz_t;	/* segment size */
+typedef	long		swblk_t;	/* swap offset */
+typedef	unsigned long	uid_t;		/* user id */
+
+/*
+ * This belongs in unistd.h, but is placed here to ensure that programs
+ * casting the second parameter of lseek to off_t will get the correct
+ * version of lseek.
+ */
+#ifndef KERNEL
+#include <sys/cdefs.h>
+__BEGIN_DECLS
+off_t	 lseek __P((int, off_t, int));
+__END_DECLS
+#endif
+
+#ifndef _POSIX_SOURCE
+#define	major(x)	((int)(((u_int)(x) >> 8)&0xff))	/* major number */
+#define	minor(x)	((int)((x)&0xff))		/* minor number */
+#define	makedev(x,y)	((dev_t)(((x)<<8) | (y)))	/* create dev_t */
+#endif
+
+#include <machine/ansi.h>
+#include <machine/types.h>
+
+#ifdef	_BSD_CLOCK_T_
+typedef	_BSD_CLOCK_T_	clock_t;
+#undef	_BSD_CLOCK_T_
+#endif
+
+#ifdef	_BSD_SIZE_T_
+typedef	_BSD_SIZE_T_	size_t;
+#undef	_BSD_SIZE_T_
+#endif
+
+#ifdef	_BSD_SSIZE_T_
+typedef	_BSD_SSIZE_T_	ssize_t;
+#undef	_BSD_SSIZE_T_
+#endif
+
+#ifdef	_BSD_TIME_T_
+typedef	_BSD_TIME_T_	time_t;
+#undef	_BSD_TIME_T_
+#endif
+
+#ifndef _POSIX_SOURCE
+#define	NBBY	8		/* number of bits in a byte */
+
+/*
+ * Select uses bit masks of file descriptors in longs.  These macros
+ * manipulate such bit fields (the filesystem macros use chars).
+ * FD_SETSIZE may be defined by the user, but the default here should
+ * be enough for most uses.
+ */
+#ifndef	FD_SETSIZE
+#define	FD_SETSIZE	256
+#endif
+
+typedef long	fd_mask;
+#define NFDBITS	(sizeof(fd_mask) * NBBY)	/* bits per mask */
+
+#ifndef howmany
+#define	howmany(x, y)	(((x)+((y)-1))/(y))
+#endif
+
+typedef	struct fd_set {
+	fd_mask	fds_bits[howmany(FD_SETSIZE, NFDBITS)];
+} fd_set;
+
+#define	FD_SET(n, p)	((p)->fds_bits[(n)/NFDBITS] |= (1 << ((n) % NFDBITS)))
+#define	FD_CLR(n, p)	((p)->fds_bits[(n)/NFDBITS] &= ~(1 << ((n) % NFDBITS)))
+#define	FD_ISSET(n, p)	((p)->fds_bits[(n)/NFDBITS] & (1 << ((n) % NFDBITS)))
+#define	FD_COPY(f, t)	bcopy(f, t, sizeof(*(f)))
+#define	FD_ZERO(p)	bzero(p, sizeof(*(p)))
+
+#if defined(__STDC__) && defined(KERNEL)
+/*
+ * Forward structure declarations for function prototypes.  We include the
+ * common structures that cross subsystem boundaries here; others are mostly
+ * used in the same place that the structure is defined.
+ */
+struct	proc;
+struct	pgrp;
+struct	ucred;
+struct	rusage;
+struct	file;
+struct	buf;
+struct	tty;
+struct	uio;
+#endif
+
+#endif /* !_POSIX_SOURCE */
+#endif /* !_SYS_TYPES_H_ */
diff --git a/sys/sys/ucred.h b/sys/sys/ucred.h
new file mode 100644
index 00000000000..d3ee02dbde3
--- /dev/null
+++ b/sys/sys/ucred.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ucred.h	8.2 (Berkeley) 1/4/94
+ */
+
+#ifndef _SYS_UCRED_H_
+#define	_SYS_UCRED_H_
+
+/*
+ * Credentials.
+ */
+struct ucred {
+	u_short	cr_ref;			/* reference count */
+	uid_t	cr_uid;			/* effective user id */
+	short	cr_ngroups;		/* number of groups */
+	gid_t	cr_groups[NGROUPS];	/* groups */
+};
+#define cr_gid cr_groups[0]
+#define NOCRED ((struct ucred *)-1)	/* no credential available */
+#define FSCRED ((struct ucred *)-2)	/* filesystem credential */
+
+#ifdef KERNEL
+#define	crhold(cr)	(cr)->cr_ref++
+struct ucred *crget();
+struct ucred *crcopy();
+struct ucred *crdup();
+#endif /* KERNEL */
+
+#endif /* !_SYS_UCRED_H_ */
diff --git a/sys/sys/uio.h b/sys/sys/uio.h
new file mode 100644
index 00000000000..3356ebfee89
--- /dev/null
+++ b/sys/sys/uio.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 1982, 1986, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)uio.h	8.5 (Berkeley) 2/22/94
+ */
+
+#ifndef _SYS_UIO_H_
+#define	_SYS_UIO_H_
+
+/*
+ * XXX
+ * iov_base should be a void *.
+ */
+struct iovec {
+	char	*iov_base;	/* Base address. */
+	size_t	 iov_len;	/* Length. */
+};
+
+enum	uio_rw { UIO_READ, UIO_WRITE };
+
+/* Segment flag values. */
+enum uio_seg {
+	UIO_USERSPACE,		/* from user data space */
+	UIO_SYSSPACE,		/* from system space */
+	UIO_USERISPACE		/* from user I space */
+};
+
+#ifdef KERNEL
+struct uio {
+	struct	iovec *uio_iov;
+	int	uio_iovcnt;
+	off_t	uio_offset;
+	int	uio_resid;
+	enum	uio_seg uio_segflg;
+	enum	uio_rw uio_rw;
+	struct	proc *uio_procp;
+};
+
+/*
+ * Limits
+ */
+#define UIO_MAXIOV	1024		/* max 1K of iov's */
+#define UIO_SMALLIOV	8		/* 8 on stack, else malloc */
+#endif /* KERNEL */
+
+#ifndef	KERNEL
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+ssize_t	readv __P((int, const struct iovec *, int));
+ssize_t	writev __P((int, const struct iovec *, int));
+__END_DECLS
+#endif /* !KERNEL */
+#endif /* !_SYS_UIO_H_ */
diff --git a/sys/sys/un.h b/sys/sys/un.h
new file mode 100644
index 00000000000..3e214a26bb5
--- /dev/null
+++ b/sys/sys/un.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)un.h	8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * Definitions for UNIX IPC domain.
+ */
+struct	sockaddr_un {
+	u_char	sun_len;		/* sockaddr len including null */
+	u_char	sun_family;		/* AF_UNIX */
+	char	sun_path[104];		/* path name (gag) */
+};
+
+#ifdef KERNEL
+int	unp_discard();
+#else
+
+/* actual length of an initialized sockaddr_un */
+#define SUN_LEN(su) \
+	(sizeof(*(su)) - sizeof((su)->sun_path) + strlen((su)->sun_path))
+#endif
diff --git a/sys/sys/unistd.h b/sys/sys/unistd.h
new file mode 100644
index 00000000000..e086f6f6e39
--- /dev/null
+++ b/sys/sys/unistd.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)unistd.h	8.2 (Berkeley) 1/7/94
+ */
+
+#ifndef _SYS_UNISTD_H_
+#define	_SYS_UNISTD_H_
+
+/* compile-time symbolic constants */
+#define	_POSIX_JOB_CONTROL	/* implementation supports job control */
+
+/*
+ * Although we have saved user/group IDs, we do not use them in setuid
+ * as described in POSIX 1003.1, because the feature does not work for
+ * root.  We use the saved IDs in seteuid/setegid, which are not currently
+ * part of the POSIX 1003.1 specification.
+ */
+#ifdef	_NOT_AVAILABLE
+#define	_POSIX_SAVED_IDS	/* saved set-user-ID and set-group-ID */
+#endif
+
+#define	_POSIX_VERSION		198808L
+#define	_POSIX2_VERSION		199212L
+
+/* execution-time symbolic constants */
+				/* chown requires appropriate privileges */
+#define	_POSIX_CHOWN_RESTRICTED	1
+				/* too-long path components generate errors */
+#define	_POSIX_NO_TRUNC		1
+				/* may disable terminal special characters */
+#define	_POSIX_VDISABLE		((unsigned char)'\377')
+
+/* access function */
+#define	F_OK		0	/* test for existence of file */
+#define	X_OK		0x01	/* test for execute or search permission */
+#define	W_OK		0x02	/* test for write permission */
+#define	R_OK		0x04	/* test for read permission */
+
+/* whence values for lseek(2) */
+#define	SEEK_SET	0	/* set file offset to offset */
+#define	SEEK_CUR	1	/* set file offset to current plus offset */
+#define	SEEK_END	2	/* set file offset to EOF plus offset */
+
+#ifndef _POSIX_SOURCE
+/* whence values for lseek(2); renamed by POSIX 1003.1 */
+#define	L_SET		SEEK_SET
+#define	L_INCR		SEEK_CUR
+#define	L_XTND		SEEK_END
+#endif
+
+/* configurable pathname variables */
+#define	_PC_LINK_MAX		 1
+#define	_PC_MAX_CANON		 2
+#define	_PC_MAX_INPUT		 3
+#define	_PC_NAME_MAX		 4
+#define	_PC_PATH_MAX		 5
+#define	_PC_PIPE_BUF		 6
+#define	_PC_CHOWN_RESTRICTED	 7
+#define	_PC_NO_TRUNC		 8
+#define	_PC_VDISABLE		 9
+
+/* configurable system variables */
+#define	_SC_ARG_MAX		 1
+#define	_SC_CHILD_MAX		 2
+#define	_SC_CLK_TCK		 3
+#define	_SC_NGROUPS_MAX		 4
+#define	_SC_OPEN_MAX		 5
+#define	_SC_JOB_CONTROL		 6
+#define	_SC_SAVED_IDS		 7
+#define	_SC_VERSION		 8
+#define	_SC_BC_BASE_MAX		 9
+#define	_SC_BC_DIM_MAX		10
+#define	_SC_BC_SCALE_MAX	11
+#define	_SC_BC_STRING_MAX	12
+#define	_SC_COLL_WEIGHTS_MAX	13
+#define	_SC_EXPR_NEST_MAX	14
+#define	_SC_LINE_MAX		15
+#define	_SC_RE_DUP_MAX		16
+#define	_SC_2_VERSION		17
+#define	_SC_2_C_BIND		18
+#define	_SC_2_C_DEV		19
+#define	_SC_2_CHAR_TERM		20
+#define	_SC_2_FORT_DEV		21
+#define	_SC_2_FORT_RUN		22
+#define	_SC_2_LOCALEDEF		23
+#define	_SC_2_SW_DEV		24
+#define	_SC_2_UPE		25
+#define	_SC_STREAM_MAX		26
+#define	_SC_TZNAME_MAX		27
+
+/* configurable system strings */
+#define	_CS_PATH		 1
+
+#endif /* !_SYS_UNISTD_H_ */
diff --git a/sys/sys/unpcb.h b/sys/sys/unpcb.h
new file mode 100644
index 00000000000..efcfd0e23c1
--- /dev/null
+++ b/sys/sys/unpcb.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)unpcb.h	8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * Protocol control block for an active
+ * instance of a UNIX internal protocol.
+ *
+ * A socket may be associated with an vnode in the
+ * file system.  If so, the unp_vnode pointer holds
+ * a reference count to this vnode, which should be irele'd
+ * when the socket goes away.
+ *
+ * A socket may be connected to another socket, in which
+ * case the control block of the socket to which it is connected
+ * is given by unp_conn.
+ *
+ * A socket may be referenced by a number of sockets (e.g. several
+ * sockets may be connected to a datagram socket.)  These sockets
+ * are in a linked list starting with unp_refs, linked through
+ * unp_nextref and null-terminated.  Note that a socket may be referenced
+ * by a number of other sockets and may also reference a socket (not
+ * necessarily one which is referencing it).  This generates
+ * the need for unp_refs and unp_nextref to be separate fields.
+ *
+ * Stream sockets keep copies of receive sockbuf sb_cc and sb_mbcnt
+ * so that changes in the sockbuf may be computed to modify
+ * back pressure on the sender accordingly.
+ */
+struct	unpcb {
+	struct	socket *unp_socket;	/* pointer back to socket */
+	struct	vnode *unp_vnode;	/* if associated with file */
+	ino_t	unp_ino;		/* fake inode number */
+	struct	unpcb *unp_conn;	/* control block of connected socket */
+	struct	unpcb *unp_refs;	/* referencing socket linked list */
+	struct 	unpcb *unp_nextref;	/* link in unp_refs list */
+	struct	mbuf *unp_addr;		/* bound address of socket */
+	int	unp_cc;			/* copy of rcv.sb_cc */
+	int	unp_mbcnt;		/* copy of rcv.sb_mbcnt */
+};
+
+#define	sotounpcb(so)	((struct unpcb *)((so)->so_pcb))
diff --git a/sys/sys/user.h b/sys/sys/user.h
new file mode 100644
index 00000000000..85fdd130c2d
--- /dev/null
+++ b/sys/sys/user.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)user.h	8.2 (Berkeley) 9/23/93
+ */
+
+#include <machine/pcb.h>
+#ifndef KERNEL
+/* stuff that *used* to be included by user.h, or is now needed */
+#include <errno.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/ucred.h>
+#include <sys/uio.h>
+#endif
+#include <sys/resourcevar.h>
+#include <sys/signalvar.h>
+#include <vm/vm.h>		/* XXX */
+#include <sys/sysctl.h>
+
+
+/*
+ * Per process structure containing data that isn't needed in core
+ * when the process isn't running (esp. when swapped out).
+ * This structure may or may not be at the same kernel address
+ * in all processes.
+ */
+ 
+struct	user {
+	struct	pcb u_pcb;
+
+	struct	sigacts u_sigacts;	/* p_sigacts points here (use it!) */
+	struct	pstats u_stats;		/* p_stats points here (use it!) */
+
+	/*
+	 * Remaining fields only for core dump and/or ptrace--
+	 * not valid at other times!
+	 */
+	struct	kinfo_proc u_kproc;	/* proc + eproc */
+	struct	md_coredump u_md;	/* machine dependent glop */
+};
+
+/*
+ * Redefinitions to make the debuggers happy for now...  This subterfuge
+ * brought to you by coredump() and trace_req().  These fields are *only*
+ * valid at those times!
+ */
+#define	U_ar0	u_kproc.kp_proc.p_md.md_regs /* copy of curproc->p_md.md_regs */
+#define	U_tsize	u_kproc.kp_eproc.e_vm.vm_tsize
+#define	U_dsize	u_kproc.kp_eproc.e_vm.vm_dsize
+#define	U_ssize	u_kproc.kp_eproc.e_vm.vm_ssize
+#define	U_sig	u_sigacts.ps_sig
+#define	U_code	u_sigacts.ps_code
+
+#ifndef KERNEL
+#define	u_ar0	U_ar0
+#define	u_tsize	U_tsize
+#define	u_dsize	U_dsize
+#define	u_ssize	U_ssize
+#define	u_sig	U_sig
+#define	u_code	U_code
+#endif /* KERNEL */
diff --git a/sys/sys/utsname.h b/sys/sys/utsname.h
new file mode 100644
index 00000000000..aa0f2c75ab6
--- /dev/null
+++ b/sys/sys/utsname.h
@@ -0,0 +1,56 @@
+/*-
+ * Copyright (c) 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Chuck Karish of Mindcraft, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)utsname.h	8.1 (Berkeley) 1/4/94
+ */
+
+#ifndef	_SYS_UTSNAME_H
+#define	_SYS_UTSNAME_H
+
+struct utsname {
+	char	sysname[256];	/* Name of this OS. */
+	char	nodename[256];	/* Name of this network node. */
+	char	release[256];	/* Release level. */
+	char	version[256];	/* Version level. */
+	char	machine[256];	/* Hardware type. */
+};
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+int	uname __P((struct utsname *));
+__END_DECLS
+
+#endif	/* !_SYS_UTSNAME_H */
diff --git a/sys/sys/vadvise.h b/sys/sys/vadvise.h
new file mode 100644
index 00000000000..be793e8e721
--- /dev/null
+++ b/sys/sys/vadvise.h
@@ -0,0 +1,49 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vadvise.h	8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * Parameters to vadvise() to tell system of particular paging
+ * behaviour:
+ *	VA_NORM		Normal strategy
+ *	VA_ANOM		Sampling page behaviour is not a win, don't bother
+ *			Suitable during GCs in LISP, or sequential or random
+ *			page referencing.
+ *	VA_SEQL		Sequential behaviour expected.
+ *	VA_FLUSH	Invalidate all page table entries.
+ */
+#define	VA_NORM		0
+#define	VA_ANOM		1
+#define	VA_SEQL		2
+#define	VA_FLUSH 	3
diff --git a/sys/sys/vcmd.h b/sys/sys/vcmd.h
new file mode 100644
index 00000000000..de27ec1b0af
--- /dev/null
+++ b/sys/sys/vcmd.h
@@ -0,0 +1,43 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vcmd.h	8.1 (Berkeley) 6/2/93
+ */
+
+#include <sys/ioctl.h>
+
+#define	VPRINT		0100
+#define	VPLOT		0200
+#define	VPRINTPLOT	0400
+
+#define	VGETSTATE	_IOR('v', 0, int)
+#define	VSETSTATE	_IOW('v', 1, int)
diff --git a/sys/sys/vlimit.h b/sys/sys/vlimit.h
new file mode 100644
index 00000000000..b6457e64ddf
--- /dev/null
+++ b/sys/sys/vlimit.h
@@ -0,0 +1,49 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vlimit.h	8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * Limits for u.u_limit[i], per process, inherited.
+ */
+#define	LIM_NORAISE	0	/* if <> 0, can't raise limits */
+#define	LIM_CPU		1	/* max secs cpu time */
+#define	LIM_FSIZE	2	/* max size of file created */
+#define	LIM_DATA	3	/* max growth of data space */
+#define	LIM_STACK	4	/* max growth of stack */
+#define	LIM_CORE	5	/* max size of ``core'' file */
+#define	LIM_MAXRSS	6	/* max desired data+stack core usage */
+
+#define	NLIMITS		6
+
+#define	INFINITY	0x7fffffff
diff --git a/sys/sys/vmmeter.h b/sys/sys/vmmeter.h
new file mode 100644
index 00000000000..f0b3d57f336
--- /dev/null
+++ b/sys/sys/vmmeter.h
@@ -0,0 +1,147 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vmmeter.h	8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * System wide statistics counters.
+ */
+struct vmmeter {
+	/*
+	 * General system activity.
+	 */
+	unsigned v_swtch;	/* context switches */
+	unsigned v_trap;	/* calls to trap */
+	unsigned v_syscall;	/* calls to syscall() */
+	unsigned v_intr;	/* device interrupts */
+	unsigned v_soft;	/* software interrupts */
+	unsigned v_faults;	/* total faults taken */
+	/*
+	 * Virtual memory activity.
+	 */
+	unsigned v_lookups;	/* object cache lookups */
+	unsigned v_hits;	/* object cache hits */
+	unsigned v_vm_faults;	/* number of address memory faults */
+	unsigned v_cow_faults;	/* number of copy-on-writes */
+	unsigned v_swpin;	/* swapins */
+	unsigned v_swpout;	/* swapouts */
+	unsigned v_pswpin;	/* pages swapped in */
+	unsigned v_pswpout;	/* pages swapped out */
+	unsigned v_pageins;	/* number of pageins */
+	unsigned v_pageouts;	/* number of pageouts */
+	unsigned v_pgpgin;	/* pages paged in */
+	unsigned v_pgpgout;	/* pages paged out */
+	unsigned v_intrans;	/* intransit blocking page faults */
+	unsigned v_reactivated;	/* number of pages reactivated from free list */
+	unsigned v_rev;		/* revolutions of the hand */
+	unsigned v_scan;	/* scans in page out daemon */
+	unsigned v_dfree;	/* pages freed by daemon */
+	unsigned v_pfree;	/* pages freed by exiting processes */
+	unsigned v_zfod;	/* pages zero filled on demand */
+	unsigned v_nzfod;	/* number of zfod's created */
+	/*
+	 * Distribution of page usages.
+	 */
+	unsigned v_page_size;	/* page size in bytes */
+	unsigned v_kernel_pages;/* number of pages in use by kernel */
+	unsigned v_free_target;	/* number of pages desired free */
+	unsigned v_free_min;	/* minimum number of pages desired free */
+	unsigned v_free_count;	/* number of pages free */
+	unsigned v_wire_count;	/* number of pages wired down */
+	unsigned v_active_count;/* number of pages active */
+	unsigned v_inactive_target; /* number of pages desired inactive */
+	unsigned v_inactive_count;  /* number of pages inactive */
+};
+#ifdef KERNEL
+struct	vmmeter cnt;
+#endif
+
+/* systemwide totals computed every five seconds */
+struct vmtotal
+{
+	short	t_rq;		/* length of the run queue */
+	short	t_dw;		/* jobs in ``disk wait'' (neg priority) */
+	short	t_pw;		/* jobs in page wait */
+	short	t_sl;		/* jobs sleeping in core */
+	short	t_sw;		/* swapped out runnable/short block jobs */
+	long	t_vm;		/* total virtual memory */
+	long	t_avm;		/* active virtual memory */
+	long	t_rm;		/* total real memory in use */
+	long	t_arm;		/* active real memory */
+	long	t_vmshr;	/* shared virtual memory */
+	long	t_avmshr;	/* active shared virtual memory */
+	long	t_rmshr;	/* shared real memory */
+	long	t_armshr;	/* active shared real memory */
+	long	t_free;		/* free memory pages */
+};
+#ifdef KERNEL
+struct	vmtotal total;
+#endif
+
+/*
+ * Optional instrumentation.
+ */
+#ifdef PGINPROF
+
+#define	NDMON	128
+#define	NSMON	128
+
+#define	DRES	20
+#define	SRES	5
+
+#define	PMONMIN	20
+#define	PRES	50
+#define	NPMON	64
+
+#define	RMONMIN	130
+#define	RRES	5
+#define	NRMON	64
+
+/* data and stack size distribution counters */
+unsigned int	dmon[NDMON+1];
+unsigned int	smon[NSMON+1];
+
+/* page in time distribution counters */
+unsigned int	pmon[NPMON+2];
+
+/* reclaim time distribution counters */
+unsigned int	rmon[NRMON+2];
+
+int	pmonmin;
+int	pres;
+int	rmonmin;
+int	rres;
+
+unsigned rectime;		/* accumulator for reclaim times */
+unsigned pgintime;		/* accumulator for page in times */
+#endif
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
new file mode 100644
index 00000000000..fa51d994a21
--- /dev/null
+++ b/sys/sys/vnode.h
@@ -0,0 +1,397 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vnode.h	8.7 (Berkeley) 2/4/94
+ */
+
+#include <sys/queue.h>
+
+/*
+ * The vnode is the focus of all file activity in UNIX.  There is a
+ * unique vnode allocated for each active file, each current directory,
+ * each mounted-on file, text file, and the root.
+ */
+
+/*
+ * Vnode types.  VNON means no type.
+ */
+enum vtype	{ VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO, VBAD };
+
+/*
+ * Vnode tag types.
+ * These are for the benefit of external programs only (e.g., pstat)
+ * and should NEVER be inspected by the kernel.
+ */
+enum vtagtype	{
+	VT_NON, VT_UFS, VT_NFS, VT_MFS, VT_PC, VT_LFS, VT_LOFS, VT_FDESC,
+	VT_PORTAL, VT_NULL, VT_UMAP, VT_KERNFS, VT_PROCFS, VT_AFS, VT_ISOFS,
+	VT_UNION
+};
+
+/*
+ * Each underlying filesystem allocates its own private area and hangs
+ * it from v_data.  If non-null, this area is freed in getnewvnode().
+ */
+LIST_HEAD(buflists, buf);
+
+struct vnode {
+	u_long	v_flag;				/* vnode flags (see below) */
+	short	v_usecount;			/* reference count of users */
+	short	v_writecount;			/* reference count of writers */
+	long	v_holdcnt;			/* page & buffer references */
+	daddr_t	v_lastr;			/* last read (read-ahead) */
+	u_long	v_id;				/* capability identifier */
+	struct	mount *v_mount;			/* ptr to vfs we are in */
+	int 	(**v_op)();			/* vnode operations vector */
+	TAILQ_ENTRY(vnode) v_freelist;		/* vnode freelist */
+	LIST_ENTRY(vnode) v_mntvnodes;		/* vnodes for mount point */
+	struct	buflists v_cleanblkhd;		/* clean blocklist head */
+	struct	buflists v_dirtyblkhd;		/* dirty blocklist head */
+	long	v_numoutput;			/* num of writes in progress */
+	enum	vtype v_type;			/* vnode type */
+	union {
+		struct mount	*vu_mountedhere;/* ptr to mounted vfs (VDIR) */
+		struct socket	*vu_socket;	/* unix ipc (VSOCK) */
+		caddr_t		vu_vmdata;	/* private data for vm (VREG) */
+		struct specinfo	*vu_specinfo;	/* device (VCHR, VBLK) */
+		struct fifoinfo	*vu_fifoinfo;	/* fifo (VFIFO) */
+	} v_un;
+	struct	nqlease *v_lease;		/* Soft reference to lease */
+	daddr_t	v_lastw;			/* last write (write cluster) */
+	daddr_t	v_cstart;			/* start block of cluster */
+	daddr_t	v_lasta;			/* last allocation */
+	int	v_clen;				/* length of current cluster */
+	int	v_ralen;			/* Read-ahead length */
+	daddr_t	v_maxra;			/* last readahead block */
+	long	v_spare[7];			/* round to 128 bytes */
+	enum	vtagtype v_tag;			/* type of underlying data */
+	void 	*v_data;			/* private data for fs */
+};
+#define	v_mountedhere	v_un.vu_mountedhere
+#define	v_socket	v_un.vu_socket
+#define	v_vmdata	v_un.vu_vmdata
+#define	v_specinfo	v_un.vu_specinfo
+#define	v_fifoinfo	v_un.vu_fifoinfo
+
+/*
+ * Vnode flags.
+ */
+#define	VROOT		0x0001	/* root of its file system */
+#define	VTEXT		0x0002	/* vnode is a pure text prototype */
+#define	VSYSTEM		0x0004	/* vnode being used by kernel */
+#define	VXLOCK		0x0100	/* vnode is locked to change underlying type */
+#define	VXWANT		0x0200	/* process is waiting for vnode */
+#define	VBWAIT		0x0400	/* waiting for output to complete */
+#define	VALIASED	0x0800	/* vnode has an alias */
+#define	VDIROP		0x1000	/* LFS: vnode is involved in a directory op */
+
+/*
+ * Vnode attributes.  A field value of VNOVAL represents a field whose value
+ * is unavailable (getattr) or which is not to be changed (setattr).
+ */
+struct vattr {
+	enum vtype	va_type;	/* vnode type (for create) */
+	u_short		va_mode;	/* files access mode and type */
+	short		va_nlink;	/* number of references to file */
+	uid_t		va_uid;		/* owner user id */
+	gid_t		va_gid;		/* owner group id */
+	long		va_fsid;	/* file system id (dev for now) */
+	long		va_fileid;	/* file id */
+	u_quad_t	va_size;	/* file size in bytes */
+	long		va_blocksize;	/* blocksize preferred for i/o */
+	struct timespec	va_atime;	/* time of last access */
+	struct timespec	va_mtime;	/* time of last modification */
+	struct timespec	va_ctime;	/* time file changed */
+	u_long		va_gen;		/* generation number of file */
+	u_long		va_flags;	/* flags defined for file */
+	dev_t		va_rdev;	/* device the special file represents */
+	u_quad_t	va_bytes;	/* bytes of disk space held by file */
+	u_quad_t	va_filerev;	/* file modification number */
+	u_int		va_vaflags;	/* operations flags, see below */
+	long		va_spare;	/* remain quad aligned */
+};
+
+/*
+ * Flags for va_cflags.
+ */
+#define	VA_UTIMES_NULL	0x01		/* utimes argument was NULL */
+
+/*
+ * Flags for ioflag.
+ */
+#define	IO_UNIT		0x01		/* do I/O as atomic unit */
+#define	IO_APPEND	0x02		/* append write to end */
+#define	IO_SYNC		0x04		/* do I/O synchronously */
+#define	IO_NODELOCKED	0x08		/* underlying node already locked */
+#define	IO_NDELAY	0x10		/* FNDELAY flag set in file table */
+
+/*
+ *  Modes.  Some values same as Ixxx entries from inode.h for now.
+ */
+#define	VSUID	04000		/* set user id on execution */
+#define	VSGID	02000		/* set group id on execution */
+#define	VSVTX	01000		/* save swapped text even after use */
+#define	VREAD	00400		/* read, write, execute permissions */
+#define	VWRITE	00200
+#define	VEXEC	00100
+
+/*
+ * Token indicating no attribute value yet assigned.
+ */
+#define	VNOVAL	(-1)
+
+#ifdef KERNEL
+/*
+ * Convert between vnode types and inode formats (since POSIX.1
+ * defines mode word of stat structure in terms of inode formats).
+ */
+extern enum vtype	iftovt_tab[];
+extern int		vttoif_tab[];
+#define IFTOVT(mode)	(iftovt_tab[((mode) & S_IFMT) >> 12])
+#define VTTOIF(indx)	(vttoif_tab[(int)(indx)])
+#define MAKEIMODE(indx, mode)	(int)(VTTOIF(indx) | (mode))
+
+/*
+ * Flags to various vnode functions.
+ */
+#define	SKIPSYSTEM	0x0001		/* vflush: skip vnodes marked VSYSTEM */
+#define	FORCECLOSE	0x0002		/* vflush: force file closeure */
+#define	WRITECLOSE	0x0004		/* vflush: only close writeable files */
+#define	DOCLOSE		0x0008		/* vclean: close active files */
+#define	V_SAVE		0x0001		/* vinvalbuf: sync file first */
+#define	V_SAVEMETA	0x0002		/* vinvalbuf: leave indirect blocks */
+
+#ifdef DIAGNOSTIC
+#define	HOLDRELE(vp)	holdrele(vp)
+#define	VATTR_NULL(vap)	vattr_null(vap)
+#define	VHOLD(vp)	vhold(vp)
+#define	VREF(vp)	vref(vp)
+
+void	holdrele __P((struct vnode *));
+void	vattr_null __P((struct vattr *));
+void	vhold __P((struct vnode *));
+void	vref __P((struct vnode *));
+#else
+#define	HOLDRELE(vp)	(vp)->v_holdcnt--	/* decrease buf or page ref */
+#define	VATTR_NULL(vap)	(*(vap) = va_null)	/* initialize a vattr */
+#define	VHOLD(vp)	(vp)->v_holdcnt++	/* increase buf or page ref */
+#define	VREF(vp)	(vp)->v_usecount++	/* increase reference */
+#endif
+
+#define	NULLVP	((struct vnode *)NULL)
+
+/*
+ * Global vnode data.
+ */
+extern	struct vnode *rootvnode;	/* root (i.e. "/") vnode */
+extern	int desiredvnodes;		/* number of vnodes desired */
+extern	struct vattr va_null;		/* predefined null vattr structure */
+
+/*
+ * Macro/function to check for client cache inconsistency w.r.t. leasing.
+ */
+#define	LEASE_READ	0x1		/* Check lease for readers */
+#define	LEASE_WRITE	0x2		/* Check lease for modifiers */
+
+#ifdef NFS
+void	lease_check __P((struct vnode *vp, struct proc *p,
+	    struct ucred *ucred, int flag));
+void	lease_updatetime __P((int deltat));
+#define	LEASE_CHECK(vp, p, cred, flag)	lease_check((vp), (p), (cred), (flag))
+#define	LEASE_UPDATETIME(dt)		lease_updatetime(dt)
+#else
+#define	LEASE_CHECK(vp, p, cred, flag)
+#define	LEASE_UPDATETIME(dt)
+#endif /* NFS */
+#endif /* KERNEL */
+
+
+/*
+ * Mods for exensibility.
+ */
+
+/*
+ * Flags for vdesc_flags:
+ */
+#define VDESC_MAX_VPS		16
+/* Low order 16 flag bits are reserved for willrele flags for vp arguments. */
+#define VDESC_VP0_WILLRELE	0x0001
+#define VDESC_VP1_WILLRELE	0x0002
+#define VDESC_VP2_WILLRELE	0x0004
+#define VDESC_VP3_WILLRELE	0x0008
+#define VDESC_NOMAP_VPP		0x0100
+#define VDESC_VPP_WILLRELE	0x0200
+
+/*
+ * VDESC_NO_OFFSET is used to identify the end of the offset list
+ * and in places where no such field exists.
+ */
+#define VDESC_NO_OFFSET -1
+
+/*
+ * This structure describes the vnode operation taking place.
+ */
+struct vnodeop_desc {
+	int	vdesc_offset;		/* offset in vector--first for speed */
+	char    *vdesc_name;		/* a readable name for debugging */
+	int	vdesc_flags;		/* VDESC_* flags */
+
+	/*
+	 * These ops are used by bypass routines to map and locate arguments.
+	 * Creds and procs are not needed in bypass routines, but sometimes
+	 * they are useful to (for example) transport layers.
+	 * Nameidata is useful because it has a cred in it.
+	 */
+	int	*vdesc_vp_offsets;	/* list ended by VDESC_NO_OFFSET */
+	int	vdesc_vpp_offset;	/* return vpp location */
+	int	vdesc_cred_offset;	/* cred location, if any */
+	int	vdesc_proc_offset;	/* proc location, if any */
+	int	vdesc_componentname_offset; /* if any */
+	/*
+	 * Finally, we've got a list of private data (about each operation)
+	 * for each transport layer.  (Support to manage this list is not
+	 * yet part of BSD.)
+	 */
+	caddr_t	*vdesc_transports;
+};
+
+#ifdef KERNEL
+/*
+ * A list of all the operation descs.
+ */
+extern struct vnodeop_desc *vnodeop_descs[];
+
+
+/*
+ * This macro is very helpful in defining those offsets in the vdesc struct.
+ *
+ * This is stolen from X11R4.  I ingored all the fancy stuff for
+ * Crays, so if you decide to port this to such a serious machine,
+ * you might want to consult Intrisics.h's XtOffset{,Of,To}.
+ */
+#define VOPARG_OFFSET(p_type,field) \
+        ((int) (((char *) (&(((p_type)NULL)->field))) - ((char *) NULL)))
+#define VOPARG_OFFSETOF(s_type,field) \
+	VOPARG_OFFSET(s_type*,field)
+#define VOPARG_OFFSETTO(S_TYPE,S_OFFSET,STRUCT_P) \
+	((S_TYPE)(((char*)(STRUCT_P))+(S_OFFSET)))
+
+
+/*
+ * This structure is used to configure the new vnodeops vector.
+ */
+struct vnodeopv_entry_desc {
+	struct vnodeop_desc *opve_op;   /* which operation this is */
+	int (*opve_impl)();		/* code implementing this operation */
+};
+struct vnodeopv_desc {
+			/* ptr to the ptr to the vector where op should go */
+	int (***opv_desc_vector_p)();
+	struct vnodeopv_entry_desc *opv_desc_ops;   /* null terminated list */
+};
+
+/*
+ * A default routine which just returns an error.
+ */
+int vn_default_error __P((void));
+
+/*
+ * A generic structure.
+ * This can be used by bypass routines to identify generic arguments.
+ */
+struct vop_generic_args {
+	struct vnodeop_desc *a_desc;
+	/* other random data follows, presumably */
+};
+
+/*
+ * VOCALL calls an op given an ops vector.  We break it out because BSD's
+ * vclean changes the ops vector and then wants to call ops with the old
+ * vector.
+ */
+#define VOCALL(OPSV,OFF,AP) (( *((OPSV)[(OFF)])) (AP))
+
+/*
+ * This call works for vnodes in the kernel.
+ */
+#define VCALL(VP,OFF,AP) VOCALL((VP)->v_op,(OFF),(AP))
+#define VDESC(OP) (& __CONCAT(OP,_desc))
+#define VOFFSET(OP) (VDESC(OP)->vdesc_offset)
+
+/*
+ * Finally, include the default set of vnode operations.
+ */
+#include <vnode_if.h>
+
+/*
+ * Public vnode manipulation functions.
+ */
+struct file;
+struct mount;
+struct nameidata;
+struct proc;
+struct stat;
+struct ucred;
+struct uio;
+struct vattr;
+struct vnode;
+struct vop_bwrite_args;
+
+int 	bdevvp __P((dev_t dev, struct vnode **vpp));
+int 	getnewvnode __P((enum vtagtype tag,
+	    struct mount *mp, int (**vops)(), struct vnode **vpp));
+int	vinvalbuf __P((struct vnode *vp, int save, struct ucred *cred,
+	    struct proc *p, int slpflag, int slptimeo));
+void 	vattr_null __P((struct vattr *vap));
+int 	vcount __P((struct vnode *vp));
+int 	vget __P((struct vnode *vp, int lockflag));
+void 	vgone __P((struct vnode *vp));
+void 	vgoneall __P((struct vnode *vp));
+int	vn_bwrite __P((struct vop_bwrite_args *ap));
+int 	vn_close __P((struct vnode *vp,
+	    int flags, struct ucred *cred, struct proc *p));
+int 	vn_closefile __P((struct file *fp, struct proc *p));
+int	vn_ioctl __P((struct file *fp, int com, caddr_t data, struct proc *p));
+int 	vn_open __P((struct nameidata *ndp, int fmode, int cmode));
+int 	vn_rdwr __P((enum uio_rw rw, struct vnode *vp, caddr_t base,
+	    int len, off_t offset, enum uio_seg segflg, int ioflg,
+	    struct ucred *cred, int *aresid, struct proc *p));
+int	vn_read __P((struct file *fp, struct uio *uio, struct ucred *cred));
+int	vn_select __P((struct file *fp, int which, struct proc *p));
+int	vn_stat __P((struct vnode *vp, struct stat *sb, struct proc *p));
+int	vn_write __P((struct file *fp, struct uio *uio, struct ucred *cred));
+struct vnode *
+	checkalias __P((struct vnode *vp, dev_t nvp_rdev, struct mount *mp));
+void 	vput __P((struct vnode *vp));
+void 	vref __P((struct vnode *vp));
+void 	vrele __P((struct vnode *vp));
+#endif /* KERNEL */
diff --git a/sys/sys/vsio.h b/sys/sys/vsio.h
new file mode 100644
index 00000000000..d84218cc238
--- /dev/null
+++ b/sys/sys/vsio.h
@@ -0,0 +1,153 @@
+/*-
+ * Copyright (c) 1987, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vsio.h	8.1 (Berkeley) 6/2/93
+ */
+
+ /****************************************************************************
+ *									    *
+ *  Copyright (c) 1983, 1984 by						    *
+ *  DIGITAL EQUIPMENT CORPORATION, Maynard, Massachusetts.		    *
+ *  All rights reserved.						    *
+ * 									    *
+ *  This software is furnished on an as-is basis and may be used and copied *
+ *  only with inclusion of the above copyright notice. This software or any *
+ *  other copies thereof may be provided or otherwise made available to     *
+ *  others only for non-commercial purposes.  No title to or ownership of   *
+ *  the software is hereby transferred.					    *
+ * 									    *
+ *  The information in this software is  subject to change without notice   *
+ *  and  should  not  be  construed as  a commitment by DIGITAL EQUIPMENT   *
+ *  CORPORATION.							    *
+ * 									    *
+ *  DIGITAL assumes no responsibility for the use  or  reliability of its   *
+ *  software on equipment which is not supplied by DIGITAL.		    *
+ * 									    *
+ *									    *
+ ****************************************************************************/
+/* 
+ * vsio.h - VS100 I/O command definitions
+ * 
+ * Author:	Christopher A. Kent
+ *		Digital Equipment Corporation
+ *		Western Research Lab
+ * Date:	Tue Jun 21 1983
+ */
+
+/* 
+ * Possible ioctl calls
+ */
+
+#define	VSIOINIT	_IO('V', 0)		/* init the device */
+#define	VSIOSTART	_IOW('V', 1, int)	/* start microcode */
+#define	VSIOABORT	_IO('V', 2)		/* abort a command chain */
+#define	VSIOPWRUP	_IO('V', 3)		/* power-up reset */
+#define	VSIOGETVER	_IOR('V', 4, int)	/* get rom version */
+#define	VSIOSYNC	_IO('V', 6)		/* synch with device */
+#define	VSIOBBACTL	_IOW('V', 8, int)	/* control the BBA */
+#define	VSIOFIBCTL	_IOW('V', 9, int)	/* lamp on/off */
+#define	VSIOFIBRETRY	_IOW('V',10, int)	/* fiber retries */
+#define	VSIOGETSTATS	_IOR('V',11, vsStats)	/* get statistics */
+#define	VSIOGETIOA	_IOR('V',13, vsIoAddrAddr)/* get ioreg address */
+#define	VSIOUSERWAIT	_IO('V', 15)	/* wait for user I/O completion */
+#define VSIOWAITGO	_IOW('V', 16, caddr_t)	/* wait then go */
+
+
+#define	VSIO_OFF	0		/* option off */
+#define	VSIO_ON		1		/* option on */
+
+#define	VS_FIB_FINITE	1		/* finite retries */
+#define	VS_FIB_INFINITE	2		/* infinite retries */
+
+/* 
+ * Event queue entries
+ */
+
+typedef struct	_vs_event{
+	u_short	vse_x;		/* x position */
+	u_short	vse_y;		/* y position */
+	u_short	vse_time;	/* 10 millisecond units (button only) */
+	char	vse_type;	/* button or motion? */
+	u_char	vse_key;	/* the key (button only) */
+	char	vse_direction;	/* which direction (button only) */
+	char	vse_device;	/* which device (button only) */
+}vsEvent;
+
+#define	VSE_BUTTON	0		/* button moved */
+#define	VSE_MMOTION	1		/* mouse moved */
+#define	VSE_TMOTION	2		/* tablet moved */
+
+#define	VSE_KBTUP	0		/* up */
+#define	VSE_KBTDOWN	1		/* down */
+
+#define	VSE_MOUSE	1		/* mouse */
+#define	VSE_DKB		2		/* main keyboard */
+#define	VSE_TABLET	3		/* graphics tablet */
+#define	VSE_AUX		4		/* auxiliary */
+#define	VSE_CONSOLE	5		/* console */
+
+typedef struct _vsStats{
+	int	errors;			/* count errors */
+	int	unsolIntr;		/* count unsolicited interrupts */
+	int	overruns;		/* event queue overruns */
+	int	flashes;		/* flashes on fiber link */
+	int	ignites;		/* times turned on */
+	int	douses;			/* times turned off */
+	int	linkErrors;		/* link errors */
+}vsStats;
+
+typedef struct _vs_cursor{
+	short x;
+	short y;
+}vsCursor;
+
+typedef struct _vs_box {
+	short bottom;
+	short right;
+	short left;
+	short top;
+}vsBox;
+
+typedef struct _vsIoAddr {
+	short	 *ioreg;
+	short	 status;
+	caddr_t  obuff;
+	int	 obufflen;
+	int	 reloc;
+	vsEvent  *ibuff;
+	int	 iqsize;		/* may assume power of 2 */
+	int	 ihead;			/* atomic write */
+	int	 itail;			/* atomic read */
+	vsCursor mouse;			/* atomic read/write */
+	vsBox	 mbox;			/* atomic read/write */
+} vsIoAddr;
+typedef vsIoAddr *vsIoAddrAddr;
diff --git a/sys/sys/wait.h b/sys/sys/wait.h
new file mode 100644
index 00000000000..33a68d9f33f
--- /dev/null
+++ b/sys/sys/wait.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)wait.h	8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * This file holds definitions relevent to the wait4 system call
+ * and the alternate interfaces that use it (wait, wait3, waitpid).
+ */
+
+/*
+ * Macros to test the exit status returned by wait
+ * and extract the relevant values.
+ */
+#ifdef _POSIX_SOURCE
+#define	_W_INT(i)	(i)
+#else
+#define	_W_INT(w)	(*(int *)&(w))	/* convert union wait to int */
+#define	WCOREFLAG	0200
+#endif
+
+#define	_WSTATUS(x)	(_W_INT(x) & 0177)
+#define	_WSTOPPED	0177		/* _WSTATUS if process is stopped */
+#define WIFSTOPPED(x)	(_WSTATUS(x) == _WSTOPPED)
+#define WSTOPSIG(x)	(_W_INT(x) >> 8)
+#define WIFSIGNALED(x)	(_WSTATUS(x) != _WSTOPPED && _WSTATUS(x) != 0)
+#define WTERMSIG(x)	(_WSTATUS(x))
+#define WIFEXITED(x)	(_WSTATUS(x) == 0)
+#define WEXITSTATUS(x)	(_W_INT(x) >> 8)
+#ifndef _POSIX_SOURCE
+#define WCOREDUMP(x)	(_W_INT(x) & WCOREFLAG)
+
+#define	W_EXITCODE(ret, sig)	((ret) << 8 | (sig))
+#define	W_STOPCODE(sig)		((sig) << 8 | _WSTOPPED)
+#endif
+
+/*
+ * Option bits for the third argument of wait4.  WNOHANG causes the
+ * wait to not hang if there are no stopped or terminated processes, rather
+ * returning an error indication in this case (pid==0).  WUNTRACED
+ * indicates that the caller should receive status about untraced children
+ * which stop due to signals.  If children are stopped and a wait without
+ * this option is done, it is as though they were still running... nothing
+ * about them is returned.
+ */
+#define WNOHANG		1	/* dont hang in wait */
+#define WUNTRACED	2	/* tell about stopped, untraced children */
+
+#ifndef _POSIX_SOURCE
+/* POSIX extensions and 4.2/4.3 compatability: */
+
+/*
+ * Tokens for special values of the "pid" parameter to wait4.
+ */
+#define	WAIT_ANY	(-1)	/* any process */
+#define	WAIT_MYPGRP	0	/* any process in my process group */
+
+#include <machine/endian.h>
+
+/*
+ * Deprecated:
+ * Structure of the information in the status word returned by wait4.
+ * If w_stopval==WSTOPPED, then the second structure describes
+ * the information returned, else the first.
+ */
+union wait {
+	int	w_status;		/* used in syscall */
+	/*
+	 * Terminated process status.
+	 */
+	struct {
+#if BYTE_ORDER == LITTLE_ENDIAN 
+		unsigned int	w_Termsig:7,	/* termination signal */
+				w_Coredump:1,	/* core dump indicator */
+				w_Retcode:8,	/* exit code if w_termsig==0 */
+				w_Filler:16;	/* upper bits filler */
+#endif
+#if BYTE_ORDER == BIG_ENDIAN 
+		unsigned int	w_Filler:16,	/* upper bits filler */
+				w_Retcode:8,	/* exit code if w_termsig==0 */
+				w_Coredump:1,	/* core dump indicator */
+				w_Termsig:7;	/* termination signal */
+#endif
+	} w_T;
+	/*
+	 * Stopped process status.  Returned
+	 * only for traced children unless requested
+	 * with the WUNTRACED option bit.
+	 */
+	struct {
+#if BYTE_ORDER == LITTLE_ENDIAN 
+		unsigned int	w_Stopval:8,	/* == W_STOPPED if stopped */
+				w_Stopsig:8,	/* signal that stopped us */
+				w_Filler:16;	/* upper bits filler */
+#endif
+#if BYTE_ORDER == BIG_ENDIAN 
+		unsigned int	w_Filler:16,	/* upper bits filler */
+				w_Stopsig:8,	/* signal that stopped us */
+				w_Stopval:8;	/* == W_STOPPED if stopped */
+#endif
+	} w_S;
+};
+#define	w_termsig	w_T.w_Termsig
+#define w_coredump	w_T.w_Coredump
+#define w_retcode	w_T.w_Retcode
+#define w_stopval	w_S.w_Stopval
+#define w_stopsig	w_S.w_Stopsig
+
+#define	WSTOPPED	_WSTOPPED
+#endif /* _POSIX_SOURCE */
+
+#ifndef KERNEL
+#include <sys/types.h>
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+struct rusage;	/* forward declaration */
+
+pid_t	wait __P((int *));
+pid_t	waitpid __P((pid_t, int *, int));
+#ifndef _POSIX_SOURCE
+pid_t	wait3 __P((int *, int, struct rusage *));
+pid_t	wait4 __P((pid_t, int *, int, struct rusage *));
+#endif
+__END_DECLS
+#endif
diff --git a/sys/tools/vnode_if.awk b/sys/tools/vnode_if.awk
new file mode 100644
index 00000000000..e190fa04836
--- /dev/null
+++ b/sys/tools/vnode_if.awk
@@ -0,0 +1,433 @@
+#!/bin/sh -
+#
+# Copyright (c) 1992, 1993
+#	The Regents of the University of California.  All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+# 3. All advertising materials mentioning features or use of this software
+#    must display the following acknowledgement:
+#	This product includes software developed by the University of
+#	California, Berkeley and its contributors.
+# 4. Neither the name of the University nor the names of its contributors
+#    may be used to endorse or promote products derived from this software
+#    without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+#	@(#)vnode_if.sh	8.1 (Berkeley) 6/10/93
+#
+
+# Script to produce VFS front-end sugar.
+#
+# usage: vnode_if.sh srcfile
+#	(where srcfile is currently /sys/kern/vnode_if.src)
+#
+# These awk scripts are not particularly well written, specifically they
+# don't use arrays well and figure out the same information repeatedly.
+# Please rewrite them if you actually understand how to use awk.  Note,
+# they use nawk extensions and gawk's toupper.
+
+if [ $# -ne 1 ] ; then
+	echo 'usage: vnode_if.sh srcfile'
+	exit 1
+fi
+
+# Name of the source file.
+SRC=$1
+
+# Names of the created files.
+CFILE=vnode_if.c
+HEADER=vnode_if.h
+
+# Awk program (must support nawk extensions and gawk's "toupper")
+# Use "awk" at Berkeley, "gawk" elsewhere.
+AWK=awk
+
+# Print out header information for vnode_if.h.
+cat << END_OF_LEADING_COMMENT > $HEADER
+/*
+ * This file is produced automatically.
+ * Do not modify anything in here by hand.
+ *
+ * Created from @(#)vnode_if.sh	8.1 (Berkeley) 6/10/93
+ */
+
+extern struct vnodeop_desc vop_default_desc;
+END_OF_LEADING_COMMENT
+
+# Awk script to take vnode_if.src and turn it into vnode_if.h.
+$AWK '
+	NF == 0 || $0 ~ "^#" {
+		next;
+	}
+	{
+		# Get the function name.
+		name = $1;
+		uname = toupper(name);
+
+		# Get the function arguments.
+		for (c1 = 0;; ++c1) {
+			if (getline <= 0)
+				exit
+			if ($0 ~ "^};")
+				break;
+			a[c1] = $0;
+		}
+
+		# Print out the vop_F_args structure.
+		printf("struct %s_args {\n\tstruct vnodeop_desc *a_desc;\n",
+		    name);
+		for (c2 = 0; c2 < c1; ++c2) {
+			c3 = split(a[c2], t);
+			printf("\t");
+			if (t[2] ~ "WILLRELE")
+				c4 = 3;
+			else 
+				c4 = 2;
+			for (; c4 < c3; ++c4)
+				printf("%s ", t[c4]);
+			beg = match(t[c3], "[^*]");
+			printf("%sa_%s\n",
+			    substr(t[c4], 0, beg - 1), substr(t[c4], beg));
+		}
+		printf("};\n");
+
+		# Print out extern declaration.
+		printf("extern struct vnodeop_desc %s_desc;\n", name);
+
+		# Print out inline struct.
+		printf("static inline int %s(", uname);
+		sep = ", ";
+		for (c2 = 0; c2 < c1; ++c2) {
+			if (c2 == c1 - 1)
+				sep = ")\n";
+			c3 = split(a[c2], t);
+			beg = match(t[c3], "[^*]");
+			end = match(t[c3], ";");
+			printf("%s%s", substr(t[c3], beg, end - beg), sep);
+		}
+		for (c2 = 0; c2 < c1; ++c2) {
+			c3 = split(a[c2], t);
+			printf("\t");
+			if (t[2] ~ "WILLRELE")
+				c4 = 3;
+			else
+				c4 = 2;
+			for (; c4 < c3; ++c4)
+				printf("%s ", t[c4]);
+			beg = match(t[c3], "[^*]");
+			printf("%s%s\n",
+			    substr(t[c4], 0, beg - 1), substr(t[c4], beg));
+		}
+		printf("{\n\tstruct %s_args a;\n\n", name);
+		printf("\ta.a_desc = VDESC(%s);\n", name);
+		for (c2 = 0; c2 < c1; ++c2) {
+			c3 = split(a[c2], t);
+			printf("\t");
+			beg = match(t[c3], "[^*]");
+			end = match(t[c3], ";");
+			printf("a.a_%s = %s\n",
+			    substr(t[c3], beg, end - beg), substr(t[c3], beg));
+		}
+		c1 = split(a[0], t);
+		beg = match(t[c1], "[^*]");
+		end = match(t[c1], ";");
+		printf("\treturn (VCALL(%s, VOFFSET(%s), &a));\n}\n",
+		    substr(t[c1], beg, end - beg), name);
+	}' < $SRC >> $HEADER
+
+# Print out header information for vnode_if.c.
+cat << END_OF_LEADING_COMMENT > $CFILE
+/*
+ * This file is produced automatically.
+ * Do not modify anything in here by hand.
+ *
+ * Created from @(#)vnode_if.sh	8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+
+struct vnodeop_desc vop_default_desc = {
+	0,
+	"default",
+	0,
+	NULL,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	NULL,
+};
+
+END_OF_LEADING_COMMENT
+
+# Awk script to take vnode_if.src and turn it into vnode_if.c.
+$AWK 'function kill_surrounding_ws (s) {
+		sub (/^[ \t]*/, "", s);
+		sub (/[ \t]*$/, "", s);
+		return s;
+	}
+
+	function read_args() {
+		numargs = 0;
+		while (getline ln) {
+			if (ln ~ /}/) {
+				break;
+			};
+	
+			# Delete comments, if any.
+			gsub (/\/\*.*\*\//, "", ln);
+			
+			# Delete leading/trailing space.
+			ln = kill_surrounding_ws(ln);
+	
+			# Pick off direction.
+			if (1 == sub(/^INOUT[ \t]+/, "", ln))
+				dir = "INOUT";
+			else if (1 == sub(/^IN[ \t]+/, "", ln))
+				dir = "IN";
+			else if (1 == sub(/^OUT[ \t]+/, "", ln))
+				dir = "OUT";
+			else
+				bail("No IN/OUT direction for \"" ln "\".");
+
+			# check for "WILLRELE"
+			if (1 == sub(/^WILLRELE[ \t]+/, "", ln)) {
+				rele = "WILLRELE";
+			} else {
+				rele = "WONTRELE";
+			};
+	
+			# kill trailing ;
+			if (1 != sub (/;$/, "", ln)) {
+				bail("Missing end-of-line ; in \"" ln "\".");
+			};
+	
+			# pick off variable name
+			if (!(i = match(ln, /[A-Za-z0-9_]+$/))) {
+				bail("Missing var name \"a_foo\" in \"" ln "\".");
+			};
+			arg = substr (ln, i);
+			# Want to <<substr(ln, i) = "";>>, but nawk cannot.
+			# Hack around this.
+			ln = substr(ln, 1, i-1);
+	
+			# what is left must be type
+			# (put clean it up some)
+			type = ln;
+			gsub (/[ \t]+/, " ", type);   # condense whitespace
+			type = kill_surrounding_ws(type);
+	
+			# (boy this was easier in Perl)
+	
+			numargs++;
+			dirs[numargs] = dir;
+			reles[numargs] = rele;
+			types[numargs] = type;
+			args[numargs] = arg;
+		};
+	}
+
+	function generate_operation_vp_offsets() {
+		printf ("int %s_vp_offsets[] = {\n", name);
+		# as a side effect, figure out the releflags
+		releflags = "";
+		vpnum = 0;
+		for (i=1; i<=numargs; i++) {
+			if (types[i] == "struct vnode *") {
+				printf ("\tVOPARG_OFFSETOF(struct %s_args,a_%s),\n",
+					name, args[i]);
+				if (reles[i] == "WILLRELE") {
+					releflags = releflags "|VDESC_VP" vpnum "_WILLRELE";
+				};
+				vpnum++;
+			};
+		};
+		sub (/^\|/, "", releflags);
+		print "\tVDESC_NO_OFFSET";
+		print "};";
+	}
+	
+	function find_arg_with_type (type) {
+		for (i=1; i<=numargs; i++) {
+			if (types[i] == type) {
+				return "VOPARG_OFFSETOF(struct " name "_args,a_" args[i] ")";
+			};
+		};
+		return "VDESC_NO_OFFSET";
+	}
+	
+	function generate_operation_desc() {
+		printf ("struct vnodeop_desc %s_desc = {\n", name);
+		# offset
+		printf ("\t0,\n");
+		# printable name
+		printf ("\t\"%s\",\n", name);
+		# flags
+		vppwillrele = "";
+		for (i=1; i<=numargs; i++) {
+			if (types[i] == "struct vnode **" &&
+				(reles[i] == "WILLRELE")) {
+				vppwillrele = "|VDESC_VPP_WILLRELE";
+			};
+		};
+		if (releflags == "") {
+			printf ("\t0%s,\n", vppwillrele);
+		} else {
+			printf ("\t%s%s,\n", releflags, vppwillrele);
+		};
+		# vp offsets
+		printf ("\t%s_vp_offsets,\n", name);
+		# vpp (if any)
+		printf ("\t%s,\n", find_arg_with_type("struct vnode **"));
+		# cred (if any)
+		printf ("\t%s,\n", find_arg_with_type("struct ucred *"));
+		# proc (if any)
+		printf ("\t%s,\n", find_arg_with_type("struct proc *"));
+		# componentname
+		printf ("\t%s,\n", find_arg_with_type("struct componentname *"));
+		# transport layer information
+		printf ("\tNULL,\n};\n");
+	}
+
+	NF == 0 || $0 ~ "^#" {
+		next;
+	}
+	{
+		# get the function name
+		name = $1;
+
+		# get the function arguments
+		read_args();
+
+		# Print out the vop_F_vp_offsets structure.  This all depends
+		# on naming conventions and nothing else.
+		generate_operation_vp_offsets();
+
+		# Print out the vnodeop_desc structure.
+		generate_operation_desc();
+
+		printf "\n";
+
+	}' < $SRC >> $CFILE
+# THINGS THAT DON'T WORK RIGHT YET.
+# 
+# Two existing BSD vnodeops (bwrite and strategy) don't take any vnodes as
+# arguments.  This means that these operations can't function successfully
+# through a bypass routine.
+#
+# Bwrite and strategy will be replaced when the VM page/buffer cache
+# integration happens.
+#
+# To get around this problem for now we handle these ops as special cases.
+
+cat << END_OF_SPECIAL_CASES >> $HEADER
+#include <sys/buf.h>
+struct vop_strategy_args {
+	struct vnodeop_desc *a_desc;
+	struct buf *a_bp;
+};
+extern struct vnodeop_desc vop_strategy_desc;
+static inline int VOP_STRATEGY(bp)
+	struct buf *bp;
+{
+	struct vop_strategy_args a;
+
+	a.a_desc = VDESC(vop_strategy);
+	a.a_bp = bp;
+	return (VCALL((bp)->b_vp, VOFFSET(vop_strategy), &a));
+}
+
+struct vop_bwrite_args {
+	struct vnodeop_desc *a_desc;
+	struct buf *a_bp;
+};
+extern struct vnodeop_desc vop_bwrite_desc;
+static inline int VOP_BWRITE(bp)
+	struct buf *bp;
+{
+	struct vop_bwrite_args a;
+
+	a.a_desc = VDESC(vop_bwrite);
+	a.a_bp = bp;
+	return (VCALL((bp)->b_vp, VOFFSET(vop_bwrite), &a));
+}
+END_OF_SPECIAL_CASES
+
+cat << END_OF_SPECIAL_CASES >> $CFILE
+int vop_strategy_vp_offsets[] = {
+	VDESC_NO_OFFSET
+};
+struct vnodeop_desc vop_strategy_desc = {
+	0,
+	"vop_strategy",
+	0,
+	vop_strategy_vp_offsets,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	NULL,
+};
+int vop_bwrite_vp_offsets[] = {
+	VDESC_NO_OFFSET
+};
+struct vnodeop_desc vop_bwrite_desc = {
+	0,
+	"vop_bwrite",
+	0,
+	vop_bwrite_vp_offsets,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	NULL,
+};
+END_OF_SPECIAL_CASES
+
+# Add the vfs_op_descs array to the C file.
+$AWK '
+	BEGIN {
+		printf("\nstruct vnodeop_desc *vfs_op_descs[] = {\n");
+		printf("\t&vop_default_desc,	/* MUST BE FIRST */\n");
+		printf("\t&vop_strategy_desc,	/* XXX: SPECIAL CASE */\n");
+		printf("\t&vop_bwrite_desc,	/* XXX: SPECIAL CASE */\n");
+	}
+	END {
+		printf("\tNULL\n};\n");
+	}
+	NF == 0 || $0 ~ "^#" {
+		next;
+	}
+	{
+		# Get the function name.
+		printf("\t&%s_desc,\n", $1);
+
+		# Skip the function arguments.
+		for (;;) {
+			if (getline <= 0)
+				exit
+			if ($0 ~ "^};")
+				break;
+		}
+	}' < $SRC >> $CFILE
+
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
new file mode 100644
index 00000000000..cdd2e4b2b35
--- /dev/null
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -0,0 +1,1474 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ffs_alloc.c	8.8 (Berkeley) 2/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+#include <sys/syslog.h>
+
+#include <vm/vm.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+
+#include <ufs/ffs/fs.h>
+#include <ufs/ffs/ffs_extern.h>
+
+extern u_long nextgennumber;
+
+static daddr_t	ffs_alloccg __P((struct inode *, int, daddr_t, int));
+static daddr_t	ffs_alloccgblk __P((struct fs *, struct cg *, daddr_t));
+static daddr_t	ffs_clusteralloc __P((struct inode *, int, daddr_t, int));
+static ino_t	ffs_dirpref __P((struct fs *));
+static daddr_t	ffs_fragextend __P((struct inode *, int, long, int, int));
+static void	ffs_fserr __P((struct fs *, u_int, char *));
+static u_long	ffs_hashalloc
+		    __P((struct inode *, int, long, int, u_long (*)()));
+static ino_t	ffs_nodealloccg __P((struct inode *, int, daddr_t, int));
+static daddr_t	ffs_mapsearch __P((struct fs *, struct cg *, daddr_t, int));
+
+/*
+ * Allocate a block in the file system.
+ * 
+ * The size of the requested block is given, which must be some
+ * multiple of fs_fsize and <= fs_bsize.
+ * A preference may be optionally specified. If a preference is given
+ * the following hierarchy is used to allocate a block:
+ *   1) allocate the requested block.
+ *   2) allocate a rotationally optimal block in the same cylinder.
+ *   3) allocate a block in the same cylinder group.
+ *   4) quadradically rehash into other cylinder groups, until an
+ *      available block is located.
+ * If no block preference is given the following heirarchy is used
+ * to allocate a block:
+ *   1) allocate a block in the cylinder group that contains the
+ *      inode for the file.
+ *   2) quadradically rehash into other cylinder groups, until an
+ *      available block is located.
+ */
+ffs_alloc(ip, lbn, bpref, size, cred, bnp)
+	register struct inode *ip;
+	daddr_t lbn, bpref;
+	int size;
+	struct ucred *cred;
+	daddr_t *bnp;
+{
+	register struct fs *fs;
+	daddr_t bno;
+	int cg, error;
+	
+	*bnp = 0;
+	fs = ip->i_fs;
+#ifdef DIAGNOSTIC
+	if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
+		printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
+		    ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
+		panic("ffs_alloc: bad size");
+	}
+	if (cred == NOCRED)
+		panic("ffs_alloc: missing credential\n");
+#endif /* DIAGNOSTIC */
+	if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
+		goto nospace;
+	if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
+		goto nospace;
+#ifdef QUOTA
+	if (error = chkdq(ip, (long)btodb(size), cred, 0))
+		return (error);
+#endif
+	if (bpref >= fs->fs_size)
+		bpref = 0;
+	if (bpref == 0)
+		cg = ino_to_cg(fs, ip->i_number);
+	else
+		cg = dtog(fs, bpref);
+	bno = (daddr_t)ffs_hashalloc(ip, cg, (long)bpref, size,
+	    (u_long (*)())ffs_alloccg);
+	if (bno > 0) {
+		ip->i_blocks += btodb(size);
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		*bnp = bno;
+		return (0);
+	}
+#ifdef QUOTA
+	/*
+	 * Restore user's disk quota because allocation failed.
+	 */
+	(void) chkdq(ip, (long)-btodb(size), cred, FORCE);
+#endif
+nospace:
+	ffs_fserr(fs, cred->cr_uid, "file system full");
+	uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
+	return (ENOSPC);
+}
+
+/*
+ * Reallocate a fragment to a bigger size
+ *
+ * The number and size of the old block is given, and a preference
+ * and new size is also specified. The allocator attempts to extend
+ * the original block. Failing that, the regular block allocator is
+ * invoked to get an appropriate block.
+ */
+ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp)
+	register struct inode *ip;
+	daddr_t lbprev;
+	daddr_t bpref;
+	int osize, nsize;
+	struct ucred *cred;
+	struct buf **bpp;
+{
+	register struct fs *fs;
+	struct buf *bp;
+	int cg, request, error;
+	daddr_t bprev, bno;
+	
+	*bpp = 0;
+	fs = ip->i_fs;
+#ifdef DIAGNOSTIC
+	if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
+	    (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
+		printf(
+		    "dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n",
+		    ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt);
+		panic("ffs_realloccg: bad size");
+	}
+	if (cred == NOCRED)
+		panic("ffs_realloccg: missing credential\n");
+#endif /* DIAGNOSTIC */
+	if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
+		goto nospace;
+	if ((bprev = ip->i_db[lbprev]) == 0) {
+		printf("dev = 0x%x, bsize = %d, bprev = %d, fs = %s\n",
+		    ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt);
+		panic("ffs_realloccg: bad bprev");
+	}
+	/*
+	 * Allocate the extra space in the buffer.
+	 */
+	if (error = bread(ITOV(ip), lbprev, osize, NOCRED, &bp)) {
+		brelse(bp);
+		return (error);
+	}
+#ifdef QUOTA
+	if (error = chkdq(ip, (long)btodb(nsize - osize), cred, 0)) {
+		brelse(bp);
+		return (error);
+	}
+#endif
+	/*
+	 * Check for extension in the existing location.
+	 */
+	cg = dtog(fs, bprev);
+	if (bno = ffs_fragextend(ip, cg, (long)bprev, osize, nsize)) {
+		if (bp->b_blkno != fsbtodb(fs, bno))
+			panic("bad blockno");
+		ip->i_blocks += btodb(nsize - osize);
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		allocbuf(bp, nsize);
+		bp->b_flags |= B_DONE;
+		bzero((char *)bp->b_data + osize, (u_int)nsize - osize);
+		*bpp = bp;
+		return (0);
+	}
+	/*
+	 * Allocate a new disk location.
+	 */
+	if (bpref >= fs->fs_size)
+		bpref = 0;
+	switch ((int)fs->fs_optim) {
+	case FS_OPTSPACE:
+		/*
+		 * Allocate an exact sized fragment. Although this makes 
+		 * best use of space, we will waste time relocating it if 
+		 * the file continues to grow. If the fragmentation is
+		 * less than half of the minimum free reserve, we choose
+		 * to begin optimizing for time.
+		 */
+		request = nsize;
+		if (fs->fs_minfree < 5 ||
+		    fs->fs_cstotal.cs_nffree >
+		    fs->fs_dsize * fs->fs_minfree / (2 * 100))
+			break;
+		log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n",
+			fs->fs_fsmnt);
+		fs->fs_optim = FS_OPTTIME;
+		break;
+	case FS_OPTTIME:
+		/*
+		 * At this point we have discovered a file that is trying to
+		 * grow a small fragment to a larger fragment. To save time,
+		 * we allocate a full sized block, then free the unused portion.
+		 * If the file continues to grow, the `ffs_fragextend' call
+		 * above will be able to grow it in place without further
+		 * copying. If aberrant programs cause disk fragmentation to
+		 * grow within 2% of the free reserve, we choose to begin
+		 * optimizing for space.
+		 */
+		request = fs->fs_bsize;
+		if (fs->fs_cstotal.cs_nffree <
+		    fs->fs_dsize * (fs->fs_minfree - 2) / 100)
+			break;
+		log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n",
+			fs->fs_fsmnt);
+		fs->fs_optim = FS_OPTSPACE;
+		break;
+	default:
+		printf("dev = 0x%x, optim = %d, fs = %s\n",
+		    ip->i_dev, fs->fs_optim, fs->fs_fsmnt);
+		panic("ffs_realloccg: bad optim");
+		/* NOTREACHED */
+	}
+	bno = (daddr_t)ffs_hashalloc(ip, cg, (long)bpref, request,
+	    (u_long (*)())ffs_alloccg);
+	if (bno > 0) {
+		bp->b_blkno = fsbtodb(fs, bno);
+		(void) vnode_pager_uncache(ITOV(ip));
+		ffs_blkfree(ip, bprev, (long)osize);
+		if (nsize < request)
+			ffs_blkfree(ip, bno + numfrags(fs, nsize),
+			    (long)(request - nsize));
+		ip->i_blocks += btodb(nsize - osize);
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		allocbuf(bp, nsize);
+		bp->b_flags |= B_DONE;
+		bzero((char *)bp->b_data + osize, (u_int)nsize - osize);
+		*bpp = bp;
+		return (0);
+	}
+#ifdef QUOTA
+	/*
+	 * Restore user's disk quota because allocation failed.
+	 */
+	(void) chkdq(ip, (long)-btodb(nsize - osize), cred, FORCE);
+#endif
+	brelse(bp);
+nospace:
+	/*
+	 * no space available
+	 */
+	ffs_fserr(fs, cred->cr_uid, "file system full");
+	uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
+	return (ENOSPC);
+}
+
+/*
+ * Reallocate a sequence of blocks into a contiguous sequence of blocks.
+ *
+ * The vnode and an array of buffer pointers for a range of sequential
+ * logical blocks to be made contiguous is given. The allocator attempts
+ * to find a range of sequential blocks starting as close as possible to
+ * an fs_rotdelay offset from the end of the allocation for the logical
+ * block immediately preceeding the current range. If successful, the
+ * physical block numbers in the buffer pointers and in the inode are
+ * changed to reflect the new allocation. If unsuccessful, the allocation
+ * is left unchanged. The success in doing the reallocation is returned.
+ * Note that the error return is not reflected back to the user. Rather
+ * the previous block allocation will be used.
+ */
+#include <sys/sysctl.h>
+int doasyncfree = 1;
+struct ctldebug debug14 = { "doasyncfree", &doasyncfree };
+int
+ffs_reallocblks(ap)
+	struct vop_reallocblks_args /* {
+		struct vnode *a_vp;
+		struct cluster_save *a_buflist;
+	} */ *ap;
+{
+	struct fs *fs;
+	struct inode *ip;
+	struct vnode *vp;
+	struct buf *sbp, *ebp;
+	daddr_t *bap, *sbap, *ebap;
+	struct cluster_save *buflist;
+	daddr_t start_lbn, end_lbn, soff, eoff, newblk, blkno;
+	struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
+	int i, len, start_lvl, end_lvl, pref, ssize;
+
+	vp = ap->a_vp;
+	ip = VTOI(vp);
+	fs = ip->i_fs;
+	if (fs->fs_contigsumsize <= 0)
+		return (ENOSPC);
+	buflist = ap->a_buflist;
+	len = buflist->bs_nchildren;
+	start_lbn = buflist->bs_children[0]->b_lblkno;
+	end_lbn = start_lbn + len - 1;
+#ifdef DIAGNOSTIC
+	for (i = 1; i < len; i++)
+		if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
+			panic("ffs_reallocblks: non-cluster");
+#endif
+	/*
+	 * If the latest allocation is in a new cylinder group, assume that
+	 * the filesystem has decided to move and do not force it back to
+	 * the previous cylinder group.
+	 */
+	if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
+	    dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
+		return (ENOSPC);
+	if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
+	    ufs_getlbns(vp, end_lbn, end_ap, &end_lvl))
+		return (ENOSPC);
+	/*
+	 * Get the starting offset and block map for the first block.
+	 */
+	if (start_lvl == 0) {
+		sbap = &ip->i_db[0];
+		soff = start_lbn;
+	} else {
+		idp = &start_ap[start_lvl - 1];
+		if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) {
+			brelse(sbp);
+			return (ENOSPC);
+		}
+		sbap = (daddr_t *)sbp->b_data;
+		soff = idp->in_off;
+	}
+	/*
+	 * Find the preferred location for the cluster.
+	 */
+	pref = ffs_blkpref(ip, start_lbn, soff, sbap);
+	/*
+	 * If the block range spans two block maps, get the second map.
+	 */
+	if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
+		ssize = len;
+	} else {
+#ifdef DIAGNOSTIC
+		if (start_ap[start_lvl-1].in_lbn == idp->in_lbn)
+			panic("ffs_reallocblk: start == end");
+#endif
+		ssize = len - (idp->in_off + 1);
+		if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp))
+			goto fail;
+		ebap = (daddr_t *)ebp->b_data;
+	}
+	/*
+	 * Search the block map looking for an allocation of the desired size.
+	 */
+	if ((newblk = (daddr_t)ffs_hashalloc(ip, dtog(fs, pref), (long)pref,
+	    len, (u_long (*)())ffs_clusteralloc)) == 0)
+		goto fail;
+	/*
+	 * We have found a new contiguous block.
+	 *
+	 * First we have to replace the old block pointers with the new
+	 * block pointers in the inode and indirect blocks associated
+	 * with the file.
+	 */
+	blkno = newblk;
+	for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) {
+		if (i == ssize)
+			bap = ebap;
+#ifdef DIAGNOSTIC
+		if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap))
+			panic("ffs_reallocblks: alloc mismatch");
+#endif
+		*bap++ = blkno;
+	}
+	/*
+	 * Next we must write out the modified inode and indirect blocks.
+	 * For strict correctness, the writes should be synchronous since
+	 * the old block values may have been written to disk. In practise
+	 * they are almost never written, but if we are concerned about 
+	 * strict correctness, the `doasyncfree' flag should be set to zero.
+	 *
+	 * The test on `doasyncfree' should be changed to test a flag
+	 * that shows whether the associated buffers and inodes have
+	 * been written. The flag should be set when the cluster is
+	 * started and cleared whenever the buffer or inode is flushed.
+	 * We can then check below to see if it is set, and do the
+	 * synchronous write only when it has been cleared.
+	 */
+	if (sbap != &ip->i_db[0]) {
+		if (doasyncfree)
+			bdwrite(sbp);
+		else
+			bwrite(sbp);
+	} else {
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		if (!doasyncfree)
+			VOP_UPDATE(vp, &time, &time, MNT_WAIT);
+	}
+	if (ssize < len)
+		if (doasyncfree)
+			bdwrite(ebp);
+		else
+			bwrite(ebp);
+	/*
+	 * Last, free the old blocks and assign the new blocks to the buffers.
+	 */
+	for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) {
+		ffs_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno),
+		    fs->fs_bsize);
+		buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
+	}
+	return (0);
+
+fail:
+	if (ssize < len)
+		brelse(ebp);
+	if (sbap != &ip->i_db[0])
+		brelse(sbp);
+	return (ENOSPC);
+}
+
+/*
+ * Allocate an inode in the file system.
+ * 
+ * If allocating a directory, use ffs_dirpref to select the inode.
+ * If allocating in a directory, the following hierarchy is followed:
+ *   1) allocate the preferred inode.
+ *   2) allocate an inode in the same cylinder group.
+ *   3) quadradically rehash into other cylinder groups, until an
+ *      available inode is located.
+ * If no inode preference is given the following heirarchy is used
+ * to allocate an inode:
+ *   1) allocate an inode in cylinder group 0.
+ *   2) quadradically rehash into other cylinder groups, until an
+ *      available inode is located.
+ */
+ffs_valloc(ap)
+	struct vop_valloc_args /* {
+		struct vnode *a_pvp;
+		int a_mode;
+		struct ucred *a_cred;
+		struct vnode **a_vpp;
+	} */ *ap;
+{
+	register struct vnode *pvp = ap->a_pvp;
+	register struct inode *pip;
+	register struct fs *fs;
+	register struct inode *ip;
+	mode_t mode = ap->a_mode;
+	ino_t ino, ipref;
+	int cg, error;
+	
+	*ap->a_vpp = NULL;
+	pip = VTOI(pvp);
+	fs = pip->i_fs;
+	if (fs->fs_cstotal.cs_nifree == 0)
+		goto noinodes;
+
+	if ((mode & IFMT) == IFDIR)
+		ipref = ffs_dirpref(fs);
+	else
+		ipref = pip->i_number;
+	if (ipref >= fs->fs_ncg * fs->fs_ipg)
+		ipref = 0;
+	cg = ino_to_cg(fs, ipref);
+	ino = (ino_t)ffs_hashalloc(pip, cg, (long)ipref, mode, ffs_nodealloccg);
+	if (ino == 0)
+		goto noinodes;
+	error = VFS_VGET(pvp->v_mount, ino, ap->a_vpp);
+	if (error) {
+		VOP_VFREE(pvp, ino, mode);
+		return (error);
+	}
+	ip = VTOI(*ap->a_vpp);
+	if (ip->i_mode) {
+		printf("mode = 0%o, inum = %d, fs = %s\n",
+		    ip->i_mode, ip->i_number, fs->fs_fsmnt);
+		panic("ffs_valloc: dup alloc");
+	}
+	if (ip->i_blocks) {				/* XXX */
+		printf("free inode %s/%d had %d blocks\n",
+		    fs->fs_fsmnt, ino, ip->i_blocks);
+		ip->i_blocks = 0;
+	}
+	ip->i_flags = 0;
+	/*
+	 * Set up a new generation number for this inode.
+	 */
+	if (++nextgennumber < (u_long)time.tv_sec)
+		nextgennumber = time.tv_sec;
+	ip->i_gen = nextgennumber;
+	return (0);
+noinodes:
+	ffs_fserr(fs, ap->a_cred->cr_uid, "out of inodes");
+	uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
+	return (ENOSPC);
+}
+
+/*
+ * Find a cylinder to place a directory.
+ *
+ * The policy implemented by this algorithm is to select from
+ * among those cylinder groups with above the average number of
+ * free inodes, the one with the smallest number of directories.
+ */
+static ino_t
+ffs_dirpref(fs)
+	register struct fs *fs;
+{
+	int cg, minndir, mincg, avgifree;
+
+	avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
+	minndir = fs->fs_ipg;
+	mincg = 0;
+	for (cg = 0; cg < fs->fs_ncg; cg++)
+		if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
+		    fs->fs_cs(fs, cg).cs_nifree >= avgifree) {
+			mincg = cg;
+			minndir = fs->fs_cs(fs, cg).cs_ndir;
+		}
+	return ((ino_t)(fs->fs_ipg * mincg));
+}
+
+/*
+ * Select the desired position for the next block in a file.  The file is
+ * logically divided into sections. The first section is composed of the
+ * direct blocks. Each additional section contains fs_maxbpg blocks.
+ * 
+ * If no blocks have been allocated in the first section, the policy is to
+ * request a block in the same cylinder group as the inode that describes
+ * the file. If no blocks have been allocated in any other section, the
+ * policy is to place the section in a cylinder group with a greater than
+ * average number of free blocks.  An appropriate cylinder group is found
+ * by using a rotor that sweeps the cylinder groups. When a new group of
+ * blocks is needed, the sweep begins in the cylinder group following the
+ * cylinder group from which the previous allocation was made. The sweep
+ * continues until a cylinder group with greater than the average number
+ * of free blocks is found. If the allocation is for the first block in an
+ * indirect block, the information on the previous allocation is unavailable;
+ * here a best guess is made based upon the logical block number being
+ * allocated.
+ * 
+ * If a section is already partially allocated, the policy is to
+ * contiguously allocate fs_maxcontig blocks.  The end of one of these
+ * contiguous blocks and the beginning of the next is physically separated
+ * so that the disk head will be in transit between them for at least
+ * fs_rotdelay milliseconds.  This is to allow time for the processor to
+ * schedule another I/O transfer.
+ */
+daddr_t
+ffs_blkpref(ip, lbn, indx, bap)
+	struct inode *ip;
+	daddr_t lbn;
+	int indx;
+	daddr_t *bap;
+{
+	register struct fs *fs;
+	register int cg;
+	int avgbfree, startcg;
+	daddr_t nextblk;
+
+	fs = ip->i_fs;
+	if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
+		if (lbn < NDADDR) {
+			cg = ino_to_cg(fs, ip->i_number);
+			return (fs->fs_fpg * cg + fs->fs_frag);
+		}
+		/*
+		 * Find a cylinder with greater than average number of
+		 * unused data blocks.
+		 */
+		if (indx == 0 || bap[indx - 1] == 0)
+			startcg =
+			    ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
+		else
+			startcg = dtog(fs, bap[indx - 1]) + 1;
+		startcg %= fs->fs_ncg;
+		avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
+		for (cg = startcg; cg < fs->fs_ncg; cg++)
+			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
+				fs->fs_cgrotor = cg;
+				return (fs->fs_fpg * cg + fs->fs_frag);
+			}
+		for (cg = 0; cg <= startcg; cg++)
+			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
+				fs->fs_cgrotor = cg;
+				return (fs->fs_fpg * cg + fs->fs_frag);
+			}
+		return (NULL);
+	}
+	/*
+	 * One or more previous blocks have been laid out. If less
+	 * than fs_maxcontig previous blocks are contiguous, the
+	 * next block is requested contiguously, otherwise it is
+	 * requested rotationally delayed by fs_rotdelay milliseconds.
+	 */
+	nextblk = bap[indx - 1] + fs->fs_frag;
+	if (indx < fs->fs_maxcontig || bap[indx - fs->fs_maxcontig] +
+	    blkstofrags(fs, fs->fs_maxcontig) != nextblk)
+		return (nextblk);
+	if (fs->fs_rotdelay != 0)
+		/*
+		 * Here we convert ms of delay to frags as:
+		 * (frags) = (ms) * (rev/sec) * (sect/rev) /
+		 *	((sect/frag) * (ms/sec))
+		 * then round up to the next block.
+		 */
+		nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect /
+		    (NSPF(fs) * 1000), fs->fs_frag);
+	return (nextblk);
+}
+
+/*
+ * Implement the cylinder overflow algorithm.
+ *
+ * The policy implemented by this algorithm is:
+ *   1) allocate the block in its requested cylinder group.
+ *   2) quadradically rehash on the cylinder group number.
+ *   3) brute force search for a free block.
+ */
+/*VARARGS5*/
+static u_long
+ffs_hashalloc(ip, cg, pref, size, allocator)
+	struct inode *ip;
+	int cg;
+	long pref;
+	int size;	/* size for data blocks, mode for inodes */
+	u_long (*allocator)();
+{
+	register struct fs *fs;
+	long result;
+	int i, icg = cg;
+
+	fs = ip->i_fs;
+	/*
+	 * 1: preferred cylinder group
+	 */
+	result = (*allocator)(ip, cg, pref, size);
+	if (result)
+		return (result);
+	/*
+	 * 2: quadratic rehash
+	 */
+	for (i = 1; i < fs->fs_ncg; i *= 2) {
+		cg += i;
+		if (cg >= fs->fs_ncg)
+			cg -= fs->fs_ncg;
+		result = (*allocator)(ip, cg, 0, size);
+		if (result)
+			return (result);
+	}
+	/*
+	 * 3: brute force search
+	 * Note that we start at i == 2, since 0 was checked initially,
+	 * and 1 is always checked in the quadratic rehash.
+	 */
+	cg = (icg + 2) % fs->fs_ncg;
+	for (i = 2; i < fs->fs_ncg; i++) {
+		result = (*allocator)(ip, cg, 0, size);
+		if (result)
+			return (result);
+		cg++;
+		if (cg == fs->fs_ncg)
+			cg = 0;
+	}
+	return (NULL);
+}
+
+/*
+ * Determine whether a fragment can be extended.
+ *
+ * Check to see if the necessary fragments are available, and 
+ * if they are, allocate them.
+ */
+static daddr_t
+ffs_fragextend(ip, cg, bprev, osize, nsize)
+	struct inode *ip;
+	int cg;
+	long bprev;
+	int osize, nsize;
+{
+	register struct fs *fs;
+	register struct cg *cgp;
+	struct buf *bp;
+	long bno;
+	int frags, bbase;
+	int i, error;
+
+	fs = ip->i_fs;
+	if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
+		return (NULL);
+	frags = numfrags(fs, nsize);
+	bbase = fragnum(fs, bprev);
+	if (bbase > fragnum(fs, (bprev + frags - 1))) {
+		/* cannot extend across a block boundary */
+		return (NULL);
+	}
+	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
+		(int)fs->fs_cgsize, NOCRED, &bp);
+	if (error) {
+		brelse(bp);
+		return (NULL);
+	}
+	cgp = (struct cg *)bp->b_data;
+	if (!cg_chkmagic(cgp)) {
+		brelse(bp);
+		return (NULL);
+	}
+	cgp->cg_time = time.tv_sec;
+	bno = dtogd(fs, bprev);
+	for (i = numfrags(fs, osize); i < frags; i++)
+		if (isclr(cg_blksfree(cgp), bno + i)) {
+			brelse(bp);
+			return (NULL);
+		}
+	/*
+	 * the current fragment can be extended
+	 * deduct the count on fragment being extended into
+	 * increase the count on the remaining fragment (if any)
+	 * allocate the extended piece
+	 */
+	for (i = frags; i < fs->fs_frag - bbase; i++)
+		if (isclr(cg_blksfree(cgp), bno + i))
+			break;
+	cgp->cg_frsum[i - numfrags(fs, osize)]--;
+	if (i != frags)
+		cgp->cg_frsum[i - frags]++;
+	for (i = numfrags(fs, osize); i < frags; i++) {
+		clrbit(cg_blksfree(cgp), bno + i);
+		cgp->cg_cs.cs_nffree--;
+		fs->fs_cstotal.cs_nffree--;
+		fs->fs_cs(fs, cg).cs_nffree--;
+	}
+	fs->fs_fmod = 1;
+	bdwrite(bp);
+	return (bprev);
+}
+
+/*
+ * Determine whether a block can be allocated.
+ *
+ * Check to see if a block of the appropriate size is available,
+ * and if it is, allocate it.
+ */
+static daddr_t
+ffs_alloccg(ip, cg, bpref, size)
+	struct inode *ip;
+	int cg;
+	daddr_t bpref;
+	int size;
+{
+	register struct fs *fs;
+	register struct cg *cgp;
+	struct buf *bp;
+	register int i;
+	int error, bno, frags, allocsiz;
+
+	fs = ip->i_fs;
+	if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
+		return (NULL);
+	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
+		(int)fs->fs_cgsize, NOCRED, &bp);
+	if (error) {
+		brelse(bp);
+		return (NULL);
+	}
+	cgp = (struct cg *)bp->b_data;
+	if (!cg_chkmagic(cgp) ||
+	    (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) {
+		brelse(bp);
+		return (NULL);
+	}
+	cgp->cg_time = time.tv_sec;
+	if (size == fs->fs_bsize) {
+		bno = ffs_alloccgblk(fs, cgp, bpref);
+		bdwrite(bp);
+		return (bno);
+	}
+	/*
+	 * check to see if any fragments are already available
+	 * allocsiz is the size which will be allocated, hacking
+	 * it down to a smaller size if necessary
+	 */
+	frags = numfrags(fs, size);
+	for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
+		if (cgp->cg_frsum[allocsiz] != 0)
+			break;
+	if (allocsiz == fs->fs_frag) {
+		/*
+		 * no fragments were available, so a block will be 
+		 * allocated, and hacked up
+		 */
+		if (cgp->cg_cs.cs_nbfree == 0) {
+			brelse(bp);
+			return (NULL);
+		}
+		bno = ffs_alloccgblk(fs, cgp, bpref);
+		bpref = dtogd(fs, bno);
+		for (i = frags; i < fs->fs_frag; i++)
+			setbit(cg_blksfree(cgp), bpref + i);
+		i = fs->fs_frag - frags;
+		cgp->cg_cs.cs_nffree += i;
+		fs->fs_cstotal.cs_nffree += i;
+		fs->fs_cs(fs, cg).cs_nffree += i;
+		fs->fs_fmod = 1;
+		cgp->cg_frsum[i]++;
+		bdwrite(bp);
+		return (bno);
+	}
+	bno = ffs_mapsearch(fs, cgp, bpref, allocsiz);
+	if (bno < 0) {
+		brelse(bp);
+		return (NULL);
+	}
+	for (i = 0; i < frags; i++)
+		clrbit(cg_blksfree(cgp), bno + i);
+	cgp->cg_cs.cs_nffree -= frags;
+	fs->fs_cstotal.cs_nffree -= frags;
+	fs->fs_cs(fs, cg).cs_nffree -= frags;
+	fs->fs_fmod = 1;
+	cgp->cg_frsum[allocsiz]--;
+	if (frags != allocsiz)
+		cgp->cg_frsum[allocsiz - frags]++;
+	bdwrite(bp);
+	return (cg * fs->fs_fpg + bno);
+}
+
+/*
+ * Allocate a block in a cylinder group.
+ *
+ * This algorithm implements the following policy:
+ *   1) allocate the requested block.
+ *   2) allocate a rotationally optimal block in the same cylinder.
+ *   3) allocate the next available block on the block rotor for the
+ *      specified cylinder group.
+ * Note that this routine only allocates fs_bsize blocks; these
+ * blocks may be fragmented by the routine that allocates them.
+ */
+static daddr_t
+ffs_alloccgblk(fs, cgp, bpref)
+	register struct fs *fs;
+	register struct cg *cgp;
+	daddr_t bpref;
+{
+	daddr_t bno, blkno;
+	int cylno, pos, delta;
+	short *cylbp;
+	register int i;
+
+	if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) {
+		bpref = cgp->cg_rotor;
+		goto norot;
+	}
+	bpref = blknum(fs, bpref);
+	bpref = dtogd(fs, bpref);
+	/*
+	 * if the requested block is available, use it
+	 */
+	if (ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bpref))) {
+		bno = bpref;
+		goto gotit;
+	}
+	/*
+	 * check for a block available on the same cylinder
+	 */
+	cylno = cbtocylno(fs, bpref);
+	if (cg_blktot(cgp)[cylno] == 0)
+		goto norot;
+	if (fs->fs_cpc == 0) {
+		/*
+		 * Block layout information is not available.
+		 * Leaving bpref unchanged means we take the
+		 * next available free block following the one 
+		 * we just allocated. Hopefully this will at
+		 * least hit a track cache on drives of unknown
+		 * geometry (e.g. SCSI).
+		 */
+		goto norot;
+	}
+	/*
+	 * check the summary information to see if a block is 
+	 * available in the requested cylinder starting at the
+	 * requested rotational position and proceeding around.
+	 */
+	cylbp = cg_blks(fs, cgp, cylno);
+	pos = cbtorpos(fs, bpref);
+	for (i = pos; i < fs->fs_nrpos; i++)
+		if (cylbp[i] > 0)
+			break;
+	if (i == fs->fs_nrpos)
+		for (i = 0; i < pos; i++)
+			if (cylbp[i] > 0)
+				break;
+	if (cylbp[i] > 0) {
+		/*
+		 * found a rotational position, now find the actual
+		 * block. A panic if none is actually there.
+		 */
+		pos = cylno % fs->fs_cpc;
+		bno = (cylno - pos) * fs->fs_spc / NSPB(fs);
+		if (fs_postbl(fs, pos)[i] == -1) {
+			printf("pos = %d, i = %d, fs = %s\n",
+			    pos, i, fs->fs_fsmnt);
+			panic("ffs_alloccgblk: cyl groups corrupted");
+		}
+		for (i = fs_postbl(fs, pos)[i];; ) {
+			if (ffs_isblock(fs, cg_blksfree(cgp), bno + i)) {
+				bno = blkstofrags(fs, (bno + i));
+				goto gotit;
+			}
+			delta = fs_rotbl(fs)[i];
+			if (delta <= 0 ||
+			    delta + i > fragstoblks(fs, fs->fs_fpg))
+				break;
+			i += delta;
+		}
+		printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt);
+		panic("ffs_alloccgblk: can't find blk in cyl");
+	}
+norot:
+	/*
+	 * no blocks in the requested cylinder, so take next
+	 * available one in this cylinder group.
+	 */
+	bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
+	if (bno < 0)
+		return (NULL);
+	cgp->cg_rotor = bno;
+gotit:
+	blkno = fragstoblks(fs, bno);
+	ffs_clrblock(fs, cg_blksfree(cgp), (long)blkno);
+	ffs_clusteracct(fs, cgp, blkno, -1);
+	cgp->cg_cs.cs_nbfree--;
+	fs->fs_cstotal.cs_nbfree--;
+	fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
+	cylno = cbtocylno(fs, bno);
+	cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--;
+	cg_blktot(cgp)[cylno]--;
+	fs->fs_fmod = 1;
+	return (cgp->cg_cgx * fs->fs_fpg + bno);
+}
+
+/*
+ * Determine whether a cluster can be allocated.
+ *
+ * We do not currently check for optimal rotational layout if there
+ * are multiple choices in the same cylinder group. Instead we just
+ * take the first one that we find following bpref.
+ */
+static daddr_t
+ffs_clusteralloc(ip, cg, bpref, len)
+	struct inode *ip;
+	int cg;
+	daddr_t bpref;
+	int len;
+{
+	register struct fs *fs;
+	register struct cg *cgp;
+	struct buf *bp;
+	int i, run, bno, bit, map;
+	u_char *mapp;
+
+	fs = ip->i_fs;
+	if (fs->fs_cs(fs, cg).cs_nbfree < len)
+		return (NULL);
+	if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
+	    NOCRED, &bp))
+		goto fail;
+	cgp = (struct cg *)bp->b_data;
+	if (!cg_chkmagic(cgp))
+		goto fail;
+	/*
+	 * Check to see if a cluster of the needed size (or bigger) is
+	 * available in this cylinder group.
+	 */
+	for (i = len; i <= fs->fs_contigsumsize; i++)
+		if (cg_clustersum(cgp)[i] > 0)
+			break;
+	if (i > fs->fs_contigsumsize)
+		goto fail;
+	/*
+	 * Search the cluster map to find a big enough cluster.
+	 * We take the first one that we find, even if it is larger
+	 * than we need as we prefer to get one close to the previous
+	 * block allocation. We do not search before the current
+	 * preference point as we do not want to allocate a block
+	 * that is allocated before the previous one (as we will
+	 * then have to wait for another pass of the elevator
+	 * algorithm before it will be read). We prefer to fail and
+	 * be recalled to try an allocation in the next cylinder group.
+	 */
+	if (dtog(fs, bpref) != cg)
+		bpref = 0;
+	else
+		bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref)));
+	mapp = &cg_clustersfree(cgp)[bpref / NBBY];
+	map = *mapp++;
+	bit = 1 << (bpref % NBBY);
+	for (run = 0, i = bpref; i < cgp->cg_nclusterblks; i++) {
+		if ((map & bit) == 0) {
+			run = 0;
+		} else {
+			run++;
+			if (run == len)
+				break;
+		}
+		if ((i & (NBBY - 1)) != (NBBY - 1)) {
+			bit <<= 1;
+		} else {
+			map = *mapp++;
+			bit = 1;
+		}
+	}
+	if (i == cgp->cg_nclusterblks)
+		goto fail;
+	/*
+	 * Allocate the cluster that we have found.
+	 */
+	bno = cg * fs->fs_fpg + blkstofrags(fs, i - run + 1);
+	len = blkstofrags(fs, len);
+	for (i = 0; i < len; i += fs->fs_frag)
+		if (ffs_alloccgblk(fs, cgp, bno + i) != bno + i)
+			panic("ffs_clusteralloc: lost block");
+	brelse(bp);
+	return (bno);
+
+fail:
+	brelse(bp);
+	return (0);
+}
+
+/*
+ * Determine whether an inode can be allocated.
+ *
+ * Check to see if an inode is available, and if it is,
+ * allocate it using the following policy:
+ *   1) allocate the requested inode.
+ *   2) allocate the next available inode after the requested
+ *      inode in the specified cylinder group.
+ */
+static ino_t
+ffs_nodealloccg(ip, cg, ipref, mode)
+	struct inode *ip;
+	int cg;
+	daddr_t ipref;
+	int mode;
+{
+	register struct fs *fs;
+	register struct cg *cgp;
+	struct buf *bp;
+	int error, start, len, loc, map, i;
+
+	fs = ip->i_fs;
+	if (fs->fs_cs(fs, cg).cs_nifree == 0)
+		return (NULL);
+	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
+		(int)fs->fs_cgsize, NOCRED, &bp);
+	if (error) {
+		brelse(bp);
+		return (NULL);
+	}
+	cgp = (struct cg *)bp->b_data;
+	if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) {
+		brelse(bp);
+		return (NULL);
+	}
+	cgp->cg_time = time.tv_sec;
+	if (ipref) {
+		ipref %= fs->fs_ipg;
+		if (isclr(cg_inosused(cgp), ipref))
+			goto gotit;
+	}
+	start = cgp->cg_irotor / NBBY;
+	len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
+	loc = skpc(0xff, len, &cg_inosused(cgp)[start]);
+	if (loc == 0) {
+		len = start + 1;
+		start = 0;
+		loc = skpc(0xff, len, &cg_inosused(cgp)[0]);
+		if (loc == 0) {
+			printf("cg = %d, irotor = %d, fs = %s\n",
+			    cg, cgp->cg_irotor, fs->fs_fsmnt);
+			panic("ffs_nodealloccg: map corrupted");
+			/* NOTREACHED */
+		}
+	}
+	i = start + len - loc;
+	map = cg_inosused(cgp)[i];
+	ipref = i * NBBY;
+	for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
+		if ((map & i) == 0) {
+			cgp->cg_irotor = ipref;
+			goto gotit;
+		}
+	}
+	printf("fs = %s\n", fs->fs_fsmnt);
+	panic("ffs_nodealloccg: block not in map");
+	/* NOTREACHED */
+gotit:
+	setbit(cg_inosused(cgp), ipref);
+	cgp->cg_cs.cs_nifree--;
+	fs->fs_cstotal.cs_nifree--;
+	fs->fs_cs(fs, cg).cs_nifree--;
+	fs->fs_fmod = 1;
+	if ((mode & IFMT) == IFDIR) {
+		cgp->cg_cs.cs_ndir++;
+		fs->fs_cstotal.cs_ndir++;
+		fs->fs_cs(fs, cg).cs_ndir++;
+	}
+	bdwrite(bp);
+	return (cg * fs->fs_ipg + ipref);
+}
+
+/*
+ * Free a block or fragment.
+ *
+ * The specified block or fragment is placed back in the
+ * free map. If a fragment is deallocated, a possible 
+ * block reassembly is checked.
+ */
+ffs_blkfree(ip, bno, size)
+	register struct inode *ip;
+	daddr_t bno;
+	long size;
+{
+	register struct fs *fs;
+	register struct cg *cgp;
+	struct buf *bp;
+	daddr_t blkno;
+	int i, error, cg, blk, frags, bbase;
+
+	fs = ip->i_fs;
+	if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
+		printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
+		    ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
+		panic("blkfree: bad size");
+	}
+	cg = dtog(fs, bno);
+	if ((u_int)bno >= fs->fs_size) {
+		printf("bad block %d, ino %d\n", bno, ip->i_number);
+		ffs_fserr(fs, ip->i_uid, "bad block");
+		return;
+	}
+	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
+		(int)fs->fs_cgsize, NOCRED, &bp);
+	if (error) {
+		brelse(bp);
+		return;
+	}
+	cgp = (struct cg *)bp->b_data;
+	if (!cg_chkmagic(cgp)) {
+		brelse(bp);
+		return;
+	}
+	cgp->cg_time = time.tv_sec;
+	bno = dtogd(fs, bno);
+	if (size == fs->fs_bsize) {
+		blkno = fragstoblks(fs, bno);
+		if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) {
+			printf("dev = 0x%x, block = %d, fs = %s\n",
+			    ip->i_dev, bno, fs->fs_fsmnt);
+			panic("blkfree: freeing free block");
+		}
+		ffs_setblock(fs, cg_blksfree(cgp), blkno);
+		ffs_clusteracct(fs, cgp, blkno, 1);
+		cgp->cg_cs.cs_nbfree++;
+		fs->fs_cstotal.cs_nbfree++;
+		fs->fs_cs(fs, cg).cs_nbfree++;
+		i = cbtocylno(fs, bno);
+		cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++;
+		cg_blktot(cgp)[i]++;
+	} else {
+		bbase = bno - fragnum(fs, bno);
+		/*
+		 * decrement the counts associated with the old frags
+		 */
+		blk = blkmap(fs, cg_blksfree(cgp), bbase);
+		ffs_fragacct(fs, blk, cgp->cg_frsum, -1);
+		/*
+		 * deallocate the fragment
+		 */
+		frags = numfrags(fs, size);
+		for (i = 0; i < frags; i++) {
+			if (isset(cg_blksfree(cgp), bno + i)) {
+				printf("dev = 0x%x, block = %d, fs = %s\n",
+				    ip->i_dev, bno + i, fs->fs_fsmnt);
+				panic("blkfree: freeing free frag");
+			}
+			setbit(cg_blksfree(cgp), bno + i);
+		}
+		cgp->cg_cs.cs_nffree += i;
+		fs->fs_cstotal.cs_nffree += i;
+		fs->fs_cs(fs, cg).cs_nffree += i;
+		/*
+		 * add back in counts associated with the new frags
+		 */
+		blk = blkmap(fs, cg_blksfree(cgp), bbase);
+		ffs_fragacct(fs, blk, cgp->cg_frsum, 1);
+		/*
+		 * if a complete block has been reassembled, account for it
+		 */
+		blkno = fragstoblks(fs, bbase);
+		if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) {
+			cgp->cg_cs.cs_nffree -= fs->fs_frag;
+			fs->fs_cstotal.cs_nffree -= fs->fs_frag;
+			fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
+			ffs_clusteracct(fs, cgp, blkno, 1);
+			cgp->cg_cs.cs_nbfree++;
+			fs->fs_cstotal.cs_nbfree++;
+			fs->fs_cs(fs, cg).cs_nbfree++;
+			i = cbtocylno(fs, bbase);
+			cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++;
+			cg_blktot(cgp)[i]++;
+		}
+	}
+	fs->fs_fmod = 1;
+	bdwrite(bp);
+}
+
+/*
+ * Free an inode.
+ *
+ * The specified inode is placed back in the free map.
+ */
+int
+ffs_vfree(ap)
+	struct vop_vfree_args /* {
+		struct vnode *a_pvp;
+		ino_t a_ino;
+		int a_mode;
+	} */ *ap;
+{
+	register struct fs *fs;
+	register struct cg *cgp;
+	register struct inode *pip;
+	ino_t ino = ap->a_ino;
+	struct buf *bp;
+	int error, cg;
+
+	pip = VTOI(ap->a_pvp);
+	fs = pip->i_fs;
+	if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
+		panic("ifree: range: dev = 0x%x, ino = %d, fs = %s\n",
+		    pip->i_dev, ino, fs->fs_fsmnt);
+	cg = ino_to_cg(fs, ino);
+	error = bread(pip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
+		(int)fs->fs_cgsize, NOCRED, &bp);
+	if (error) {
+		brelse(bp);
+		return (0);
+	}
+	cgp = (struct cg *)bp->b_data;
+	if (!cg_chkmagic(cgp)) {
+		brelse(bp);
+		return (0);
+	}
+	cgp->cg_time = time.tv_sec;
+	ino %= fs->fs_ipg;
+	if (isclr(cg_inosused(cgp), ino)) {
+		printf("dev = 0x%x, ino = %d, fs = %s\n",
+		    pip->i_dev, ino, fs->fs_fsmnt);
+		if (fs->fs_ronly == 0)
+			panic("ifree: freeing free inode");
+	}
+	clrbit(cg_inosused(cgp), ino);
+	if (ino < cgp->cg_irotor)
+		cgp->cg_irotor = ino;
+	cgp->cg_cs.cs_nifree++;
+	fs->fs_cstotal.cs_nifree++;
+	fs->fs_cs(fs, cg).cs_nifree++;
+	if ((ap->a_mode & IFMT) == IFDIR) {
+		cgp->cg_cs.cs_ndir--;
+		fs->fs_cstotal.cs_ndir--;
+		fs->fs_cs(fs, cg).cs_ndir--;
+	}
+	fs->fs_fmod = 1;
+	bdwrite(bp);
+	return (0);
+}
+
+/*
+ * Find a block of the specified size in the specified cylinder group.
+ *
+ * It is a panic if a request is made to find a block if none are
+ * available.
+ */
+static daddr_t
+ffs_mapsearch(fs, cgp, bpref, allocsiz)
+	register struct fs *fs;
+	register struct cg *cgp;
+	daddr_t bpref;
+	int allocsiz;
+{
+	daddr_t bno;
+	int start, len, loc, i;
+	int blk, field, subfield, pos;
+
+	/*
+	 * find the fragment by searching through the free block
+	 * map for an appropriate bit pattern
+	 */
+	if (bpref)
+		start = dtogd(fs, bpref) / NBBY;
+	else
+		start = cgp->cg_frotor / NBBY;
+	len = howmany(fs->fs_fpg, NBBY) - start;
+	loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[start],
+		(u_char *)fragtbl[fs->fs_frag],
+		(u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
+	if (loc == 0) {
+		len = start + 1;
+		start = 0;
+		loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[0],
+			(u_char *)fragtbl[fs->fs_frag],
+			(u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
+		if (loc == 0) {
+			printf("start = %d, len = %d, fs = %s\n",
+			    start, len, fs->fs_fsmnt);
+			panic("ffs_alloccg: map corrupted");
+			/* NOTREACHED */
+		}
+	}
+	bno = (start + len - loc) * NBBY;
+	cgp->cg_frotor = bno;
+	/*
+	 * found the byte in the map
+	 * sift through the bits to find the selected frag
+	 */
+	for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
+		blk = blkmap(fs, cg_blksfree(cgp), bno);
+		blk <<= 1;
+		field = around[allocsiz];
+		subfield = inside[allocsiz];
+		for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
+			if ((blk & field) == subfield)
+				return (bno + pos);
+			field <<= 1;
+			subfield <<= 1;
+		}
+	}
+	printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt);
+	panic("ffs_alloccg: block not in map");
+	return (-1);
+}
+
+/*
+ * Update the cluster map because of an allocation or free.
+ *
+ * Cnt == 1 means free; cnt == -1 means allocating.
+ */
+ffs_clusteracct(fs, cgp, blkno, cnt)
+	struct fs *fs;
+	struct cg *cgp;
+	daddr_t blkno;
+	int cnt;
+{
+	long *sump;
+	u_char *freemapp, *mapp;
+	int i, start, end, forw, back, map, bit;
+
+	if (fs->fs_contigsumsize <= 0)
+		return;
+	freemapp = cg_clustersfree(cgp);
+	sump = cg_clustersum(cgp);
+	/*
+	 * Allocate or clear the actual block.
+	 */
+	if (cnt > 0)
+		setbit(freemapp, blkno);
+	else
+		clrbit(freemapp, blkno);
+	/*
+	 * Find the size of the cluster going forward.
+	 */
+	start = blkno + 1;
+	end = start + fs->fs_contigsumsize;
+	if (end >= cgp->cg_nclusterblks)
+		end = cgp->cg_nclusterblks;
+	mapp = &freemapp[start / NBBY];
+	map = *mapp++;
+	bit = 1 << (start % NBBY);
+	for (i = start; i < end; i++) {
+		if ((map & bit) == 0)
+			break;
+		if ((i & (NBBY - 1)) != (NBBY - 1)) {
+			bit <<= 1;
+		} else {
+			map = *mapp++;
+			bit = 1;
+		}
+	}
+	forw = i - start;
+	/*
+	 * Find the size of the cluster going backward.
+	 */
+	start = blkno - 1;
+	end = start - fs->fs_contigsumsize;
+	if (end < 0)
+		end = -1;
+	mapp = &freemapp[start / NBBY];
+	map = *mapp--;
+	bit = 1 << (start % NBBY);
+	for (i = start; i > end; i--) {
+		if ((map & bit) == 0)
+			break;
+		if ((i & (NBBY - 1)) != 0) {
+			bit >>= 1;
+		} else {
+			map = *mapp--;
+			bit = 1 << (NBBY - 1);
+		}
+	}
+	back = start - i;
+	/*
+	 * Account for old cluster and the possibly new forward and
+	 * back clusters.
+	 */
+	i = back + forw + 1;
+	if (i > fs->fs_contigsumsize)
+		i = fs->fs_contigsumsize;
+	sump[i] += cnt;
+	if (back > 0)
+		sump[back] -= cnt;
+	if (forw > 0)
+		sump[forw] -= cnt;
+}
+
+/*
+ * Fserr prints the name of a file system with an error diagnostic.
+ * 
+ * The form of the error message is:
+ *	fs: error message
+ */
+static void
+ffs_fserr(fs, uid, cp)
+	struct fs *fs;
+	u_int uid;
+	char *cp;
+{
+
+	log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->fs_fsmnt, cp);
+}
diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c
new file mode 100644
index 00000000000..752feec9947
--- /dev/null
+++ b/sys/ufs/ffs/ffs_balloc.c
@@ -0,0 +1,282 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ffs_balloc.c	8.4 (Berkeley) 9/23/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+
+#include <vm/vm.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/ffs/fs.h>
+#include <ufs/ffs/ffs_extern.h>
+
+/*
+ * Balloc defines the structure of file system storage
+ * by allocating the physical blocks on a device given
+ * the inode and the logical block number in a file.
+ */
+ffs_balloc(ip, bn, size, cred, bpp, flags)
+	register struct inode *ip;
+	register daddr_t bn;
+	int size;
+	struct ucred *cred;
+	struct buf **bpp;
+	int flags;
+{
+	register struct fs *fs;
+	register daddr_t nb;
+	struct buf *bp, *nbp;
+	struct vnode *vp = ITOV(ip);
+	struct indir indirs[NIADDR + 2];
+	daddr_t newb, lbn, *bap, pref;
+	int osize, nsize, num, i, error;
+
+	*bpp = NULL;
+	if (bn < 0)
+		return (EFBIG);
+	fs = ip->i_fs;
+	lbn = bn;
+
+	/*
+	 * If the next write will extend the file into a new block,
+	 * and the file is currently composed of a fragment
+	 * this fragment has to be extended to be a full block.
+	 */
+	nb = lblkno(fs, ip->i_size);
+	if (nb < NDADDR && nb < bn) {
+		osize = blksize(fs, ip, nb);
+		if (osize < fs->fs_bsize && osize > 0) {
+			error = ffs_realloccg(ip, nb,
+				ffs_blkpref(ip, nb, (int)nb, &ip->i_db[0]),
+				osize, (int)fs->fs_bsize, cred, &bp);
+			if (error)
+				return (error);
+			ip->i_size = (nb + 1) * fs->fs_bsize;
+			vnode_pager_setsize(vp, (u_long)ip->i_size);
+			ip->i_db[nb] = dbtofsb(fs, bp->b_blkno);
+			ip->i_flag |= IN_CHANGE | IN_UPDATE;
+			if (flags & B_SYNC)
+				bwrite(bp);
+			else
+				bawrite(bp);
+		}
+	}
+	/*
+	 * The first NDADDR blocks are direct blocks
+	 */
+	if (bn < NDADDR) {
+		nb = ip->i_db[bn];
+		if (nb != 0 && ip->i_size >= (bn + 1) * fs->fs_bsize) {
+			error = bread(vp, bn, fs->fs_bsize, NOCRED, &bp);
+			if (error) {
+				brelse(bp);
+				return (error);
+			}
+			*bpp = bp;
+			return (0);
+		}
+		if (nb != 0) {
+			/*
+			 * Consider need to reallocate a fragment.
+			 */
+			osize = fragroundup(fs, blkoff(fs, ip->i_size));
+			nsize = fragroundup(fs, size);
+			if (nsize <= osize) {
+				error = bread(vp, bn, osize, NOCRED, &bp);
+				if (error) {
+					brelse(bp);
+					return (error);
+				}
+			} else {
+				error = ffs_realloccg(ip, bn,
+				    ffs_blkpref(ip, bn, (int)bn, &ip->i_db[0]),
+				    osize, nsize, cred, &bp);
+				if (error)
+					return (error);
+			}
+		} else {
+			if (ip->i_size < (bn + 1) * fs->fs_bsize)
+				nsize = fragroundup(fs, size);
+			else
+				nsize = fs->fs_bsize;
+			error = ffs_alloc(ip, bn,
+			    ffs_blkpref(ip, bn, (int)bn, &ip->i_db[0]),
+			    nsize, cred, &newb);
+			if (error)
+				return (error);
+			bp = getblk(vp, bn, nsize, 0, 0);
+			bp->b_blkno = fsbtodb(fs, newb);
+			if (flags & B_CLRBUF)
+				clrbuf(bp);
+		}
+		ip->i_db[bn] = dbtofsb(fs, bp->b_blkno);
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		*bpp = bp;
+		return (0);
+	}
+	/*
+	 * Determine the number of levels of indirection.
+	 */
+	pref = 0;
+	if (error = ufs_getlbns(vp, bn, indirs, &num))
+		return(error);
+#ifdef DIAGNOSTIC
+	if (num < 1)
+		panic ("ffs_balloc: ufs_bmaparray returned indirect block\n");
+#endif
+	/*
+	 * Fetch the first indirect block allocating if necessary.
+	 */
+	--num;
+	nb = ip->i_ib[indirs[0].in_off];
+	if (nb == 0) {
+		pref = ffs_blkpref(ip, lbn, 0, (daddr_t *)0);
+	        if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
+		    cred, &newb))
+			return (error);
+		nb = newb;
+		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
+		bp->b_blkno = fsbtodb(fs, newb);
+		clrbuf(bp);
+		/*
+		 * Write synchronously so that indirect blocks
+		 * never point at garbage.
+		 */
+		if (error = bwrite(bp)) {
+			ffs_blkfree(ip, nb, fs->fs_bsize);
+			return (error);
+		}
+		ip->i_ib[indirs[0].in_off] = newb;
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+	}
+	/*
+	 * Fetch through the indirect blocks, allocating as necessary.
+	 */
+	for (i = 1;;) {
+		error = bread(vp,
+		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
+		if (error) {
+			brelse(bp);
+			return (error);
+		}
+		bap = (daddr_t *)bp->b_data;
+		nb = bap[indirs[i].in_off];
+		if (i == num)
+			break;
+		i += 1;
+		if (nb != 0) {
+			brelse(bp);
+			continue;
+		}
+		if (pref == 0)
+			pref = ffs_blkpref(ip, lbn, 0, (daddr_t *)0);
+		if (error =
+		    ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) {
+			brelse(bp);
+			return (error);
+		}
+		nb = newb;
+		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
+		nbp->b_blkno = fsbtodb(fs, nb);
+		clrbuf(nbp);
+		/*
+		 * Write synchronously so that indirect blocks
+		 * never point at garbage.
+		 */
+		if (error = bwrite(nbp)) {
+			ffs_blkfree(ip, nb, fs->fs_bsize);
+			brelse(bp);
+			return (error);
+		}
+		bap[indirs[i - 1].in_off] = nb;
+		/*
+		 * If required, write synchronously, otherwise use
+		 * delayed write.
+		 */
+		if (flags & B_SYNC) {
+			bwrite(bp);
+		} else {
+			bdwrite(bp);
+		}
+	}
+	/*
+	 * Get the data block, allocating if necessary.
+	 */
+	if (nb == 0) {
+		pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]);
+		if (error = ffs_alloc(ip,
+		    lbn, pref, (int)fs->fs_bsize, cred, &newb)) {
+			brelse(bp);
+			return (error);
+		}
+		nb = newb;
+		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
+		nbp->b_blkno = fsbtodb(fs, nb);
+		if (flags & B_CLRBUF)
+			clrbuf(nbp);
+		bap[indirs[i].in_off] = nb;
+		/*
+		 * If required, write synchronously, otherwise use
+		 * delayed write.
+		 */
+		if (flags & B_SYNC) {
+			bwrite(bp);
+		} else {
+			bdwrite(bp);
+		}
+		*bpp = nbp;
+		return (0);
+	}
+	brelse(bp);
+	if (flags & B_CLRBUF) {
+		error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
+		if (error) {
+			brelse(nbp);
+			return (error);
+		}
+	} else {
+		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
+		nbp->b_blkno = fsbtodb(fs, nb);
+	}
+	*bpp = nbp;
+	return (0);
+}
diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h
new file mode 100644
index 00000000000..ab467a272a9
--- /dev/null
+++ b/sys/ufs/ffs/ffs_extern.h
@@ -0,0 +1,101 @@
+/*-
+ * Copyright (c) 1991, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ffs_extern.h	8.3 (Berkeley) 4/16/94
+ */
+
+struct buf;
+struct fid;
+struct fs;
+struct inode;
+struct mount;
+struct nameidata;
+struct proc;
+struct statfs;
+struct timeval;
+struct ucred;
+struct uio;
+struct vnode;
+struct mbuf;
+
+__BEGIN_DECLS
+int	ffs_alloc __P((struct inode *,
+	    daddr_t, daddr_t, int, struct ucred *, daddr_t *));
+int	ffs_balloc __P((struct inode *,
+	    daddr_t, int, struct ucred *, struct buf **, int));
+int	ffs_blkatoff __P((struct vop_blkatoff_args *));
+int	ffs_blkfree __P((struct inode *, daddr_t, long));
+daddr_t	ffs_blkpref __P((struct inode *, daddr_t, int, daddr_t *));
+int	ffs_bmap __P((struct vop_bmap_args *));
+void	ffs_clrblock __P((struct fs *, u_char *, daddr_t));
+int	ffs_fhtovp __P((struct mount *, struct fid *, struct mbuf *,
+	    struct vnode **, int *, struct ucred **));
+void	ffs_fragacct __P((struct fs *, int, long [], int));
+int	ffs_fsync __P((struct vop_fsync_args *));
+int	ffs_init __P((void));
+int	ffs_isblock __P((struct fs *, u_char *, daddr_t));
+int	ffs_mount __P((struct mount *,
+	    char *, caddr_t, struct nameidata *, struct proc *));
+int	ffs_mountfs __P((struct vnode *, struct mount *, struct proc *));
+int	ffs_mountroot __P((void));
+int	ffs_read __P((struct vop_read_args *));
+int	ffs_reallocblks __P((struct vop_reallocblks_args *));
+int	ffs_realloccg __P((struct inode *,
+	    daddr_t, daddr_t, int, int, struct ucred *, struct buf **));
+int	ffs_reclaim __P((struct vop_reclaim_args *));
+void	ffs_setblock __P((struct fs *, u_char *, daddr_t));
+int	ffs_statfs __P((struct mount *, struct statfs *, struct proc *));
+int	ffs_sync __P((struct mount *, int, struct ucred *, struct proc *));
+int	ffs_truncate __P((struct vop_truncate_args *));
+int	ffs_unmount __P((struct mount *, int, struct proc *));
+int	ffs_update __P((struct vop_update_args *));
+int	ffs_valloc __P((struct vop_valloc_args *));
+int	ffs_vfree __P((struct vop_vfree_args *));
+int	ffs_vget __P((struct mount *, ino_t, struct vnode **));
+int	ffs_vptofh __P((struct vnode *, struct fid *));
+int	ffs_write __P((struct vop_write_args *));
+
+int	bwrite();		/* FFS needs a bwrite routine.  XXX */
+
+#ifdef DIAGNOSTIC
+void	ffs_checkoverlap __P((struct buf *, struct inode *));
+#endif
+__END_DECLS
+
+extern int (**ffs_vnodeop_p)();
+extern int (**ffs_specop_p)();
+#ifdef FIFO
+extern int (**ffs_fifoop_p)();
+#define FFS_FIFOOPS ffs_fifoop_p
+#else
+#define FFS_FIFOOPS NULL
+#endif
diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c
new file mode 100644
index 00000000000..b45aee53552
--- /dev/null
+++ b/sys/ufs/ffs/ffs_inode.c
@@ -0,0 +1,488 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ffs_inode.c	8.5 (Berkeley) 12/30/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/trace.h>
+#include <sys/resourcevar.h>
+
+#include <vm/vm.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/ffs/fs.h>
+#include <ufs/ffs/ffs_extern.h>
+
+static int ffs_indirtrunc __P((struct inode *, daddr_t, daddr_t, daddr_t, int,
+	    long *));
+
+int
+ffs_init()
+{
+	return (ufs_init());
+}
+
+/*
+ * Update the access, modified, and inode change times as specified by the
+ * IACCESS, IUPDATE, and ICHANGE flags respectively. The IMODIFIED flag is
+ * used to specify that the inode needs to be updated but that the times have
+ * already been set. The access and modified times are taken from the second
+ * and third parameters; the inode change time is always taken from the current
+ * time. If waitfor is set, then wait for the disk write of the inode to
+ * complete.
+ */
+int
+ffs_update(ap)
+	struct vop_update_args /* {
+		struct vnode *a_vp;
+		struct timeval *a_access;
+		struct timeval *a_modify;
+		int a_waitfor;
+	} */ *ap;
+{
+	register struct fs *fs;
+	struct buf *bp;
+	struct inode *ip;
+	int error;
+
+	ip = VTOI(ap->a_vp);
+	if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) {
+		ip->i_flag &=
+		    ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE);
+		return (0);
+	}
+	if ((ip->i_flag &
+	    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0)
+		return (0);
+	if (ip->i_flag & IN_ACCESS)
+		ip->i_atime.ts_sec = ap->a_access->tv_sec;
+	if (ip->i_flag & IN_UPDATE) {
+		ip->i_mtime.ts_sec = ap->a_modify->tv_sec;
+		ip->i_modrev++;
+	}
+	if (ip->i_flag & IN_CHANGE)
+		ip->i_ctime.ts_sec = time.tv_sec;
+	ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE);
+	fs = ip->i_fs;
+	/*
+	 * Ensure that uid and gid are correct. This is a temporary
+	 * fix until fsck has been changed to do the update.
+	 */
+	if (fs->fs_inodefmt < FS_44INODEFMT) {		/* XXX */
+		ip->i_din.di_ouid = ip->i_uid;		/* XXX */
+		ip->i_din.di_ogid = ip->i_gid;		/* XXX */
+	}						/* XXX */
+	if (error = bread(ip->i_devvp,
+	    fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
+		(int)fs->fs_bsize, NOCRED, &bp)) {
+		brelse(bp);
+		return (error);
+	}
+	*((struct dinode *)bp->b_data +
+	    ino_to_fsbo(fs, ip->i_number)) = ip->i_din;
+	if (ap->a_waitfor)
+		return (bwrite(bp));
+	else {
+		bdwrite(bp);
+		return (0);
+	}
+}
+
+#define	SINGLE	0	/* index of single indirect block */
+#define	DOUBLE	1	/* index of double indirect block */
+#define	TRIPLE	2	/* index of triple indirect block */
+/*
+ * Truncate the inode oip to at most length size, freeing the
+ * disk blocks.
+ */
+ffs_truncate(ap)
+	struct vop_truncate_args /* {
+		struct vnode *a_vp;
+		off_t a_length;
+		int a_flags;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *ovp = ap->a_vp;
+	register daddr_t lastblock;
+	register struct inode *oip;
+	daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR];
+	daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR];
+	off_t length = ap->a_length;
+	register struct fs *fs;
+	struct buf *bp;
+	int offset, size, level;
+	long count, nblocks, vflags, blocksreleased = 0;
+	struct timeval tv;
+	register int i;
+	int aflags, error, allerror;
+	off_t osize;
+
+	oip = VTOI(ovp);
+	tv = time;
+	if (ovp->v_type == VLNK &&
+	    oip->i_size < ovp->v_mount->mnt_maxsymlinklen) {
+#ifdef DIAGNOSTIC
+		if (length != 0)
+			panic("ffs_truncate: partial truncate of symlink");
+#endif
+		bzero((char *)&oip->i_shortlink, (u_int)oip->i_size);
+		oip->i_size = 0;
+		oip->i_flag |= IN_CHANGE | IN_UPDATE;
+		return (VOP_UPDATE(ovp, &tv, &tv, 1));
+	}
+	if (oip->i_size == length) {
+		oip->i_flag |= IN_CHANGE | IN_UPDATE;
+		return (VOP_UPDATE(ovp, &tv, &tv, 0));
+	}
+#ifdef QUOTA
+	if (error = getinoquota(oip))
+		return (error);
+#endif
+	vnode_pager_setsize(ovp, (u_long)length);
+	fs = oip->i_fs;
+	osize = oip->i_size;
+	/*
+	 * Lengthen the size of the file. We must ensure that the
+	 * last byte of the file is allocated. Since the smallest
+	 * value of oszie is 0, length will be at least 1.
+	 */
+	if (osize < length) {
+		offset = blkoff(fs, length - 1);
+		lbn = lblkno(fs, length - 1);
+		aflags = B_CLRBUF;
+		if (ap->a_flags & IO_SYNC)
+			aflags |= B_SYNC;
+		if (error = ffs_balloc(oip, lbn, offset + 1, ap->a_cred, &bp,
+		    aflags))
+			return (error);
+		oip->i_size = length;
+		(void) vnode_pager_uncache(ovp);
+		if (aflags & IO_SYNC)
+			bwrite(bp);
+		else
+			bawrite(bp);
+		oip->i_flag |= IN_CHANGE | IN_UPDATE;
+		return (VOP_UPDATE(ovp, &tv, &tv, 1));
+	}
+	/*
+	 * Shorten the size of the file. If the file is not being
+	 * truncated to a block boundry, the contents of the
+	 * partial block following the end of the file must be
+	 * zero'ed in case it ever become accessable again because
+	 * of subsequent file growth.
+	 */
+	offset = blkoff(fs, length);
+	if (offset == 0) {
+		oip->i_size = length;
+	} else {
+		lbn = lblkno(fs, length);
+		aflags = B_CLRBUF;
+		if (ap->a_flags & IO_SYNC)
+			aflags |= B_SYNC;
+		if (error = ffs_balloc(oip, lbn, offset, ap->a_cred, &bp,
+		    aflags))
+			return (error);
+		oip->i_size = length;
+		size = blksize(fs, oip, lbn);
+		(void) vnode_pager_uncache(ovp);
+		bzero((char *)bp->b_data + offset, (u_int)(size - offset));
+		allocbuf(bp, size);
+		if (aflags & IO_SYNC)
+			bwrite(bp);
+		else
+			bawrite(bp);
+	}
+	/*
+	 * Calculate index into inode's block list of
+	 * last direct and indirect blocks (if any)
+	 * which we want to keep.  Lastblock is -1 when
+	 * the file is truncated to 0.
+	 */
+	lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
+	lastiblock[SINGLE] = lastblock - NDADDR;
+	lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
+	lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
+	nblocks = btodb(fs->fs_bsize);
+	/*
+	 * Update file and block pointers on disk before we start freeing
+	 * blocks.  If we crash before free'ing blocks below, the blocks
+	 * will be returned to the free list.  lastiblock values are also
+	 * normalized to -1 for calls to ffs_indirtrunc below.
+	 */
+	bcopy((caddr_t)&oip->i_db[0], (caddr_t)oldblks, sizeof oldblks);
+	for (level = TRIPLE; level >= SINGLE; level--)
+		if (lastiblock[level] < 0) {
+			oip->i_ib[level] = 0;
+			lastiblock[level] = -1;
+		}
+	for (i = NDADDR - 1; i > lastblock; i--)
+		oip->i_db[i] = 0;
+	oip->i_flag |= IN_CHANGE | IN_UPDATE;
+	if (error = VOP_UPDATE(ovp, &tv, &tv, MNT_WAIT))
+		allerror = error;
+	/*
+	 * Having written the new inode to disk, save its new configuration
+	 * and put back the old block pointers long enough to process them.
+	 * Note that we save the new block configuration so we can check it
+	 * when we are done.
+	 */
+	bcopy((caddr_t)&oip->i_db[0], (caddr_t)newblks, sizeof newblks);
+	bcopy((caddr_t)oldblks, (caddr_t)&oip->i_db[0], sizeof oldblks);
+	oip->i_size = osize;
+	vflags = ((length > 0) ? V_SAVE : 0) | V_SAVEMETA;
+	allerror = vinvalbuf(ovp, vflags, ap->a_cred, ap->a_p, 0, 0);
+
+	/*
+	 * Indirect blocks first.
+	 */
+	indir_lbn[SINGLE] = -NDADDR;
+	indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1;
+	indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1;
+	for (level = TRIPLE; level >= SINGLE; level--) {
+		bn = oip->i_ib[level];
+		if (bn != 0) {
+			error = ffs_indirtrunc(oip, indir_lbn[level],
+			    fsbtodb(fs, bn), lastiblock[level], level, &count);
+			if (error)
+				allerror = error;
+			blocksreleased += count;
+			if (lastiblock[level] < 0) {
+				oip->i_ib[level] = 0;
+				ffs_blkfree(oip, bn, fs->fs_bsize);
+				blocksreleased += nblocks;
+			}
+		}
+		if (lastiblock[level] >= 0)
+			goto done;
+	}
+
+	/*
+	 * All whole direct blocks or frags.
+	 */
+	for (i = NDADDR - 1; i > lastblock; i--) {
+		register long bsize;
+
+		bn = oip->i_db[i];
+		if (bn == 0)
+			continue;
+		oip->i_db[i] = 0;
+		bsize = blksize(fs, oip, i);
+		ffs_blkfree(oip, bn, bsize);
+		blocksreleased += btodb(bsize);
+	}
+	if (lastblock < 0)
+		goto done;
+
+	/*
+	 * Finally, look for a change in size of the
+	 * last direct block; release any frags.
+	 */
+	bn = oip->i_db[lastblock];
+	if (bn != 0) {
+		long oldspace, newspace;
+
+		/*
+		 * Calculate amount of space we're giving
+		 * back as old block size minus new block size.
+		 */
+		oldspace = blksize(fs, oip, lastblock);
+		oip->i_size = length;
+		newspace = blksize(fs, oip, lastblock);
+		if (newspace == 0)
+			panic("itrunc: newspace");
+		if (oldspace - newspace > 0) {
+			/*
+			 * Block number of space to be free'd is
+			 * the old block # plus the number of frags
+			 * required for the storage we're keeping.
+			 */
+			bn += numfrags(fs, newspace);
+			ffs_blkfree(oip, bn, oldspace - newspace);
+			blocksreleased += btodb(oldspace - newspace);
+		}
+	}
+done:
+#ifdef DIAGNOSTIC
+	for (level = SINGLE; level <= TRIPLE; level++)
+		if (newblks[NDADDR + level] != oip->i_ib[level])
+			panic("itrunc1");
+	for (i = 0; i < NDADDR; i++)
+		if (newblks[i] != oip->i_db[i])
+			panic("itrunc2");
+	if (length == 0 &&
+	    (ovp->v_dirtyblkhd.lh_first || ovp->v_cleanblkhd.lh_first))
+		panic("itrunc3");
+#endif /* DIAGNOSTIC */
+	/*
+	 * Put back the real size.
+	 */
+	oip->i_size = length;
+	oip->i_blocks -= blocksreleased;
+	if (oip->i_blocks < 0)			/* sanity */
+		oip->i_blocks = 0;
+	oip->i_flag |= IN_CHANGE;
+#ifdef QUOTA
+	(void) chkdq(oip, -blocksreleased, NOCRED, 0);
+#endif
+	return (allerror);
+}
+
+/*
+ * Release blocks associated with the inode ip and stored in the indirect
+ * block bn.  Blocks are free'd in LIFO order up to (but not including)
+ * lastbn.  If level is greater than SINGLE, the block is an indirect block
+ * and recursive calls to indirtrunc must be used to cleanse other indirect
+ * blocks.
+ *
+ * NB: triple indirect blocks are untested.
+ */
+static int
+ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp)
+	register struct inode *ip;
+	daddr_t lbn, lastbn;
+	daddr_t dbn;
+	int level;
+	long *countp;
+{
+	register int i;
+	struct buf *bp;
+	register struct fs *fs = ip->i_fs;
+	register daddr_t *bap;
+	struct vnode *vp;
+	daddr_t *copy, nb, nlbn, last;
+	long blkcount, factor;
+	int nblocks, blocksreleased = 0;
+	int error = 0, allerror = 0;
+
+	/*
+	 * Calculate index in current block of last
+	 * block to be kept.  -1 indicates the entire
+	 * block so we need not calculate the index.
+	 */
+	factor = 1;
+	for (i = SINGLE; i < level; i++)
+		factor *= NINDIR(fs);
+	last = lastbn;
+	if (lastbn > 0)
+		last /= factor;
+	nblocks = btodb(fs->fs_bsize);
+	/*
+	 * Get buffer of block pointers, zero those entries corresponding
+	 * to blocks to be free'd, and update on disk copy first.  Since
+	 * double(triple) indirect before single(double) indirect, calls
+	 * to bmap on these blocks will fail.  However, we already have
+	 * the on disk address, so we have to set the b_blkno field
+	 * explicitly instead of letting bread do everything for us.
+	 */
+	vp = ITOV(ip);
+	bp = getblk(vp, lbn, (int)fs->fs_bsize, 0, 0);
+	if (bp->b_flags & (B_DONE | B_DELWRI)) {
+		/* Braces must be here in case trace evaluates to nothing. */
+		trace(TR_BREADHIT, pack(vp, fs->fs_bsize), lbn);
+	} else {
+		trace(TR_BREADMISS, pack(vp, fs->fs_bsize), lbn);
+		curproc->p_stats->p_ru.ru_inblock++;	/* pay for read */
+		bp->b_flags |= B_READ;
+		if (bp->b_bcount > bp->b_bufsize)
+			panic("ffs_indirtrunc: bad buffer size");
+		bp->b_blkno = dbn;
+		VOP_STRATEGY(bp);
+		error = biowait(bp);
+	}
+	if (error) {
+		brelse(bp);
+		*countp = 0;
+		return (error);
+	}
+
+	bap = (daddr_t *)bp->b_data;
+	MALLOC(copy, daddr_t *, fs->fs_bsize, M_TEMP, M_WAITOK);
+	bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->fs_bsize);
+	bzero((caddr_t)&bap[last + 1],
+	  (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t));
+	if (last == -1)
+		bp->b_flags |= B_INVAL;
+	error = bwrite(bp);
+	if (error)
+		allerror = error;
+	bap = copy;
+
+	/*
+	 * Recursively free totally unused blocks.
+	 */
+	for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last;
+	    i--, nlbn += factor) {
+		nb = bap[i];
+		if (nb == 0)
+			continue;
+		if (level > SINGLE) {
+			if (error = ffs_indirtrunc(ip, nlbn,
+			    fsbtodb(fs, nb), (daddr_t)-1, level - 1, &blkcount))
+				allerror = error;
+			blocksreleased += blkcount;
+		}
+		ffs_blkfree(ip, nb, fs->fs_bsize);
+		blocksreleased += nblocks;
+	}
+
+	/*
+	 * Recursively free last partial block.
+	 */
+	if (level > SINGLE && lastbn >= 0) {
+		last = lastbn % factor;
+		nb = bap[i];
+		if (nb != 0) {
+			if (error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb),
+			    last, level - 1, &blkcount))
+				allerror = error;
+			blocksreleased += blkcount;
+		}
+	}
+	FREE(copy, M_TEMP);
+	*countp = blocksreleased;
+	return (allerror);
+}
diff --git a/sys/ufs/ffs/ffs_subr.c b/sys/ufs/ffs/ffs_subr.c
new file mode 100644
index 00000000000..c251b16e697
--- /dev/null
+++ b/sys/ufs/ffs/ffs_subr.c
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ffs_subr.c	8.2 (Berkeley) 9/21/93
+ */
+
+#include <sys/param.h>
+#include <ufs/ffs/fs.h>
+
+#ifdef KERNEL
+#include <sys/systm.h>
+#include <sys/vnode.h>
+#include <ufs/ffs/ffs_extern.h>
+#include <sys/buf.h>
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+
+/*
+ * Return buffer with the contents of block "offset" from the beginning of
+ * directory "ip".  If "res" is non-zero, fill it in with a pointer to the
+ * remaining space in the directory.
+ */
+int
+ffs_blkatoff(ap)
+	struct vop_blkatoff_args /* {
+		struct vnode *a_vp;
+		off_t a_offset;
+		char **a_res;
+		struct buf **a_bpp;
+	} */ *ap;
+{
+	struct inode *ip;
+	register struct fs *fs;
+	struct buf *bp;
+	daddr_t lbn;
+	int bsize, error;
+
+	ip = VTOI(ap->a_vp);
+	fs = ip->i_fs;
+	lbn = lblkno(fs, ap->a_offset);
+	bsize = blksize(fs, ip, lbn);
+
+	*ap->a_bpp = NULL;
+	if (error = bread(ap->a_vp, lbn, bsize, NOCRED, &bp)) {
+		brelse(bp);
+		return (error);
+	}
+	if (ap->a_res)
+		*ap->a_res = (char *)bp->b_data + blkoff(fs, ap->a_offset);
+	*ap->a_bpp = bp;
+	return (0);
+}
+#endif
+
+/*
+ * Update the frsum fields to reflect addition or deletion 
+ * of some frags.
+ */
+void
+ffs_fragacct(fs, fragmap, fraglist, cnt)
+	struct fs *fs;
+	int fragmap;
+	long fraglist[];
+	int cnt;
+{
+	int inblk;
+	register int field, subfield;
+	register int siz, pos;
+
+	inblk = (int)(fragtbl[fs->fs_frag][fragmap]) << 1;
+	fragmap <<= 1;
+	for (siz = 1; siz < fs->fs_frag; siz++) {
+		if ((inblk & (1 << (siz + (fs->fs_frag % NBBY)))) == 0)
+			continue;
+		field = around[siz];
+		subfield = inside[siz];
+		for (pos = siz; pos <= fs->fs_frag; pos++) {
+			if ((fragmap & field) == subfield) {
+				fraglist[siz] += cnt;
+				pos += siz;
+				field <<= siz;
+				subfield <<= siz;
+			}
+			field <<= 1;
+			subfield <<= 1;
+		}
+	}
+}
+
+#if defined(KERNEL) && defined(DIAGNOSTIC)
+void
+ffs_checkoverlap(bp, ip)
+	struct buf *bp;
+	struct inode *ip;
+{
+	register struct buf *ebp, *ep;
+	register daddr_t start, last;
+	struct vnode *vp;
+
+	ebp = &buf[nbuf];
+	start = bp->b_blkno;
+	last = start + btodb(bp->b_bcount) - 1;
+	for (ep = buf; ep < ebp; ep++) {
+		if (ep == bp || (ep->b_flags & B_INVAL) ||
+		    ep->b_vp == NULLVP)
+			continue;
+		if (VOP_BMAP(ep->b_vp, (daddr_t)0, &vp, (daddr_t)0, NULL))
+			continue;
+		if (vp != ip->i_devvp)
+			continue;
+		/* look for overlap */
+		if (ep->b_bcount == 0 || ep->b_blkno > last ||
+		    ep->b_blkno + btodb(ep->b_bcount) <= start)
+			continue;
+		vprint("Disk overlap", vp);
+		(void)printf("\tstart %d, end %d overlap start %d, end %d\n",
+			start, last, ep->b_blkno,
+			ep->b_blkno + btodb(ep->b_bcount) - 1);
+		panic("Disk buffer overlap");
+	}
+}
+#endif /* DIAGNOSTIC */
+
+/*
+ * block operations
+ *
+ * check if a block is available
+ */
+int
+ffs_isblock(fs, cp, h)
+	struct fs *fs;
+	unsigned char *cp;
+	daddr_t h;
+{
+	unsigned char mask;
+
+	switch ((int)fs->fs_frag) {
+	case 8:
+		return (cp[h] == 0xff);
+	case 4:
+		mask = 0x0f << ((h & 0x1) << 2);
+		return ((cp[h >> 1] & mask) == mask);
+	case 2:
+		mask = 0x03 << ((h & 0x3) << 1);
+		return ((cp[h >> 2] & mask) == mask);
+	case 1:
+		mask = 0x01 << (h & 0x7);
+		return ((cp[h >> 3] & mask) == mask);
+	default:
+		panic("ffs_isblock");
+	}
+}
+
+/*
+ * take a block out of the map
+ */
+void
+ffs_clrblock(fs, cp, h)
+	struct fs *fs;
+	u_char *cp;
+	daddr_t h;
+{
+
+	switch ((int)fs->fs_frag) {
+	case 8:
+		cp[h] = 0;
+		return;
+	case 4:
+		cp[h >> 1] &= ~(0x0f << ((h & 0x1) << 2));
+		return;
+	case 2:
+		cp[h >> 2] &= ~(0x03 << ((h & 0x3) << 1));
+		return;
+	case 1:
+		cp[h >> 3] &= ~(0x01 << (h & 0x7));
+		return;
+	default:
+		panic("ffs_clrblock");
+	}
+}
+
+/*
+ * put a block into the map
+ */
+void
+ffs_setblock(fs, cp, h)
+	struct fs *fs;
+	unsigned char *cp;
+	daddr_t h;
+{
+
+	switch ((int)fs->fs_frag) {
+
+	case 8:
+		cp[h] = 0xff;
+		return;
+	case 4:
+		cp[h >> 1] |= (0x0f << ((h & 0x1) << 2));
+		return;
+	case 2:
+		cp[h >> 2] |= (0x03 << ((h & 0x3) << 1));
+		return;
+	case 1:
+		cp[h >> 3] |= (0x01 << (h & 0x7));
+		return;
+	default:
+		panic("ffs_setblock");
+	}
+}
diff --git a/sys/ufs/ffs/ffs_tables.c b/sys/ufs/ffs/ffs_tables.c
new file mode 100644
index 00000000000..8cf46b0150a
--- /dev/null
+++ b/sys/ufs/ffs/ffs_tables.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ffs_tables.c	8.1 (Berkeley) 6/11/93
+ */
+
+#include <sys/param.h>
+
+/*
+ * Bit patterns for identifying fragments in the block map
+ * used as ((map & around) == inside)
+ */
+int around[9] = {
+	0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff, 0x1ff, 0x3ff
+};
+int inside[9] = {
+	0x0, 0x2, 0x6, 0xe, 0x1e, 0x3e, 0x7e, 0xfe, 0x1fe
+};
+
+/*
+ * Given a block map bit pattern, the frag tables tell whether a
+ * particular size fragment is available. 
+ *
+ * used as:
+ * if ((1 << (size - 1)) & fragtbl[fs->fs_frag][map] {
+ *	at least one fragment of the indicated size is available
+ * }
+ *
+ * These tables are used by the scanc instruction on the VAX to
+ * quickly find an appropriate fragment.
+ */
+u_char fragtbl124[256] = {
+	0x00, 0x16, 0x16, 0x2a, 0x16, 0x16, 0x26, 0x4e,
+	0x16, 0x16, 0x16, 0x3e, 0x2a, 0x3e, 0x4e, 0x8a,
+	0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e,
+	0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e,
+	0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e,
+	0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e,
+	0x2a, 0x3e, 0x3e, 0x2a, 0x3e, 0x3e, 0x2e, 0x6e,
+	0x3e, 0x3e, 0x3e, 0x3e, 0x2a, 0x3e, 0x6e, 0xaa,
+	0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e,
+	0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e,
+	0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e,
+	0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e,
+	0x26, 0x36, 0x36, 0x2e, 0x36, 0x36, 0x26, 0x6e,
+	0x36, 0x36, 0x36, 0x3e, 0x2e, 0x3e, 0x6e, 0xae,
+	0x4e, 0x5e, 0x5e, 0x6e, 0x5e, 0x5e, 0x6e, 0x4e,
+	0x5e, 0x5e, 0x5e, 0x7e, 0x6e, 0x7e, 0x4e, 0xce,
+	0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e,
+	0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e,
+	0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e,
+	0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e,
+	0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e,
+	0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e,
+	0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x7e,
+	0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x7e, 0xbe,
+	0x2a, 0x3e, 0x3e, 0x2a, 0x3e, 0x3e, 0x2e, 0x6e,
+	0x3e, 0x3e, 0x3e, 0x3e, 0x2a, 0x3e, 0x6e, 0xaa,
+	0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x7e,
+	0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x7e, 0xbe,
+	0x4e, 0x5e, 0x5e, 0x6e, 0x5e, 0x5e, 0x6e, 0x4e,
+	0x5e, 0x5e, 0x5e, 0x7e, 0x6e, 0x7e, 0x4e, 0xce,
+	0x8a, 0x9e, 0x9e, 0xaa, 0x9e, 0x9e, 0xae, 0xce,
+	0x9e, 0x9e, 0x9e, 0xbe, 0xaa, 0xbe, 0xce, 0x8a,
+};
+
+u_char fragtbl8[256] = {
+	0x00, 0x01, 0x01, 0x02, 0x01, 0x01, 0x02, 0x04,
+	0x01, 0x01, 0x01, 0x03, 0x02, 0x03, 0x04, 0x08,
+	0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05,
+	0x02, 0x03, 0x03, 0x02, 0x04, 0x05, 0x08, 0x10,
+	0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05,
+	0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09,
+	0x02, 0x03, 0x03, 0x02, 0x03, 0x03, 0x02, 0x06,
+	0x04, 0x05, 0x05, 0x06, 0x08, 0x09, 0x10, 0x20,
+	0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05,
+	0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09,
+	0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05,
+	0x03, 0x03, 0x03, 0x03, 0x05, 0x05, 0x09, 0x11,
+	0x02, 0x03, 0x03, 0x02, 0x03, 0x03, 0x02, 0x06,
+	0x03, 0x03, 0x03, 0x03, 0x02, 0x03, 0x06, 0x0a,
+	0x04, 0x05, 0x05, 0x06, 0x05, 0x05, 0x06, 0x04,
+	0x08, 0x09, 0x09, 0x0a, 0x10, 0x11, 0x20, 0x40,
+	0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05,
+	0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09,
+	0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05,
+	0x03, 0x03, 0x03, 0x03, 0x05, 0x05, 0x09, 0x11,
+	0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05,
+	0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09,
+	0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x07,
+	0x05, 0x05, 0x05, 0x07, 0x09, 0x09, 0x11, 0x21,
+	0x02, 0x03, 0x03, 0x02, 0x03, 0x03, 0x02, 0x06,
+	0x03, 0x03, 0x03, 0x03, 0x02, 0x03, 0x06, 0x0a,
+	0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x07,
+	0x02, 0x03, 0x03, 0x02, 0x06, 0x07, 0x0a, 0x12,
+	0x04, 0x05, 0x05, 0x06, 0x05, 0x05, 0x06, 0x04,
+	0x05, 0x05, 0x05, 0x07, 0x06, 0x07, 0x04, 0x0c,
+	0x08, 0x09, 0x09, 0x0a, 0x09, 0x09, 0x0a, 0x0c,
+	0x10, 0x11, 0x11, 0x12, 0x20, 0x21, 0x40, 0x80,
+};
+
+/*
+ * The actual fragtbl array.
+ */
+u_char *fragtbl[MAXFRAG + 1] = {
+	0, fragtbl124, fragtbl124, 0, fragtbl124, 0, 0, 0, fragtbl8,
+};
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
new file mode 100644
index 00000000000..505dd5db8cb
--- /dev/null
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -0,0 +1,843 @@
+/*
+ * Copyright (c) 1989, 1991, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ffs_vfsops.c	8.8 (Berkeley) 4/18/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <sys/socket.h>
+#include <sys/mount.h>
+#include <sys/buf.h>
+#include <sys/mbuf.h>
+#include <sys/file.h>
+#include <sys/disklabel.h>
+#include <sys/ioctl.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/ffs/fs.h>
+#include <ufs/ffs/ffs_extern.h>
+
+int ffs_sbupdate __P((struct ufsmount *, int));
+
+struct vfsops ufs_vfsops = {
+	ffs_mount,
+	ufs_start,
+	ffs_unmount,
+	ufs_root,
+	ufs_quotactl,
+	ffs_statfs,
+	ffs_sync,
+	ffs_vget,
+	ffs_fhtovp,
+	ffs_vptofh,
+	ffs_init,
+};
+
+extern u_long nextgennumber;
+
+/*
+ * Called by main() when ufs is going to be mounted as root.
+ *
+ * Name is updated by mount(8) after booting.
+ */
+#define ROOTNAME	"root_device"
+
+ffs_mountroot()
+{
+	extern struct vnode *rootvp;
+	register struct fs *fs;
+	register struct mount *mp;
+	struct proc *p = curproc;	/* XXX */
+	struct ufsmount *ump;
+	u_int size;
+	int error;
+	
+	/*
+	 * Get vnodes for swapdev and rootdev.
+	 */
+	if (bdevvp(swapdev, &swapdev_vp) || bdevvp(rootdev, &rootvp))
+		panic("ffs_mountroot: can't setup bdevvp's");
+
+	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
+	bzero((char *)mp, (u_long)sizeof(struct mount));
+	mp->mnt_op = &ufs_vfsops;
+	mp->mnt_flag = MNT_RDONLY;
+	if (error = ffs_mountfs(rootvp, mp, p)) {
+		free(mp, M_MOUNT);
+		return (error);
+	}
+	if (error = vfs_lock(mp)) {
+		(void)ffs_unmount(mp, 0, p);
+		free(mp, M_MOUNT);
+		return (error);
+	}
+	TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+	mp->mnt_flag |= MNT_ROOTFS;
+	mp->mnt_vnodecovered = NULLVP;
+	ump = VFSTOUFS(mp);
+	fs = ump->um_fs;
+	bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt));
+	fs->fs_fsmnt[0] = '/';
+	bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
+	    MNAMELEN);
+	(void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+	    &size);
+	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+	(void)ffs_statfs(mp, &mp->mnt_stat, p);
+	vfs_unlock(mp);
+	inittodr(fs->fs_time);
+	return (0);
+}
+
+/*
+ * VFS Operations.
+ *
+ * mount system call
+ */
+int
+ffs_mount(mp, path, data, ndp, p)
+	register struct mount *mp;
+	char *path;
+	caddr_t data;
+	struct nameidata *ndp;
+	struct proc *p;
+{
+	struct vnode *devvp;
+	struct ufs_args args;
+	struct ufsmount *ump;
+	register struct fs *fs;
+	u_int size;
+	int error, flags;
+
+	if (error = copyin(data, (caddr_t)&args, sizeof (struct ufs_args)))
+		return (error);
+	/*
+	 * If updating, check whether changing from read-only to
+	 * read/write; if there is no device name, that's all we do.
+	 */
+	if (mp->mnt_flag & MNT_UPDATE) {
+		ump = VFSTOUFS(mp);
+		fs = ump->um_fs;
+		error = 0;
+		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
+			flags = WRITECLOSE;
+			if (mp->mnt_flag & MNT_FORCE)
+				flags |= FORCECLOSE;
+			if (vfs_busy(mp))
+				return (EBUSY);
+			error = ffs_flushfiles(mp, flags, p);
+			vfs_unbusy(mp);
+		}
+		if (!error && (mp->mnt_flag & MNT_RELOAD))
+			error = ffs_reload(mp, ndp->ni_cnd.cn_cred, p);
+		if (error)
+			return (error);
+		if (fs->fs_ronly && (mp->mnt_flag & MNT_WANTRDWR))
+			fs->fs_ronly = 0;
+		if (args.fspec == 0) {
+			/*
+			 * Process export requests.
+			 */
+			return (vfs_export(mp, &ump->um_export, &args.export));
+		}
+	}
+	/*
+	 * Not an update, or updating the name: look up the name
+	 * and verify that it refers to a sensible block device.
+	 */
+	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
+	if (error = namei(ndp))
+		return (error);
+	devvp = ndp->ni_vp;
+
+	if (devvp->v_type != VBLK) {
+		vrele(devvp);
+		return (ENOTBLK);
+	}
+	if (major(devvp->v_rdev) >= nblkdev) {
+		vrele(devvp);
+		return (ENXIO);
+	}
+	if ((mp->mnt_flag & MNT_UPDATE) == 0)
+		error = ffs_mountfs(devvp, mp, p);
+	else {
+		if (devvp != ump->um_devvp)
+			error = EINVAL;	/* needs translation */
+		else
+			vrele(devvp);
+	}
+	if (error) {
+		vrele(devvp);
+		return (error);
+	}
+	ump = VFSTOUFS(mp);
+	fs = ump->um_fs;
+	(void) copyinstr(path, fs->fs_fsmnt, sizeof(fs->fs_fsmnt) - 1, &size);
+	bzero(fs->fs_fsmnt + size, sizeof(fs->fs_fsmnt) - size);
+	bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
+	    MNAMELEN);
+	(void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 
+	    &size);
+	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+	(void)ffs_statfs(mp, &mp->mnt_stat, p);
+	return (0);
+}
+
+/*
+ * Reload all incore data for a filesystem (used after running fsck on
+ * the root filesystem and finding things to fix). The filesystem must
+ * be mounted read-only.
+ *
+ * Things to do to update the mount:
+ *	1) invalidate all cached meta-data.
+ *	2) re-read superblock from disk.
+ *	3) re-read summary information from disk.
+ *	4) invalidate all inactive vnodes.
+ *	5) invalidate all cached file data.
+ *	6) re-read inode data for all active vnodes.
+ */
+ffs_reload(mountp, cred, p)
+	register struct mount *mountp;
+	struct ucred *cred;
+	struct proc *p;
+{
+	register struct vnode *vp, *nvp, *devvp;
+	struct inode *ip;
+	struct csum *space;
+	struct buf *bp;
+	struct fs *fs;
+	int i, blks, size, error;
+
+	if ((mountp->mnt_flag & MNT_RDONLY) == 0)
+		return (EINVAL);
+	/*
+	 * Step 1: invalidate all cached meta-data.
+	 */
+	devvp = VFSTOUFS(mountp)->um_devvp;
+	if (vinvalbuf(devvp, 0, cred, p, 0, 0))
+		panic("ffs_reload: dirty1");
+	/*
+	 * Step 2: re-read superblock from disk.
+	 */
+	if (error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp))
+		return (error);
+	fs = (struct fs *)bp->b_data;
+	if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
+	    fs->fs_bsize < sizeof(struct fs)) {
+		brelse(bp);
+		return (EIO);		/* XXX needs translation */
+	}
+	fs = VFSTOUFS(mountp)->um_fs;
+	bcopy(&fs->fs_csp[0], &((struct fs *)bp->b_data)->fs_csp[0],
+	    sizeof(fs->fs_csp));
+	bcopy(bp->b_data, fs, (u_int)fs->fs_sbsize);
+	if (fs->fs_sbsize < SBSIZE)
+		bp->b_flags |= B_INVAL;
+	brelse(bp);
+	ffs_oldfscompat(fs);
+	/*
+	 * Step 3: re-read summary information from disk.
+	 */
+	blks = howmany(fs->fs_cssize, fs->fs_fsize);
+	space = fs->fs_csp[0];
+	for (i = 0; i < blks; i += fs->fs_frag) {
+		size = fs->fs_bsize;
+		if (i + fs->fs_frag > blks)
+			size = (blks - i) * fs->fs_fsize;
+		if (error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
+		    NOCRED, &bp))
+			return (error);
+		bcopy(bp->b_data, fs->fs_csp[fragstoblks(fs, i)], (u_int)size);
+		brelse(bp);
+	}
+loop:
+	for (vp = mountp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
+		nvp = vp->v_mntvnodes.le_next;
+		/*
+		 * Step 4: invalidate all inactive vnodes.
+		 */
+		if (vp->v_usecount == 0) {
+			vgone(vp);
+			continue;
+		}
+		/*
+		 * Step 5: invalidate all cached file data.
+		 */
+		if (vget(vp, 1))
+			goto loop;
+		if (vinvalbuf(vp, 0, cred, p, 0, 0))
+			panic("ffs_reload: dirty2");
+		/*
+		 * Step 6: re-read inode data for all active vnodes.
+		 */
+		ip = VTOI(vp);
+		if (error =
+		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
+		    (int)fs->fs_bsize, NOCRED, &bp)) {
+			vput(vp);
+			return (error);
+		}
+		ip->i_din = *((struct dinode *)bp->b_data +
+		    ino_to_fsbo(fs, ip->i_number));
+		brelse(bp);
+		vput(vp);
+		if (vp->v_mount != mountp)
+			goto loop;
+	}
+	return (0);
+}
+
+/*
+ * Common code for mount and mountroot
+ */
+int
+ffs_mountfs(devvp, mp, p)
+	register struct vnode *devvp;
+	struct mount *mp;
+	struct proc *p;
+{
+	register struct ufsmount *ump;
+	struct buf *bp;
+	register struct fs *fs;
+	dev_t dev = devvp->v_rdev;
+	struct partinfo dpart;
+	caddr_t base, space;
+	int havepart = 0, blks;
+	int error, i, size;
+	int ronly;
+	extern struct vnode *rootvp;
+
+	/*
+	 * Disallow multiple mounts of the same device.
+	 * Disallow mounting of a device that is currently in use
+	 * (except for root, which might share swap device for miniroot).
+	 * Flush out any old buffers remaining from a previous use.
+	 */
+	if (error = vfs_mountedon(devvp))
+		return (error);
+	if (vcount(devvp) > 1 && devvp != rootvp)
+		return (EBUSY);
+	if (error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0))
+		return (error);
+
+	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
+	if (error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p))
+		return (error);
+	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0)
+		size = DEV_BSIZE;
+	else {
+		havepart = 1;
+		size = dpart.disklab->d_secsize;
+	}
+
+	bp = NULL;
+	ump = NULL;
+	if (error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp))
+		goto out;
+	fs = (struct fs *)bp->b_data;
+	if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
+	    fs->fs_bsize < sizeof(struct fs)) {
+		error = EINVAL;		/* XXX needs translation */
+		goto out;
+	}
+	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
+	bzero((caddr_t)ump, sizeof *ump);
+	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
+	    M_WAITOK);
+	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
+	if (fs->fs_sbsize < SBSIZE)
+		bp->b_flags |= B_INVAL;
+	brelse(bp);
+	bp = NULL;
+	fs = ump->um_fs;
+	fs->fs_ronly = ronly;
+	if (ronly == 0)
+		fs->fs_fmod = 1;
+	blks = howmany(fs->fs_cssize, fs->fs_fsize);
+	base = space = malloc((u_long)fs->fs_cssize, M_UFSMNT,
+	    M_WAITOK);
+	for (i = 0; i < blks; i += fs->fs_frag) {
+		size = fs->fs_bsize;
+		if (i + fs->fs_frag > blks)
+			size = (blks - i) * fs->fs_fsize;
+		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
+			NOCRED, &bp);
+		if (error) {
+			free(base, M_UFSMNT);
+			goto out;
+		}
+		bcopy(bp->b_data, space, (u_int)size);
+		fs->fs_csp[fragstoblks(fs, i)] = (struct csum *)space;
+		space += size;
+		brelse(bp);
+		bp = NULL;
+	}
+	mp->mnt_data = (qaddr_t)ump;
+	mp->mnt_stat.f_fsid.val[0] = (long)dev;
+	mp->mnt_stat.f_fsid.val[1] = MOUNT_UFS;
+	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
+	mp->mnt_flag |= MNT_LOCAL;
+	ump->um_mountp = mp;
+	ump->um_dev = dev;
+	ump->um_devvp = devvp;
+	ump->um_nindir = fs->fs_nindir;
+	ump->um_bptrtodb = fs->fs_fsbtodb;
+	ump->um_seqinc = fs->fs_frag;
+	for (i = 0; i < MAXQUOTAS; i++)
+		ump->um_quotas[i] = NULLVP;
+	devvp->v_specflags |= SI_MOUNTEDON;
+	ffs_oldfscompat(fs);
+	return (0);
+out:
+	if (bp)
+		brelse(bp);
+	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p);
+	if (ump) {
+		free(ump->um_fs, M_UFSMNT);
+		free(ump, M_UFSMNT);
+		mp->mnt_data = (qaddr_t)0;
+	}
+	return (error);
+}
+
+/*
+ * Sanity checks for old file systems.
+ *
+ * XXX - goes away some day.
+ */
+ffs_oldfscompat(fs)
+	struct fs *fs;
+{
+	int i;
+
+	fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect);	/* XXX */
+	fs->fs_interleave = max(fs->fs_interleave, 1);		/* XXX */
+	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
+		fs->fs_nrpos = 8;				/* XXX */
+	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
+		quad_t sizepb = fs->fs_bsize;			/* XXX */
+								/* XXX */
+		fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1;	/* XXX */
+		for (i = 0; i < NIADDR; i++) {			/* XXX */
+			sizepb *= NINDIR(fs);			/* XXX */
+			fs->fs_maxfilesize += sizepb;		/* XXX */
+		}						/* XXX */
+		fs->fs_qbmask = ~fs->fs_bmask;			/* XXX */
+		fs->fs_qfmask = ~fs->fs_fmask;			/* XXX */
+	}							/* XXX */
+	return (0);
+}
+
+/*
+ * unmount system call
+ */
+int
+ffs_unmount(mp, mntflags, p)
+	struct mount *mp;
+	int mntflags;
+	struct proc *p;
+{
+	register struct ufsmount *ump;
+	register struct fs *fs;
+	int error, flags, ronly;
+
+	flags = 0;
+	if (mntflags & MNT_FORCE) {
+		if (mp->mnt_flag & MNT_ROOTFS)
+			return (EINVAL);
+		flags |= FORCECLOSE;
+	}
+	if (error = ffs_flushfiles(mp, flags, p))
+		return (error);
+	ump = VFSTOUFS(mp);
+	fs = ump->um_fs;
+	ronly = !fs->fs_ronly;
+	ump->um_devvp->v_specflags &= ~SI_MOUNTEDON;
+	error = VOP_CLOSE(ump->um_devvp, ronly ? FREAD : FREAD|FWRITE,
+		NOCRED, p);
+	vrele(ump->um_devvp);
+	free(fs->fs_csp[0], M_UFSMNT);
+	free(fs, M_UFSMNT);
+	free(ump, M_UFSMNT);
+	mp->mnt_data = (qaddr_t)0;
+	mp->mnt_flag &= ~MNT_LOCAL;
+	return (error);
+}
+
+/*
+ * Flush out all the files in a filesystem.
+ */
+ffs_flushfiles(mp, flags, p)
+	register struct mount *mp;
+	int flags;
+	struct proc *p;
+{
+	extern int doforce;
+	register struct ufsmount *ump;
+	int i, error;
+
+	if (!doforce)
+		flags &= ~FORCECLOSE;
+	ump = VFSTOUFS(mp);
+#ifdef QUOTA
+	if (mp->mnt_flag & MNT_QUOTA) {
+		if (error = vflush(mp, NULLVP, SKIPSYSTEM|flags))
+			return (error);
+		for (i = 0; i < MAXQUOTAS; i++) {
+			if (ump->um_quotas[i] == NULLVP)
+				continue;
+			quotaoff(p, mp, i);
+		}
+		/*
+		 * Here we fall through to vflush again to ensure
+		 * that we have gotten rid of all the system vnodes.
+		 */
+	}
+#endif
+	error = vflush(mp, NULLVP, flags);
+	return (error);
+}
+
+/*
+ * Get file system statistics.
+ */
+int
+ffs_statfs(mp, sbp, p)
+	struct mount *mp;
+	register struct statfs *sbp;
+	struct proc *p;
+{
+	register struct ufsmount *ump;
+	register struct fs *fs;
+
+	ump = VFSTOUFS(mp);
+	fs = ump->um_fs;
+	if (fs->fs_magic != FS_MAGIC)
+		panic("ffs_statfs");
+	sbp->f_type = MOUNT_UFS;
+	sbp->f_bsize = fs->fs_fsize;
+	sbp->f_iosize = fs->fs_bsize;
+	sbp->f_blocks = fs->fs_dsize;
+	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
+		fs->fs_cstotal.cs_nffree;
+	sbp->f_bavail = (fs->fs_dsize * (100 - fs->fs_minfree) / 100) -
+		(fs->fs_dsize - sbp->f_bfree);
+	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
+	sbp->f_ffree = fs->fs_cstotal.cs_nifree;
+	if (sbp != &mp->mnt_stat) {
+		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
+			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
+		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
+			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
+	}
+	return (0);
+}
+
+/*
+ * Go through the disk queues to initiate sandbagged IO;
+ * go through the inodes to write those that have been modified;
+ * initiate the writing of the super block if it has been modified.
+ *
+ * Note: we are always called with the filesystem marked `MPBUSY'.
+ */
+int
+ffs_sync(mp, waitfor, cred, p)
+	struct mount *mp;
+	int waitfor;
+	struct ucred *cred;
+	struct proc *p;
+{
+	register struct vnode *vp;
+	register struct inode *ip;
+	register struct ufsmount *ump = VFSTOUFS(mp);
+	register struct fs *fs;
+	int error, allerror = 0;
+
+	fs = ump->um_fs;
+	/*
+	 * Write back modified superblock.
+	 * Consistency check that the superblock
+	 * is still in the buffer cache.
+	 */
+	if (fs->fs_fmod != 0) {
+		if (fs->fs_ronly != 0) {		/* XXX */
+			printf("fs = %s\n", fs->fs_fsmnt);
+			panic("update: rofs mod");
+		}
+		fs->fs_fmod = 0;
+		fs->fs_time = time.tv_sec;
+		allerror = ffs_sbupdate(ump, waitfor);
+	}
+	/*
+	 * Write back each (modified) inode.
+	 */
+loop:
+	for (vp = mp->mnt_vnodelist.lh_first;
+	     vp != NULL;
+	     vp = vp->v_mntvnodes.le_next) {
+		/*
+		 * If the vnode that we are about to sync is no longer
+		 * associated with this mount point, start over.
+		 */
+		if (vp->v_mount != mp)
+			goto loop;
+		if (VOP_ISLOCKED(vp))
+			continue;
+		ip = VTOI(vp);
+		if ((ip->i_flag &
+		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
+		    vp->v_dirtyblkhd.lh_first == NULL)
+			continue;
+		if (vget(vp, 1))
+			goto loop;
+		if (error = VOP_FSYNC(vp, cred, waitfor, p))
+			allerror = error;
+		vput(vp);
+	}
+	/*
+	 * Force stale file system control information to be flushed.
+	 */
+	if (error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p))
+		allerror = error;
+#ifdef QUOTA
+	qsync(mp);
+#endif
+	return (allerror);
+}
+
+/*
+ * Look up a FFS dinode number to find its incore vnode, otherwise read it
+ * in from disk.  If it is in core, wait for the lock bit to clear, then
+ * return the inode locked.  Detection and handling of mount points must be
+ * done by the calling routine.
+ */
+int
+ffs_vget(mp, ino, vpp)
+	struct mount *mp;
+	ino_t ino;
+	struct vnode **vpp;
+{
+	register struct fs *fs;
+	register struct inode *ip;
+	struct ufsmount *ump;
+	struct buf *bp;
+	struct vnode *vp;
+	dev_t dev;
+	int i, type, error;
+
+	ump = VFSTOUFS(mp);
+	dev = ump->um_dev;
+	if ((*vpp = ufs_ihashget(dev, ino)) != NULL)
+		return (0);
+
+	/* Allocate a new vnode/inode. */
+	if (error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp)) {
+		*vpp = NULL;
+		return (error);
+	}
+	type = ump->um_devvp->v_tag == VT_MFS ? M_MFSNODE : M_FFSNODE; /* XXX */
+	MALLOC(ip, struct inode *, sizeof(struct inode), type, M_WAITOK);
+	bzero((caddr_t)ip, sizeof(struct inode));
+	vp->v_data = ip;
+	ip->i_vnode = vp;
+	ip->i_fs = fs = ump->um_fs;
+	ip->i_dev = dev;
+	ip->i_number = ino;
+#ifdef QUOTA
+	for (i = 0; i < MAXQUOTAS; i++)
+		ip->i_dquot[i] = NODQUOT;
+#endif
+	/*
+	 * Put it onto its hash chain and lock it so that other requests for
+	 * this inode will block if they arrive while we are sleeping waiting
+	 * for old data structures to be purged or for the contents of the
+	 * disk portion of this inode to be read.
+	 */
+	ufs_ihashins(ip);
+
+	/* Read in the disk contents for the inode, copy into the inode. */
+	if (error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
+	    (int)fs->fs_bsize, NOCRED, &bp)) {
+		/*
+		 * The inode does not contain anything useful, so it would
+		 * be misleading to leave it on its hash chain. With mode
+		 * still zero, it will be unlinked and returned to the free
+		 * list by vput().
+		 */
+		vput(vp);
+		brelse(bp);
+		*vpp = NULL;
+		return (error);
+	}
+	ip->i_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino));
+	brelse(bp);
+
+	/*
+	 * Initialize the vnode from the inode, check for aliases.
+	 * Note that the underlying vnode may have changed.
+	 */
+	if (error = ufs_vinit(mp, ffs_specop_p, FFS_FIFOOPS, &vp)) {
+		vput(vp);
+		*vpp = NULL;
+		return (error);
+	}
+	/*
+	 * Finish inode initialization now that aliasing has been resolved.
+	 */
+	ip->i_devvp = ump->um_devvp;
+	VREF(ip->i_devvp);
+	/*
+	 * Set up a generation number for this inode if it does not
+	 * already have one. This should only happen on old filesystems.
+	 */
+	if (ip->i_gen == 0) {
+		if (++nextgennumber < (u_long)time.tv_sec)
+			nextgennumber = time.tv_sec;
+		ip->i_gen = nextgennumber;
+		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
+			ip->i_flag |= IN_MODIFIED;
+	}
+	/*
+	 * Ensure that uid and gid are correct. This is a temporary
+	 * fix until fsck has been changed to do the update.
+	 */
+	if (fs->fs_inodefmt < FS_44INODEFMT) {		/* XXX */
+		ip->i_uid = ip->i_din.di_ouid;		/* XXX */
+		ip->i_gid = ip->i_din.di_ogid;		/* XXX */
+	}						/* XXX */
+
+	*vpp = vp;
+	return (0);
+}
+
+/*
+ * File handle to vnode
+ *
+ * Have to be really careful about stale file handles:
+ * - check that the inode number is valid
+ * - call ffs_vget() to get the locked inode
+ * - check for an unallocated inode (i_mode == 0)
+ * - check that the given client host has export rights and return
+ *   those rights via. exflagsp and credanonp
+ */
+int
+ffs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
+	register struct mount *mp;
+	struct fid *fhp;
+	struct mbuf *nam;
+	struct vnode **vpp;
+	int *exflagsp;
+	struct ucred **credanonp;
+{
+	register struct ufid *ufhp;
+	struct fs *fs;
+
+	ufhp = (struct ufid *)fhp;
+	fs = VFSTOUFS(mp)->um_fs;
+	if (ufhp->ufid_ino < ROOTINO ||
+	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
+		return (ESTALE);
+	return (ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp));
+}
+
+/*
+ * Vnode pointer to File handle
+ */
+/* ARGSUSED */
+ffs_vptofh(vp, fhp)
+	struct vnode *vp;
+	struct fid *fhp;
+{
+	register struct inode *ip;
+	register struct ufid *ufhp;
+
+	ip = VTOI(vp);
+	ufhp = (struct ufid *)fhp;
+	ufhp->ufid_len = sizeof(struct ufid);
+	ufhp->ufid_ino = ip->i_number;
+	ufhp->ufid_gen = ip->i_gen;
+	return (0);
+}
+
+/*
+ * Write a superblock and associated information back to disk.
+ */
+int
+ffs_sbupdate(mp, waitfor)
+	struct ufsmount *mp;
+	int waitfor;
+{
+	register struct fs *fs = mp->um_fs;
+	register struct buf *bp;
+	int blks;
+	caddr_t space;
+	int i, size, error = 0;
+
+	bp = getblk(mp->um_devvp, SBLOCK, (int)fs->fs_sbsize, 0, 0);
+	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
+	/* Restore compatibility to old file systems.		   XXX */
+	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
+		((struct fs *)bp->b_data)->fs_nrpos = -1;	/* XXX */
+	if (waitfor == MNT_WAIT)
+		error = bwrite(bp);
+	else
+		bawrite(bp);
+	blks = howmany(fs->fs_cssize, fs->fs_fsize);
+	space = (caddr_t)fs->fs_csp[0];
+	for (i = 0; i < blks; i += fs->fs_frag) {
+		size = fs->fs_bsize;
+		if (i + fs->fs_frag > blks)
+			size = (blks - i) * fs->fs_fsize;
+		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
+		    size, 0, 0);
+		bcopy(space, bp->b_data, (u_int)size);
+		space += size;
+		if (waitfor == MNT_WAIT)
+			error = bwrite(bp);
+		else
+			bawrite(bp);
+	}
+	return (error);
+}
diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c
new file mode 100644
index 00000000000..59814f2f378
--- /dev/null
+++ b/sys/ufs/ffs/ffs_vnops.c
@@ -0,0 +1,288 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ffs_vnops.c	8.7 (Berkeley) 2/3/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/resourcevar.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/conf.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+
+#include <miscfs/specfs/specdev.h>
+#include <miscfs/fifofs/fifo.h>
+
+#include <ufs/ufs/lockf.h>
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/dir.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/ffs/fs.h>
+#include <ufs/ffs/ffs_extern.h>
+
+/* Global vfs data structures for ufs. */
+int (**ffs_vnodeop_p)();
+struct vnodeopv_entry_desc ffs_vnodeop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, ufs_lookup },		/* lookup */
+	{ &vop_create_desc, ufs_create },		/* create */
+	{ &vop_mknod_desc, ufs_mknod },			/* mknod */
+	{ &vop_open_desc, ufs_open },			/* open */
+	{ &vop_close_desc, ufs_close },			/* close */
+	{ &vop_access_desc, ufs_access },		/* access */
+	{ &vop_getattr_desc, ufs_getattr },		/* getattr */
+	{ &vop_setattr_desc, ufs_setattr },		/* setattr */
+	{ &vop_read_desc, ffs_read },			/* read */
+	{ &vop_write_desc, ffs_write },			/* write */
+	{ &vop_ioctl_desc, ufs_ioctl },			/* ioctl */
+	{ &vop_select_desc, ufs_select },		/* select */
+	{ &vop_mmap_desc, ufs_mmap },			/* mmap */
+	{ &vop_fsync_desc, ffs_fsync },			/* fsync */
+	{ &vop_seek_desc, ufs_seek },			/* seek */
+	{ &vop_remove_desc, ufs_remove },		/* remove */
+	{ &vop_link_desc, ufs_link },			/* link */
+	{ &vop_rename_desc, ufs_rename },		/* rename */
+	{ &vop_mkdir_desc, ufs_mkdir },			/* mkdir */
+	{ &vop_rmdir_desc, ufs_rmdir },			/* rmdir */
+	{ &vop_symlink_desc, ufs_symlink },		/* symlink */
+	{ &vop_readdir_desc, ufs_readdir },		/* readdir */
+	{ &vop_readlink_desc, ufs_readlink },		/* readlink */
+	{ &vop_abortop_desc, ufs_abortop },		/* abortop */
+	{ &vop_inactive_desc, ufs_inactive },		/* inactive */
+	{ &vop_reclaim_desc, ufs_reclaim },		/* reclaim */
+	{ &vop_lock_desc, ufs_lock },			/* lock */
+	{ &vop_unlock_desc, ufs_unlock },		/* unlock */
+	{ &vop_bmap_desc, ufs_bmap },			/* bmap */
+	{ &vop_strategy_desc, ufs_strategy },		/* strategy */
+	{ &vop_print_desc, ufs_print },			/* print */
+	{ &vop_islocked_desc, ufs_islocked },		/* islocked */
+	{ &vop_pathconf_desc, ufs_pathconf },		/* pathconf */
+	{ &vop_advlock_desc, ufs_advlock },		/* advlock */
+	{ &vop_blkatoff_desc, ffs_blkatoff },		/* blkatoff */
+	{ &vop_valloc_desc, ffs_valloc },		/* valloc */
+	{ &vop_reallocblks_desc, ffs_reallocblks },	/* reallocblks */
+	{ &vop_vfree_desc, ffs_vfree },			/* vfree */
+	{ &vop_truncate_desc, ffs_truncate },		/* truncate */
+	{ &vop_update_desc, ffs_update },		/* update */
+	{ &vop_bwrite_desc, vn_bwrite },
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc ffs_vnodeop_opv_desc =
+	{ &ffs_vnodeop_p, ffs_vnodeop_entries };
+
+int (**ffs_specop_p)();
+struct vnodeopv_entry_desc ffs_specop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, spec_lookup },		/* lookup */
+	{ &vop_create_desc, spec_create },		/* create */
+	{ &vop_mknod_desc, spec_mknod },		/* mknod */
+	{ &vop_open_desc, spec_open },			/* open */
+	{ &vop_close_desc, ufsspec_close },		/* close */
+	{ &vop_access_desc, ufs_access },		/* access */
+	{ &vop_getattr_desc, ufs_getattr },		/* getattr */
+	{ &vop_setattr_desc, ufs_setattr },		/* setattr */
+	{ &vop_read_desc, ufsspec_read },		/* read */
+	{ &vop_write_desc, ufsspec_write },		/* write */
+	{ &vop_ioctl_desc, spec_ioctl },		/* ioctl */
+	{ &vop_select_desc, spec_select },		/* select */
+	{ &vop_mmap_desc, spec_mmap },			/* mmap */
+	{ &vop_fsync_desc, ffs_fsync },			/* fsync */
+	{ &vop_seek_desc, spec_seek },			/* seek */
+	{ &vop_remove_desc, spec_remove },		/* remove */
+	{ &vop_link_desc, spec_link },			/* link */
+	{ &vop_rename_desc, spec_rename },		/* rename */
+	{ &vop_mkdir_desc, spec_mkdir },		/* mkdir */
+	{ &vop_rmdir_desc, spec_rmdir },		/* rmdir */
+	{ &vop_symlink_desc, spec_symlink },		/* symlink */
+	{ &vop_readdir_desc, spec_readdir },		/* readdir */
+	{ &vop_readlink_desc, spec_readlink },		/* readlink */
+	{ &vop_abortop_desc, spec_abortop },		/* abortop */
+	{ &vop_inactive_desc, ufs_inactive },		/* inactive */
+	{ &vop_reclaim_desc, ufs_reclaim },		/* reclaim */
+	{ &vop_lock_desc, ufs_lock },			/* lock */
+	{ &vop_unlock_desc, ufs_unlock },		/* unlock */
+	{ &vop_bmap_desc, spec_bmap },			/* bmap */
+	{ &vop_strategy_desc, spec_strategy },		/* strategy */
+	{ &vop_print_desc, ufs_print },			/* print */
+	{ &vop_islocked_desc, ufs_islocked },		/* islocked */
+	{ &vop_pathconf_desc, spec_pathconf },		/* pathconf */
+	{ &vop_advlock_desc, spec_advlock },		/* advlock */
+	{ &vop_blkatoff_desc, spec_blkatoff },		/* blkatoff */
+	{ &vop_valloc_desc, spec_valloc },		/* valloc */
+	{ &vop_reallocblks_desc, spec_reallocblks },	/* reallocblks */
+	{ &vop_vfree_desc, ffs_vfree },			/* vfree */
+	{ &vop_truncate_desc, spec_truncate },		/* truncate */
+	{ &vop_update_desc, ffs_update },		/* update */
+	{ &vop_bwrite_desc, vn_bwrite },
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc ffs_specop_opv_desc =
+	{ &ffs_specop_p, ffs_specop_entries };
+
+#ifdef FIFO
+int (**ffs_fifoop_p)();
+struct vnodeopv_entry_desc ffs_fifoop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, fifo_lookup },		/* lookup */
+	{ &vop_create_desc, fifo_create },		/* create */
+	{ &vop_mknod_desc, fifo_mknod },		/* mknod */
+	{ &vop_open_desc, fifo_open },			/* open */
+	{ &vop_close_desc, ufsfifo_close },		/* close */
+	{ &vop_access_desc, ufs_access },		/* access */
+	{ &vop_getattr_desc, ufs_getattr },		/* getattr */
+	{ &vop_setattr_desc, ufs_setattr },		/* setattr */
+	{ &vop_read_desc, ufsfifo_read },		/* read */
+	{ &vop_write_desc, ufsfifo_write },		/* write */
+	{ &vop_ioctl_desc, fifo_ioctl },		/* ioctl */
+	{ &vop_select_desc, fifo_select },		/* select */
+	{ &vop_mmap_desc, fifo_mmap },			/* mmap */
+	{ &vop_fsync_desc, ffs_fsync },			/* fsync */
+	{ &vop_seek_desc, fifo_seek },			/* seek */
+	{ &vop_remove_desc, fifo_remove },		/* remove */
+	{ &vop_link_desc, fifo_link },			/* link */
+	{ &vop_rename_desc, fifo_rename },		/* rename */
+	{ &vop_mkdir_desc, fifo_mkdir },		/* mkdir */
+	{ &vop_rmdir_desc, fifo_rmdir },		/* rmdir */
+	{ &vop_symlink_desc, fifo_symlink },		/* symlink */
+	{ &vop_readdir_desc, fifo_readdir },		/* readdir */
+	{ &vop_readlink_desc, fifo_readlink },		/* readlink */
+	{ &vop_abortop_desc, fifo_abortop },		/* abortop */
+	{ &vop_inactive_desc, ufs_inactive },		/* inactive */
+	{ &vop_reclaim_desc, ufs_reclaim },		/* reclaim */
+	{ &vop_lock_desc, ufs_lock },			/* lock */
+	{ &vop_unlock_desc, ufs_unlock },		/* unlock */
+	{ &vop_bmap_desc, fifo_bmap },			/* bmap */
+	{ &vop_strategy_desc, fifo_strategy },		/* strategy */
+	{ &vop_print_desc, ufs_print },			/* print */
+	{ &vop_islocked_desc, ufs_islocked },		/* islocked */
+	{ &vop_pathconf_desc, fifo_pathconf },		/* pathconf */
+	{ &vop_advlock_desc, fifo_advlock },		/* advlock */
+	{ &vop_blkatoff_desc, fifo_blkatoff },		/* blkatoff */
+	{ &vop_valloc_desc, fifo_valloc },		/* valloc */
+	{ &vop_reallocblks_desc, fifo_reallocblks },	/* reallocblks */
+	{ &vop_vfree_desc, ffs_vfree },			/* vfree */
+	{ &vop_truncate_desc, fifo_truncate },		/* truncate */
+	{ &vop_update_desc, ffs_update },		/* update */
+	{ &vop_bwrite_desc, vn_bwrite },
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc ffs_fifoop_opv_desc =
+	{ &ffs_fifoop_p, ffs_fifoop_entries };
+#endif /* FIFO */
+
+#ifdef DEBUG
+/*
+ * Enabling cluster read/write operations.
+ */
+#include <sys/sysctl.h>
+int doclusterread = 1;
+struct ctldebug debug11 = { "doclusterread", &doclusterread };
+int doclusterwrite = 1;
+struct ctldebug debug12 = { "doclusterwrite", &doclusterwrite };
+#else
+/* XXX for ufs_readwrite */
+#define doclusterread 1
+#define doclusterwrite 1
+#endif
+
+#include <ufs/ufs/ufs_readwrite.c>
+
+/*
+ * Synch an open file.
+ */
+/* ARGSUSED */
+int
+ffs_fsync(ap)
+	struct vop_fsync_args /* {
+		struct vnode *a_vp;
+		struct ucred *a_cred;
+		int a_waitfor;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct buf *bp;
+	struct timeval tv;
+	struct buf *nbp;
+	int s;
+
+	/*
+	 * Flush all dirty buffers associated with a vnode.
+	 */
+loop:
+	s = splbio();
+	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
+		nbp = bp->b_vnbufs.le_next;
+		if ((bp->b_flags & B_BUSY))
+			continue;
+		if ((bp->b_flags & B_DELWRI) == 0)
+			panic("ffs_fsync: not dirty");
+		bremfree(bp);
+		bp->b_flags |= B_BUSY;
+		splx(s);
+		/*
+		 * Wait for I/O associated with indirect blocks to complete,
+		 * since there is no way to quickly wait for them below.
+		 */
+		if (bp->b_vp == vp || ap->a_waitfor == MNT_NOWAIT)
+			(void) bawrite(bp);
+		else
+			(void) bwrite(bp);
+		goto loop;
+	}
+	if (ap->a_waitfor == MNT_WAIT) {
+		while (vp->v_numoutput) {
+			vp->v_flag |= VBWAIT;
+			sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1);
+		}
+#ifdef DIAGNOSTIC
+		if (vp->v_dirtyblkhd.lh_first) {
+			vprint("ffs_fsync: dirty", vp);
+			goto loop;
+		}
+#endif
+	}
+	splx(s);
+	tv = time;
+	return (VOP_UPDATE(ap->a_vp, &tv, &tv, ap->a_waitfor == MNT_WAIT));
+}
diff --git a/sys/ufs/ffs/fs.h b/sys/ufs/ffs/fs.h
new file mode 100644
index 00000000000..bef052feef4
--- /dev/null
+++ b/sys/ufs/ffs/fs.h
@@ -0,0 +1,489 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)fs.h	8.7 (Berkeley) 4/19/94
+ */
+
+/*
+ * Each disk drive contains some number of file systems.
+ * A file system consists of a number of cylinder groups.
+ * Each cylinder group has inodes and data.
+ *
+ * A file system is described by its super-block, which in turn
+ * describes the cylinder groups.  The super-block is critical
+ * data and is replicated in each cylinder group to protect against
+ * catastrophic loss.  This is done at `newfs' time and the critical
+ * super-block data does not change, so the copies need not be
+ * referenced further unless disaster strikes.
+ *
+ * For file system fs, the offsets of the various blocks of interest
+ * are given in the super block as:
+ *	[fs->fs_sblkno]		Super-block
+ *	[fs->fs_cblkno]		Cylinder group block
+ *	[fs->fs_iblkno]		Inode blocks
+ *	[fs->fs_dblkno]		Data blocks
+ * The beginning of cylinder group cg in fs, is given by
+ * the ``cgbase(fs, cg)'' macro.
+ *
+ * The first boot and super blocks are given in absolute disk addresses.
+ * The byte-offset forms are preferred, as they don't imply a sector size.
+ */
+#define BBSIZE		8192
+#define SBSIZE		8192
+#define	BBOFF		((off_t)(0))
+#define	SBOFF		((off_t)(BBOFF + BBSIZE))
+#define	BBLOCK		((daddr_t)(0))
+#define	SBLOCK		((daddr_t)(BBLOCK + BBSIZE / DEV_BSIZE))
+
+/*
+ * Addresses stored in inodes are capable of addressing fragments
+ * of `blocks'. File system blocks of at most size MAXBSIZE can 
+ * be optionally broken into 2, 4, or 8 pieces, each of which is
+ * addressible; these pieces may be DEV_BSIZE, or some multiple of
+ * a DEV_BSIZE unit.
+ *
+ * Large files consist of exclusively large data blocks.  To avoid
+ * undue wasted disk space, the last data block of a small file may be
+ * allocated as only as many fragments of a large block as are
+ * necessary.  The file system format retains only a single pointer
+ * to such a fragment, which is a piece of a single large block that
+ * has been divided.  The size of such a fragment is determinable from
+ * information in the inode, using the ``blksize(fs, ip, lbn)'' macro.
+ *
+ * The file system records space availability at the fragment level;
+ * to determine block availability, aligned fragments are examined.
+ */
+
+/*
+ * MINBSIZE is the smallest allowable block size.
+ * In order to insure that it is possible to create files of size
+ * 2^32 with only two levels of indirection, MINBSIZE is set to 4096.
+ * MINBSIZE must be big enough to hold a cylinder group block,
+ * thus changes to (struct cg) must keep its size within MINBSIZE.
+ * Note that super blocks are always of size SBSIZE,
+ * and that both SBSIZE and MAXBSIZE must be >= MINBSIZE.
+ */
+#define MINBSIZE	4096
+
+/*
+ * The path name on which the file system is mounted is maintained
+ * in fs_fsmnt. MAXMNTLEN defines the amount of space allocated in 
+ * the super block for this name.
+ * The limit on the amount of summary information per file system
+ * is defined by MAXCSBUFS. It is currently parameterized for a
+ * maximum of two million cylinders.
+ */
+#define MAXMNTLEN 512
+#define MAXCSBUFS 32
+
+/*
+ * A summary of contiguous blocks of various sizes is maintained
+ * in each cylinder group. Normally this is set by the initial
+ * value of fs_maxcontig. To conserve space, a maximum summary size
+ * is set by FS_MAXCONTIG.
+ */
+#define FS_MAXCONTIG	16
+
+/*
+ * MINFREE gives the minimum acceptable percentage of file system
+ * blocks which may be free. If the freelist drops below this level
+ * only the superuser may continue to allocate blocks. This may
+ * be set to 0 if no reserve of free blocks is deemed necessary,
+ * however throughput drops by fifty percent if the file system
+ * is run at between 95% and 100% full; thus the minimum default
+ * value of fs_minfree is 5%. However, to get good clustering
+ * performance, 10% is a better choice. hence we use 10% as our
+ * default value. With 10% free space, fragmentation is not a
+ * problem, so we choose to optimize for time.
+ */
+#define MINFREE		5
+#define DEFAULTOPT	FS_OPTTIME
+
+/*
+ * Per cylinder group information; summarized in blocks allocated
+ * from first cylinder group data blocks.  These blocks have to be
+ * read in from fs_csaddr (size fs_cssize) in addition to the
+ * super block.
+ *
+ * N.B. sizeof(struct csum) must be a power of two in order for
+ * the ``fs_cs'' macro to work (see below).
+ */
+struct csum {
+	long	cs_ndir;	/* number of directories */
+	long	cs_nbfree;	/* number of free blocks */
+	long	cs_nifree;	/* number of free inodes */
+	long	cs_nffree;	/* number of free frags */
+};
+
+/*
+ * Super block for a file system.
+ */
+struct fs {
+	struct	fs *fs_link;		/* linked list of file systems */
+	struct	fs *fs_rlink;		/*     used for incore super blocks */
+	daddr_t	fs_sblkno;		/* addr of super-block in filesys */
+	daddr_t	fs_cblkno;		/* offset of cyl-block in filesys */
+	daddr_t	fs_iblkno;		/* offset of inode-blocks in filesys */
+	daddr_t	fs_dblkno;		/* offset of first data after cg */
+	long	fs_cgoffset;		/* cylinder group offset in cylinder */
+	long	fs_cgmask;		/* used to calc mod fs_ntrak */
+	time_t 	fs_time;    		/* last time written */
+	long	fs_size;		/* number of blocks in fs */
+	long	fs_dsize;		/* number of data blocks in fs */
+	long	fs_ncg;			/* number of cylinder groups */
+	long	fs_bsize;		/* size of basic blocks in fs */
+	long	fs_fsize;		/* size of frag blocks in fs */
+	long	fs_frag;		/* number of frags in a block in fs */
+/* these are configuration parameters */
+	long	fs_minfree;		/* minimum percentage of free blocks */
+	long	fs_rotdelay;		/* num of ms for optimal next block */
+	long	fs_rps;			/* disk revolutions per second */
+/* these fields can be computed from the others */
+	long	fs_bmask;		/* ``blkoff'' calc of blk offsets */
+	long	fs_fmask;		/* ``fragoff'' calc of frag offsets */
+	long	fs_bshift;		/* ``lblkno'' calc of logical blkno */
+	long	fs_fshift;		/* ``numfrags'' calc number of frags */
+/* these are configuration parameters */
+	long	fs_maxcontig;		/* max number of contiguous blks */
+	long	fs_maxbpg;		/* max number of blks per cyl group */
+/* these fields can be computed from the others */
+	long	fs_fragshift;		/* block to frag shift */
+	long	fs_fsbtodb;		/* fsbtodb and dbtofsb shift constant */
+	long	fs_sbsize;		/* actual size of super block */
+	long	fs_csmask;		/* csum block offset */
+	long	fs_csshift;		/* csum block number */
+	long	fs_nindir;		/* value of NINDIR */
+	long	fs_inopb;		/* value of INOPB */
+	long	fs_nspf;		/* value of NSPF */
+/* yet another configuration parameter */
+	long	fs_optim;		/* optimization preference, see below */
+/* these fields are derived from the hardware */
+	long	fs_npsect;		/* # sectors/track including spares */
+	long	fs_interleave;		/* hardware sector interleave */
+	long	fs_trackskew;		/* sector 0 skew, per track */
+	long	fs_headswitch;		/* head switch time, usec */
+	long	fs_trkseek;		/* track-to-track seek, usec */
+/* sizes determined by number of cylinder groups and their sizes */
+	daddr_t fs_csaddr;		/* blk addr of cyl grp summary area */
+	long	fs_cssize;		/* size of cyl grp summary area */
+	long	fs_cgsize;		/* cylinder group size */
+/* these fields are derived from the hardware */
+	long	fs_ntrak;		/* tracks per cylinder */
+	long	fs_nsect;		/* sectors per track */
+	long  	fs_spc;   		/* sectors per cylinder */
+/* this comes from the disk driver partitioning */
+	long	fs_ncyl;   		/* cylinders in file system */
+/* these fields can be computed from the others */
+	long	fs_cpg;			/* cylinders per group */
+	long	fs_ipg;			/* inodes per group */
+	long	fs_fpg;			/* blocks per group * fs_frag */
+/* this data must be re-computed after crashes */
+	struct	csum fs_cstotal;	/* cylinder summary information */
+/* these fields are cleared at mount time */
+	char   	fs_fmod;    		/* super block modified flag */
+	char   	fs_clean;    		/* file system is clean flag */
+	char   	fs_ronly;   		/* mounted read-only flag */
+	char   	fs_flags;   		/* currently unused flag */
+	char	fs_fsmnt[MAXMNTLEN];	/* name mounted on */
+/* these fields retain the current block allocation info */
+	long	fs_cgrotor;		/* last cg searched */
+	struct	csum *fs_csp[MAXCSBUFS];/* list of fs_cs info buffers */
+	long	fs_cpc;			/* cyl per cycle in postbl */
+	short	fs_opostbl[16][8];	/* old rotation block list head */
+	long	fs_sparecon[50];	/* reserved for future constants */
+	long	fs_contigsumsize;	/* size of cluster summary array */ 
+	long	fs_maxsymlinklen;	/* max length of an internal symlink */
+	long	fs_inodefmt;		/* format of on-disk inodes */
+	u_quad_t fs_maxfilesize;	/* maximum representable file size */
+	quad_t	fs_qbmask;		/* ~fs_bmask - for use with quad size */
+	quad_t	fs_qfmask;		/* ~fs_fmask - for use with quad size */
+	long	fs_state;		/* validate fs_clean field */
+	long	fs_postblformat;	/* format of positional layout tables */
+	long	fs_nrpos;		/* number of rotational positions */
+	long	fs_postbloff;		/* (short) rotation block list head */
+	long	fs_rotbloff;		/* (u_char) blocks for each rotation */
+	long	fs_magic;		/* magic number */
+	u_char	fs_space[1];		/* list of blocks for each rotation */
+/* actually longer */
+};
+/*
+ * Filesystem idetification
+ */
+#define	FS_MAGIC	0x011954	/* the fast filesystem magic number */
+#define	FS_OKAY		0x7c269d38	/* superblock checksum */
+#define FS_42INODEFMT	-1		/* 4.2BSD inode format */
+#define FS_44INODEFMT	2		/* 4.4BSD inode format */
+/*
+ * Preference for optimization.
+ */
+#define FS_OPTTIME	0	/* minimize allocation time */
+#define FS_OPTSPACE	1	/* minimize disk fragmentation */
+
+/*
+ * Rotational layout table format types
+ */
+#define FS_42POSTBLFMT		-1	/* 4.2BSD rotational table format */
+#define FS_DYNAMICPOSTBLFMT	1	/* dynamic rotational table format */
+/*
+ * Macros for access to superblock array structures
+ */
+#define fs_postbl(fs, cylno) \
+    (((fs)->fs_postblformat == FS_42POSTBLFMT) \
+    ? ((fs)->fs_opostbl[cylno]) \
+    : ((short *)((char *)(fs) + (fs)->fs_postbloff) + (cylno) * (fs)->fs_nrpos))
+#define fs_rotbl(fs) \
+    (((fs)->fs_postblformat == FS_42POSTBLFMT) \
+    ? ((fs)->fs_space) \
+    : ((u_char *)((char *)(fs) + (fs)->fs_rotbloff)))
+
+/*
+ * The size of a cylinder group is calculated by CGSIZE. The maximum size
+ * is limited by the fact that cylinder groups are at most one block.
+ * Its size is derived from the size of the maps maintained in the 
+ * cylinder group and the (struct cg) size.
+ */
+#define CGSIZE(fs) \
+    /* base cg */	(sizeof(struct cg) + sizeof(long) + \
+    /* blktot size */	(fs)->fs_cpg * sizeof(long) + \
+    /* blks size */	(fs)->fs_cpg * (fs)->fs_nrpos * sizeof(short) + \
+    /* inode map */	howmany((fs)->fs_ipg, NBBY) + \
+    /* block map */	howmany((fs)->fs_cpg * (fs)->fs_spc / NSPF(fs), NBBY) +\
+    /* if present */	((fs)->fs_contigsumsize <= 0 ? 0 : \
+    /* cluster sum */	(fs)->fs_contigsumsize * sizeof(long) + \
+    /* cluster map */	howmany((fs)->fs_cpg * (fs)->fs_spc / NSPB(fs), NBBY)))
+
+/*
+ * Convert cylinder group to base address of its global summary info.
+ *
+ * N.B. This macro assumes that sizeof(struct csum) is a power of two.
+ */
+#define fs_cs(fs, indx) \
+	fs_csp[(indx) >> (fs)->fs_csshift][(indx) & ~(fs)->fs_csmask]
+
+/*
+ * Cylinder group block for a file system.
+ */
+#define	CG_MAGIC	0x090255
+struct	cg {
+	struct	cg *cg_link;		/* linked list of cyl groups */
+	long	cg_magic;		/* magic number */
+	time_t	cg_time;		/* time last written */
+	long	cg_cgx;			/* we are the cgx'th cylinder group */
+	short	cg_ncyl;		/* number of cyl's this cg */
+	short	cg_niblk;		/* number of inode blocks this cg */
+	long	cg_ndblk;		/* number of data blocks this cg */
+	struct	csum cg_cs;		/* cylinder summary information */
+	long	cg_rotor;		/* position of last used block */
+	long	cg_frotor;		/* position of last used frag */
+	long	cg_irotor;		/* position of last used inode */
+	long	cg_frsum[MAXFRAG];	/* counts of available frags */
+	long	cg_btotoff;		/* (long) block totals per cylinder */
+	long	cg_boff;		/* (short) free block positions */
+	long	cg_iusedoff;		/* (char) used inode map */
+	long	cg_freeoff;		/* (u_char) free block map */
+	long	cg_nextfreeoff;		/* (u_char) next available space */
+	long	cg_clustersumoff;	/* (long) counts of avail clusters */
+	long	cg_clusteroff;		/* (char) free cluster map */
+	long	cg_nclusterblks;	/* number of clusters this cg */
+	long	cg_sparecon[13];	/* reserved for future use */
+	u_char	cg_space[1];		/* space for cylinder group maps */
+/* actually longer */
+};
+/*
+ * Macros for access to cylinder group array structures
+ */
+#define cg_blktot(cgp) \
+    (((cgp)->cg_magic != CG_MAGIC) \
+    ? (((struct ocg *)(cgp))->cg_btot) \
+    : ((long *)((char *)(cgp) + (cgp)->cg_btotoff)))
+#define cg_blks(fs, cgp, cylno) \
+    (((cgp)->cg_magic != CG_MAGIC) \
+    ? (((struct ocg *)(cgp))->cg_b[cylno]) \
+    : ((short *)((char *)(cgp) + (cgp)->cg_boff) + (cylno) * (fs)->fs_nrpos))
+#define cg_inosused(cgp) \
+    (((cgp)->cg_magic != CG_MAGIC) \
+    ? (((struct ocg *)(cgp))->cg_iused) \
+    : ((char *)((char *)(cgp) + (cgp)->cg_iusedoff)))
+#define cg_blksfree(cgp) \
+    (((cgp)->cg_magic != CG_MAGIC) \
+    ? (((struct ocg *)(cgp))->cg_free) \
+    : ((u_char *)((char *)(cgp) + (cgp)->cg_freeoff)))
+#define cg_chkmagic(cgp) \
+    ((cgp)->cg_magic == CG_MAGIC || ((struct ocg *)(cgp))->cg_magic == CG_MAGIC)
+#define cg_clustersfree(cgp) \
+    ((u_char *)((char *)(cgp) + (cgp)->cg_clusteroff))
+#define cg_clustersum(cgp) \
+    ((long *)((char *)(cgp) + (cgp)->cg_clustersumoff))
+
+/*
+ * The following structure is defined
+ * for compatibility with old file systems.
+ */
+struct	ocg {
+	struct	ocg *cg_link;		/* linked list of cyl groups */
+	struct	ocg *cg_rlink;		/*     used for incore cyl groups */
+	time_t	cg_time;		/* time last written */
+	long	cg_cgx;			/* we are the cgx'th cylinder group */
+	short	cg_ncyl;		/* number of cyl's this cg */
+	short	cg_niblk;		/* number of inode blocks this cg */
+	long	cg_ndblk;		/* number of data blocks this cg */
+	struct	csum cg_cs;		/* cylinder summary information */
+	long	cg_rotor;		/* position of last used block */
+	long	cg_frotor;		/* position of last used frag */
+	long	cg_irotor;		/* position of last used inode */
+	long	cg_frsum[8];		/* counts of available frags */
+	long	cg_btot[32];		/* block totals per cylinder */
+	short	cg_b[32][8];		/* positions of free blocks */
+	char	cg_iused[256];		/* used inode map */
+	long	cg_magic;		/* magic number */
+	u_char	cg_free[1];		/* free block map */
+/* actually longer */
+};
+
+/*
+ * Turn file system block numbers into disk block addresses.
+ * This maps file system blocks to device size blocks.
+ */
+#define fsbtodb(fs, b)	((b) << (fs)->fs_fsbtodb)
+#define	dbtofsb(fs, b)	((b) >> (fs)->fs_fsbtodb)
+
+/*
+ * Cylinder group macros to locate things in cylinder groups.
+ * They calc file system addresses of cylinder group data structures.
+ */
+#define	cgbase(fs, c)	((daddr_t)((fs)->fs_fpg * (c)))
+#define	cgdmin(fs, c)	(cgstart(fs, c) + (fs)->fs_dblkno)	/* 1st data */
+#define	cgimin(fs, c)	(cgstart(fs, c) + (fs)->fs_iblkno)	/* inode blk */
+#define	cgsblock(fs, c)	(cgstart(fs, c) + (fs)->fs_sblkno)	/* super blk */
+#define	cgtod(fs, c)	(cgstart(fs, c) + (fs)->fs_cblkno)	/* cg block */
+#define cgstart(fs, c)							\
+	(cgbase(fs, c) + (fs)->fs_cgoffset * ((c) & ~((fs)->fs_cgmask)))
+
+/*
+ * Macros for handling inode numbers:
+ *     inode number to file system block offset.
+ *     inode number to cylinder group number.
+ *     inode number to file system block address.
+ */
+#define	ino_to_cg(fs, x)	((x) / (fs)->fs_ipg)
+#define	ino_to_fsba(fs, x)						\
+	((daddr_t)(cgimin(fs, ino_to_cg(fs, x)) +			\
+	    (blkstofrags((fs), (((x) % (fs)->fs_ipg) / INOPB(fs))))))
+#define	ino_to_fsbo(fs, x)	((x) % INOPB(fs))
+
+/*
+ * Give cylinder group number for a file system block.
+ * Give cylinder group block number for a file system block.
+ */
+#define	dtog(fs, d)	((d) / (fs)->fs_fpg)
+#define	dtogd(fs, d)	((d) % (fs)->fs_fpg)
+
+/*
+ * Extract the bits for a block from a map.
+ * Compute the cylinder and rotational position of a cyl block addr.
+ */
+#define blkmap(fs, map, loc) \
+    (((map)[(loc) / NBBY] >> ((loc) % NBBY)) & (0xff >> (NBBY - (fs)->fs_frag)))
+#define cbtocylno(fs, bno) \
+    ((bno) * NSPF(fs) / (fs)->fs_spc)
+#define cbtorpos(fs, bno) \
+    (((bno) * NSPF(fs) % (fs)->fs_spc / (fs)->fs_nsect * (fs)->fs_trackskew + \
+     (bno) * NSPF(fs) % (fs)->fs_spc % (fs)->fs_nsect * (fs)->fs_interleave) % \
+     (fs)->fs_nsect * (fs)->fs_nrpos / (fs)->fs_npsect)
+
+/*
+ * The following macros optimize certain frequently calculated
+ * quantities by using shifts and masks in place of divisions
+ * modulos and multiplications.
+ */
+#define blkoff(fs, loc)		/* calculates (loc % fs->fs_bsize) */ \
+	((loc) & (fs)->fs_qbmask)
+#define fragoff(fs, loc)	/* calculates (loc % fs->fs_fsize) */ \
+	((loc) & (fs)->fs_qfmask)
+#define lblktosize(fs, blk)	/* calculates (blk * fs->fs_bsize) */ \
+	((blk) << (fs)->fs_bshift)
+#define lblkno(fs, loc)		/* calculates (loc / fs->fs_bsize) */ \
+	((loc) >> (fs)->fs_bshift)
+#define numfrags(fs, loc)	/* calculates (loc / fs->fs_fsize) */ \
+	((loc) >> (fs)->fs_fshift)
+#define blkroundup(fs, size)	/* calculates roundup(size, fs->fs_bsize) */ \
+	(((size) + (fs)->fs_qbmask) & (fs)->fs_bmask)
+#define fragroundup(fs, size)	/* calculates roundup(size, fs->fs_fsize) */ \
+	(((size) + (fs)->fs_qfmask) & (fs)->fs_fmask)
+#define fragstoblks(fs, frags)	/* calculates (frags / fs->fs_frag) */ \
+	((frags) >> (fs)->fs_fragshift)
+#define blkstofrags(fs, blks)	/* calculates (blks * fs->fs_frag) */ \
+	((blks) << (fs)->fs_fragshift)
+#define fragnum(fs, fsb)	/* calculates (fsb % fs->fs_frag) */ \
+	((fsb) & ((fs)->fs_frag - 1))
+#define blknum(fs, fsb)		/* calculates rounddown(fsb, fs->fs_frag) */ \
+	((fsb) &~ ((fs)->fs_frag - 1))
+
+/*
+ * Determine the number of available frags given a
+ * percentage to hold in reserve
+ */
+#define freespace(fs, percentreserved) \
+	(blkstofrags((fs), (fs)->fs_cstotal.cs_nbfree) + \
+	(fs)->fs_cstotal.cs_nffree - ((fs)->fs_dsize * (percentreserved) / 100))
+
+/*
+ * Determining the size of a file block in the file system.
+ */
+#define blksize(fs, ip, lbn) \
+	(((lbn) >= NDADDR || (ip)->i_size >= ((lbn) + 1) << (fs)->fs_bshift) \
+	    ? (fs)->fs_bsize \
+	    : (fragroundup(fs, blkoff(fs, (ip)->i_size))))
+#define dblksize(fs, dip, lbn) \
+	(((lbn) >= NDADDR || (dip)->di_size >= ((lbn) + 1) << (fs)->fs_bshift) \
+	    ? (fs)->fs_bsize \
+	    : (fragroundup(fs, blkoff(fs, (dip)->di_size))))
+
+/*
+ * Number of disk sectors per block; assumes DEV_BSIZE byte sector size.
+ */
+#define	NSPB(fs)	((fs)->fs_nspf << (fs)->fs_fragshift)
+#define	NSPF(fs)	((fs)->fs_nspf)
+
+/*
+ * INOPB is the number of inodes in a secondary storage block.
+ */
+#define	INOPB(fs)	((fs)->fs_inopb)
+#define	INOPF(fs)	((fs)->fs_inopb >> (fs)->fs_fragshift)
+
+/*
+ * NINDIR is the number of indirects in a file system block.
+ */
+#define	NINDIR(fs)	((fs)->fs_nindir)
+
+extern int inside[], around[];
+extern u_char *fragtbl[];
diff --git a/sys/ufs/lfs/README b/sys/ufs/lfs/README
new file mode 100644
index 00000000000..724b18fb9ea
--- /dev/null
+++ b/sys/ufs/lfs/README
@@ -0,0 +1,139 @@
+#	@(#)README	8.1 (Berkeley) 6/11/93
+
+The file system is reasonably stable, but incomplete.  There are
+places where cleaning performance can be improved dramatically (see
+comments in lfs_syscalls.c).  For details on the implementation,
+performance and why garbage collection always wins, see Dr. Margo
+Seltzer's thesis available for anonymous ftp from toe.cs.berkeley.edu,
+in the directory pub/personal/margo/thesis.ps.Z, or the January 1993
+USENIX paper.
+
+Missing Functionality:
+	Multiple block sizes and/or fragments are not yet implemented.
+
+----------
+The disk is laid out in segments.  The first segment starts 8K into the
+disk (the first 8K is used for boot information).  Each segment is composed
+of the following:
+
+	An optional super block
+	One or more groups of:
+		segment summary
+		0 or more data blocks
+		0 or more inode blocks
+
+The segment summary and inode/data blocks start after the super block (if
+present), and grow toward the end of the segment.
+
+	_______________________________________________
+	|         |            |         |            |
+	| summary | data/inode | summary | data/inode |
+	|  block  |   blocks   |  block  |   blocks   | ...
+	|_________|____________|_________|____________|
+
+The data/inode blocks following a summary block are described by the
+summary block.  In order to permit the segment to be written in any order
+and in a forward direction only, a checksum is calculated across the
+blocks described by the summary.  Additionally, the summary is checksummed
+and timestamped.  Both of these are intended for recovery; the former is
+to make it easy to determine that it *is* a summary block and the latter
+is to make it easy to determine when recovery is finished for partially
+written segments.  These checksums are also used by the cleaner.
+
+	Summary block (detail)
+	________________
+	| sum cksum    |
+	| data cksum   |
+	| next segment |
+	| timestamp    |
+	| FINFO count  |
+	| inode count  |
+	| flags        |
+	|______________|
+	|   FINFO-1    | 0 or more file info structures, identifying the
+	|     .        | blocks in the segment.
+	|     .        |
+	|     .        |
+	|   FINFO-N    |
+	|   inode-N    |
+	|     .        |
+	|     .        |
+	|     .        | 0 or more inode daddr_t's, identifying the inode
+	|   inode-1    | blocks in the segment.
+	|______________|
+
+Inode blocks are blocks of on-disk inodes in the same format as those in
+the FFS.  However, spare[0] contains the inode number of the inode so we
+can find a particular inode on a page.  They are packed page_size /
+sizeof(inode) to a block.  Data blocks are exactly as in the FFS.  Both
+inodes and data blocks move around the file system at will.
+
+The file system is described by a super-block which is replicated and
+occurs as the first block of the first and other segments.  (The maximum
+number of super-blocks is MAXNUMSB).  Each super-block maintains a list
+of the disk addresses of all the super-blocks.  The super-block maintains
+a small amount of checkpoint information, essentially just enough to find
+the inode for the IFILE (fs->lfs_idaddr).
+
+The IFILE is visible in the file system, as inode number IFILE_INUM.  It
+contains information shared between the kernel and various user processes.
+
+	Ifile (detail)
+	________________
+	| cleaner info | Cleaner information per file system.  (Page
+	|              | granularity.)
+	|______________|
+	| segment      | Space available and last modified times per
+	| usage table  | segment.  (Page granularity.)
+	|______________|
+	|   IFILE-1    | Per inode status information: current version #,
+	|     .        | if currently allocated, last access time and
+	|     .        | current disk address of containing inode block.
+	|     .        | If current disk address is LFS_UNUSED_DADDR, the
+	|   IFILE-N    | inode is not in use, and it's on the free list.
+	|______________|
+
+
+First Segment at Creation Time:
+_____________________________________________________________
+|        |       |         |       |       |       |       |
+| 8K pad | Super | summary | inode | ifile | root  | l + f |
+|        | block |         | block |       | dir   | dir   |
+|________|_______|_________|_______|_______|_______|_______|
+	  ^
+           Segment starts here.
+
+Some differences from the Sprite LFS implementation.
+
+1. The LFS implementation placed the ifile metadata and the super block
+   at fixed locations.  This implementation replicates the super block
+   and puts each at a fixed location.  The checkpoint data is divided into
+   two parts -- just enough information to find the IFILE is stored in
+   two of the super blocks, although it is not toggled between them as in
+   the Sprite implementation.  (This was deliberate, to avoid a single
+   point of failure.)  The remaining checkpoint information is treated as
+   a regular file, which means that the cleaner info, the segment usage
+   table and the ifile meta-data are stored in normal log segments.
+   (Tastes great, less filling...)
+
+2. The segment layout is radically different in Sprite; this implementation
+   uses something a lot like network framing, where data/inode blocks are
+   written asynchronously, and a checksum is used to validate any set of
+   summary and data/inode blocks.  Sprite writes summary blocks synchronously
+   after the data/inode blocks have been written and the existence of the
+   summary block validates the data/inode blocks.  This permits us to write
+   everything contiguously, even partial segments and their summaries, whereas
+   Sprite is forced to seek (from the end of the data inode to the summary
+   which lives at the end of the segment).  Additionally, writing the summary
+   synchronously should cost about 1/2 a rotation per summary.
+
+3. Sprite LFS distinguishes between different types of blocks in the segment.
+   Other than inode blocks and data blocks, we don't.
+
+4. Sprite LFS traverses the IFILE looking for free blocks.  We maintain a
+   free list threaded through the IFILE entries.
+
+5. The cleaner runs in user space, as opposed to kernel space.  It shares
+   information with the kernel by reading/writing the IFILE and through
+   cleaner specific system calls.
+
diff --git a/sys/ufs/lfs/TODO b/sys/ufs/lfs/TODO
new file mode 100644
index 00000000000..ace8f5eaef6
--- /dev/null
+++ b/sys/ufs/lfs/TODO
@@ -0,0 +1,116 @@
+#	@(#)TODO	8.1 (Berkeley) 6/11/93
+
+NOTE: Changed the lookup on a page of inodes to search from the back
+in case the same inode gets written twice on the same page.
+
+Make sure that if you are writing a file, but not all the blocks
+make it into a single segment, that you do not write the inode in
+that segment.
+
+Keith:
+	Why not delete the lfs_bmapv call, just mark everything dirty
+		that isn't deleted/truncated?  Get some numbers about
+		what percentage of the stuff that the cleaner thinks
+		might be live is live.  If it's high, get rid of lfs_bmapv.
+
+	There is a nasty problem in that it may take *more* room to write
+	the data to clean a segment than is returned by the new segment
+	because of indirect blocks in segment 2 being dirtied by the data
+	being copied into the log from segment 1.  The suggested solution
+	at this point is to detect it when we have no space left on the
+	filesystem, write the extra data into the last segment (leaving
+	no clean ones), make it a checkpoint and shut down the file system
+	for fixing by a utility reading the raw partition.  Argument is
+	that this should never happen and is practically impossible to fix
+	since the cleaner would have to theoretically build a model of the
+	entire filesystem in memory to detect the condition occurring.
+	A file coalescing cleaner will help avoid the problem, and one
+	that reads/writes from the raw disk could fix it.
+
+DONE	Currently, inodes are being flushed to disk synchronously upon
+		creation -- see ufs_makeinode.  However, only the inode
+		is flushed, the directory "name" is written using VOP_BWRITE,
+		so it's not synchronous.  Possible solutions: 1: get some
+		ordering in the writes so that inode/directory entries get
+		stuffed into the same segment.  2: do both synchronously
+		3: add Mendel's information into the stream so we log
+		creation/deletion of inodes.  4: do some form of partial
+		segment when changing the inode (creation/deletion/rename).
+DONE	Fix i_block increment for indirect blocks.
+	If the file system is tar'd, extracted on top of another LFS, the
+		IFILE ain't worth diddly.  Is the cleaner writing the IFILE?
+		If not, let's make it read-only.
+DONE	Delete unnecessary source from utils in main-line source tree.
+DONE	Make sure that we're counting meta blocks in the inode i_block count.
+	Overlap the version and nextfree fields in the IFILE
+DONE	Vinvalbuf (Kirk):
+		Why writing blocks that are no longer useful?
+		Are the semantics of close such that blocks have to be flushed?
+		How specify in the buf chain the blocks that don't need
+		to be written?  (Different numbering of indirect blocks.)
+
+Margo:
+	Change so that only search one sector of inode block file for the
+		inode by using sector addresses in the ifile instead of
+		logical disk addresses.
+	Fix the use of the ifile version field to use the generation
+		number instead.
+DONE	Unmount; not doing a bgetvp (VHOLD) in lfs_newbuf call.
+DONE	Document in the README file where the checkpoint information is
+		on disk.
+	Variable block sizes (Margo/Keith).
+	Switch the byte accounting to sector accounting.
+DONE	Check lfs.h and make sure that the #defines/structures are all
+		actually needed.
+DONE	Add a check in lfs_segment.c so that if the segment is empty,
+		we don't write it.
+	Need to keep vnode v_numoutput up to date for pending writes?
+DONE	USENIX paper (Carl/Margo).
+
+
+Evelyn:
+	lfsck:	If delete a file that's being executed, the version number
+		isn't updated, and lfsck has to figure this out; case is			the same as if have an inode that no directory references,
+		so the file should be reattached into lost+found.
+	Recovery/fsck.
+
+Carl:
+	Investigate: clustering of reads (if blocks in the segment are ordered,
+		should read them all) and writes (McVoy paper).
+	Investigate: should the access time be part of the IFILE:
+		pro: theoretically, saves disk writes
+		con: cacheing inodes should obviate this advantage
+		     the IFILE is already humongous
+	Cleaner.
+	Port to OSF/1 (Carl/Keith).
+	Currently there's no notion of write error checking.
+		+ Failed data/inode writes should be rescheduled (kernel level
+		  bad blocking).
+		+ Failed superblock writes should cause selection of new
+		  superblock for checkpointing.
+
+FUTURE FANTASIES: ============
+
++ unrm, versioning
++ transactions
++ extended cleaner policies (hot/cold data, data placement)
+
+==============================
+Problem with the concept of multiple buffer headers referencing the segment:
+Positives:
+	Don't lock down 1 segment per file system of physical memory.
+	Don't copy from buffers to segment memory.
+	Don't tie down the bus to transfer 1M.
+	Works on controllers supporting less than large transfers.
+	Disk can start writing immediately instead of waiting 1/2 rotation
+	    and the full transfer.
+Negatives:
+	Have to do segment write then segment summary write, since the latter
+	is what verifies that the segment is okay.  (Is there another way
+	to do this?)
+==============================
+
+The algorithm for selecting the disk addresses of the super-blocks
+has to be available to the user program which checks the file system.
+
+(Currently in newfs, becomes a common subroutine.)
diff --git a/sys/ufs/lfs/lfs.h b/sys/ufs/lfs/lfs.h
new file mode 100644
index 00000000000..87b8c22ccc0
--- /dev/null
+++ b/sys/ufs/lfs/lfs.h
@@ -0,0 +1,353 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)lfs.h	8.3 (Berkeley) 9/23/93
+ */
+
+#define	LFS_LABELPAD	8192		/* LFS label size */
+#define	LFS_SBPAD	8192		/* LFS superblock size */
+
+/*
+ * XXX
+ * This is a kluge and NEEDS to go away.
+ *
+ * Right now, ufs code handles most of the calls for directory operations
+ * such as create, mkdir, link, etc.  As a result VOP_UPDATE is being
+ * called with waitfor set (since ffs does these things synchronously).
+ * Since LFS does not want to do these synchronously, we treat the last
+ * argument to lfs_update as a set of flags.  If LFS_SYNC is set, then
+ * the update should be synchronous, if not, do it asynchronously.
+ * Unfortunately, this means that LFS won't work with NFS yet because
+ * NFS goes through paths that will make normal calls to ufs which will
+ * call lfs with a last argument of 1.
+ */
+#define	LFS_SYNC	0x02
+
+/* On-disk and in-memory checkpoint segment usage structure. */
+typedef struct segusage SEGUSE;
+struct segusage {
+	u_long	su_nbytes;		/* number of live bytes */
+	u_long	su_lastmod;		/* SEGUSE last modified timestamp */
+	u_short	su_nsums;		/* number of summaries in segment */
+	u_short	su_ninos;		/* number of inode blocks in seg */
+#define	SEGUSE_ACTIVE		0x1	/* segment is currently being written */
+#define	SEGUSE_DIRTY		0x2	/* segment has data in it */
+#define	SEGUSE_SUPERBLOCK	0x4	/* segment contains a superblock */
+	u_long	su_flags;
+};
+
+#define	SEGUPB(fs)	(1 << (fs)->lfs_sushift)
+#define	SEGTABSIZE_SU(fs)						\
+	(((fs)->lfs_nseg + SEGUPB(fs) - 1) >> (fs)->lfs_sushift)
+
+/* On-disk file information.  One per file with data blocks in the segment. */
+typedef struct finfo FINFO;
+struct finfo {
+	u_long	fi_nblocks;		/* number of blocks */
+	u_long	fi_version;		/* version number */
+	u_long	fi_ino;			/* inode number */
+	long	fi_blocks[1];		/* array of logical block numbers */
+};
+
+/* On-disk and in-memory super block. */
+struct lfs {
+#define	LFS_MAGIC	0x070162
+	u_long	lfs_magic;		/* magic number */
+#define	LFS_VERSION	1
+	u_long	lfs_version;		/* version number */
+
+	u_long	lfs_size;		/* number of blocks in fs */
+	u_long	lfs_ssize;		/* number of blocks per segment */
+	u_long	lfs_dsize;		/* number of disk blocks in fs */
+	u_long	lfs_bsize;		/* file system block size */
+	u_long	lfs_fsize;		/* size of frag blocks in fs */
+	u_long	lfs_frag;		/* number of frags in a block in fs */
+
+/* Checkpoint region. */
+	ino_t	lfs_free;		/* start of the free list */
+	u_long	lfs_bfree;		/* number of free disk blocks */
+	u_long	lfs_nfiles;		/* number of allocated inodes */
+	long	lfs_avail;		/* blocks available for writing */
+	u_long  lfs_uinodes;		/* inodes in cache not yet on disk */
+	daddr_t	lfs_idaddr;		/* inode file disk address */
+	ino_t	lfs_ifile;		/* inode file inode number */
+	daddr_t	lfs_lastseg;		/* address of last segment written */
+	daddr_t	lfs_nextseg;		/* address of next segment to write */
+	daddr_t	lfs_curseg;		/* current segment being written */
+	daddr_t	lfs_offset;		/* offset in curseg for next partial */
+	daddr_t	lfs_lastpseg;		/* address of last partial written */
+	u_long	lfs_tstamp;		/* time stamp */
+
+/* These are configuration parameters. */
+	u_long	lfs_minfree;		/* minimum percentage of free blocks */
+
+/* These fields can be computed from the others. */
+	u_quad_t lfs_maxfilesize;	/* maximum representable file size */
+	u_long	lfs_dbpseg;		/* disk blocks per segment */
+	u_long	lfs_inopb;		/* inodes per block */
+	u_long	lfs_ifpb;		/* IFILE entries per block */
+	u_long	lfs_sepb;		/* SEGUSE entries per block */
+	u_long	lfs_nindir;		/* indirect pointers per block */
+	u_long	lfs_nseg;		/* number of segments */
+	u_long	lfs_nspf;		/* number of sectors per fragment */
+	u_long	lfs_cleansz;		/* cleaner info size in blocks */
+	u_long	lfs_segtabsz;		/* segment table size in blocks */
+
+	u_long	lfs_segmask;		/* calculate offset within a segment */
+	u_long	lfs_segshift;		/* fast mult/div for segments */
+	u_long	lfs_bmask;		/* calc block offset from file offset */
+	u_long	lfs_bshift;		/* calc block number from file offset */
+	u_long	lfs_ffmask;		/* calc frag offset from file offset */
+	u_long	lfs_ffshift;		/* fast mult/div for frag from file */
+	u_long	lfs_fbmask;		/* calc frag offset from block offset */
+	u_long	lfs_fbshift;		/* fast mult/div for frag from block */
+	u_long	lfs_fsbtodb;		/* fsbtodb and dbtofsb shift constant */
+	u_long	lfs_sushift;		/* fast mult/div for segusage table */
+
+#define	LFS_MIN_SBINTERVAL	5	/* minimum superblock segment spacing */
+#define	LFS_MAXNUMSB		10	/* superblock disk offsets */
+	daddr_t	lfs_sboffs[LFS_MAXNUMSB];
+
+/* These fields are set at mount time and are meaningless on disk. */
+	struct	segment *lfs_sp;	/* current segment being written */
+	struct	vnode *lfs_ivnode;	/* vnode for the ifile */
+	u_long	lfs_seglock;		/* single-thread the segment writer */
+	pid_t	lfs_lockpid;		/* pid of lock holder */
+	u_long	lfs_iocount;		/* number of ios pending */
+	u_long	lfs_writer;		/* don't allow any dirops to start */
+	u_long	lfs_dirops;		/* count of active directory ops */
+	u_long	lfs_doifile;		/* Write ifile blocks on next write */
+	u_long	lfs_nactive;		/* Number of segments since last ckp */
+	u_char	lfs_fmod;		/* super block modified flag */
+	u_char	lfs_clean;		/* file system is clean flag */
+	u_char	lfs_ronly;		/* mounted read-only flag */
+	u_char	lfs_flags;		/* currently unused flag */
+	u_char	lfs_fsmnt[MNAMELEN];	/* name mounted on */
+	u_char	pad[3];			/* long-align */
+
+/* Checksum; valid on disk. */
+	u_long	lfs_cksum;		/* checksum for superblock checking */
+};
+
+/*
+ * Inode 0 is the out-of-band inode number, inode 1 is the inode number for
+ * the IFILE, the root inode is 2 and the lost+found inode is 3.
+ */
+
+/* Fixed inode numbers. */
+#define	LFS_UNUSED_INUM	0		/* out of band inode number */
+#define	LFS_IFILE_INUM	1		/* IFILE inode number */
+#define	LOSTFOUNDINO	3		/* lost+found inode number */
+#define	LFS_FIRST_INUM	4		/* first free inode number */
+
+/* Address calculations for metadata located in the inode */
+#define	S_INDIR(fs)	-NDADDR
+#define	D_INDIR(fs)	(S_INDIR(fs) - NINDIR(fs) - 1)
+#define	T_INDIR(fs)	(D_INDIR(fs) - NINDIR(fs) * NINDIR(fs) - 1)
+
+/* Unassigned disk address. */
+#define	UNASSIGNED	-1
+
+/* Unused logical block number */
+#define LFS_UNUSED_LBN	-1
+
+typedef struct ifile IFILE;
+struct ifile {
+	u_long	if_version;		/* inode version number */
+#define	LFS_UNUSED_DADDR	0	/* out-of-band daddr */
+	daddr_t	if_daddr;		/* inode disk address */
+	ino_t	if_nextfree;		/* next-unallocated inode */
+};
+
+/*
+ * Cleaner information structure.  This resides in the ifile and is used
+ * to pass information between the cleaner and the kernel.
+ */
+typedef struct _cleanerinfo {
+	u_long	clean;			/* K: number of clean segments */
+	u_long	dirty;			/* K: number of dirty segments */
+} CLEANERINFO;
+
+#define	CLEANSIZE_SU(fs)						\
+	((sizeof(CLEANERINFO) + (fs)->lfs_bsize - 1) >> (fs)->lfs_bshift)
+
+/*
+ * All summary blocks are the same size, so we can always read a summary
+ * block easily from a segment.
+ */
+#define	LFS_SUMMARY_SIZE	512
+
+/* On-disk segment summary information */
+typedef struct segsum SEGSUM;
+struct segsum {
+	u_long	ss_sumsum;		/* check sum of summary block */
+	u_long	ss_datasum;		/* check sum of data */
+	daddr_t	ss_next;		/* next segment */
+	u_long	ss_create;		/* creation time stamp */
+	u_short	ss_nfinfo;		/* number of file info structures */
+	u_short	ss_ninos;		/* number of inodes in summary */
+#define	SS_DIROP	0x01		/* segment begins a dirop */
+#define	SS_CONT		0x02		/* more partials to finish this write*/
+	u_short	ss_flags;		/* used for directory operations */
+	u_short	ss_pad;			/* extra space */
+	/* FINFO's and inode daddr's... */
+};
+
+/* NINDIR is the number of indirects in a file system block. */
+#define	NINDIR(fs)	((fs)->lfs_nindir)
+
+/* INOPB is the number of inodes in a secondary storage block. */
+#define	INOPB(fs)	((fs)->lfs_inopb)
+
+#define	blksize(fs)		((fs)->lfs_bsize)
+#define	blkoff(fs, loc)		((loc) & (fs)->lfs_bmask)
+#define	fsbtodb(fs, b)		((b) << (fs)->lfs_fsbtodb)
+#define	dbtofsb(fs, b)		((b) >> (fs)->lfs_fsbtodb)
+#define	lblkno(fs, loc)		((loc) >> (fs)->lfs_bshift)
+#define	lblktosize(fs, blk)	((blk) << (fs)->lfs_bshift)
+#define numfrags(fs, loc)	/* calculates (loc / fs->fs_fsize) */	\
+	((loc) >> (fs)->lfs_bshift)
+
+#define	datosn(fs, daddr)	/* disk address to segment number */	\
+	(((daddr) - (fs)->lfs_sboffs[0]) / fsbtodb((fs), (fs)->lfs_ssize))
+#define sntoda(fs, sn) 		/* segment number to disk address */	\
+	((daddr_t)((sn) * ((fs)->lfs_ssize << (fs)->lfs_fsbtodb) +	\
+	    (fs)->lfs_sboffs[0]))
+
+/* Read in the block with the cleaner info from the ifile. */
+#define LFS_CLEANERINFO(CP, F, BP) {					\
+	VTOI((F)->lfs_ivnode)->i_flag |= IN_ACCESS;			\
+	if (bread((F)->lfs_ivnode,					\
+	    (daddr_t)0, (F)->lfs_bsize, NOCRED, &(BP)))			\
+		panic("lfs: ifile read");				\
+	(CP) = (CLEANERINFO *)(BP)->b_data;				\
+}
+
+/* Read in the block with a specific inode from the ifile. */
+#define	LFS_IENTRY(IP, F, IN, BP) {					\
+	int _e;								\
+	VTOI((F)->lfs_ivnode)->i_flag |= IN_ACCESS;			\
+	if (_e = bread((F)->lfs_ivnode,					\
+	    (IN) / (F)->lfs_ifpb + (F)->lfs_cleansz + (F)->lfs_segtabsz,\
+	    (F)->lfs_bsize, NOCRED, &(BP)))				\
+		panic("lfs: ifile read %d", _e);			\
+	(IP) = (IFILE *)(BP)->b_data + (IN) % (F)->lfs_ifpb;		\
+}
+
+/* Read in the block with a specific segment usage entry from the ifile. */
+#define	LFS_SEGENTRY(SP, F, IN, BP) {					\
+	int _e;								\
+	VTOI((F)->lfs_ivnode)->i_flag |= IN_ACCESS;			\
+	if (_e = bread((F)->lfs_ivnode,					\
+	    ((IN) >> (F)->lfs_sushift) + (F)->lfs_cleansz,		\
+	    (F)->lfs_bsize, NOCRED, &(BP)))				\
+		panic("lfs: ifile read: %d", _e);			\
+	(SP) = (SEGUSE *)(BP)->b_data + ((IN) & (F)->lfs_sepb - 1);	\
+}
+
+/* 
+ * Determine if there is enough room currently available to write db
+ * disk blocks.  We need enough blocks for the new blocks, the current,
+ * inode blocks, a summary block, plus potentially the ifile inode and
+ * the segment usage table, plus an ifile page.
+ */
+#define LFS_FITS(fs, db)						\
+	((long)((db + ((fs)->lfs_uinodes + INOPB((fs))) / INOPB((fs)) +	\
+	fsbtodb(fs, 1) + LFS_SUMMARY_SIZE / DEV_BSIZE +			\
+	(fs)->lfs_segtabsz)) < (fs)->lfs_avail)
+
+/* Determine if a buffer belongs to the ifile */
+#define IS_IFILE(bp)	(VTOI(bp->b_vp)->i_number == LFS_IFILE_INUM)
+
+/*
+ * Structures used by lfs_bmapv and lfs_markv to communicate information
+ * about inodes and data blocks.
+ */
+typedef struct block_info {
+	ino_t	bi_inode;		/* inode # */
+	daddr_t	bi_lbn;			/* logical block w/in file */
+	daddr_t	bi_daddr;		/* disk address of block */
+	time_t	bi_segcreate;		/* origin segment create time */
+	int	bi_version;		/* file version number */
+	void	*bi_bp;			/* data buffer */
+} BLOCK_INFO;
+
+/* In-memory description of a segment about to be written. */
+struct segment {
+	struct lfs	*fs;		/* file system pointer */
+	struct buf	**bpp;		/* pointer to buffer array */
+	struct buf	**cbpp;		/* pointer to next available bp */
+	struct buf	**start_bpp;	/* pointer to first bp in this set */
+	struct buf	*ibp;		/* buffer pointer to inode page */
+	struct finfo	*fip;		/* current fileinfo pointer */
+	struct vnode	*vp;		/* vnode being gathered */
+	void	*segsum;		/* segment summary info */
+	u_long	ninodes;		/* number of inodes in this segment */
+	u_long	seg_bytes_left;		/* bytes left in segment */
+	u_long	sum_bytes_left;		/* bytes left in summary block */
+	u_long	seg_number;		/* number of this segment */
+	daddr_t *start_lbp;		/* beginning lbn for this set */
+#define	SEGM_CKP	0x01		/* doing a checkpoint */
+#define	SEGM_CLEAN	0x02		/* cleaner call; don't sort */
+#define	SEGM_SYNC	0x04		/* wait for segment */
+	u_long	seg_flags;		/* run-time flags for this segment */
+};
+
+#define ISSPACE(F, BB, C)						\
+	(((C)->cr_uid == 0 && (F)->lfs_bfree >= (BB)) ||		\
+	((C)->cr_uid != 0 && IS_FREESPACE(F, BB)))
+
+#define IS_FREESPACE(F, BB)						\
+	((F)->lfs_bfree > ((F)->lfs_dsize * (F)->lfs_minfree / 100 + (BB)))
+
+#define ISSPACE_XXX(F, BB)						\
+	((F)->lfs_bfree >= (BB))
+
+#define DOSTATS
+#ifdef DOSTATS
+/* Statistics Counters */
+struct lfs_stats {
+	int	segsused;
+	int	psegwrites;
+	int	psyncwrites;
+	int	pcleanwrites;
+	int	blocktot;
+	int	cleanblocks;
+	int	ncheckpoints;
+	int	nwrites;
+	int	nsync_writes;
+	int	wait_exceeded;
+	int	write_exceeded;
+	int	flush_invoked;
+};
+extern struct lfs_stats lfs_stats;
+#endif
diff --git a/sys/ufs/lfs/lfs_alloc.c b/sys/ufs/lfs/lfs_alloc.c
new file mode 100644
index 00000000000..3f06c813930
--- /dev/null
+++ b/sys/ufs/lfs/lfs_alloc.c
@@ -0,0 +1,251 @@
+/*
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)lfs_alloc.c	8.4 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/syslog.h>
+#include <sys/mount.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+
+#include <ufs/lfs/lfs.h>
+#include <ufs/lfs/lfs_extern.h>
+
+extern u_long nextgennumber;
+
+/* Allocate a new inode. */
+/* ARGSUSED */
+int
+lfs_valloc(ap)
+	struct vop_valloc_args /* {
+		struct vnode *a_pvp;
+		int a_mode;
+		struct ucred *a_cred;
+		struct vnode **a_vpp;
+	} */ *ap;
+{
+	struct lfs *fs;
+	struct buf *bp;
+	struct ifile *ifp;
+	struct inode *ip;
+	struct vnode *vp;
+	daddr_t blkno;
+	ino_t new_ino;
+	u_long i, max;
+	int error;
+
+	/* Get the head of the freelist. */
+	fs = VTOI(ap->a_pvp)->i_lfs;
+	new_ino = fs->lfs_free;
+#ifdef ALLOCPRINT
+	printf("lfs_ialloc: allocate inode %d\n", new_ino);
+#endif
+
+	/*
+	 * Remove the inode from the free list and write the new start
+	 * of the free list into the superblock.
+	 */
+	LFS_IENTRY(ifp, fs, new_ino, bp);
+	if (ifp->if_daddr != LFS_UNUSED_DADDR)
+		panic("lfs_ialloc: inuse inode on the free list");
+	fs->lfs_free = ifp->if_nextfree;
+	brelse(bp);
+
+	/* Extend IFILE so that the next lfs_valloc will succeed. */
+	if (fs->lfs_free == LFS_UNUSED_INUM) {
+		vp = fs->lfs_ivnode;
+		ip = VTOI(vp);
+		blkno = lblkno(fs, ip->i_size);
+		lfs_balloc(vp, fs->lfs_bsize, blkno, &bp);
+		ip->i_size += fs->lfs_bsize;
+		vnode_pager_setsize(vp, (u_long)ip->i_size);
+		vnode_pager_uncache(vp);
+
+		i = (blkno - fs->lfs_segtabsz - fs->lfs_cleansz) *
+		    fs->lfs_ifpb;
+		fs->lfs_free = i;
+		max = i + fs->lfs_ifpb;
+		for (ifp = (struct ifile *)bp->b_data; i < max; ++ifp) {
+			ifp->if_version = 1;
+			ifp->if_daddr = LFS_UNUSED_DADDR;
+			ifp->if_nextfree = ++i;
+		}
+		ifp--;
+		ifp->if_nextfree = LFS_UNUSED_INUM;
+		if (error = VOP_BWRITE(bp))
+			return (error);
+	}
+
+	/* Create a vnode to associate with the inode. */
+	if (error = lfs_vcreate(ap->a_pvp->v_mount, new_ino, &vp))
+		return (error);
+
+
+	ip = VTOI(vp);
+	/* Zero out the direct and indirect block addresses. */
+	bzero(&ip->i_din, sizeof(struct dinode));
+	ip->i_din.di_inumber = new_ino;
+
+	/* Set a new generation number for this inode. */
+	if (++nextgennumber < (u_long)time.tv_sec)
+		nextgennumber = time.tv_sec;
+	ip->i_gen = nextgennumber;
+
+	/* Insert into the inode hash table. */
+	ufs_ihashins(ip);
+
+	if (error = ufs_vinit(vp->v_mount, lfs_specop_p, LFS_FIFOOPS, &vp)) {
+		vput(vp);
+		*ap->a_vpp = NULL;
+		return (error);
+	}
+
+	*ap->a_vpp = vp;
+	vp->v_flag |= VDIROP;
+	VREF(ip->i_devvp);
+
+	/* Set superblock modified bit and increment file count. */
+	fs->lfs_fmod = 1;
+	++fs->lfs_nfiles;
+	return (0);
+}
+
+/* Create a new vnode/inode pair and initialize what fields we can. */
+int
+lfs_vcreate(mp, ino, vpp)
+	struct mount *mp;
+	ino_t ino;
+	struct vnode **vpp;
+{
+	extern int (**lfs_vnodeop_p)();
+	struct inode *ip;
+	struct ufsmount *ump;
+	int error, i;
+
+	/* Create the vnode. */
+	if (error = getnewvnode(VT_LFS, mp, lfs_vnodeop_p, vpp)) {
+		*vpp = NULL;
+		return (error);
+	}
+
+	/* Get a pointer to the private mount structure. */
+	ump = VFSTOUFS(mp);
+
+	/* Initialize the inode. */
+	MALLOC(ip, struct inode *, sizeof(struct inode), M_LFSNODE, M_WAITOK);
+	(*vpp)->v_data = ip;
+	ip->i_vnode = *vpp;
+	ip->i_devvp = ump->um_devvp;
+	ip->i_flag = IN_MODIFIED;
+	ip->i_dev = ump->um_dev;
+	ip->i_number = ip->i_din.di_inumber = ino;
+ip->i_din.di_spare[0] = 0xdeadbeef;
+ip->i_din.di_spare[1] = 0xdeadbeef;
+	ip->i_lfs = ump->um_lfs;
+#ifdef QUOTA
+	for (i = 0; i < MAXQUOTAS; i++)
+		ip->i_dquot[i] = NODQUOT;
+#endif
+	ip->i_lockf = 0;
+	ip->i_diroff = 0;
+	ip->i_mode = 0;
+	ip->i_size = 0;
+	ip->i_blocks = 0;
+	++ump->um_lfs->lfs_uinodes;
+	return (0);
+}
+
+/* Free an inode. */
+/* ARGUSED */
+int
+lfs_vfree(ap)
+	struct vop_vfree_args /* {
+		struct vnode *a_pvp;
+		ino_t a_ino;
+		int a_mode;
+	} */ *ap;
+{
+	SEGUSE *sup;
+	struct buf *bp;
+	struct ifile *ifp;
+	struct inode *ip;
+	struct lfs *fs;
+	daddr_t old_iaddr;
+	ino_t ino;
+
+	/* Get the inode number and file system. */
+	ip = VTOI(ap->a_pvp);
+	fs = ip->i_lfs;
+	ino = ip->i_number;
+	if (ip->i_flag & IN_MODIFIED) {
+		--fs->lfs_uinodes;
+		ip->i_flag &=
+		    ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE);
+	}
+	/*
+	 * Set the ifile's inode entry to unused, increment its version number
+	 * and link it into the free chain.
+	 */
+	LFS_IENTRY(ifp, fs, ino, bp);
+	old_iaddr = ifp->if_daddr;
+	ifp->if_daddr = LFS_UNUSED_DADDR;
+	++ifp->if_version;
+	ifp->if_nextfree = fs->lfs_free;
+	fs->lfs_free = ino;
+	(void) VOP_BWRITE(bp);
+
+	if (old_iaddr != LFS_UNUSED_DADDR) {
+		LFS_SEGENTRY(sup, fs, datosn(fs, old_iaddr), bp);
+#ifdef DIAGNOSTIC
+		if (sup->su_nbytes < sizeof(struct dinode))
+			panic("lfs_vfree: negative byte count (segment %d)\n",
+			    datosn(fs, old_iaddr));
+#endif
+		sup->su_nbytes -= sizeof(struct dinode);
+		(void) VOP_BWRITE(bp);
+	}
+
+	/* Set superblock modified bit and decrement file count. */
+	fs->lfs_fmod = 1;
+	--fs->lfs_nfiles;
+	return (0);
+}
diff --git a/sys/ufs/lfs/lfs_balloc.c b/sys/ufs/lfs/lfs_balloc.c
new file mode 100644
index 00000000000..b56bc9ec51b
--- /dev/null
+++ b/sys/ufs/lfs/lfs_balloc.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)lfs_balloc.c	8.1 (Berkeley) 6/11/93
+ */
+#include <sys/param.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/resourcevar.h>
+#include <sys/trace.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+
+#include <ufs/lfs/lfs.h>
+#include <ufs/lfs/lfs_extern.h>
+
+int
+lfs_balloc(vp, iosize, lbn, bpp)
+	struct vnode *vp;
+	u_long iosize;
+	daddr_t lbn;
+	struct buf **bpp;
+{
+	struct buf *ibp, *bp;
+	struct inode *ip;
+	struct lfs *fs;
+	struct indir indirs[NIADDR+2];
+	daddr_t daddr;
+	int bb, error, i, num;
+
+	ip = VTOI(vp);
+	fs = ip->i_lfs;
+
+	/* 
+	 * Three cases: it's a block beyond the end of file, it's a block in
+	 * the file that may or may not have been assigned a disk address or
+	 * we're writing an entire block.  Note, if the daddr is unassigned,
+	 * the block might still have existed in the cache (if it was read
+	 * or written earlier).  If it did, make sure we don't count it as a
+	 * new block or zero out its contents.  If it did not, make sure
+	 * we allocate any necessary indirect blocks.
+	 */
+
+	*bpp = NULL;
+	if (error = ufs_bmaparray(vp, lbn, &daddr, &indirs[0], &num, NULL ))
+		return (error);
+
+	*bpp = bp = getblk(vp, lbn, fs->lfs_bsize, 0, 0);
+	bb = VFSTOUFS(vp->v_mount)->um_seqinc;
+	if (daddr == UNASSIGNED)
+		/* May need to allocate indirect blocks */
+		for (i = 1; i < num; ++i)
+			if (!indirs[i].in_exists) {
+				ibp =
+				    getblk(vp, indirs[i].in_lbn, fs->lfs_bsize,
+					0, 0);
+				if (!(ibp->b_flags & (B_DONE | B_DELWRI))) {
+					if (!ISSPACE(fs, bb, curproc->p_ucred)){
+						ibp->b_flags |= B_INVAL;
+						brelse(ibp);
+						error = ENOSPC;
+					} else {
+						ip->i_blocks += bb;
+						ip->i_lfs->lfs_bfree -= bb;
+						clrbuf(ibp);
+						error = VOP_BWRITE(ibp);
+					}
+				} else
+					panic ("Indirect block should not exist");
+			}
+	if (error) {
+		if (bp)
+			brelse(bp);
+		return(error);
+	}
+
+
+	/* Now, we may need to allocate the data block */
+	if (!(bp->b_flags & (B_CACHE | B_DONE | B_DELWRI))) {
+		if (daddr == UNASSIGNED) 
+			if (!ISSPACE(fs, bb, curproc->p_ucred)) {
+				bp->b_flags |= B_INVAL;
+				brelse(bp);
+				return(ENOSPC);
+			} else {
+				ip->i_blocks += bb;
+				ip->i_lfs->lfs_bfree -= bb;
+				if (iosize != fs->lfs_bsize)
+					clrbuf(bp);
+			}
+		else if (iosize == fs->lfs_bsize)
+			bp->b_blkno = daddr;		/* Skip the I/O */
+		else  {
+			bp->b_blkno = daddr;
+			bp->b_flags |= B_READ;
+			VOP_STRATEGY(bp);
+			return(biowait(bp));
+		}
+	}
+	return (error);
+}
diff --git a/sys/ufs/lfs/lfs_bio.c b/sys/ufs/lfs/lfs_bio.c
new file mode 100644
index 00000000000..0f021f17208
--- /dev/null
+++ b/sys/ufs/lfs/lfs_bio.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)lfs_bio.c	8.4 (Berkeley) 12/30/93
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/resourcevar.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+
+#include <ufs/lfs/lfs.h>
+#include <ufs/lfs/lfs_extern.h>
+
+/*
+ * LFS block write function.
+ *
+ * XXX
+ * No write cost accounting is done.
+ * This is almost certainly wrong for synchronous operations and NFS.
+ */
+int	lfs_allclean_wakeup;		/* Cleaner wakeup address. */
+int	locked_queue_count;		/* XXX Count of locked-down buffers. */
+int	lfs_writing;			/* Set if already kicked off a writer
+					   because of buffer space */
+/*
+#define WRITE_THRESHHOLD	((nbuf >> 2) - 10)
+#define WAIT_THRESHHOLD		((nbuf >> 1) - 10)
+*/
+#define WAIT_THRESHHOLD         (nbuf - (nbuf >> 2) - 10)
+#define WRITE_THRESHHOLD        ((nbuf >> 1) - 10)
+#define LFS_BUFWAIT	2
+
+int
+lfs_bwrite(ap)
+	struct vop_bwrite_args /* {
+		struct buf *a_bp;
+	} */ *ap;
+{
+	register struct buf *bp = ap->a_bp;
+	struct lfs *fs;
+	struct inode *ip;
+	int error, s;
+
+	/*
+	 * Set the delayed write flag and use reassignbuf to move the buffer
+	 * from the clean list to the dirty one.
+	 *
+	 * Set the B_LOCKED flag and unlock the buffer, causing brelse to move
+	 * the buffer onto the LOCKED free list.  This is necessary, otherwise
+	 * getnewbuf() would try to reclaim the buffers using bawrite, which
+	 * isn't going to work.
+	 *
+	 * XXX we don't let meta-data writes run out of space because they can
+	 * come from the segment writer.  We need to make sure that there is
+	 * enough space reserved so that there's room to write meta-data
+	 * blocks.
+	 */
+	if (!(bp->b_flags & B_LOCKED)) {
+		fs = VFSTOUFS(bp->b_vp->v_mount)->um_lfs;
+		while (!LFS_FITS(fs, fsbtodb(fs, 1)) && !IS_IFILE(bp) &&
+		    bp->b_lblkno > 0) {
+			/* Out of space, need cleaner to run */
+			wakeup(&lfs_allclean_wakeup);
+			if (error = tsleep(&fs->lfs_avail, PCATCH | PUSER,
+			    "cleaner", NULL)) {
+				brelse(bp);
+				return (error);
+			}
+		}
+		ip = VTOI((bp)->b_vp);
+		if (!(ip->i_flag & IN_MODIFIED))
+			++fs->lfs_uinodes;
+		ip->i_flag |= IN_CHANGE | IN_MODIFIED | IN_UPDATE;
+		fs->lfs_avail -= fsbtodb(fs, 1);
+		++locked_queue_count;
+		bp->b_flags |= B_DELWRI | B_LOCKED;
+		bp->b_flags &= ~(B_READ | B_ERROR);
+		s = splbio();
+		reassignbuf(bp, bp->b_vp);
+		splx(s);
+	}
+	brelse(bp);
+	return (0);
+}
+
+/*
+ * XXX
+ * This routine flushes buffers out of the B_LOCKED queue when LFS has too
+ * many locked down.  Eventually the pageout daemon will simply call LFS
+ * when pages need to be reclaimed.  Note, we have one static count of locked
+ * buffers, so we can't have more than a single file system.  To make this
+ * work for multiple file systems, put the count into the mount structure.
+ */
+void
+lfs_flush()
+{
+	register struct mount *mp;
+
+#ifdef DOSTATS
+	++lfs_stats.write_exceeded;
+#endif
+	if (lfs_writing)
+		return;
+	lfs_writing = 1;
+	for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) {
+		/* The lock check below is to avoid races with unmount. */
+		if (mp->mnt_stat.f_type == MOUNT_LFS &&
+		    (mp->mnt_flag & (MNT_MLOCK|MNT_RDONLY|MNT_UNMOUNT)) == 0 &&
+		    !((((struct ufsmount *)mp->mnt_data))->ufsmount_u.lfs)->lfs_dirops ) {
+			/*
+			 * We set the queue to 0 here because we are about to
+			 * write all the dirty buffers we have.  If more come
+			 * in while we're writing the segment, they may not
+			 * get written, so we want the count to reflect these
+			 * new writes after the segwrite completes.
+			 */
+#ifdef DOSTATS
+			++lfs_stats.flush_invoked;
+#endif
+			lfs_segwrite(mp, 0);
+		}
+	}
+	lfs_writing = 0;
+}
+
+int
+lfs_check(vp, blkno)
+	struct vnode *vp;
+	daddr_t blkno;
+{
+	extern int lfs_allclean_wakeup;
+	int error;
+
+	error = 0;
+	if (incore(vp, blkno))
+		return (0);
+	if (locked_queue_count > WRITE_THRESHHOLD)
+		lfs_flush();
+
+	/* If out of buffers, wait on writer */
+	while (locked_queue_count > WAIT_THRESHHOLD) {
+#ifdef DOSTATS
+	    ++lfs_stats.wait_exceeded;
+#endif
+	    error = tsleep(&locked_queue_count, PCATCH | PUSER, "buffers",
+	        hz * LFS_BUFWAIT);
+	}
+
+	return (error);
+}
diff --git a/sys/ufs/lfs/lfs_cksum.c b/sys/ufs/lfs/lfs_cksum.c
new file mode 100644
index 00000000000..77b011aa2c4
--- /dev/null
+++ b/sys/ufs/lfs/lfs_cksum.c
@@ -0,0 +1,58 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)lfs_cksum.c	8.1 (Berkeley) 6/11/93
+ */
+
+#include <sys/types.h>
+
+/*
+ * Simple, general purpose, fast checksum.  Data must be short-aligned.
+ * Returns a u_long in case we ever want to do something more rigorous.
+ *
+ * XXX
+ * Use the TCP/IP checksum instead.
+ */
+u_long
+cksum(str, len)
+	register void *str;
+	register size_t len;
+{
+	register u_long sum;
+	
+	len &= ~(sizeof(u_short) - 1);
+	for (sum = 0; len; len -= sizeof(u_short)) {
+		sum ^= *(u_short *)str;
+		++(u_short *)str;
+	}
+	return (sum);
+}
diff --git a/sys/ufs/lfs/lfs_debug.c b/sys/ufs/lfs/lfs_debug.c
new file mode 100644
index 00000000000..cc28d609023
--- /dev/null
+++ b/sys/ufs/lfs/lfs_debug.c
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)lfs_debug.c	8.1 (Berkeley) 6/11/93
+ */
+
+#ifdef DEBUG
+#include <sys/param.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/lfs/lfs.h>
+#include <ufs/lfs/lfs_extern.h>
+
+void 
+lfs_dump_super(lfsp)
+	struct lfs *lfsp;
+{
+	int i;
+
+	(void)printf("%s%lx\t%s%lx\t%s%d\t%s%d\n",
+		"magic    ", lfsp->lfs_magic,
+		"version  ", lfsp->lfs_version,
+		"size     ", lfsp->lfs_size,
+		"ssize    ", lfsp->lfs_ssize);
+	(void)printf("%s%d\t%s%d\t%s%d\t%s%d\n",
+		"dsize    ", lfsp->lfs_dsize,
+		"bsize    ", lfsp->lfs_bsize,
+		"fsize    ", lfsp->lfs_fsize,
+		"frag     ", lfsp->lfs_frag);
+
+	(void)printf("%s%d\t%s%d\t%s%d\t%s%d\n",
+		"minfree  ", lfsp->lfs_minfree,
+		"inopb    ", lfsp->lfs_inopb,
+		"ifpb     ", lfsp->lfs_ifpb,
+		"nindir   ", lfsp->lfs_nindir);
+
+	(void)printf("%s%d\t%s%d\t%s%d\t%s%d\n",
+		"nseg     ", lfsp->lfs_nseg,
+		"nspf     ", lfsp->lfs_nspf,
+		"cleansz  ", lfsp->lfs_cleansz,
+		"segtabsz ", lfsp->lfs_segtabsz);
+
+	(void)printf("%s%lx\t%s%d\t%s%lx\t%s%d\n",
+		"segmask  ", lfsp->lfs_segmask,
+		"segshift ", lfsp->lfs_segshift,
+		"bmask    ", lfsp->lfs_bmask,
+		"bshift   ", lfsp->lfs_bshift);
+
+	(void)printf("%s%lx\t%s%d\t%s%lx\t%s%d\n",
+		"ffmask   ", lfsp->lfs_ffmask,
+		"ffshift  ", lfsp->lfs_ffshift,
+		"fbmask   ", lfsp->lfs_fbmask,
+		"fbshift  ", lfsp->lfs_fbshift);
+
+	(void)printf("%s%d\t%s%d\t%s%lx\t%s%qx\n", 
+		"sushift  ", lfsp->lfs_sushift,
+		"fsbtodb  ", lfsp->lfs_fsbtodb,
+		"cksum    ", lfsp->lfs_cksum,
+		"maxfilesize ", lfsp->lfs_maxfilesize);
+
+	(void)printf("Superblock disk addresses:");
+	for (i = 0; i < LFS_MAXNUMSB; i++)
+		(void)printf(" %lx", lfsp->lfs_sboffs[i]);
+	(void)printf("\n");
+
+	(void)printf("Checkpoint Info\n");
+	(void)printf("%s%d\t%s%lx\t%s%d\n",
+		"free     ", lfsp->lfs_free,
+		"idaddr   ", lfsp->lfs_idaddr,
+		"ifile    ", lfsp->lfs_ifile);
+	(void)printf("%s%lx\t%s%d\t%s%lx\t%s%lx\t%s%lx\t%s%lx\n",
+		"bfree    ", lfsp->lfs_bfree,
+		"nfiles   ", lfsp->lfs_nfiles,
+		"lastseg  ", lfsp->lfs_lastseg,
+		"nextseg  ", lfsp->lfs_nextseg,
+		"curseg   ", lfsp->lfs_curseg,
+		"offset   ", lfsp->lfs_offset);
+	(void)printf("tstamp   %lx\n", lfsp->lfs_tstamp);
+}
+
+void
+lfs_dump_dinode(dip)
+	struct dinode *dip;
+{
+	int i;
+
+	(void)printf("%s%u\t%s%d\t%s%u\t%s%u\t%s%lu\n",
+		"mode  ", dip->di_mode,
+		"nlink ", dip->di_nlink,
+		"uid   ", dip->di_uid,
+		"gid   ", dip->di_gid,
+		"size  ", dip->di_size);
+	(void)printf("inum  %ld\n", dip->di_inumber);
+	(void)printf("Direct Addresses\n");
+	for (i = 0; i < NDADDR; i++) {
+		(void)printf("\t%lx", dip->di_db[i]);
+		if ((i % 6) == 5)
+			(void)printf("\n");
+	}
+	for (i = 0; i < NIADDR; i++)
+		(void)printf("\t%lx", dip->di_ib[i]);
+	(void)printf("\n");
+}
+#endif /* DEBUG */
diff --git a/sys/ufs/lfs/lfs_extern.h b/sys/ufs/lfs/lfs_extern.h
new file mode 100644
index 00000000000..c1157ade02a
--- /dev/null
+++ b/sys/ufs/lfs/lfs_extern.h
@@ -0,0 +1,106 @@
+/*-
+ * Copyright (c) 1991, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)lfs_extern.h	8.2 (Berkeley) 4/16/94
+ */
+
+struct fid;
+struct mount;
+struct nameidata;
+struct proc;
+struct statfs;
+struct timeval;
+struct inode;
+struct uio;
+struct mbuf;
+
+__BEGIN_DECLS
+u_long	 cksum __P((void *, size_t));				/* XXX */
+int	 lfs_balloc __P((struct vnode *, u_long, daddr_t, struct buf **));
+int	 lfs_blkatoff __P((struct vop_blkatoff_args *));
+int	 lfs_bwrite __P((struct vop_bwrite_args *));
+int	 lfs_check __P((struct vnode *, daddr_t));
+int	 lfs_close __P((struct vop_close_args *));
+int	 lfs_create __P((struct vop_create_args *));
+int	 lfs_fhtovp __P((struct mount *, struct fid *, struct mbuf *,
+	    struct vnode **, int *, struct ucred **));
+int	 lfs_fsync __P((struct vop_fsync_args *));
+int	 lfs_getattr __P((struct vop_getattr_args *));
+struct dinode *
+	 lfs_ifind __P((struct lfs *, ino_t, struct dinode *));
+int	 lfs_inactive __P((struct vop_inactive_args *));
+int	 lfs_init __P((void));
+int	 lfs_initseg __P((struct lfs *));
+int	 lfs_link __P((struct vop_link_args *));
+int	 lfs_makeinode __P((int, struct nameidata *, struct inode **));
+int	 lfs_mkdir __P((struct vop_mkdir_args *));
+int	 lfs_mknod __P((struct vop_mknod_args *));
+int	 lfs_mount __P((struct mount *,
+	    char *, caddr_t, struct nameidata *, struct proc *));
+int	 lfs_mountroot __P((void));
+struct buf *
+	 lfs_newbuf __P((struct vnode *, daddr_t, size_t));
+int	 lfs_read __P((struct vop_read_args *));
+int	 lfs_remove __P((struct vop_remove_args *));
+int	 lfs_rmdir __P((struct vop_rmdir_args *));
+int	 lfs_rename __P((struct vop_rename_args *));
+void	 lfs_seglock __P((struct lfs *, unsigned long flags));
+void	 lfs_segunlock __P((struct lfs *));
+int	 lfs_segwrite __P((struct mount *, int));
+int	 lfs_statfs __P((struct mount *, struct statfs *, struct proc *));
+int	 lfs_symlink __P((struct vop_symlink_args *));
+int	 lfs_sync __P((struct mount *, int, struct ucred *, struct proc *));
+int	 lfs_truncate __P((struct vop_truncate_args *));
+int	 lfs_unmount __P((struct mount *, int, struct proc *));
+int	 lfs_update __P((struct vop_update_args *));
+int	 lfs_valloc __P((struct vop_valloc_args *));
+int	 lfs_vcreate __P((struct mount *, ino_t, struct vnode **));
+int	 lfs_vfree __P((struct vop_vfree_args *));
+int	 lfs_vflush __P((struct vnode *));
+int	 lfs_vget __P((struct mount *, ino_t, struct vnode **));
+int	 lfs_vptofh __P((struct vnode *, struct fid *));
+int	 lfs_vref __P((struct vnode *));
+void	 lfs_vunref __P((struct vnode *));
+int	 lfs_write __P((struct vop_write_args *));
+#ifdef DEBUG
+void	lfs_dump_dinode __P((struct dinode *));
+void	lfs_dump_super __P((struct lfs *));
+#endif
+__END_DECLS
+extern int (**lfs_vnodeop_p)();
+extern int (**lfs_specop_p)();
+#ifdef FIFO
+extern int (**lfs_fifoop_p)();
+#define LFS_FIFOOPS lfs_fifoop_p
+#else
+#define LFS_FIFOOPS NULL
+#endif
diff --git a/sys/ufs/lfs/lfs_inode.c b/sys/ufs/lfs/lfs_inode.c
new file mode 100644
index 00000000000..1a06aa23ed8
--- /dev/null
+++ b/sys/ufs/lfs/lfs_inode.c
@@ -0,0 +1,359 @@
+/*
+ * Copyright (c) 1986, 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)lfs_inode.c	8.5 (Berkeley) 12/30/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/lfs/lfs.h>
+#include <ufs/lfs/lfs_extern.h>
+
+int
+lfs_init()
+{
+	return (ufs_init());
+}
+
+/* Search a block for a specific dinode. */
+struct dinode *
+lfs_ifind(fs, ino, dip)
+	struct lfs *fs;
+	ino_t ino;
+	register struct dinode *dip;
+{
+	register int cnt;
+	register struct dinode *ldip;
+
+	for (cnt = INOPB(fs), ldip = dip + (cnt - 1); cnt--; --ldip)
+		if (ldip->di_inumber == ino)
+			return (ldip);
+
+	panic("lfs_ifind: dinode %u not found", ino);
+	/* NOTREACHED */
+}
+
+int
+lfs_update(ap)
+	struct vop_update_args /* {
+		struct vnode *a_vp;
+		struct timeval *a_access;
+		struct timeval *a_modify;
+		int a_waitfor;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct inode *ip;
+
+	if (vp->v_mount->mnt_flag & MNT_RDONLY)
+		return (0);
+	ip = VTOI(vp);
+	if ((ip->i_flag &
+	    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0)
+		return (0);
+	if (ip->i_flag & IN_ACCESS)
+		ip->i_atime.ts_sec = ap->a_access->tv_sec;
+	if (ip->i_flag & IN_UPDATE) {
+		ip->i_mtime.ts_sec = ap->a_modify->tv_sec;
+		(ip)->i_modrev++;
+	}
+	if (ip->i_flag & IN_CHANGE)
+		ip->i_ctime.ts_sec = time.tv_sec;
+	ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);
+
+	if (!(ip->i_flag & IN_MODIFIED))
+		++(VFSTOUFS(vp->v_mount)->um_lfs->lfs_uinodes);
+	ip->i_flag |= IN_MODIFIED;
+
+	/* If sync, push back the vnode and any dirty blocks it may have. */
+	return (ap->a_waitfor & LFS_SYNC ? lfs_vflush(vp) : 0);
+}
+
+/* Update segment usage information when removing a block. */
+#define UPDATE_SEGUSE \
+	if (lastseg != -1) { \
+		LFS_SEGENTRY(sup, fs, lastseg, sup_bp); \
+		if ((num << fs->lfs_bshift) > sup->su_nbytes) \
+			panic("lfs_truncate: negative bytes in segment %d\n", \
+			    lastseg); \
+		sup->su_nbytes -= num << fs->lfs_bshift; \
+		e1 = VOP_BWRITE(sup_bp); \
+		blocksreleased += num; \
+	}
+
+#define SEGDEC { \
+	if (daddr != 0) { \
+		if (lastseg != (seg = datosn(fs, daddr))) { \
+			UPDATE_SEGUSE; \
+			num = 1; \
+			lastseg = seg; \
+		} else \
+			++num; \
+	} \
+}
+
+/*
+ * Truncate the inode ip to at most length size.  Update segment usage
+ * table information.
+ */
+/* ARGSUSED */
+int
+lfs_truncate(ap)
+	struct vop_truncate_args /* {
+		struct vnode *a_vp;
+		off_t a_length;
+		int a_flags;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct indir *inp;
+	register int i;
+	register daddr_t *daddrp;
+	register struct vnode *vp = ap->a_vp;
+	off_t length = ap->a_length;
+	struct buf *bp, *sup_bp;
+	struct timeval tv;
+	struct ifile *ifp;
+	struct inode *ip;
+	struct lfs *fs;
+	struct indir a[NIADDR + 2], a_end[NIADDR + 2];
+	SEGUSE *sup;
+	daddr_t daddr, lastblock, lbn, olastblock;
+	long off, a_released, blocksreleased, i_released;
+	int e1, e2, depth, lastseg, num, offset, seg, size;
+
+	ip = VTOI(vp);
+	tv = time;
+	if (vp->v_type == VLNK && vp->v_mount->mnt_maxsymlinklen > 0) {
+#ifdef DIAGNOSTIC
+		if (length != 0)
+			panic("lfs_truncate: partial truncate of symlink");
+#endif
+		bzero((char *)&ip->i_shortlink, (u_int)ip->i_size);
+		ip->i_size = 0;
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		return (VOP_UPDATE(vp, &tv, &tv, 0));
+	}
+	vnode_pager_setsize(vp, (u_long)length);
+
+	fs = ip->i_lfs;
+
+	/* If length is larger than the file, just update the times. */
+	if (ip->i_size <= length) {
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		return (VOP_UPDATE(vp, &tv, &tv, 0));
+	}
+
+	/*
+	 * Calculate index into inode's block list of last direct and indirect
+	 * blocks (if any) which we want to keep.  Lastblock is 0 when the
+	 * file is truncated to 0.
+	 */
+	lastblock = lblkno(fs, length + fs->lfs_bsize - 1);
+	olastblock = lblkno(fs, ip->i_size + fs->lfs_bsize - 1) - 1;
+
+	/*
+	 * Update the size of the file. If the file is not being truncated to
+	 * a block boundry, the contents of the partial block following the end
+	 * of the file must be zero'ed in case it ever become accessable again
+	 * because of subsequent file growth.
+	 */
+	offset = blkoff(fs, length);
+	if (offset == 0)
+		ip->i_size = length;
+	else {
+		lbn = lblkno(fs, length);
+#ifdef QUOTA
+		if (e1 = getinoquota(ip))
+			return (e1);
+#endif	
+		if (e1 = bread(vp, lbn, fs->lfs_bsize, NOCRED, &bp))
+			return (e1);
+		ip->i_size = length;
+		size = blksize(fs);
+		(void)vnode_pager_uncache(vp);
+		bzero((char *)bp->b_data + offset, (u_int)(size - offset));
+		allocbuf(bp, size);
+		if (e1 = VOP_BWRITE(bp))
+			return (e1);
+	}
+	/*
+	 * Modify sup->su_nbyte counters for each deleted block; keep track
+	 * of number of blocks removed for ip->i_blocks.
+	 */
+	blocksreleased = 0;
+	num = 0;
+	lastseg = -1;
+
+	for (lbn = olastblock; lbn >= lastblock;) {
+		/* XXX use run length from bmap array to make this faster */
+		ufs_bmaparray(vp, lbn, &daddr, a, &depth, NULL);
+		if (lbn == olastblock)
+			for (i = NIADDR + 2; i--;)
+				a_end[i] = a[i];
+		switch (depth) {
+		case 0:				/* Direct block. */
+			daddr = ip->i_db[lbn];
+			SEGDEC;
+			ip->i_db[lbn] = 0;
+			--lbn;
+			break;
+#ifdef DIAGNOSTIC
+		case 1:				/* An indirect block. */
+			panic("lfs_truncate: ufs_bmaparray returned depth 1");
+			/* NOTREACHED */
+#endif
+		default:			/* Chain of indirect blocks. */
+			inp = a + --depth;
+			if (inp->in_off > 0 && lbn != lastblock) {
+				lbn -= inp->in_off < lbn - lastblock ?
+				    inp->in_off : lbn - lastblock;
+				break;
+			}
+			for (; depth && (inp->in_off == 0 || lbn == lastblock);
+			    --inp, --depth) {
+				if (bread(vp,
+				    inp->in_lbn, fs->lfs_bsize, NOCRED, &bp))
+					panic("lfs_truncate: bread bno %d",
+					    inp->in_lbn);
+				daddrp = (daddr_t *)bp->b_data + inp->in_off;
+				for (i = inp->in_off;
+				    i++ <= a_end[depth].in_off;) {
+					daddr = *daddrp++;
+					SEGDEC;
+				}
+				a_end[depth].in_off = NINDIR(fs) - 1;
+				if (inp->in_off == 0)
+					brelse (bp);
+				else {
+					bzero((daddr_t *)bp->b_data +
+					    inp->in_off, fs->lfs_bsize - 
+					    inp->in_off * sizeof(daddr_t));
+					if (e1 = VOP_BWRITE(bp)) 
+						return (e1);
+				}
+			}
+			if (depth == 0 && a[1].in_off == 0) {
+				off = a[0].in_off;
+				daddr = ip->i_ib[off];
+				SEGDEC;
+				ip->i_ib[off] = 0;
+			}
+			if (lbn == lastblock || lbn <= NDADDR)
+				--lbn;
+			else {
+				lbn -= NINDIR(fs);
+				if (lbn < lastblock)
+					lbn = lastblock;
+			}
+		}
+	}
+	UPDATE_SEGUSE;
+
+	/* If truncating the file to 0, update the version number. */
+	if (length == 0) {
+		LFS_IENTRY(ifp, fs, ip->i_number, bp);
+		++ifp->if_version;
+		(void) VOP_BWRITE(bp);
+	}
+
+#ifdef DIAGNOSTIC
+	if (ip->i_blocks < fsbtodb(fs, blocksreleased)) {
+		printf("lfs_truncate: block count < 0\n");
+		blocksreleased = ip->i_blocks;
+	}
+#endif
+	ip->i_blocks -= fsbtodb(fs, blocksreleased);
+	fs->lfs_bfree +=  fsbtodb(fs, blocksreleased);
+	ip->i_flag |= IN_CHANGE | IN_UPDATE;
+	/*
+	 * Traverse dirty block list counting number of dirty buffers
+	 * that are being deleted out of the cache, so that the lfs_avail
+	 * field can be updated.
+	 */
+	a_released = 0;
+	i_released = 0;
+	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = bp->b_vnbufs.le_next)
+		if (bp->b_flags & B_LOCKED) {
+			++a_released;
+			/*
+			 * XXX
+			 * When buffers are created in the cache, their block
+			 * number is set equal to their logical block number.
+			 * If that is still true, we are assuming that the
+			 * blocks are new (not yet on disk) and weren't
+			 * counted above.  However, there is a slight chance
+			 * that a block's disk address is equal to its logical
+			 * block number in which case, we'll get an overcounting
+			 * here.
+			 */
+			if (bp->b_blkno == bp->b_lblkno)
+				++i_released;
+		}
+	blocksreleased = fsbtodb(fs, i_released);
+#ifdef DIAGNOSTIC
+	if (blocksreleased > ip->i_blocks) {
+		printf("lfs_inode: Warning! %s\n",
+		    "more blocks released from inode than are in inode");
+		blocksreleased = ip->i_blocks;
+	}
+#endif
+	fs->lfs_bfree += blocksreleased;
+	ip->i_blocks -= blocksreleased;
+#ifdef DIAGNOSTIC
+	if (length == 0 && ip->i_blocks != 0)
+		printf("lfs_inode: Warning! %s%d%s\n",
+		    "Truncation to zero, but ", ip->i_blocks,
+		    " blocks left on inode");
+#endif
+	fs->lfs_avail += fsbtodb(fs, a_released);
+	e1 = vinvalbuf(vp, (length > 0) ? V_SAVE : 0, ap->a_cred, ap->a_p,
+	    0, 0); 
+	e2 = VOP_UPDATE(vp, &tv, &tv, 0);
+	return (e1 ? e1 : e2 ? e2 : 0);
+}
diff --git a/sys/ufs/lfs/lfs_segment.c b/sys/ufs/lfs/lfs_segment.c
new file mode 100644
index 00000000000..249d59ddda5
--- /dev/null
+++ b/sys/ufs/lfs/lfs_segment.c
@@ -0,0 +1,1111 @@
+/*
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)lfs_segment.c	8.5 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/kernel.h>
+#include <sys/resourcevar.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/conf.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+
+#include <miscfs/specfs/specdev.h>
+#include <miscfs/fifofs/fifo.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/dir.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/lfs/lfs.h>
+#include <ufs/lfs/lfs_extern.h>
+
+extern int count_lock_queue __P((void));
+
+#define MAX_ACTIVE	10
+/*
+ * Determine if it's OK to start a partial in this segment, or if we need
+ * to go on to a new segment.
+ */
+#define	LFS_PARTIAL_FITS(fs) \
+	((fs)->lfs_dbpseg - ((fs)->lfs_offset - (fs)->lfs_curseg) > \
+	1 << (fs)->lfs_fsbtodb)
+
+void	 lfs_callback __P((struct buf *));
+void	 lfs_gather __P((struct lfs *, struct segment *,
+	     struct vnode *, int (*) __P((struct lfs *, struct buf *))));
+int	 lfs_gatherblock __P((struct segment *, struct buf *, int *));
+void	 lfs_iset __P((struct inode *, daddr_t, time_t));
+int	 lfs_match_data __P((struct lfs *, struct buf *));
+int	 lfs_match_dindir __P((struct lfs *, struct buf *));
+int	 lfs_match_indir __P((struct lfs *, struct buf *));
+int	 lfs_match_tindir __P((struct lfs *, struct buf *));
+void	 lfs_newseg __P((struct lfs *));
+void	 lfs_shellsort __P((struct buf **, daddr_t *, register int));
+void	 lfs_supercallback __P((struct buf *));
+void	 lfs_updatemeta __P((struct segment *));
+int	 lfs_vref __P((struct vnode *));
+void	 lfs_vunref __P((struct vnode *));
+void	 lfs_writefile __P((struct lfs *, struct segment *, struct vnode *));
+int	 lfs_writeinode __P((struct lfs *, struct segment *, struct inode *));
+int	 lfs_writeseg __P((struct lfs *, struct segment *));
+void	 lfs_writesuper __P((struct lfs *));
+void	 lfs_writevnodes __P((struct lfs *fs, struct mount *mp,
+	    struct segment *sp, int dirops));
+
+int	lfs_allclean_wakeup;		/* Cleaner wakeup address. */
+
+/* Statistics Counters */
+#define DOSTATS
+struct lfs_stats lfs_stats;
+
+/* op values to lfs_writevnodes */
+#define	VN_REG	0
+#define	VN_DIROP	1
+#define	VN_EMPTY	2
+
+/*
+ * Ifile and meta data blocks are not marked busy, so segment writes MUST be
+ * single threaded.  Currently, there are two paths into lfs_segwrite, sync()
+ * and getnewbuf().  They both mark the file system busy.  Lfs_vflush()
+ * explicitly marks the file system busy.  So lfs_segwrite is safe.  I think.
+ */
+
+int
+lfs_vflush(vp)
+	struct vnode *vp;
+{
+	struct inode *ip;
+	struct lfs *fs;
+	struct segment *sp;
+
+	fs = VFSTOUFS(vp->v_mount)->um_lfs;
+	if (fs->lfs_nactive > MAX_ACTIVE)
+		return(lfs_segwrite(vp->v_mount, SEGM_SYNC|SEGM_CKP));
+	lfs_seglock(fs, SEGM_SYNC);
+	sp = fs->lfs_sp;
+
+
+	ip = VTOI(vp);
+	if (vp->v_dirtyblkhd.lh_first == NULL)
+		lfs_writevnodes(fs, vp->v_mount, sp, VN_EMPTY);
+
+	do {
+		do {
+			if (vp->v_dirtyblkhd.lh_first != NULL)
+				lfs_writefile(fs, sp, vp);
+		} while (lfs_writeinode(fs, sp, ip));
+
+	} while (lfs_writeseg(fs, sp) && ip->i_number == LFS_IFILE_INUM);
+
+#ifdef DOSTATS
+	++lfs_stats.nwrites;
+	if (sp->seg_flags & SEGM_SYNC)
+		++lfs_stats.nsync_writes;
+	if (sp->seg_flags & SEGM_CKP)
+		++lfs_stats.ncheckpoints;
+#endif
+	lfs_segunlock(fs);
+	return (0);
+}
+
+void
+lfs_writevnodes(fs, mp, sp, op)
+	struct lfs *fs;
+	struct mount *mp;
+	struct segment *sp;
+	int op;
+{
+	struct inode *ip;
+	struct vnode *vp;
+
+loop:
+	for (vp = mp->mnt_vnodelist.lh_first;
+	     vp != NULL;
+	     vp = vp->v_mntvnodes.le_next) {
+		/*
+		 * If the vnode that we are about to sync is no longer
+		 * associated with this mount point, start over.
+		 */
+		if (vp->v_mount != mp)
+			goto loop;
+
+		/* XXX ignore dirops for now
+		if (op == VN_DIROP && !(vp->v_flag & VDIROP) ||
+		    op != VN_DIROP && (vp->v_flag & VDIROP))
+			continue;
+		*/
+
+		if (op == VN_EMPTY && vp->v_dirtyblkhd.lh_first)
+			continue;
+
+		if (vp->v_type == VNON)
+			continue;
+
+		if (lfs_vref(vp))
+			continue;
+
+		/*
+		 * Write the inode/file if dirty and it's not the
+		 * the IFILE.
+		 */
+		ip = VTOI(vp);
+		if ((ip->i_flag &
+		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE) ||
+		    vp->v_dirtyblkhd.lh_first != NULL) &&
+		    ip->i_number != LFS_IFILE_INUM) {
+			if (vp->v_dirtyblkhd.lh_first != NULL)
+				lfs_writefile(fs, sp, vp);
+			(void) lfs_writeinode(fs, sp, ip);
+		}
+		vp->v_flag &= ~VDIROP;
+		lfs_vunref(vp);
+	}
+}
+
+int
+lfs_segwrite(mp, flags)
+	struct mount *mp;
+	int flags;			/* Do a checkpoint. */
+{
+	struct buf *bp;
+	struct inode *ip;
+	struct lfs *fs;
+	struct segment *sp;
+	struct vnode *vp;
+	SEGUSE *segusep;
+	daddr_t ibno;
+	CLEANERINFO *cip;
+	int clean, do_ckp, error, i;
+
+	fs = VFSTOUFS(mp)->um_lfs;
+
+ 	/*
+ 	 * If we have fewer than 2 clean segments, wait until cleaner
+	 * writes.
+ 	 */
+	do {
+		LFS_CLEANERINFO(cip, fs, bp);
+		clean = cip->clean;
+		brelse(bp);
+		if (clean <= 2) {
+			printf ("segs clean: %d\n", clean);
+			wakeup(&lfs_allclean_wakeup);
+			if (error = tsleep(&fs->lfs_avail, PRIBIO + 1,
+			    "lfs writer", 0))
+				return (error);
+		}
+	} while (clean <= 2 );
+
+	/*
+	 * Allocate a segment structure and enough space to hold pointers to
+	 * the maximum possible number of buffers which can be described in a
+	 * single summary block.
+	 */
+	do_ckp = flags & SEGM_CKP || fs->lfs_nactive > MAX_ACTIVE;
+	lfs_seglock(fs, flags | (do_ckp ? SEGM_CKP : 0));
+	sp = fs->lfs_sp;
+
+	lfs_writevnodes(fs, mp, sp, VN_REG);
+
+	/* XXX ignore ordering of dirops for now */
+	/* XXX
+	fs->lfs_writer = 1;
+	if (fs->lfs_dirops && (error =
+	    tsleep(&fs->lfs_writer, PRIBIO + 1, "lfs writer", 0))) {
+		free(sp->bpp, M_SEGMENT);
+		free(sp, M_SEGMENT); 
+		fs->lfs_writer = 0;
+		return (error);
+	}
+
+	lfs_writevnodes(fs, mp, sp, VN_DIROP);
+	*/
+
+	/*
+	 * If we are doing a checkpoint, mark everything since the
+	 * last checkpoint as no longer ACTIVE.
+	 */
+	if (do_ckp)
+		for (ibno = fs->lfs_cleansz + fs->lfs_segtabsz;
+		     --ibno >= fs->lfs_cleansz; ) {
+			if (bread(fs->lfs_ivnode, ibno, fs->lfs_bsize,
+			    NOCRED, &bp))
+
+				panic("lfs: ifile read");
+			segusep = (SEGUSE *)bp->b_data;
+			for (i = fs->lfs_sepb; i--; segusep++)
+				segusep->su_flags &= ~SEGUSE_ACTIVE;
+				
+			error = VOP_BWRITE(bp);
+		}
+
+	if (do_ckp || fs->lfs_doifile) {
+redo:
+		vp = fs->lfs_ivnode;
+		while (vget(vp, 1));
+		ip = VTOI(vp);
+		if (vp->v_dirtyblkhd.lh_first != NULL)
+			lfs_writefile(fs, sp, vp);
+		(void)lfs_writeinode(fs, sp, ip);
+		vput(vp);
+		if (lfs_writeseg(fs, sp) && do_ckp)
+			goto redo;
+	} else
+		(void) lfs_writeseg(fs, sp);
+
+	/*
+	 * If the I/O count is non-zero, sleep until it reaches zero.  At the
+	 * moment, the user's process hangs around so we can sleep.
+	 */
+	/* XXX ignore dirops for now
+	fs->lfs_writer = 0;
+	fs->lfs_doifile = 0;
+	wakeup(&fs->lfs_dirops);
+	*/
+
+#ifdef DOSTATS
+	++lfs_stats.nwrites;
+	if (sp->seg_flags & SEGM_SYNC)
+		++lfs_stats.nsync_writes;
+	if (sp->seg_flags & SEGM_CKP)
+		++lfs_stats.ncheckpoints;
+#endif
+	lfs_segunlock(fs);
+	return (0);
+}
+
+/*
+ * Write the dirty blocks associated with a vnode.
+ */
+void
+lfs_writefile(fs, sp, vp)
+	struct lfs *fs;
+	struct segment *sp;
+	struct vnode *vp;
+{
+	struct buf *bp;
+	struct finfo *fip;
+	IFILE *ifp;
+
+	if (sp->seg_bytes_left < fs->lfs_bsize ||
+	    sp->sum_bytes_left < sizeof(struct finfo))
+		(void) lfs_writeseg(fs, sp);
+
+	sp->sum_bytes_left -= sizeof(struct finfo) - sizeof(daddr_t);
+	++((SEGSUM *)(sp->segsum))->ss_nfinfo;
+
+	fip = sp->fip;
+	fip->fi_nblocks = 0;
+	fip->fi_ino = VTOI(vp)->i_number;
+	LFS_IENTRY(ifp, fs, fip->fi_ino, bp);
+	fip->fi_version = ifp->if_version;
+	brelse(bp);
+
+	/*
+	 * It may not be necessary to write the meta-data blocks at this point,
+	 * as the roll-forward recovery code should be able to reconstruct the
+	 * list.
+	 */
+	lfs_gather(fs, sp, vp, lfs_match_data);
+	lfs_gather(fs, sp, vp, lfs_match_indir);
+	lfs_gather(fs, sp, vp, lfs_match_dindir);
+#ifdef TRIPLE
+	lfs_gather(fs, sp, vp, lfs_match_tindir);
+#endif
+
+	fip = sp->fip;
+	if (fip->fi_nblocks != 0) {
+		sp->fip =
+		    (struct finfo *)((caddr_t)fip + sizeof(struct finfo) +
+		    sizeof(daddr_t) * (fip->fi_nblocks - 1));
+		sp->start_lbp = &sp->fip->fi_blocks[0];
+	} else {
+		sp->sum_bytes_left += sizeof(struct finfo) - sizeof(daddr_t);
+		--((SEGSUM *)(sp->segsum))->ss_nfinfo;
+	}
+}
+
+int
+lfs_writeinode(fs, sp, ip)
+	struct lfs *fs;
+	struct segment *sp;
+	struct inode *ip;
+{
+	struct buf *bp, *ibp;
+	IFILE *ifp;
+	SEGUSE *sup;
+	daddr_t daddr;
+	ino_t ino;
+	int error, i, ndx;
+	int redo_ifile = 0;
+
+	if (!(ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)))
+		return(0);
+
+	/* Allocate a new inode block if necessary. */
+	if (sp->ibp == NULL) {
+		/* Allocate a new segment if necessary. */
+		if (sp->seg_bytes_left < fs->lfs_bsize ||
+		    sp->sum_bytes_left < sizeof(daddr_t))
+			(void) lfs_writeseg(fs, sp);
+
+		/* Get next inode block. */
+		daddr = fs->lfs_offset;
+		fs->lfs_offset += fsbtodb(fs, 1);
+		sp->ibp = *sp->cbpp++ =
+		    lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, daddr,
+		    fs->lfs_bsize);
+		/* Zero out inode numbers */
+		for (i = 0; i < INOPB(fs); ++i)
+			((struct dinode *)sp->ibp->b_data)[i].di_inumber = 0;
+		++sp->start_bpp;
+		fs->lfs_avail -= fsbtodb(fs, 1);
+		/* Set remaining space counters. */
+		sp->seg_bytes_left -= fs->lfs_bsize;
+		sp->sum_bytes_left -= sizeof(daddr_t);
+		ndx = LFS_SUMMARY_SIZE / sizeof(daddr_t) -
+		    sp->ninodes / INOPB(fs) - 1;
+		((daddr_t *)(sp->segsum))[ndx] = daddr;
+	}
+
+	/* Update the inode times and copy the inode onto the inode page. */
+	if (ip->i_flag & IN_MODIFIED)
+		--fs->lfs_uinodes;
+	ITIMES(ip, &time, &time);
+	ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE);
+	bp = sp->ibp;
+	((struct dinode *)bp->b_data)[sp->ninodes % INOPB(fs)] = ip->i_din;
+	/* Increment inode count in segment summary block. */
+	++((SEGSUM *)(sp->segsum))->ss_ninos;
+
+	/* If this page is full, set flag to allocate a new page. */
+	if (++sp->ninodes % INOPB(fs) == 0)
+		sp->ibp = NULL;
+
+	/*
+	 * If updating the ifile, update the super-block.  Update the disk
+	 * address and access times for this inode in the ifile.
+	 */
+	ino = ip->i_number;
+	if (ino == LFS_IFILE_INUM) {
+		daddr = fs->lfs_idaddr;
+		fs->lfs_idaddr = bp->b_blkno;
+	} else {
+		LFS_IENTRY(ifp, fs, ino, ibp);
+		daddr = ifp->if_daddr;
+		ifp->if_daddr = bp->b_blkno;
+		error = VOP_BWRITE(ibp);
+	}
+
+	/*
+	 * No need to update segment usage if there was no former inode address
+	 * or if the last inode address is in the current partial segment.
+	 */
+	if (daddr != LFS_UNUSED_DADDR && 
+	    !(daddr >= fs->lfs_lastpseg && daddr <= bp->b_blkno)) {
+		LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp);
+#ifdef DIAGNOSTIC
+		if (sup->su_nbytes < sizeof(struct dinode)) {
+			/* XXX -- Change to a panic. */
+			printf("lfs: negative bytes (segment %d)\n",
+			    datosn(fs, daddr));
+			panic("negative bytes");
+		}
+#endif
+		sup->su_nbytes -= sizeof(struct dinode);
+		redo_ifile =
+		    (ino == LFS_IFILE_INUM && !(bp->b_flags & B_GATHERED));
+		error = VOP_BWRITE(bp);
+	}
+	return (redo_ifile);
+}
+
+int
+lfs_gatherblock(sp, bp, sptr)
+	struct segment *sp;
+	struct buf *bp;
+	int *sptr;
+{
+	struct lfs *fs;
+	int version;
+
+	/*
+	 * If full, finish this segment.  We may be doing I/O, so
+	 * release and reacquire the splbio().
+	 */
+#ifdef DIAGNOSTIC
+	if (sp->vp == NULL)
+		panic ("lfs_gatherblock: Null vp in segment");
+#endif
+	fs = sp->fs;
+	if (sp->sum_bytes_left < sizeof(daddr_t) ||
+	    sp->seg_bytes_left < fs->lfs_bsize) {
+		if (sptr)
+			splx(*sptr);
+		lfs_updatemeta(sp);
+
+		version = sp->fip->fi_version;
+		(void) lfs_writeseg(fs, sp);
+
+		sp->fip->fi_version = version;
+		sp->fip->fi_ino = VTOI(sp->vp)->i_number;
+		/* Add the current file to the segment summary. */
+		++((SEGSUM *)(sp->segsum))->ss_nfinfo;
+		sp->sum_bytes_left -= 
+		    sizeof(struct finfo) - sizeof(daddr_t);
+
+		if (sptr)
+			*sptr = splbio();
+		return(1);
+	}
+
+	/* Insert into the buffer list, update the FINFO block. */
+	bp->b_flags |= B_GATHERED;
+	*sp->cbpp++ = bp;
+	sp->fip->fi_blocks[sp->fip->fi_nblocks++] = bp->b_lblkno;
+
+	sp->sum_bytes_left -= sizeof(daddr_t);
+	sp->seg_bytes_left -= fs->lfs_bsize;
+	return(0);
+}
+
+void
+lfs_gather(fs, sp, vp, match)
+	struct lfs *fs;
+	struct segment *sp;
+	struct vnode *vp;
+	int (*match) __P((struct lfs *, struct buf *));
+{
+	struct buf *bp;
+	int s;
+
+	sp->vp = vp;
+	s = splbio();
+loop:	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = bp->b_vnbufs.le_next) {
+		if (bp->b_flags & B_BUSY || !match(fs, bp) ||
+		    bp->b_flags & B_GATHERED)
+			continue;
+#ifdef DIAGNOSTIC
+		if (!(bp->b_flags & B_DELWRI))
+			panic("lfs_gather: bp not B_DELWRI");
+		if (!(bp->b_flags & B_LOCKED))
+			panic("lfs_gather: bp not B_LOCKED");
+#endif
+		if (lfs_gatherblock(sp, bp, &s))
+			goto loop;
+	}
+	splx(s);
+	lfs_updatemeta(sp);
+	sp->vp = NULL;
+}
+
+
+/*
+ * Update the metadata that points to the blocks listed in the FINFO
+ * array.
+ */
+void
+lfs_updatemeta(sp)
+	struct segment *sp;
+{
+	SEGUSE *sup;
+	struct buf *bp;
+	struct lfs *fs;
+	struct vnode *vp;
+	struct indir a[NIADDR + 2], *ap;
+	struct inode *ip;
+	daddr_t daddr, lbn, off;
+	int db_per_fsb, error, i, nblocks, num;
+
+	vp = sp->vp;
+	nblocks = &sp->fip->fi_blocks[sp->fip->fi_nblocks] - sp->start_lbp;
+	if (vp == NULL || nblocks == 0) 
+		return;
+
+	/* Sort the blocks. */
+	if (!(sp->seg_flags & SEGM_CLEAN))
+		lfs_shellsort(sp->start_bpp, sp->start_lbp, nblocks);
+
+	/*
+	 * Assign disk addresses, and update references to the logical
+	 * block and the segment usage information.
+	 */
+	fs = sp->fs;
+	db_per_fsb = fsbtodb(fs, 1);
+	for (i = nblocks; i--; ++sp->start_bpp) {
+		lbn = *sp->start_lbp++;
+		(*sp->start_bpp)->b_blkno = off = fs->lfs_offset;
+		fs->lfs_offset += db_per_fsb;
+
+		if (error = ufs_bmaparray(vp, lbn, &daddr, a, &num, NULL))
+			panic("lfs_updatemeta: ufs_bmaparray %d", error);
+		ip = VTOI(vp);
+		switch (num) {
+		case 0:
+			ip->i_db[lbn] = off;
+			break;
+		case 1:
+			ip->i_ib[a[0].in_off] = off;
+			break;
+		default:
+			ap = &a[num - 1];
+			if (bread(vp, ap->in_lbn, fs->lfs_bsize, NOCRED, &bp))
+				panic("lfs_updatemeta: bread bno %d",
+				    ap->in_lbn);
+			/*
+			 * Bread may create a new indirect block which needs
+			 * to get counted for the inode.
+			 */
+			if (bp->b_blkno == -1 && !(bp->b_flags & B_CACHE)) {
+printf ("Updatemeta allocating indirect block: shouldn't happen\n");
+				ip->i_blocks += btodb(fs->lfs_bsize);
+				fs->lfs_bfree -= btodb(fs->lfs_bsize);
+			}
+			((daddr_t *)bp->b_data)[ap->in_off] = off;
+			VOP_BWRITE(bp);
+		}
+
+		/* Update segment usage information. */
+		if (daddr != UNASSIGNED &&
+		    !(daddr >= fs->lfs_lastpseg && daddr <= off)) {
+			LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp);
+#ifdef DIAGNOSTIC
+			if (sup->su_nbytes < fs->lfs_bsize) {
+				/* XXX -- Change to a panic. */
+				printf("lfs: negative bytes (segment %d)\n",
+				    datosn(fs, daddr));
+				panic ("Negative Bytes");
+			}
+#endif
+			sup->su_nbytes -= fs->lfs_bsize;
+			error = VOP_BWRITE(bp);
+		}
+	}
+}
+
+/*
+ * Start a new segment.
+ */
+int
+lfs_initseg(fs)
+	struct lfs *fs;
+{
+	struct segment *sp;
+	SEGUSE *sup;
+	SEGSUM *ssp;
+	struct buf *bp;
+	int repeat;
+
+	sp = fs->lfs_sp;
+
+	repeat = 0;
+	/* Advance to the next segment. */
+	if (!LFS_PARTIAL_FITS(fs)) {
+		/* Wake up any cleaning procs waiting on this file system. */
+		wakeup(&lfs_allclean_wakeup);
+
+		lfs_newseg(fs);
+		repeat = 1;
+		fs->lfs_offset = fs->lfs_curseg;
+		sp->seg_number = datosn(fs, fs->lfs_curseg);
+		sp->seg_bytes_left = fs->lfs_dbpseg * DEV_BSIZE;
+
+		/*
+		 * If the segment contains a superblock, update the offset
+		 * and summary address to skip over it.
+		 */
+		LFS_SEGENTRY(sup, fs, sp->seg_number, bp);
+		if (sup->su_flags & SEGUSE_SUPERBLOCK) {
+			fs->lfs_offset += LFS_SBPAD / DEV_BSIZE;
+			sp->seg_bytes_left -= LFS_SBPAD;
+		}
+		brelse(bp);
+	} else {
+		sp->seg_number = datosn(fs, fs->lfs_curseg);
+		sp->seg_bytes_left = (fs->lfs_dbpseg -
+		    (fs->lfs_offset - fs->lfs_curseg)) * DEV_BSIZE;
+	}
+	fs->lfs_lastpseg = fs->lfs_offset;
+
+	sp->fs = fs;
+	sp->ibp = NULL;
+	sp->ninodes = 0;
+
+	/* Get a new buffer for SEGSUM and enter it into the buffer list. */
+	sp->cbpp = sp->bpp;
+	*sp->cbpp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, fs->lfs_offset,
+	     LFS_SUMMARY_SIZE);
+	sp->segsum = (*sp->cbpp)->b_data;
+	bzero(sp->segsum, LFS_SUMMARY_SIZE);
+	sp->start_bpp = ++sp->cbpp;
+	fs->lfs_offset += LFS_SUMMARY_SIZE / DEV_BSIZE;
+
+	/* Set point to SEGSUM, initialize it. */
+	ssp = sp->segsum;
+	ssp->ss_next = fs->lfs_nextseg;
+	ssp->ss_nfinfo = ssp->ss_ninos = 0;
+
+	/* Set pointer to first FINFO, initialize it. */
+	sp->fip = (struct finfo *)(sp->segsum + sizeof(SEGSUM));
+	sp->fip->fi_nblocks = 0;
+	sp->start_lbp = &sp->fip->fi_blocks[0];
+
+	sp->seg_bytes_left -= LFS_SUMMARY_SIZE;
+	sp->sum_bytes_left = LFS_SUMMARY_SIZE - sizeof(SEGSUM);
+
+	return(repeat);
+}
+
+/*
+ * Return the next segment to write.
+ */
+void
+lfs_newseg(fs)
+	struct lfs *fs;
+{
+	CLEANERINFO *cip;
+	SEGUSE *sup;
+	struct buf *bp;
+	int curseg, isdirty, sn;
+
+        LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_nextseg), bp);
+        sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE;
+	sup->su_nbytes = 0;
+	sup->su_nsums = 0;
+	sup->su_ninos = 0;
+        (void) VOP_BWRITE(bp);
+
+	LFS_CLEANERINFO(cip, fs, bp);
+	--cip->clean;
+	++cip->dirty;
+	(void) VOP_BWRITE(bp);
+
+	fs->lfs_lastseg = fs->lfs_curseg;
+	fs->lfs_curseg = fs->lfs_nextseg;
+	for (sn = curseg = datosn(fs, fs->lfs_curseg);;) {
+		sn = (sn + 1) % fs->lfs_nseg;
+		if (sn == curseg)
+			panic("lfs_nextseg: no clean segments");
+		LFS_SEGENTRY(sup, fs, sn, bp);
+		isdirty = sup->su_flags & SEGUSE_DIRTY;
+		brelse(bp);
+		if (!isdirty)
+			break;
+	}
+
+	++fs->lfs_nactive;
+	fs->lfs_nextseg = sntoda(fs, sn);
+#ifdef DOSTATS
+	++lfs_stats.segsused;
+#endif
+}
+
+int
+lfs_writeseg(fs, sp)
+	struct lfs *fs;
+	struct segment *sp;
+{
+	extern int locked_queue_count;
+	struct buf **bpp, *bp, *cbp;
+	SEGUSE *sup;
+	SEGSUM *ssp;
+	dev_t i_dev;
+	size_t size;
+	u_long *datap, *dp;
+	int ch_per_blk, do_again, i, nblocks, num, s;
+	int (*strategy)__P((struct vop_strategy_args *));
+	struct vop_strategy_args vop_strategy_a;
+	u_short ninos;
+	char *p;
+
+	/*
+	 * If there are no buffers other than the segment summary to write
+	 * and it is not a checkpoint, don't do anything.  On a checkpoint,
+	 * even if there aren't any buffers, you need to write the superblock.
+	 */
+	if ((nblocks = sp->cbpp - sp->bpp) == 1)
+		return (0);
+
+	ssp = (SEGSUM *)sp->segsum;
+
+	/* Update the segment usage information. */
+	LFS_SEGENTRY(sup, fs, sp->seg_number, bp);
+	ninos = (ssp->ss_ninos + INOPB(fs) - 1) / INOPB(fs);
+	sup->su_nbytes += nblocks - 1 - ninos << fs->lfs_bshift;
+	sup->su_nbytes += ssp->ss_ninos * sizeof(struct dinode);
+	sup->su_nbytes += LFS_SUMMARY_SIZE;
+	sup->su_lastmod = time.tv_sec;
+	sup->su_ninos += ninos;
+	++sup->su_nsums;
+	do_again = !(bp->b_flags & B_GATHERED);
+	(void)VOP_BWRITE(bp);
+	/*
+	 * Compute checksum across data and then across summary; the first
+	 * block (the summary block) is skipped.  Set the create time here
+	 * so that it's guaranteed to be later than the inode mod times.
+	 *
+	 * XXX
+	 * Fix this to do it inline, instead of malloc/copy.
+	 */
+	datap = dp = malloc(nblocks * sizeof(u_long), M_SEGMENT, M_WAITOK);
+	for (bpp = sp->bpp, i = nblocks - 1; i--;) {
+		if ((*++bpp)->b_flags & B_INVAL) {
+			if (copyin((*bpp)->b_saveaddr, dp++, sizeof(u_long)))
+				panic("lfs_writeseg: copyin failed");
+		} else
+			*dp++ = ((u_long *)(*bpp)->b_data)[0];
+	}
+	ssp->ss_create = time.tv_sec;
+	ssp->ss_datasum = cksum(datap, (nblocks - 1) * sizeof(u_long));
+	ssp->ss_sumsum =
+	    cksum(&ssp->ss_datasum, LFS_SUMMARY_SIZE - sizeof(ssp->ss_sumsum));
+	free(datap, M_SEGMENT);
+#ifdef DIAGNOSTIC
+	if (fs->lfs_bfree < fsbtodb(fs, ninos) + LFS_SUMMARY_SIZE / DEV_BSIZE)
+		panic("lfs_writeseg: No diskspace for summary");
+#endif
+	fs->lfs_bfree -= (fsbtodb(fs, ninos) + LFS_SUMMARY_SIZE / DEV_BSIZE);
+
+	i_dev = VTOI(fs->lfs_ivnode)->i_dev;
+	strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)];
+
+	/*
+	 * When we simply write the blocks we lose a rotation for every block
+	 * written.  To avoid this problem, we allocate memory in chunks, copy
+	 * the buffers into the chunk and write the chunk.  MAXPHYS is the
+	 * largest size I/O devices can handle.
+	 * When the data is copied to the chunk, turn off the the B_LOCKED bit
+	 * and brelse the buffer (which will move them to the LRU list).  Add
+	 * the B_CALL flag to the buffer header so we can count I/O's for the
+	 * checkpoints and so we can release the allocated memory.
+	 *
+	 * XXX
+	 * This should be removed if the new virtual memory system allows us to
+	 * easily make the buffers contiguous in kernel memory and if that's
+	 * fast enough.
+	 */
+	ch_per_blk = MAXPHYS / fs->lfs_bsize;
+	for (bpp = sp->bpp, i = nblocks; i;) {
+		num = ch_per_blk;
+		if (num > i)
+			num = i;
+		i -= num;
+		size = num * fs->lfs_bsize;
+
+		cbp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp,
+		    (*bpp)->b_blkno, size);
+		cbp->b_dev = i_dev;
+		cbp->b_flags |= B_ASYNC | B_BUSY;
+
+		s = splbio();
+		++fs->lfs_iocount;
+		for (p = cbp->b_data; num--;) {
+			bp = *bpp++;
+			/*
+			 * Fake buffers from the cleaner are marked as B_INVAL.
+			 * We need to copy the data from user space rather than
+			 * from the buffer indicated.
+			 * XXX == what do I do on an error?
+			 */
+			if (bp->b_flags & B_INVAL) {
+				if (copyin(bp->b_saveaddr, p, bp->b_bcount))
+					panic("lfs_writeseg: copyin failed");
+			} else
+				bcopy(bp->b_data, p, bp->b_bcount);
+			p += bp->b_bcount;
+			if (bp->b_flags & B_LOCKED)
+				--locked_queue_count;
+			bp->b_flags &= ~(B_ERROR | B_READ | B_DELWRI |
+			     B_LOCKED | B_GATHERED);
+			if (bp->b_flags & B_CALL) {
+				/* if B_CALL, it was created with newbuf */
+				brelvp(bp);
+				if (!(bp->b_flags & B_INVAL))
+					free(bp->b_data, M_SEGMENT);
+				free(bp, M_SEGMENT);
+			} else {
+				bremfree(bp);
+				bp->b_flags |= B_DONE;
+				reassignbuf(bp, bp->b_vp);
+				brelse(bp);
+			}
+		}
+		++cbp->b_vp->v_numoutput;
+		splx(s);
+		cbp->b_bcount = p - (char *)cbp->b_data;
+		/*
+		 * XXXX This is a gross and disgusting hack.  Since these
+		 * buffers are physically addressed, they hang off the
+		 * device vnode (devvp).  As a result, they have no way
+		 * of getting to the LFS superblock or lfs structure to
+		 * keep track of the number of I/O's pending.  So, I am
+		 * going to stuff the fs into the saveaddr field of
+		 * the buffer (yuk).
+		 */
+		cbp->b_saveaddr = (caddr_t)fs;
+		vop_strategy_a.a_desc = VDESC(vop_strategy);
+		vop_strategy_a.a_bp = cbp;
+		(strategy)(&vop_strategy_a);
+	}
+	/*
+	 * XXX
+	 * Vinvalbuf can move locked buffers off the locked queue
+	 * and we have no way of knowing about this.  So, after
+	 * doing a big write, we recalculate how many bufers are
+	 * really still left on the locked queue.
+	 */
+	locked_queue_count = count_lock_queue();
+	wakeup(&locked_queue_count);
+#ifdef DOSTATS
+	++lfs_stats.psegwrites;
+	lfs_stats.blocktot += nblocks - 1;
+	if (fs->lfs_sp->seg_flags & SEGM_SYNC)
+		++lfs_stats.psyncwrites;
+	if (fs->lfs_sp->seg_flags & SEGM_CLEAN) {
+		++lfs_stats.pcleanwrites;
+		lfs_stats.cleanblocks += nblocks - 1;
+	}
+#endif
+	return (lfs_initseg(fs) || do_again);
+}
+
+void
+lfs_writesuper(fs)
+	struct lfs *fs;
+{
+	struct buf *bp;
+	dev_t i_dev;
+	int (*strategy) __P((struct vop_strategy_args *));
+	int s;
+	struct vop_strategy_args vop_strategy_a;
+
+	i_dev = VTOI(fs->lfs_ivnode)->i_dev;
+	strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)];
+
+	/* Checksum the superblock and copy it into a buffer. */
+	fs->lfs_cksum = cksum(fs, sizeof(struct lfs) - sizeof(fs->lfs_cksum));
+	bp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, fs->lfs_sboffs[0],
+	    LFS_SBPAD);
+	*(struct lfs *)bp->b_data = *fs;
+
+	/* XXX Toggle between first two superblocks; for now just write first */
+	bp->b_dev = i_dev;
+	bp->b_flags |= B_BUSY | B_CALL | B_ASYNC;
+	bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI);
+	bp->b_iodone = lfs_supercallback;
+	vop_strategy_a.a_desc = VDESC(vop_strategy);
+	vop_strategy_a.a_bp = bp;
+	s = splbio();
+	++bp->b_vp->v_numoutput;
+	splx(s);
+	(strategy)(&vop_strategy_a);
+}
+
+/*
+ * Logical block number match routines used when traversing the dirty block
+ * chain.
+ */
+int
+lfs_match_data(fs, bp)
+	struct lfs *fs;
+	struct buf *bp;
+{
+	return (bp->b_lblkno >= 0);
+}
+
+int
+lfs_match_indir(fs, bp)
+	struct lfs *fs;
+	struct buf *bp;
+{
+	int lbn;
+
+	lbn = bp->b_lblkno;
+	return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 0);
+}
+
+int
+lfs_match_dindir(fs, bp)
+	struct lfs *fs;
+	struct buf *bp;
+{
+	int lbn;
+
+	lbn = bp->b_lblkno;
+	return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 1);
+}
+
+int
+lfs_match_tindir(fs, bp)
+	struct lfs *fs;
+	struct buf *bp;
+{
+	int lbn;
+
+	lbn = bp->b_lblkno;
+	return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 2);
+}
+
+/*
+ * Allocate a new buffer header.
+ */
+struct buf *
+lfs_newbuf(vp, daddr, size)
+	struct vnode *vp;
+	daddr_t daddr;
+	size_t size;
+{
+	struct buf *bp;
+	size_t nbytes;
+
+	nbytes = roundup(size, DEV_BSIZE);
+	bp = malloc(sizeof(struct buf), M_SEGMENT, M_WAITOK);
+	bzero(bp, sizeof(struct buf));
+	if (nbytes)
+		bp->b_data = malloc(nbytes, M_SEGMENT, M_WAITOK);
+	bgetvp(vp, bp);
+	bp->b_bufsize = size;
+	bp->b_bcount = size;
+	bp->b_lblkno = daddr;
+	bp->b_blkno = daddr;
+	bp->b_error = 0;
+	bp->b_resid = 0;
+	bp->b_iodone = lfs_callback;
+	bp->b_flags |= B_BUSY | B_CALL | B_NOCACHE;
+	return (bp);
+}
+
+void
+lfs_callback(bp)
+	struct buf *bp;
+{
+	struct lfs *fs;
+
+	fs = (struct lfs *)bp->b_saveaddr;
+#ifdef DIAGNOSTIC
+	if (fs->lfs_iocount == 0)
+		panic("lfs_callback: zero iocount\n");
+#endif
+	if (--fs->lfs_iocount == 0)
+		wakeup(&fs->lfs_iocount);
+
+	brelvp(bp);
+	free(bp->b_data, M_SEGMENT);
+	free(bp, M_SEGMENT);
+}
+
+void
+lfs_supercallback(bp)
+	struct buf *bp;
+{
+	brelvp(bp);
+	free(bp->b_data, M_SEGMENT);
+	free(bp, M_SEGMENT);
+}
+
+/*
+ * Shellsort (diminishing increment sort) from Data Structures and
+ * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290;
+ * see also Knuth Vol. 3, page 84.  The increments are selected from
+ * formula (8), page 95.  Roughly O(N^3/2).
+ */
+/*
+ * This is our own private copy of shellsort because we want to sort
+ * two parallel arrays (the array of buffer pointers and the array of
+ * logical block numbers) simultaneously.  Note that we cast the array
+ * of logical block numbers to a unsigned in this routine so that the
+ * negative block numbers (meta data blocks) sort AFTER the data blocks.
+ */
+void
+lfs_shellsort(bp_array, lb_array, nmemb)
+	struct buf **bp_array;
+	daddr_t *lb_array;
+	register int nmemb;
+{
+	static int __rsshell_increments[] = { 4, 1, 0 };
+	register int incr, *incrp, t1, t2;
+	struct buf *bp_temp;
+	u_long lb_temp;
+
+	for (incrp = __rsshell_increments; incr = *incrp++;)
+		for (t1 = incr; t1 < nmemb; ++t1)
+			for (t2 = t1 - incr; t2 >= 0;)
+				if (lb_array[t2] > lb_array[t2 + incr]) {
+					lb_temp = lb_array[t2];
+					lb_array[t2] = lb_array[t2 + incr];
+					lb_array[t2 + incr] = lb_temp;
+					bp_temp = bp_array[t2];
+					bp_array[t2] = bp_array[t2 + incr];
+					bp_array[t2 + incr] = bp_temp;
+					t2 -= incr;
+				} else
+					break;
+}
+
+/*
+ * Check VXLOCK.  Return 1 if the vnode is locked.  Otherwise, vget it.
+ */
+lfs_vref(vp)
+	register struct vnode *vp;
+{
+
+	if (vp->v_flag & VXLOCK)
+		return(1);
+	return (vget(vp, 0));
+}
+
+void
+lfs_vunref(vp)
+	register struct vnode *vp;
+{
+	extern int lfs_no_inactive;
+
+	/*
+	 * This is vrele except that we do not want to VOP_INACTIVE
+	 * this vnode. Rather than inline vrele here, we use a global
+	 * flag to tell lfs_inactive not to run. Yes, its gross.
+	 */
+	lfs_no_inactive = 1;
+	vrele(vp);
+	lfs_no_inactive = 0;
+}
diff --git a/sys/ufs/lfs/lfs_subr.c b/sys/ufs/lfs/lfs_subr.c
new file mode 100644
index 00000000000..afcd8c29b3f
--- /dev/null
+++ b/sys/ufs/lfs/lfs_subr.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)lfs_subr.c	8.2 (Berkeley) 9/21/93
+ */
+
+#include <sys/param.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/buf.h>
+#include <sys/mount.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/lfs/lfs.h>
+#include <ufs/lfs/lfs_extern.h>
+
+/*
+ * Return buffer with the contents of block "offset" from the beginning of
+ * directory "ip".  If "res" is non-zero, fill it in with a pointer to the
+ * remaining space in the directory.
+ */
+int
+lfs_blkatoff(ap)
+	struct vop_blkatoff_args /* {
+		struct vnode *a_vp;
+		off_t a_offset;
+		char **a_res;
+		struct buf **a_bpp;
+	} */ *ap;
+{
+	register struct lfs *fs;
+	struct inode *ip;
+	struct buf *bp;
+	daddr_t lbn;
+	int bsize, error;
+
+	ip = VTOI(ap->a_vp);
+	fs = ip->i_lfs;
+	lbn = lblkno(fs, ap->a_offset);
+	bsize = blksize(fs);
+
+	*ap->a_bpp = NULL;
+	if (error = bread(ap->a_vp, lbn, bsize, NOCRED, &bp)) {
+		brelse(bp);
+		return (error);
+	}
+	if (ap->a_res)
+		*ap->a_res = (char *)bp->b_data + blkoff(fs, ap->a_offset);
+	*ap->a_bpp = bp;
+	return (0);
+}
+
+
+/*
+ * lfs_seglock --
+ *	Single thread the segment writer.
+ */
+void
+lfs_seglock(fs, flags)
+	struct lfs *fs;
+	unsigned long flags;
+{
+	struct segment *sp;
+	int s;
+
+	if (fs->lfs_seglock)
+		if (fs->lfs_lockpid == curproc->p_pid) {
+			++fs->lfs_seglock;
+			fs->lfs_sp->seg_flags |= flags;
+			return;			
+		} else while (fs->lfs_seglock)
+			(void)tsleep(&fs->lfs_seglock, PRIBIO + 1,
+			    "lfs seglock", 0);
+
+	fs->lfs_seglock = 1;
+	fs->lfs_lockpid = curproc->p_pid;
+
+	sp = fs->lfs_sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK);
+	sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) /
+	    sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK);
+	sp->seg_flags = flags;
+	sp->vp = NULL;
+	(void) lfs_initseg(fs);
+
+	/*
+	 * Keep a cumulative count of the outstanding I/O operations.  If the
+	 * disk drive catches up with us it could go to zero before we finish,
+	 * so we artificially increment it by one until we've scheduled all of
+	 * the writes we intend to do.
+	 */
+	s = splbio();
+	++fs->lfs_iocount;
+	splx(s);
+}
+/*
+ * lfs_segunlock --
+ *	Single thread the segment writer.
+ */
+void
+lfs_segunlock(fs)
+	struct lfs *fs;
+{
+	struct segment *sp;
+	unsigned long sync, ckp;
+	int s;
+
+	if (fs->lfs_seglock == 1) {
+
+		sp = fs->lfs_sp;
+		sync = sp->seg_flags & SEGM_SYNC;
+		ckp = sp->seg_flags & SEGM_CKP;
+		if (sp->bpp != sp->cbpp) {
+			/* Free allocated segment summary */
+			fs->lfs_offset -= LFS_SUMMARY_SIZE / DEV_BSIZE;
+			brelvp(*sp->bpp);
+			free((*sp->bpp)->b_data, M_SEGMENT);
+			free(*sp->bpp, M_SEGMENT);
+		} else
+			printf ("unlock to 0 with no summary");
+		free(sp->bpp, M_SEGMENT);
+		free(sp, M_SEGMENT);
+
+		/*
+		 * If the I/O count is non-zero, sleep until it reaches zero.
+		 * At the moment, the user's process hangs around so we can
+		 * sleep.
+		 */
+		s = splbio();
+		--fs->lfs_iocount;
+		/*
+		 * We let checkpoints happen asynchronously.  That means
+		 * that during recovery, we have to roll forward between
+		 * the two segments described by the first and second
+		 * superblocks to make sure that the checkpoint described
+		 * by a superblock completed.
+		 */
+		if (sync && fs->lfs_iocount)
+		    (void)tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs vflush", 0);
+		splx(s);
+		if (ckp) {
+			fs->lfs_nactive = 0;
+			lfs_writesuper(fs);
+		}
+		--fs->lfs_seglock;
+		fs->lfs_lockpid = 0;
+		wakeup(&fs->lfs_seglock);
+	} else if (fs->lfs_seglock == 0) {
+		panic ("Seglock not held");
+	} else {
+		--fs->lfs_seglock;
+	}
+}
diff --git a/sys/ufs/lfs/lfs_syscalls.c b/sys/ufs/lfs/lfs_syscalls.c
new file mode 100644
index 00000000000..666595e6b59
--- /dev/null
+++ b/sys/ufs/lfs/lfs_syscalls.c
@@ -0,0 +1,562 @@
+/*-
+ * Copyright (c) 1991, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)lfs_syscalls.c	8.5 (Berkeley) 4/20/94
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/lfs/lfs.h>
+#include <ufs/lfs/lfs_extern.h>
+#define BUMP_FIP(SP) \
+	(SP)->fip = (FINFO *) (&(SP)->fip->fi_blocks[(SP)->fip->fi_nblocks])
+
+#define INC_FINFO(SP) ++((SEGSUM *)((SP)->segsum))->ss_nfinfo
+#define DEC_FINFO(SP) --((SEGSUM *)((SP)->segsum))->ss_nfinfo
+
+/*
+ * Before committing to add something to a segment summary, make sure there
+ * is enough room.  S is the bytes added to the summary.
+ */
+#define	CHECK_SEG(s)			\
+if (sp->sum_bytes_left < (s)) {		\
+	(void) lfs_writeseg(fs, sp);	\
+}
+struct buf *lfs_fakebuf __P((struct vnode *, int, size_t, caddr_t));
+
+/*
+ * lfs_markv:
+ *
+ * This will mark inodes and blocks dirty, so they are written into the log.
+ * It will block until all the blocks have been written.  The segment create
+ * time passed in the block_info and inode_info structures is used to decide
+ * if the data is valid for each block (in case some process dirtied a block
+ * or inode that is being cleaned between the determination that a block is
+ * live and the lfs_markv call).
+ *
+ *  0 on success
+ * -1/errno is return on error.
+ */
+struct lfs_markv_args {
+	fsid_t *fsidp;		/* file system */
+	BLOCK_INFO *blkiov;	/* block array */
+	int blkcnt;		/* count of block array entries */
+};
+int
+lfs_markv(p, uap, retval)
+	struct proc *p;
+	struct lfs_markv_args *uap;
+	int *retval;
+{
+	struct segment *sp;
+	BLOCK_INFO *blkp;
+	IFILE *ifp;
+	struct buf *bp, **bpp;
+	struct inode *ip;
+	struct lfs *fs;
+	struct mount *mntp;
+	struct vnode *vp;
+	fsid_t fsid;
+	void *start;
+	ino_t lastino;
+	daddr_t b_daddr, v_daddr;
+	u_long bsize;
+	int cnt, error;
+
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+
+	if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
+		return (error);
+	if ((mntp = getvfs(&fsid)) == NULL)
+		return (EINVAL);
+
+	cnt = uap->blkcnt;
+	start = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
+	if (error = copyin(uap->blkiov, start, cnt * sizeof(BLOCK_INFO)))
+		goto err1;
+
+	/* Mark blocks/inodes dirty.  */
+	fs = VFSTOUFS(mntp)->um_lfs;
+	bsize = fs->lfs_bsize;
+	error = 0;
+
+	lfs_seglock(fs, SEGM_SYNC | SEGM_CLEAN);
+	sp = fs->lfs_sp;
+	for (v_daddr = LFS_UNUSED_DADDR, lastino = LFS_UNUSED_INUM,
+	    blkp = start; cnt--; ++blkp) {
+		/*
+		 * Get the IFILE entry (only once) and see if the file still
+		 * exists.
+		 */
+		if (lastino != blkp->bi_inode) {
+			if (lastino != LFS_UNUSED_INUM) {
+				/* Finish up last file */
+				if (sp->fip->fi_nblocks == 0) {
+					DEC_FINFO(sp);
+					sp->sum_bytes_left +=
+					    sizeof(FINFO) - sizeof(daddr_t);
+				} else {
+					lfs_updatemeta(sp);
+					BUMP_FIP(sp);
+				}
+
+				lfs_writeinode(fs, sp, ip);
+				lfs_vunref(vp);
+			}
+
+			/* Start a new file */
+			CHECK_SEG(sizeof(FINFO));
+			sp->sum_bytes_left -= sizeof(FINFO) - sizeof(daddr_t);
+			INC_FINFO(sp);
+			sp->start_lbp = &sp->fip->fi_blocks[0];
+			sp->vp = NULL;
+			sp->fip->fi_version = blkp->bi_version;
+			sp->fip->fi_nblocks = 0;
+			sp->fip->fi_ino = blkp->bi_inode;
+			lastino = blkp->bi_inode;
+			if (blkp->bi_inode == LFS_IFILE_INUM)
+				v_daddr = fs->lfs_idaddr;
+			else {
+				LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
+				v_daddr = ifp->if_daddr;
+				brelse(bp);
+			}
+			if (v_daddr == LFS_UNUSED_DADDR)
+				continue;
+
+			/* Get the vnode/inode. */
+			if (lfs_fastvget(mntp, blkp->bi_inode, v_daddr, &vp,
+			    blkp->bi_lbn == LFS_UNUSED_LBN ? 
+			    blkp->bi_bp : NULL)) {
+#ifdef DIAGNOSTIC
+				printf("lfs_markv: VFS_VGET failed (%d)\n",
+				    blkp->bi_inode);
+#endif
+				lastino = LFS_UNUSED_INUM;
+				v_daddr = LFS_UNUSED_DADDR;
+				continue;
+			}
+			sp->vp = vp;
+			ip = VTOI(vp);
+		} else if (v_daddr == LFS_UNUSED_DADDR)
+			continue;
+
+		/* If this BLOCK_INFO didn't contain a block, keep going. */
+		if (blkp->bi_lbn == LFS_UNUSED_LBN)
+			continue;
+		if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &b_daddr, NULL) ||
+		    b_daddr != blkp->bi_daddr)
+			continue;
+		/*
+		 * If we got to here, then we are keeping the block.  If it
+		 * is an indirect block, we want to actually put it in the
+		 * buffer cache so that it can be updated in the finish_meta
+		 * section.  If it's not, we need to allocate a fake buffer
+		 * so that writeseg can perform the copyin and write the buffer.
+		 */
+		if (blkp->bi_lbn >= 0)	/* Data Block */
+			bp = lfs_fakebuf(vp, blkp->bi_lbn, bsize,
+			    blkp->bi_bp);
+		else {
+			bp = getblk(vp, blkp->bi_lbn, bsize, 0, 0);
+			if (!(bp->b_flags & (B_DELWRI | B_DONE | B_CACHE)) &&
+			    (error = copyin(blkp->bi_bp, bp->b_data,
+			    bsize)))
+				goto err2;
+			if (error = VOP_BWRITE(bp))
+				goto err2;
+		}
+		while (lfs_gatherblock(sp, bp, NULL));
+	}
+	if (sp->vp) {
+		if (sp->fip->fi_nblocks == 0) {
+			DEC_FINFO(sp);
+			sp->sum_bytes_left +=
+			    sizeof(FINFO) - sizeof(daddr_t);
+		} else
+			lfs_updatemeta(sp);
+
+		lfs_writeinode(fs, sp, ip);
+		lfs_vunref(vp);
+	}
+	(void) lfs_writeseg(fs, sp);
+	lfs_segunlock(fs);
+	free(start, M_SEGMENT);
+	return (error);
+
+/*
+ * XXX
+ * If we come in to error 2, we might have indirect blocks that were
+ * updated and now have bad block pointers.  I don't know what to do
+ * about this.
+ */
+
+err2:	lfs_vunref(vp);
+	/* Free up fakebuffers */
+	for (bpp = --sp->cbpp; bpp >= sp->bpp; --bpp)
+		if ((*bpp)->b_flags & B_CALL) {
+			brelvp(*bpp);
+			free(*bpp, M_SEGMENT);
+		} else
+			brelse(*bpp);
+	lfs_segunlock(fs);
+err1:	
+	free(start, M_SEGMENT);
+	return (error);
+}
+
+/*
+ * lfs_bmapv:
+ *
+ * This will fill in the current disk address for arrays of blocks.
+ *
+ *  0 on success
+ * -1/errno is return on error.
+ */
+struct lfs_bmapv_args {
+	fsid_t *fsidp;		/* file system */
+	BLOCK_INFO *blkiov;	/* block array */
+	int blkcnt;		/* count of block array entries */
+};
+int
+lfs_bmapv(p, uap, retval)
+	struct proc *p;
+	struct lfs_bmapv_args *uap;
+	int *retval;
+{
+	BLOCK_INFO *blkp;
+	struct mount *mntp;
+	struct vnode *vp;
+	fsid_t fsid;
+	void *start;
+	daddr_t daddr;
+	int cnt, error, step;
+
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+
+	if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
+		return (error);
+	if ((mntp = getvfs(&fsid)) == NULL)
+		return (EINVAL);
+
+	cnt = uap->blkcnt;
+	start = blkp = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
+	if (error = copyin(uap->blkiov, blkp, cnt * sizeof(BLOCK_INFO))) {
+		free(blkp, M_SEGMENT);
+		return (error);
+	}
+
+	for (step = cnt; step--; ++blkp) {
+		if (blkp->bi_lbn == LFS_UNUSED_LBN)
+			continue;
+		/* Could be a deadlock ? */
+		if (VFS_VGET(mntp, blkp->bi_inode, &vp))
+			daddr = LFS_UNUSED_DADDR;
+		else {
+			if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &daddr, NULL))
+				daddr = LFS_UNUSED_DADDR;
+			vput(vp);
+		}
+		blkp->bi_daddr = daddr;
+        }
+	copyout(start, uap->blkiov, cnt * sizeof(BLOCK_INFO));
+	free(start, M_SEGMENT);
+	return (0);
+}
+
+/*
+ * lfs_segclean:
+ *
+ * Mark the segment clean.
+ *
+ *  0 on success
+ * -1/errno is return on error.
+ */
+struct lfs_segclean_args {
+	fsid_t *fsidp;		/* file system */
+	u_long segment;		/* segment number */
+}; 
+int
+lfs_segclean(p, uap, retval)
+	struct proc *p;
+	struct lfs_segclean_args *uap;
+	int *retval;
+{
+	CLEANERINFO *cip;
+	SEGUSE *sup;
+	struct buf *bp;
+	struct mount *mntp;
+	struct lfs *fs;
+	fsid_t fsid;
+	int error;
+
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+
+	if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
+		return (error);
+	if ((mntp = getvfs(&fsid)) == NULL)
+		return (EINVAL);
+
+	fs = VFSTOUFS(mntp)->um_lfs;
+
+	if (datosn(fs, fs->lfs_curseg) == uap->segment)
+		return (EBUSY);
+
+	LFS_SEGENTRY(sup, fs, uap->segment, bp);
+	if (sup->su_flags & SEGUSE_ACTIVE) {
+		brelse(bp);
+		return (EBUSY);
+	}
+	fs->lfs_avail += fsbtodb(fs, fs->lfs_ssize) - 1;
+	fs->lfs_bfree += (sup->su_nsums * LFS_SUMMARY_SIZE / DEV_BSIZE) +
+	    sup->su_ninos * btodb(fs->lfs_bsize);
+	sup->su_flags &= ~SEGUSE_DIRTY;
+	(void) VOP_BWRITE(bp);
+
+	LFS_CLEANERINFO(cip, fs, bp);
+	++cip->clean;
+	--cip->dirty;
+	(void) VOP_BWRITE(bp);
+	wakeup(&fs->lfs_avail);
+	return (0);
+}
+
+/*
+ * lfs_segwait:
+ *
+ * This will block until a segment in file system fsid is written.  A timeout
+ * in milliseconds may be specified which will awake the cleaner automatically.
+ * An fsid of -1 means any file system, and a timeout of 0 means forever.
+ *
+ *  0 on success
+ *  1 on timeout
+ * -1/errno is return on error.
+ */
+struct lfs_segwait_args {
+	fsid_t *fsidp;		/* file system */
+	struct timeval *tv;	/* timeout */
+};
+int
+lfs_segwait(p, uap, retval)
+	struct proc *p;
+	struct lfs_segwait_args *uap;
+	int *retval;
+{
+	extern int lfs_allclean_wakeup;
+	struct mount *mntp;
+	struct timeval atv;
+	fsid_t fsid;
+	void *addr;
+	u_long timeout;
+	int error, s;
+
+	if (error = suser(p->p_ucred, &p->p_acflag)) {
+		return (error);
+}
+#ifdef WHEN_QUADS_WORK
+	if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
+		return (error);
+	if (fsid == (fsid_t)-1)
+		addr = &lfs_allclean_wakeup;
+	else {
+		if ((mntp = getvfs(&fsid)) == NULL)
+			return (EINVAL);
+		addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
+	}
+#else
+	if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
+		return (error);
+	if ((mntp = getvfs(&fsid)) == NULL)
+		addr = &lfs_allclean_wakeup;
+	else
+		addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
+#endif
+
+	if (uap->tv) {
+		if (error = copyin(uap->tv, &atv, sizeof(struct timeval)))
+			return (error);
+		if (itimerfix(&atv))
+			return (EINVAL);
+		s = splclock();
+		timevaladd(&atv, (struct timeval *)&time);
+		timeout = hzto(&atv);
+		splx(s);
+	} else
+		timeout = 0;
+
+	error = tsleep(addr, PCATCH | PUSER, "segment", timeout);
+	return (error == ERESTART ? EINTR : 0);
+}
+
+/*
+ * VFS_VGET call specialized for the cleaner.  The cleaner already knows the
+ * daddr from the ifile, so don't look it up again.  If the cleaner is
+ * processing IINFO structures, it may have the ondisk inode already, so
+ * don't go retrieving it again.
+ */
+int
+lfs_fastvget(mp, ino, daddr, vpp, dinp)
+	struct mount *mp;
+	ino_t ino;
+	daddr_t daddr;
+	struct vnode **vpp;
+	struct dinode *dinp;
+{
+	register struct inode *ip;
+	struct vnode *vp;
+	struct ufsmount *ump;
+	struct buf *bp;
+	dev_t dev;
+	int error;
+
+	ump = VFSTOUFS(mp);
+	dev = ump->um_dev;
+	/*
+	 * This is playing fast and loose.  Someone may have the inode
+	 * locked, in which case they are going to be distinctly unhappy
+	 * if we trash something.
+	 */
+	if ((*vpp = ufs_ihashlookup(dev, ino)) != NULL) {
+		lfs_vref(*vpp);
+		if ((*vpp)->v_flag & VXLOCK)
+			printf ("Cleaned vnode VXLOCKED\n");
+		ip = VTOI(*vpp);
+		if (ip->i_flags & IN_LOCKED)
+			printf("cleaned vnode locked\n");
+		if (!(ip->i_flag & IN_MODIFIED)) {
+			++ump->um_lfs->lfs_uinodes;
+			ip->i_flag |= IN_MODIFIED;
+		}
+		ip->i_flag |= IN_MODIFIED;
+		return (0);
+	}
+
+	/* Allocate new vnode/inode. */
+	if (error = lfs_vcreate(mp, ino, &vp)) {
+		*vpp = NULL;
+		return (error);
+	}
+
+	/*
+	 * Put it onto its hash chain and lock it so that other requests for
+	 * this inode will block if they arrive while we are sleeping waiting
+	 * for old data structures to be purged or for the contents of the
+	 * disk portion of this inode to be read.
+	 */
+	ip = VTOI(vp);
+	ufs_ihashins(ip);
+
+	/*
+	 * XXX
+	 * This may not need to be here, logically it should go down with
+	 * the i_devvp initialization.
+	 * Ask Kirk.
+	 */
+	ip->i_lfs = ump->um_lfs;
+
+	/* Read in the disk contents for the inode, copy into the inode. */
+	if (dinp)
+		if (error = copyin(dinp, &ip->i_din, sizeof(struct dinode)))
+			return (error);
+	else {
+		if (error = bread(ump->um_devvp, daddr,
+		    (int)ump->um_lfs->lfs_bsize, NOCRED, &bp)) {
+			/*
+			 * The inode does not contain anything useful, so it
+			 * would be misleading to leave it on its hash chain.
+			 * Iput() will return it to the free list.
+			 */
+			ufs_ihashrem(ip);
+
+			/* Unlock and discard unneeded inode. */
+			lfs_vunref(vp);
+			brelse(bp);
+			*vpp = NULL;
+			return (error);
+		}
+		ip->i_din =
+		    *lfs_ifind(ump->um_lfs, ino, (struct dinode *)bp->b_data);
+		brelse(bp);
+	}
+
+	/* Inode was just read from user space or disk, make sure it's locked */
+	ip->i_flag |= IN_LOCKED;
+
+	/*
+	 * Initialize the vnode from the inode, check for aliases.  In all
+	 * cases re-init ip, the underlying vnode/inode may have changed.
+	 */
+	if (error = ufs_vinit(mp, lfs_specop_p, LFS_FIFOOPS, &vp)) {
+		lfs_vunref(vp);
+		*vpp = NULL;
+		return (error);
+	}
+	/*
+	 * Finish inode initialization now that aliasing has been resolved.
+	 */
+	ip->i_devvp = ump->um_devvp;
+	ip->i_flag |= IN_MODIFIED;
+	++ump->um_lfs->lfs_uinodes;
+	VREF(ip->i_devvp);
+	*vpp = vp;
+	return (0);
+}
+struct buf *
+lfs_fakebuf(vp, lbn, size, uaddr)
+	struct vnode *vp;
+	int lbn;
+	size_t size;
+	caddr_t uaddr;
+{
+	struct buf *bp;
+
+	bp = lfs_newbuf(vp, lbn, 0);
+	bp->b_saveaddr = uaddr;
+	bp->b_bufsize = size;
+	bp->b_bcount = size;
+	bp->b_flags |= B_INVAL;
+	return (bp);
+}
diff --git a/sys/ufs/lfs/lfs_vfsops.c b/sys/ufs/lfs/lfs_vfsops.c
new file mode 100644
index 00000000000..0c8186e2322
--- /dev/null
+++ b/sys/ufs/lfs/lfs_vfsops.c
@@ -0,0 +1,573 @@
+/*
+ * Copyright (c) 1989, 1991, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)lfs_vfsops.c	8.7 (Berkeley) 4/16/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/buf.h>
+#include <sys/mbuf.h>
+#include <sys/file.h>
+#include <sys/disklabel.h>
+#include <sys/ioctl.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <sys/socket.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/lfs/lfs.h>
+#include <ufs/lfs/lfs_extern.h>
+
+int lfs_mountfs __P((struct vnode *, struct mount *, struct proc *));
+
+struct vfsops lfs_vfsops = {
+	lfs_mount,
+	ufs_start,
+	lfs_unmount,
+	ufs_root,
+	ufs_quotactl,
+	lfs_statfs,
+	lfs_sync,
+	lfs_vget,
+	lfs_fhtovp,
+	lfs_vptofh,
+	lfs_init,
+};
+
+int
+lfs_mountroot()
+{
+	panic("lfs_mountroot");		/* XXX -- implement */
+}
+
+/*
+ * VFS Operations.
+ *
+ * mount system call
+ */
+lfs_mount(mp, path, data, ndp, p)
+	register struct mount *mp;
+	char *path;
+	caddr_t data;
+	struct nameidata *ndp;
+	struct proc *p;
+{
+	struct vnode *devvp;
+	struct ufs_args args;
+	struct ufsmount *ump;
+	register struct lfs *fs;				/* LFS */
+	u_int size;
+	int error;
+
+	if (error = copyin(data, (caddr_t)&args, sizeof (struct ufs_args)))
+		return (error);
+
+	/* Until LFS can do NFS right.		XXX */
+	if (args.export.ex_flags & MNT_EXPORTED)
+		return (EINVAL);
+
+	/*
+	 * If updating, check whether changing from read-only to
+	 * read/write; if there is no device name, that's all we do.
+	 */
+	if (mp->mnt_flag & MNT_UPDATE) {
+		ump = VFSTOUFS(mp);
+#ifdef NOTLFS							/* LFS */
+		fs = ump->um_fs;
+		if (fs->fs_ronly && (mp->mnt_flag & MNT_RDONLY) == 0)
+			fs->fs_ronly = 0;
+#else
+		fs = ump->um_lfs;
+		if (fs->lfs_ronly && (mp->mnt_flag & MNT_RDONLY) == 0)
+			fs->lfs_ronly = 0;
+#endif
+		if (args.fspec == 0) {
+			/*
+			 * Process export requests.
+			 */
+			return (vfs_export(mp, &ump->um_export, &args.export));
+		}
+	}
+	/*
+	 * Not an update, or updating the name: look up the name
+	 * and verify that it refers to a sensible block device.
+	 */
+	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
+	if (error = namei(ndp))
+		return (error);
+	devvp = ndp->ni_vp;
+	if (devvp->v_type != VBLK) {
+		vrele(devvp);
+		return (ENOTBLK);
+	}
+	if (major(devvp->v_rdev) >= nblkdev) {
+		vrele(devvp);
+		return (ENXIO);
+	}
+	if ((mp->mnt_flag & MNT_UPDATE) == 0)
+		error = lfs_mountfs(devvp, mp, p);		/* LFS */
+	else {
+		if (devvp != ump->um_devvp)
+			error = EINVAL;	/* needs translation */
+		else
+			vrele(devvp);
+	}
+	if (error) {
+		vrele(devvp);
+		return (error);
+	}
+	ump = VFSTOUFS(mp);
+	fs = ump->um_lfs;					/* LFS */
+#ifdef NOTLFS							/* LFS */
+	(void) copyinstr(path, fs->fs_fsmnt, sizeof(fs->fs_fsmnt) - 1, &size);
+	bzero(fs->fs_fsmnt + size, sizeof(fs->fs_fsmnt) - size);
+	bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
+	    MNAMELEN);
+	(void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+	    &size);
+	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+	(void) ufs_statfs(mp, &mp->mnt_stat, p);
+#else
+	(void)copyinstr(path, fs->lfs_fsmnt, sizeof(fs->lfs_fsmnt) - 1, &size);
+	bzero(fs->lfs_fsmnt + size, sizeof(fs->lfs_fsmnt) - size);
+	bcopy((caddr_t)fs->lfs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
+	    MNAMELEN);
+	(void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+	    &size);
+	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+	(void) lfs_statfs(mp, &mp->mnt_stat, p);
+#endif
+	return (0);
+}
+
+/*
+ * Common code for mount and mountroot
+ * LFS specific
+ */
+int
+lfs_mountfs(devvp, mp, p)
+	register struct vnode *devvp;
+	struct mount *mp;
+	struct proc *p;
+{
+	extern struct vnode *rootvp;
+	register struct lfs *fs;
+	register struct ufsmount *ump;
+	struct vnode *vp;
+	struct buf *bp;
+	struct partinfo dpart;
+	dev_t dev;
+	int error, i, ronly, size;
+
+	/*
+	 * Disallow multiple mounts of the same device.
+	 * Disallow mounting of a device that is currently in use
+	 * (except for root, which might share swap device for miniroot).
+	 * Flush out any old buffers remaining from a previous use.
+	 */
+	if (error = vfs_mountedon(devvp))
+		return (error);
+	if (vcount(devvp) > 1 && devvp != rootvp)
+		return (EBUSY);
+	if (error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0))
+		return (error);
+
+	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
+	if (error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p))
+		return (error);
+
+	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0)
+		size = DEV_BSIZE;
+	else {
+		size = dpart.disklab->d_secsize;
+#ifdef NEVER_USED
+		dpart.part->p_fstype = FS_LFS;
+		dpart.part->p_fsize = fs->lfs_fsize;	/* frag size */
+		dpart.part->p_frag = fs->lfs_frag;	/* frags per block */
+		dpart.part->p_cpg = fs->lfs_segshift;	/* segment shift */
+#endif
+	}
+
+	/* Don't free random space on error. */
+	bp = NULL;
+	ump = NULL;
+
+	/* Read in the superblock. */
+	if (error = bread(devvp, LFS_LABELPAD / size, LFS_SBPAD, NOCRED, &bp))
+		goto out;
+	fs = (struct lfs *)bp->b_data;
+
+	/* Check the basics. */
+	if (fs->lfs_magic != LFS_MAGIC || fs->lfs_bsize > MAXBSIZE ||
+	    fs->lfs_bsize < sizeof(struct lfs)) {
+		error = EINVAL;		/* XXX needs translation */
+		goto out;
+	}
+
+	/* Allocate the mount structure, copy the superblock into it. */
+	ump = (struct ufsmount *)malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
+	fs = ump->um_lfs = malloc(sizeof(struct lfs), M_UFSMNT, M_WAITOK);
+	bcopy(bp->b_data, fs, sizeof(struct lfs));
+	if (sizeof(struct lfs) < LFS_SBPAD)			/* XXX why? */
+		bp->b_flags |= B_INVAL;
+	brelse(bp);
+	bp = NULL;
+
+	/* Set up the I/O information */
+	fs->lfs_iocount = 0;
+
+	/* Set up the ifile and lock aflags */
+	fs->lfs_doifile = 0;
+	fs->lfs_writer = 0;
+	fs->lfs_dirops = 0;
+	fs->lfs_seglock = 0;
+
+	/* Set the file system readonly/modify bits. */
+	fs->lfs_ronly = ronly;
+	if (ronly == 0)
+		fs->lfs_fmod = 1;
+
+	/* Initialize the mount structure. */
+	dev = devvp->v_rdev;
+	mp->mnt_data = (qaddr_t)ump;
+	mp->mnt_stat.f_fsid.val[0] = (long)dev;
+	mp->mnt_stat.f_fsid.val[1] = MOUNT_LFS;
+	mp->mnt_flag |= MNT_LOCAL;
+	ump->um_mountp = mp;
+	ump->um_dev = dev;
+	ump->um_devvp = devvp;
+	ump->um_bptrtodb = 0;
+	ump->um_seqinc = 1 << fs->lfs_fsbtodb;
+	ump->um_nindir = fs->lfs_nindir;
+	for (i = 0; i < MAXQUOTAS; i++)
+		ump->um_quotas[i] = NULLVP;
+	devvp->v_specflags |= SI_MOUNTEDON;
+
+	/*
+	 * We use the ifile vnode for almost every operation.  Instead of
+	 * retrieving it from the hash table each time we retrieve it here,
+	 * artificially increment the reference count and keep a pointer
+	 * to it in the incore copy of the superblock.
+	 */
+	if (error = VFS_VGET(mp, LFS_IFILE_INUM, &vp))
+		goto out;
+	fs->lfs_ivnode = vp;
+	VREF(vp);
+	vput(vp);
+
+	return (0);
+out:
+	if (bp)
+		brelse(bp);
+	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p);
+	if (ump) {
+		free(ump->um_lfs, M_UFSMNT);
+		free(ump, M_UFSMNT);
+		mp->mnt_data = (qaddr_t)0;
+	}
+	return (error);
+}
+
+/*
+ * unmount system call
+ */
+lfs_unmount(mp, mntflags, p)
+	struct mount *mp;
+	int mntflags;
+	struct proc *p;
+{
+	extern int doforce;
+	register struct ufsmount *ump;
+	register struct lfs *fs;
+	int i, error, flags, ronly;
+
+	flags = 0;
+	if (mntflags & MNT_FORCE) {
+		if (!doforce || (mp->mnt_flag & MNT_ROOTFS))
+			return (EINVAL);
+		flags |= FORCECLOSE;
+	}
+
+	ump = VFSTOUFS(mp);
+	fs = ump->um_lfs;
+#ifdef QUOTA
+	if (mp->mnt_flag & MNT_QUOTA) {
+		if (error = vflush(mp, fs->lfs_ivnode, SKIPSYSTEM|flags))
+			return (error);
+		for (i = 0; i < MAXQUOTAS; i++) {
+			if (ump->um_quotas[i] == NULLVP)
+				continue;
+			quotaoff(p, mp, i);
+		}
+		/*
+		 * Here we fall through to vflush again to ensure
+		 * that we have gotten rid of all the system vnodes.
+		 */
+	}
+#endif
+	if (error = vflush(mp, fs->lfs_ivnode, flags))
+		return (error);
+	fs->lfs_clean = 1;
+	if (error = VFS_SYNC(mp, 1, p->p_ucred, p))
+		return (error);
+	if (fs->lfs_ivnode->v_dirtyblkhd.lh_first)
+		panic("lfs_unmount: still dirty blocks on ifile vnode\n");
+	vrele(fs->lfs_ivnode);
+	vgone(fs->lfs_ivnode);
+
+	ronly = !fs->lfs_ronly;
+	ump->um_devvp->v_specflags &= ~SI_MOUNTEDON;
+	error = VOP_CLOSE(ump->um_devvp,
+	    ronly ? FREAD : FREAD|FWRITE, NOCRED, p);
+	vrele(ump->um_devvp);
+	free(fs, M_UFSMNT);
+	free(ump, M_UFSMNT);
+	mp->mnt_data = (qaddr_t)0;
+	mp->mnt_flag &= ~MNT_LOCAL;
+	return (error);
+}
+
+/*
+ * Get file system statistics.
+ */
+lfs_statfs(mp, sbp, p)
+	struct mount *mp;
+	register struct statfs *sbp;
+	struct proc *p;
+{
+	register struct lfs *fs;
+	register struct ufsmount *ump;
+
+	ump = VFSTOUFS(mp);
+	fs = ump->um_lfs;
+	if (fs->lfs_magic != LFS_MAGIC)
+		panic("lfs_statfs: magic");
+	sbp->f_type = MOUNT_LFS;
+	sbp->f_bsize = fs->lfs_bsize;
+	sbp->f_iosize = fs->lfs_bsize;
+	sbp->f_blocks = dbtofsb(fs,fs->lfs_dsize);
+	sbp->f_bfree = dbtofsb(fs, fs->lfs_bfree);
+	sbp->f_bavail = (fs->lfs_dsize * (100 - fs->lfs_minfree) / 100) -
+		(fs->lfs_dsize - fs->lfs_bfree);
+	sbp->f_bavail = dbtofsb(fs, sbp->f_bavail);
+	sbp->f_files = fs->lfs_nfiles;
+	sbp->f_ffree = sbp->f_bfree * INOPB(fs);
+	if (sbp != &mp->mnt_stat) {
+		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
+			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
+		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
+			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
+	}
+	return (0);
+}
+
+/*
+ * Go through the disk queues to initiate sandbagged IO;
+ * go through the inodes to write those that have been modified;
+ * initiate the writing of the super block if it has been modified.
+ *
+ * Note: we are always called with the filesystem marked `MPBUSY'.
+ */
+lfs_sync(mp, waitfor, cred, p)
+	struct mount *mp;
+	int waitfor;
+	struct ucred *cred;
+	struct proc *p;
+{
+	int error;
+
+	/* All syncs must be checkpoints until roll-forward is implemented. */
+	error = lfs_segwrite(mp, SEGM_CKP | (waitfor ? SEGM_SYNC : 0));
+#ifdef QUOTA
+	qsync(mp);
+#endif
+	return (error);
+}
+
+/*
+ * Look up an LFS dinode number to find its incore vnode.  If not already
+ * in core, read it in from the specified device.  Return the inode locked.
+ * Detection and handling of mount points must be done by the calling routine.
+ */
+int
+lfs_vget(mp, ino, vpp)
+	struct mount *mp;
+	ino_t ino;
+	struct vnode **vpp;
+{
+	register struct lfs *fs;
+	register struct inode *ip;
+	struct buf *bp;
+	struct ifile *ifp;
+	struct vnode *vp;
+	struct ufsmount *ump;
+	daddr_t daddr;
+	dev_t dev;
+	int error;
+
+	ump = VFSTOUFS(mp);
+	dev = ump->um_dev;
+	if ((*vpp = ufs_ihashget(dev, ino)) != NULL)
+		return (0);
+
+	/* Translate the inode number to a disk address. */
+	fs = ump->um_lfs;
+	if (ino == LFS_IFILE_INUM)
+		daddr = fs->lfs_idaddr;
+	else {
+		LFS_IENTRY(ifp, fs, ino, bp);
+		daddr = ifp->if_daddr;
+		brelse(bp);
+		if (daddr == LFS_UNUSED_DADDR)
+			return (ENOENT);
+	}
+
+	/* Allocate new vnode/inode. */
+	if (error = lfs_vcreate(mp, ino, &vp)) {
+		*vpp = NULL;
+		return (error);
+	}
+
+	/*
+	 * Put it onto its hash chain and lock it so that other requests for
+	 * this inode will block if they arrive while we are sleeping waiting
+	 * for old data structures to be purged or for the contents of the
+	 * disk portion of this inode to be read.
+	 */
+	ip = VTOI(vp);
+	ufs_ihashins(ip);
+
+	/*
+	 * XXX
+	 * This may not need to be here, logically it should go down with
+	 * the i_devvp initialization.
+	 * Ask Kirk.
+	 */
+	ip->i_lfs = ump->um_lfs;
+
+	/* Read in the disk contents for the inode, copy into the inode. */
+	if (error =
+	    bread(ump->um_devvp, daddr, (int)fs->lfs_bsize, NOCRED, &bp)) {
+		/*
+		 * The inode does not contain anything useful, so it would
+		 * be misleading to leave it on its hash chain. With mode
+		 * still zero, it will be unlinked and returned to the free
+		 * list by vput().
+		 */
+		vput(vp);
+		brelse(bp);
+		*vpp = NULL;
+		return (error);
+	}
+	ip->i_din = *lfs_ifind(fs, ino, (struct dinode *)bp->b_data);
+	brelse(bp);
+
+	/*
+	 * Initialize the vnode from the inode, check for aliases.  In all
+	 * cases re-init ip, the underlying vnode/inode may have changed.
+	 */
+	if (error = ufs_vinit(mp, lfs_specop_p, LFS_FIFOOPS, &vp)) {
+		vput(vp);
+		*vpp = NULL;
+		return (error);
+	}
+	/*
+	 * Finish inode initialization now that aliasing has been resolved.
+	 */
+	ip->i_devvp = ump->um_devvp;
+	VREF(ip->i_devvp);
+	*vpp = vp;
+	return (0);
+}
+
+/*
+ * File handle to vnode
+ *
+ * Have to be really careful about stale file handles:
+ * - check that the inode number is valid
+ * - call lfs_vget() to get the locked inode
+ * - check for an unallocated inode (i_mode == 0)
+ * - check that the given client host has export rights and return
+ *   those rights via. exflagsp and credanonp
+ *
+ * XXX
+ * use ifile to see if inode is allocated instead of reading off disk
+ * what is the relationship between my generational number and the NFS
+ * generational number.
+ */
+int
+lfs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
+	register struct mount *mp;
+	struct fid *fhp;
+	struct mbuf *nam;
+	struct vnode **vpp;
+	int *exflagsp;
+	struct ucred **credanonp;
+{
+	register struct ufid *ufhp;
+
+	ufhp = (struct ufid *)fhp;
+	if (ufhp->ufid_ino < ROOTINO)
+		return (ESTALE);
+	return (ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp));
+}
+
+/*
+ * Vnode pointer to File handle
+ */
+/* ARGSUSED */
+lfs_vptofh(vp, fhp)
+	struct vnode *vp;
+	struct fid *fhp;
+{
+	register struct inode *ip;
+	register struct ufid *ufhp;
+
+	ip = VTOI(vp);
+	ufhp = (struct ufid *)fhp;
+	ufhp->ufid_len = sizeof(struct ufid);
+	ufhp->ufid_ino = ip->i_number;
+	ufhp->ufid_gen = ip->i_gen;
+	return (0);
+}
diff --git a/sys/ufs/lfs/lfs_vnops.c b/sys/ufs/lfs/lfs_vnops.c
new file mode 100644
index 00000000000..fc6bd480d22
--- /dev/null
+++ b/sys/ufs/lfs/lfs_vnops.c
@@ -0,0 +1,487 @@
+/*
+ * Copyright (c) 1986, 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)lfs_vnops.c	8.5 (Berkeley) 12/30/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/resourcevar.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/conf.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+
+#include <miscfs/specfs/specdev.h>
+#include <miscfs/fifofs/fifo.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/dir.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/lfs/lfs.h>
+#include <ufs/lfs/lfs_extern.h>
+
+/* Global vfs data structures for lfs. */
+int (**lfs_vnodeop_p)();
+struct vnodeopv_entry_desc lfs_vnodeop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, ufs_lookup },		/* lookup */
+	{ &vop_create_desc, ufs_create },		/* create */
+	{ &vop_mknod_desc, ufs_mknod },			/* mknod */
+	{ &vop_open_desc, ufs_open },			/* open */
+	{ &vop_close_desc, lfs_close },			/* close */
+	{ &vop_access_desc, ufs_access },		/* access */
+	{ &vop_getattr_desc, lfs_getattr },		/* getattr */
+	{ &vop_setattr_desc, ufs_setattr },		/* setattr */
+	{ &vop_read_desc, lfs_read },			/* read */
+	{ &vop_write_desc, lfs_write },			/* write */
+	{ &vop_ioctl_desc, ufs_ioctl },			/* ioctl */
+	{ &vop_select_desc, ufs_select },		/* select */
+	{ &vop_mmap_desc, ufs_mmap },			/* mmap */
+	{ &vop_fsync_desc, lfs_fsync },			/* fsync */
+	{ &vop_seek_desc, ufs_seek },			/* seek */
+	{ &vop_remove_desc, ufs_remove },		/* remove */
+	{ &vop_link_desc, ufs_link },			/* link */
+	{ &vop_rename_desc, ufs_rename },		/* rename */
+	{ &vop_mkdir_desc, ufs_mkdir },			/* mkdir */
+	{ &vop_rmdir_desc, ufs_rmdir },			/* rmdir */
+	{ &vop_symlink_desc, ufs_symlink },		/* symlink */
+	{ &vop_readdir_desc, ufs_readdir },		/* readdir */
+	{ &vop_readlink_desc, ufs_readlink },		/* readlink */
+	{ &vop_abortop_desc, ufs_abortop },		/* abortop */
+	{ &vop_inactive_desc, lfs_inactive },		/* inactive */
+	{ &vop_reclaim_desc, ufs_reclaim },		/* reclaim */
+	{ &vop_lock_desc, ufs_lock },			/* lock */
+	{ &vop_unlock_desc, ufs_unlock },		/* unlock */
+	{ &vop_bmap_desc, ufs_bmap },			/* bmap */
+	{ &vop_strategy_desc, ufs_strategy },		/* strategy */
+	{ &vop_print_desc, ufs_print },			/* print */
+	{ &vop_islocked_desc, ufs_islocked },		/* islocked */
+	{ &vop_pathconf_desc, ufs_pathconf },		/* pathconf */
+	{ &vop_advlock_desc, ufs_advlock },		/* advlock */
+	{ &vop_blkatoff_desc, lfs_blkatoff },		/* blkatoff */
+	{ &vop_valloc_desc, lfs_valloc },		/* valloc */
+	{ &vop_vfree_desc, lfs_vfree },			/* vfree */
+	{ &vop_truncate_desc, lfs_truncate },		/* truncate */
+	{ &vop_update_desc, lfs_update },		/* update */
+	{ &vop_bwrite_desc, lfs_bwrite },		/* bwrite */
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc lfs_vnodeop_opv_desc =
+	{ &lfs_vnodeop_p, lfs_vnodeop_entries };
+
+int (**lfs_specop_p)();
+struct vnodeopv_entry_desc lfs_specop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, spec_lookup },		/* lookup */
+	{ &vop_create_desc, spec_create },		/* create */
+	{ &vop_mknod_desc, spec_mknod },		/* mknod */
+	{ &vop_open_desc, spec_open },			/* open */
+	{ &vop_close_desc, ufsspec_close },		/* close */
+	{ &vop_access_desc, ufs_access },		/* access */
+	{ &vop_getattr_desc, lfs_getattr },		/* getattr */
+	{ &vop_setattr_desc, ufs_setattr },		/* setattr */
+	{ &vop_read_desc, ufsspec_read },		/* read */
+	{ &vop_write_desc, ufsspec_write },		/* write */
+	{ &vop_ioctl_desc, spec_ioctl },		/* ioctl */
+	{ &vop_select_desc, spec_select },		/* select */
+	{ &vop_mmap_desc, spec_mmap },			/* mmap */
+	{ &vop_fsync_desc, spec_fsync },		/* fsync */
+	{ &vop_seek_desc, spec_seek },			/* seek */
+	{ &vop_remove_desc, spec_remove },		/* remove */
+	{ &vop_link_desc, spec_link },			/* link */
+	{ &vop_rename_desc, spec_rename },		/* rename */
+	{ &vop_mkdir_desc, spec_mkdir },		/* mkdir */
+	{ &vop_rmdir_desc, spec_rmdir },		/* rmdir */
+	{ &vop_symlink_desc, spec_symlink },		/* symlink */
+	{ &vop_readdir_desc, spec_readdir },		/* readdir */
+	{ &vop_readlink_desc, spec_readlink },		/* readlink */
+	{ &vop_abortop_desc, spec_abortop },		/* abortop */
+	{ &vop_inactive_desc, lfs_inactive },		/* inactive */
+	{ &vop_reclaim_desc, ufs_reclaim },		/* reclaim */
+	{ &vop_lock_desc, ufs_lock },			/* lock */
+	{ &vop_unlock_desc, ufs_unlock },		/* unlock */
+	{ &vop_bmap_desc, spec_bmap },			/* bmap */
+	{ &vop_strategy_desc, spec_strategy },		/* strategy */
+	{ &vop_print_desc, ufs_print },			/* print */
+	{ &vop_islocked_desc, ufs_islocked },		/* islocked */
+	{ &vop_pathconf_desc, spec_pathconf },		/* pathconf */
+	{ &vop_advlock_desc, spec_advlock },		/* advlock */
+	{ &vop_blkatoff_desc, spec_blkatoff },		/* blkatoff */
+	{ &vop_valloc_desc, spec_valloc },		/* valloc */
+	{ &vop_vfree_desc, lfs_vfree },			/* vfree */
+	{ &vop_truncate_desc, spec_truncate },		/* truncate */
+	{ &vop_update_desc, lfs_update },		/* update */
+	{ &vop_bwrite_desc, lfs_bwrite },		/* bwrite */
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc lfs_specop_opv_desc =
+	{ &lfs_specop_p, lfs_specop_entries };
+
+#ifdef FIFO
+int (**lfs_fifoop_p)();
+struct vnodeopv_entry_desc lfs_fifoop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, fifo_lookup },		/* lookup */
+	{ &vop_create_desc, fifo_create },		/* create */
+	{ &vop_mknod_desc, fifo_mknod },		/* mknod */
+	{ &vop_open_desc, fifo_open },			/* open */
+	{ &vop_close_desc, ufsfifo_close },		/* close */
+	{ &vop_access_desc, ufs_access },		/* access */
+	{ &vop_getattr_desc, lfs_getattr },		/* getattr */
+	{ &vop_setattr_desc, ufs_setattr },		/* setattr */
+	{ &vop_read_desc, ufsfifo_read },		/* read */
+	{ &vop_write_desc, ufsfifo_write },		/* write */
+	{ &vop_ioctl_desc, fifo_ioctl },		/* ioctl */
+	{ &vop_select_desc, fifo_select },		/* select */
+	{ &vop_mmap_desc, fifo_mmap },			/* mmap */
+	{ &vop_fsync_desc, fifo_fsync },		/* fsync */
+	{ &vop_seek_desc, fifo_seek },			/* seek */
+	{ &vop_remove_desc, fifo_remove },		/* remove */
+	{ &vop_link_desc, fifo_link },			/* link */
+	{ &vop_rename_desc, fifo_rename },		/* rename */
+	{ &vop_mkdir_desc, fifo_mkdir },		/* mkdir */
+	{ &vop_rmdir_desc, fifo_rmdir },		/* rmdir */
+	{ &vop_symlink_desc, fifo_symlink },		/* symlink */
+	{ &vop_readdir_desc, fifo_readdir },		/* readdir */
+	{ &vop_readlink_desc, fifo_readlink },		/* readlink */
+	{ &vop_abortop_desc, fifo_abortop },		/* abortop */
+	{ &vop_inactive_desc, lfs_inactive },		/* inactive */
+	{ &vop_reclaim_desc, ufs_reclaim },		/* reclaim */
+	{ &vop_lock_desc, ufs_lock },			/* lock */
+	{ &vop_unlock_desc, ufs_unlock },		/* unlock */
+	{ &vop_bmap_desc, fifo_bmap },			/* bmap */
+	{ &vop_strategy_desc, fifo_strategy },		/* strategy */
+	{ &vop_print_desc, ufs_print },			/* print */
+	{ &vop_islocked_desc, ufs_islocked },		/* islocked */
+	{ &vop_pathconf_desc, fifo_pathconf },		/* pathconf */
+	{ &vop_advlock_desc, fifo_advlock },		/* advlock */
+	{ &vop_blkatoff_desc, fifo_blkatoff },		/* blkatoff */
+	{ &vop_valloc_desc, fifo_valloc },		/* valloc */
+	{ &vop_vfree_desc, lfs_vfree },			/* vfree */
+	{ &vop_truncate_desc, fifo_truncate },		/* truncate */
+	{ &vop_update_desc, lfs_update },		/* update */
+	{ &vop_bwrite_desc, lfs_bwrite },		/* bwrite */
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc lfs_fifoop_opv_desc =
+	{ &lfs_fifoop_p, lfs_fifoop_entries };
+#endif /* FIFO */
+
+#define	LFS_READWRITE
+#include <ufs/ufs/ufs_readwrite.c>
+#undef	LFS_READWRITE
+
+/*
+ * Synch an open file.
+ */
+/* ARGSUSED */
+lfs_fsync(ap)
+	struct vop_fsync_args /* {
+		struct vnode *a_vp;
+		struct ucred *a_cred;
+		int a_waitfor;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct timeval tv;
+
+	tv = time;
+	return (VOP_UPDATE(ap->a_vp, &tv, &tv,
+	    ap->a_waitfor == MNT_WAIT ? LFS_SYNC : 0));
+}
+
+/*
+ * These macros are used to bracket UFS directory ops, so that we can
+ * identify all the pages touched during directory ops which need to
+ * be ordered and flushed atomically, so that they may be recovered.
+ */
+#define	SET_DIROP(fs) {							\
+	if ((fs)->lfs_writer)						\
+		tsleep(&(fs)->lfs_dirops, PRIBIO + 1, "lfs_dirop", 0);	\
+	++(fs)->lfs_dirops;						\
+	(fs)->lfs_doifile = 1;						\
+}
+
+#define	SET_ENDOP(fs) {							\
+	--(fs)->lfs_dirops;						\
+	if (!(fs)->lfs_dirops)						\
+		wakeup(&(fs)->lfs_writer);				\
+}
+
+#define	MARK_VNODE(dvp)	(dvp)->v_flag |= VDIROP
+
+int
+lfs_symlink(ap)
+	struct vop_symlink_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+		char *a_target;
+	} */ *ap;
+{
+	int ret;
+
+	SET_DIROP(VTOI(ap->a_dvp)->i_lfs);
+	MARK_VNODE(ap->a_dvp);
+	ret = ufs_symlink(ap);
+	SET_ENDOP(VTOI(ap->a_dvp)->i_lfs);
+	return (ret);
+}
+
+int
+lfs_mknod(ap)
+	struct vop_mknod_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	int ret;
+
+	SET_DIROP(VTOI(ap->a_dvp)->i_lfs);
+	MARK_VNODE(ap->a_dvp);
+	ret = ufs_mknod(ap);
+	SET_ENDOP(VTOI(ap->a_dvp)->i_lfs);
+	return (ret);
+}
+
+int
+lfs_create(ap)
+	struct vop_create_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	int ret;
+
+	SET_DIROP(VTOI(ap->a_dvp)->i_lfs);
+	MARK_VNODE(ap->a_dvp);
+	ret = ufs_create(ap);
+	SET_ENDOP(VTOI(ap->a_dvp)->i_lfs);
+	return (ret);
+}
+
+int
+lfs_mkdir(ap)
+	struct vop_mkdir_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	int ret;
+
+	SET_DIROP(VTOI(ap->a_dvp)->i_lfs);
+	MARK_VNODE(ap->a_dvp);
+	ret = ufs_mkdir(ap);
+	SET_ENDOP(VTOI(ap->a_dvp)->i_lfs);
+	return (ret);
+}
+
+int
+lfs_remove(ap)
+	struct vop_remove_args /* {
+		struct vnode *a_dvp;
+		struct vnode *a_vp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	int ret;
+
+	SET_DIROP(VTOI(ap->a_dvp)->i_lfs);
+	MARK_VNODE(ap->a_dvp);
+	MARK_VNODE(ap->a_vp);
+	ret = ufs_remove(ap);
+	SET_ENDOP(VTOI(ap->a_dvp)->i_lfs);
+	return (ret);
+}
+
+int
+lfs_rmdir(ap)
+	struct vop_rmdir_args /* {
+		struct vnodeop_desc *a_desc;
+		struct vnode *a_dvp;
+		struct vnode *a_vp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	int ret;
+
+	SET_DIROP(VTOI(ap->a_dvp)->i_lfs);
+	MARK_VNODE(ap->a_dvp);
+	MARK_VNODE(ap->a_vp);
+	ret = ufs_rmdir(ap);
+	SET_ENDOP(VTOI(ap->a_dvp)->i_lfs);
+	return (ret);
+}
+
+int
+lfs_link(ap)
+	struct vop_link_args /* {
+		struct vnode *a_vp;
+		struct vnode *a_tdvp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	int ret;
+
+	SET_DIROP(VTOI(ap->a_vp)->i_lfs);
+	MARK_VNODE(ap->a_vp);
+	ret = ufs_link(ap);
+	SET_ENDOP(VTOI(ap->a_vp)->i_lfs);
+	return (ret);
+}
+
+int
+lfs_rename(ap)
+	struct vop_rename_args  /* {
+		struct vnode *a_fdvp;
+		struct vnode *a_fvp;
+		struct componentname *a_fcnp;
+		struct vnode *a_tdvp;
+		struct vnode *a_tvp;
+		struct componentname *a_tcnp;
+	} */ *ap;
+{
+	int ret;
+
+	SET_DIROP(VTOI(ap->a_fdvp)->i_lfs);
+	MARK_VNODE(ap->a_fdvp);
+	MARK_VNODE(ap->a_tdvp);
+	ret = ufs_rename(ap);
+	SET_ENDOP(VTOI(ap->a_fdvp)->i_lfs);
+	return (ret);
+}
+/* XXX hack to avoid calling ITIMES in getattr */
+int
+lfs_getattr(ap)
+	struct vop_getattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct inode *ip = VTOI(vp);
+	register struct vattr *vap = ap->a_vap;
+	/*
+	 * Copy from inode table
+	 */
+	vap->va_fsid = ip->i_dev;
+	vap->va_fileid = ip->i_number;
+	vap->va_mode = ip->i_mode & ~IFMT;
+	vap->va_nlink = ip->i_nlink;
+	vap->va_uid = ip->i_uid;
+	vap->va_gid = ip->i_gid;
+	vap->va_rdev = (dev_t)ip->i_rdev;
+	vap->va_size = ip->i_din.di_size;
+	vap->va_atime = ip->i_atime;
+	vap->va_mtime = ip->i_mtime;
+	vap->va_ctime = ip->i_ctime;
+	vap->va_flags = ip->i_flags;
+	vap->va_gen = ip->i_gen;
+	/* this doesn't belong here */
+	if (vp->v_type == VBLK)
+		vap->va_blocksize = BLKDEV_IOSIZE;
+	else if (vp->v_type == VCHR)
+		vap->va_blocksize = MAXBSIZE;
+	else
+		vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
+	vap->va_bytes = dbtob(ip->i_blocks);
+	vap->va_type = vp->v_type;
+	vap->va_filerev = ip->i_modrev;
+	return (0);
+}
+/*
+ * Close called
+ *
+ * XXX -- we were using ufs_close, but since it updates the
+ * times on the inode, we might need to bump the uinodes
+ * count.
+ */
+/* ARGSUSED */
+int
+lfs_close(ap)
+	struct vop_close_args /* {
+		struct vnode *a_vp;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct inode *ip = VTOI(vp);
+	int mod;
+
+	if (vp->v_usecount > 1 && !(ip->i_flag & IN_LOCKED)) {
+		mod = ip->i_flag & IN_MODIFIED;
+		ITIMES(ip, &time, &time);
+		if (!mod && ip->i_flag & IN_MODIFIED)
+			ip->i_lfs->lfs_uinodes++;
+	}
+	return (0);
+}
+
+/*
+ * Stub inactive routine that avoid calling ufs_inactive in some cases.
+ */
+int lfs_no_inactive = 0;
+
+int
+lfs_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	
+	if (lfs_no_inactive)
+		return (0);
+	return (ufs_inactive(ap));
+}
diff --git a/sys/ufs/mfs/mfs_extern.h b/sys/ufs/mfs/mfs_extern.h
new file mode 100644
index 00000000000..e357faf6fa5
--- /dev/null
+++ b/sys/ufs/mfs/mfs_extern.h
@@ -0,0 +1,60 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)mfs_extern.h	8.1 (Berkeley) 6/11/93
+ */
+
+struct buf;
+struct mount;
+struct nameidata;
+struct proc;
+struct statfs;
+struct ucred;
+struct vnode;
+
+__BEGIN_DECLS
+int	mfs_badop __P((void));
+int	mfs_bmap __P((struct vop_bmap_args *));
+int	mfs_close __P((struct vop_close_args *));
+void	mfs_doio __P((struct buf *bp, caddr_t base));
+int	mfs_inactive __P((struct vop_inactive_args *)); /* XXX */
+int	mfs_reclaim __P((struct vop_reclaim_args *)); /* XXX */
+int	mfs_init __P((void));
+int	mfs_ioctl __P((struct vop_ioctl_args *));
+int	mfs_mount __P((struct mount *mp,
+	    char *path, caddr_t data, struct nameidata *ndp, struct proc *p));
+int	mfs_open __P((struct vop_open_args *));
+int	mfs_print __P((struct vop_print_args *)); /* XXX */
+int	mfs_start __P((struct mount *mp, int flags, struct proc *p));
+int	mfs_statfs __P((struct mount *mp, struct statfs *sbp, struct proc *p));
+int	mfs_strategy __P((struct vop_strategy_args *)); /* XXX */
+__END_DECLS
diff --git a/sys/ufs/mfs/mfs_vfsops.c b/sys/ufs/mfs/mfs_vfsops.c
new file mode 100644
index 00000000000..3fcbdf37928
--- /dev/null
+++ b/sys/ufs/mfs/mfs_vfsops.c
@@ -0,0 +1,307 @@
+/*
+ * Copyright (c) 1989, 1990, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)mfs_vfsops.c	8.4 (Berkeley) 4/16/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/mount.h>
+#include <sys/signalvar.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/ffs/fs.h>
+#include <ufs/ffs/ffs_extern.h>
+
+#include <ufs/mfs/mfsnode.h>
+#include <ufs/mfs/mfs_extern.h>
+
+caddr_t	mfs_rootbase;	/* address of mini-root in kernel virtual memory */
+u_long	mfs_rootsize;	/* size of mini-root in bytes */
+
+static	int mfs_minor;	/* used for building internal dev_t */
+
+extern int (**mfs_vnodeop_p)();
+
+/*
+ * mfs vfs operations.
+ */
+struct vfsops mfs_vfsops = {
+	mfs_mount,
+	mfs_start,
+	ffs_unmount,
+	ufs_root,
+	ufs_quotactl,
+	mfs_statfs,
+	ffs_sync,
+	ffs_vget,
+	ffs_fhtovp,
+	ffs_vptofh,
+	mfs_init,
+};
+
+/*
+ * Called by main() when mfs is going to be mounted as root.
+ *
+ * Name is updated by mount(8) after booting.
+ */
+#define ROOTNAME	"mfs_root"
+
+mfs_mountroot()
+{
+	extern struct vnode *rootvp;
+	register struct fs *fs;
+	register struct mount *mp;
+	struct proc *p = curproc;	/* XXX */
+	struct ufsmount *ump;
+	struct mfsnode *mfsp;
+	u_int size;
+	int error;
+
+	/*
+	 * Get vnodes for swapdev and rootdev.
+	 */
+	if (bdevvp(swapdev, &swapdev_vp) || bdevvp(rootdev, &rootvp))
+		panic("mfs_mountroot: can't setup bdevvp's");
+
+	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
+	bzero((char *)mp, (u_long)sizeof(struct mount));
+	mp->mnt_op = &mfs_vfsops;
+	mp->mnt_flag = MNT_RDONLY;
+	mfsp = malloc(sizeof *mfsp, M_MFSNODE, M_WAITOK);
+	rootvp->v_data = mfsp;
+	rootvp->v_op = mfs_vnodeop_p;
+	rootvp->v_tag = VT_MFS;
+	mfsp->mfs_baseoff = mfs_rootbase;
+	mfsp->mfs_size = mfs_rootsize;
+	mfsp->mfs_vnode = rootvp;
+	mfsp->mfs_pid = p->p_pid;
+	mfsp->mfs_buflist = (struct buf *)0;
+	if (error = ffs_mountfs(rootvp, mp, p)) {
+		free(mp, M_MOUNT);
+		free(mfsp, M_MFSNODE);
+		return (error);
+	}
+	if (error = vfs_lock(mp)) {
+		(void)ffs_unmount(mp, 0, p);
+		free(mp, M_MOUNT);
+		free(mfsp, M_MFSNODE);
+		return (error);
+	}
+	TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+	mp->mnt_flag |= MNT_ROOTFS;
+	mp->mnt_vnodecovered = NULLVP;
+	ump = VFSTOUFS(mp);
+	fs = ump->um_fs;
+	bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt));
+	fs->fs_fsmnt[0] = '/';
+	bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
+	    MNAMELEN);
+	(void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+	    &size);
+	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+	(void)ffs_statfs(mp, &mp->mnt_stat, p);
+	vfs_unlock(mp);
+	inittodr((time_t)0);
+	return (0);
+}
+
+/*
+ * This is called early in boot to set the base address and size
+ * of the mini-root.
+ */
+mfs_initminiroot(base)
+	caddr_t base;
+{
+	struct fs *fs = (struct fs *)(base + SBOFF);
+	extern int (*mountroot)();
+
+	/* check for valid super block */
+	if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
+	    fs->fs_bsize < sizeof(struct fs))
+		return (0);
+	mountroot = mfs_mountroot;
+	mfs_rootbase = base;
+	mfs_rootsize = fs->fs_fsize * fs->fs_size;
+	rootdev = makedev(255, mfs_minor++);
+	return (mfs_rootsize);
+}
+
+/*
+ * VFS Operations.
+ *
+ * mount system call
+ */
+/* ARGSUSED */
+int
+mfs_mount(mp, path, data, ndp, p)
+	register struct mount *mp;
+	char *path;
+	caddr_t data;
+	struct nameidata *ndp;
+	struct proc *p;
+{
+	struct vnode *devvp;
+	struct mfs_args args;
+	struct ufsmount *ump;
+	register struct fs *fs;
+	register struct mfsnode *mfsp;
+	u_int size;
+	int flags, error;
+
+	if (error = copyin(data, (caddr_t)&args, sizeof (struct mfs_args)))
+		return (error);
+
+	/*
+	 * If updating, check whether changing from read-only to
+	 * read/write; if there is no device name, that's all we do.
+	 */
+	if (mp->mnt_flag & MNT_UPDATE) {
+		ump = VFSTOUFS(mp);
+		fs = ump->um_fs;
+		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
+			flags = WRITECLOSE;
+			if (mp->mnt_flag & MNT_FORCE)
+				flags |= FORCECLOSE;
+			if (vfs_busy(mp))
+				return (EBUSY);
+			error = ffs_flushfiles(mp, flags, p);
+			vfs_unbusy(mp);
+			if (error)
+				return (error);
+		}
+		if (fs->fs_ronly && (mp->mnt_flag & MNT_WANTRDWR))
+			fs->fs_ronly = 0;
+#ifdef EXPORTMFS
+		if (args.fspec == 0)
+			return (vfs_export(mp, &ump->um_export, &args.export));
+#endif
+		return (0);
+	}
+	error = getnewvnode(VT_MFS, (struct mount *)0, mfs_vnodeop_p, &devvp);
+	if (error)
+		return (error);
+	devvp->v_type = VBLK;
+	if (checkalias(devvp, makedev(255, mfs_minor++), (struct mount *)0))
+		panic("mfs_mount: dup dev");
+	mfsp = (struct mfsnode *)malloc(sizeof *mfsp, M_MFSNODE, M_WAITOK);
+	devvp->v_data = mfsp;
+	mfsp->mfs_baseoff = args.base;
+	mfsp->mfs_size = args.size;
+	mfsp->mfs_vnode = devvp;
+	mfsp->mfs_pid = p->p_pid;
+	mfsp->mfs_buflist = (struct buf *)0;
+	if (error = ffs_mountfs(devvp, mp, p)) {
+		mfsp->mfs_buflist = (struct buf *)-1;
+		vrele(devvp);
+		return (error);
+	}
+	ump = VFSTOUFS(mp);
+	fs = ump->um_fs;
+	(void) copyinstr(path, fs->fs_fsmnt, sizeof(fs->fs_fsmnt) - 1, &size);
+	bzero(fs->fs_fsmnt + size, sizeof(fs->fs_fsmnt) - size);
+	bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
+		MNAMELEN);
+	(void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+		&size);
+	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+	(void) mfs_statfs(mp, &mp->mnt_stat, p);
+	return (0);
+}
+
+int	mfs_pri = PWAIT | PCATCH;		/* XXX prob. temp */
+
+/*
+ * Used to grab the process and keep it in the kernel to service
+ * memory filesystem I/O requests.
+ *
+ * Loop servicing I/O requests.
+ * Copy the requested data into or out of the memory filesystem
+ * address space.
+ */
+/* ARGSUSED */
+int
+mfs_start(mp, flags, p)
+	struct mount *mp;
+	int flags;
+	struct proc *p;
+{
+	register struct vnode *vp = VFSTOUFS(mp)->um_devvp;
+	register struct mfsnode *mfsp = VTOMFS(vp);
+	register struct buf *bp;
+	register caddr_t base;
+	int error = 0;
+
+	base = mfsp->mfs_baseoff;
+	while (mfsp->mfs_buflist != (struct buf *)(-1)) {
+		while (bp = mfsp->mfs_buflist) {
+			mfsp->mfs_buflist = bp->b_actf;
+			mfs_doio(bp, base);
+			wakeup((caddr_t)bp);
+		}
+		/*
+		 * If a non-ignored signal is received, try to unmount.
+		 * If that fails, clear the signal (it has been "processed"),
+		 * otherwise we will loop here, as tsleep will always return
+		 * EINTR/ERESTART.
+		 */
+		if (error = tsleep((caddr_t)vp, mfs_pri, "mfsidl", 0))
+			if (dounmount(mp, 0, p) != 0)
+				CLRSIG(p, CURSIG(p));
+	}
+	return (error);
+}
+
+/*
+ * Get file system statistics.
+ */
+mfs_statfs(mp, sbp, p)
+	struct mount *mp;
+	struct statfs *sbp;
+	struct proc *p;
+{
+	int error;
+
+	error = ffs_statfs(mp, sbp, p);
+	sbp->f_type = MOUNT_MFS;
+	return (error);
+}
diff --git a/sys/ufs/mfs/mfs_vnops.c b/sys/ufs/mfs/mfs_vnops.c
new file mode 100644
index 00000000000..71adf069b1d
--- /dev/null
+++ b/sys/ufs/mfs/mfs_vnops.c
@@ -0,0 +1,432 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)mfs_vnops.c	8.3 (Berkeley) 9/21/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/map.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <machine/vmparam.h>
+
+#include <ufs/mfs/mfsnode.h>
+#include <ufs/mfs/mfsiom.h>
+#include <ufs/mfs/mfs_extern.h>
+
+#if !defined(hp300) && !defined(i386) && !defined(mips) && !defined(sparc) && !defined(luna68k)
+static int mfsmap_want;		/* 1 => need kernel I/O resources */
+struct map mfsmap[MFS_MAPSIZE];
+extern char mfsiobuf[];
+#endif
+
+/*
+ * mfs vnode operations.
+ */
+int (**mfs_vnodeop_p)();
+struct vnodeopv_entry_desc mfs_vnodeop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, mfs_lookup },		/* lookup */
+	{ &vop_create_desc, mfs_create },		/* create */
+	{ &vop_mknod_desc, mfs_mknod },			/* mknod */
+	{ &vop_open_desc, mfs_open },			/* open */
+	{ &vop_close_desc, mfs_close },			/* close */
+	{ &vop_access_desc, mfs_access },		/* access */
+	{ &vop_getattr_desc, mfs_getattr },		/* getattr */
+	{ &vop_setattr_desc, mfs_setattr },		/* setattr */
+	{ &vop_read_desc, mfs_read },			/* read */
+	{ &vop_write_desc, mfs_write },			/* write */
+	{ &vop_ioctl_desc, mfs_ioctl },			/* ioctl */
+	{ &vop_select_desc, mfs_select },		/* select */
+	{ &vop_mmap_desc, mfs_mmap },			/* mmap */
+	{ &vop_fsync_desc, spec_fsync },		/* fsync */
+	{ &vop_seek_desc, mfs_seek },			/* seek */
+	{ &vop_remove_desc, mfs_remove },		/* remove */
+	{ &vop_link_desc, mfs_link },			/* link */
+	{ &vop_rename_desc, mfs_rename },		/* rename */
+	{ &vop_mkdir_desc, mfs_mkdir },			/* mkdir */
+	{ &vop_rmdir_desc, mfs_rmdir },			/* rmdir */
+	{ &vop_symlink_desc, mfs_symlink },		/* symlink */
+	{ &vop_readdir_desc, mfs_readdir },		/* readdir */
+	{ &vop_readlink_desc, mfs_readlink },		/* readlink */
+	{ &vop_abortop_desc, mfs_abortop },		/* abortop */
+	{ &vop_inactive_desc, mfs_inactive },		/* inactive */
+	{ &vop_reclaim_desc, mfs_reclaim },		/* reclaim */
+	{ &vop_lock_desc, mfs_lock },			/* lock */
+	{ &vop_unlock_desc, mfs_unlock },		/* unlock */
+	{ &vop_bmap_desc, mfs_bmap },			/* bmap */
+	{ &vop_strategy_desc, mfs_strategy },		/* strategy */
+	{ &vop_print_desc, mfs_print },			/* print */
+	{ &vop_islocked_desc, mfs_islocked },		/* islocked */
+	{ &vop_pathconf_desc, mfs_pathconf },		/* pathconf */
+	{ &vop_advlock_desc, mfs_advlock },		/* advlock */
+	{ &vop_blkatoff_desc, mfs_blkatoff },		/* blkatoff */
+	{ &vop_valloc_desc, mfs_valloc },		/* valloc */
+	{ &vop_vfree_desc, mfs_vfree },			/* vfree */
+	{ &vop_truncate_desc, mfs_truncate },		/* truncate */
+	{ &vop_update_desc, mfs_update },		/* update */
+	{ &vop_bwrite_desc, mfs_bwrite },		/* bwrite */
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc mfs_vnodeop_opv_desc =
+	{ &mfs_vnodeop_p, mfs_vnodeop_entries };
+
+/*
+ * Vnode Operations.
+ *
+ * Open called to allow memory filesystem to initialize and
+ * validate before actual IO. Record our process identifier
+ * so we can tell when we are doing I/O to ourself.
+ */
+/* ARGSUSED */
+int
+mfs_open(ap)
+	struct vop_open_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	if (ap->a_vp->v_type != VBLK) {
+		panic("mfs_ioctl not VBLK");
+		/* NOTREACHED */
+	}
+	return (0);
+}
+
+/*
+ * Ioctl operation.
+ */
+/* ARGSUSED */
+int
+mfs_ioctl(ap)
+	struct vop_ioctl_args /* {
+		struct vnode *a_vp;
+		int  a_command;
+		caddr_t  a_data;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	return (ENOTTY);
+}
+
+/*
+ * Pass I/O requests to the memory filesystem process.
+ */
+int
+mfs_strategy(ap)
+	struct vop_strategy_args /* {
+		struct buf *a_bp;
+	} */ *ap;
+{
+	register struct buf *bp = ap->a_bp;
+	register struct mfsnode *mfsp;
+	struct vnode *vp;
+	struct proc *p = curproc;		/* XXX */
+
+	if (!vfinddev(bp->b_dev, VBLK, &vp) || vp->v_usecount == 0)
+		panic("mfs_strategy: bad dev");
+	mfsp = VTOMFS(vp);
+	/* check for mini-root access */
+	if (mfsp->mfs_pid == 0) {
+		caddr_t base;
+
+		base = mfsp->mfs_baseoff + (bp->b_blkno << DEV_BSHIFT);
+		if (bp->b_flags & B_READ)
+			bcopy(base, bp->b_data, bp->b_bcount);
+		else
+			bcopy(bp->b_data, base, bp->b_bcount);
+		biodone(bp);
+	} else if (mfsp->mfs_pid == p->p_pid) {
+		mfs_doio(bp, mfsp->mfs_baseoff);
+	} else {
+		bp->b_actf = mfsp->mfs_buflist;
+		mfsp->mfs_buflist = bp;
+		wakeup((caddr_t)vp);
+	}
+	return (0);
+}
+
+#if defined(vax) || defined(tahoe)
+/*
+ * Memory file system I/O.
+ *
+ * Essentially play ubasetup() and disk interrupt service routine by
+ * doing the copies to or from the memfs process. If doing physio
+ * (i.e. pagein), we must map the I/O through the kernel virtual
+ * address space.
+ */
+void
+mfs_doio(bp, base)
+	register struct buf *bp;
+	caddr_t base;
+{
+	register struct pte *pte, *ppte;
+	register caddr_t vaddr;
+	int off, npf, npf2, reg;
+	caddr_t kernaddr, offset;
+
+	/*
+	 * For phys I/O, map the b_data into kernel virtual space using
+	 * the Mfsiomap pte's.
+	 */
+	if ((bp->b_flags & B_PHYS) == 0) {
+		kernaddr = bp->b_data;
+	} else {
+		if (bp->b_flags & (B_PAGET | B_UAREA | B_DIRTY))
+			panic("swap on memfs?");
+		off = (int)bp->b_data & PGOFSET;
+		npf = btoc(bp->b_bcount + off);
+		/*
+		 * Get some mapping page table entries
+		 */
+		while ((reg = rmalloc(mfsmap, (long)npf)) == 0) {
+			mfsmap_want++;
+			sleep((caddr_t)&mfsmap_want, PZERO-1);
+		}
+		reg--;
+		pte = vtopte(bp->b_proc, btop(bp->b_data));
+		/*
+		 * Do vmaccess() but with the Mfsiomap page table.
+		 */
+		ppte = &Mfsiomap[reg];
+		vaddr = &mfsiobuf[reg * NBPG];
+		kernaddr = vaddr + off;
+		for (npf2 = npf; npf2; npf2--) {
+			mapin(ppte, (u_int)vaddr, pte->pg_pfnum,
+				(int)(PG_V|PG_KW));
+#if defined(tahoe)
+			if ((bp->b_flags & B_READ) == 0)
+				mtpr(P1DC, vaddr);
+#endif
+			ppte++;
+			pte++;
+			vaddr += NBPG;
+		}
+	}
+	offset = base + (bp->b_blkno << DEV_BSHIFT);
+	if (bp->b_flags & B_READ)
+		bp->b_error = copyin(offset, kernaddr, bp->b_bcount);
+	else
+		bp->b_error = copyout(kernaddr, offset, bp->b_bcount);
+	if (bp->b_error)
+		bp->b_flags |= B_ERROR;
+	/*
+	 * Release pte's used by physical I/O.
+	 */
+	if (bp->b_flags & B_PHYS) {
+		rmfree(mfsmap, (long)npf, (long)++reg);
+		if (mfsmap_want) {
+			mfsmap_want = 0;
+			wakeup((caddr_t)&mfsmap_want);
+		}
+	}
+	biodone(bp);
+}
+#endif	/* vax || tahoe */
+
+#if defined(hp300) || defined(i386) || defined(mips) || defined(sparc) || defined(luna68k)
+/*
+ * Memory file system I/O.
+ *
+ * Trivial on the HP since buffer has already been mapping into KVA space.
+ */
+void
+mfs_doio(bp, base)
+	register struct buf *bp;
+	caddr_t base;
+{
+
+	base += (bp->b_blkno << DEV_BSHIFT);
+	if (bp->b_flags & B_READ)
+		bp->b_error = copyin(base, bp->b_data, bp->b_bcount);
+	else
+		bp->b_error = copyout(bp->b_data, base, bp->b_bcount);
+	if (bp->b_error)
+		bp->b_flags |= B_ERROR;
+	biodone(bp);
+}
+#endif
+
+/*
+ * This is a noop, simply returning what one has been given.
+ */
+int
+mfs_bmap(ap)
+	struct vop_bmap_args /* {
+		struct vnode *a_vp;
+		daddr_t  a_bn;
+		struct vnode **a_vpp;
+		daddr_t *a_bnp;
+		int *a_runp;
+	} */ *ap;
+{
+
+	if (ap->a_vpp != NULL)
+		*ap->a_vpp = ap->a_vp;
+	if (ap->a_bnp != NULL)
+		*ap->a_bnp = ap->a_bn;
+	return (0);
+}
+
+/*
+ * Memory filesystem close routine
+ */
+/* ARGSUSED */
+int
+mfs_close(ap)
+	struct vop_close_args /* {
+		struct vnode *a_vp;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct mfsnode *mfsp = VTOMFS(vp);
+	register struct buf *bp;
+	int error;
+
+	/*
+	 * Finish any pending I/O requests.
+	 */
+	while (bp = mfsp->mfs_buflist) {
+		mfsp->mfs_buflist = bp->b_actf;
+		mfs_doio(bp, mfsp->mfs_baseoff);
+		wakeup((caddr_t)bp);
+	}
+	/*
+	 * On last close of a memory filesystem
+	 * we must invalidate any in core blocks, so that
+	 * we can, free up its vnode.
+	 */
+	if (error = vinvalbuf(vp, 1, ap->a_cred, ap->a_p, 0, 0))
+		return (error);
+	/*
+	 * There should be no way to have any more uses of this
+	 * vnode, so if we find any other uses, it is a panic.
+	 */
+	if (vp->v_usecount > 1)
+		printf("mfs_close: ref count %d > 1\n", vp->v_usecount);
+	if (vp->v_usecount > 1 || mfsp->mfs_buflist)
+		panic("mfs_close");
+	/*
+	 * Send a request to the filesystem server to exit.
+	 */
+	mfsp->mfs_buflist = (struct buf *)(-1);
+	wakeup((caddr_t)vp);
+	return (0);
+}
+
+/*
+ * Memory filesystem inactive routine
+ */
+/* ARGSUSED */
+int
+mfs_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	register struct mfsnode *mfsp = VTOMFS(ap->a_vp);
+
+	if (mfsp->mfs_buflist && mfsp->mfs_buflist != (struct buf *)(-1))
+		panic("mfs_inactive: not inactive (mfs_buflist %x)",
+			mfsp->mfs_buflist);
+	return (0);
+}
+
+/*
+ * Reclaim a memory filesystem devvp so that it can be reused.
+ */
+int
+mfs_reclaim(ap)
+	struct vop_reclaim_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	FREE(ap->a_vp->v_data, M_MFSNODE);
+	ap->a_vp->v_data = NULL;
+	return (0);
+}
+
+/*
+ * Print out the contents of an mfsnode.
+ */
+int
+mfs_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	register struct mfsnode *mfsp = VTOMFS(ap->a_vp);
+
+	printf("tag VT_MFS, pid %d, base %d, size %d\n", mfsp->mfs_pid,
+		mfsp->mfs_baseoff, mfsp->mfs_size);
+	return (0);
+}
+
+/*
+ * Block device bad operation
+ */
+int
+mfs_badop()
+{
+
+	panic("mfs_badop called\n");
+	/* NOTREACHED */
+}
+
+/*
+ * Memory based filesystem initialization.
+ */
+mfs_init()
+{
+
+#if !defined(hp300) && !defined(i386) && !defined(mips) && !defined(sparc) && !defined(luna68k)
+	rminit(mfsmap, (long)MFS_MAPREG, (long)1, "mfs mapreg", MFS_MAPSIZE);
+#endif
+}
diff --git a/sys/ufs/mfs/mfsiom.h b/sys/ufs/mfs/mfsiom.h
new file mode 100644
index 00000000000..98aca855f6a
--- /dev/null
+++ b/sys/ufs/mfs/mfsiom.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)mfsiom.h	8.1 (Berkeley) 6/11/93
+ */
+
+#define MFS_MAPREG	(MAXPHYS/NBPG + 2) /* Kernel mapping pte's */
+#define MFS_MAPSIZE	10		   /* Size of alloc map for pte's */
diff --git a/sys/ufs/mfs/mfsnode.h b/sys/ufs/mfs/mfsnode.h
new file mode 100644
index 00000000000..4480ab02407
--- /dev/null
+++ b/sys/ufs/mfs/mfsnode.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)mfsnode.h	8.2 (Berkeley) 8/11/93
+ */
+
+/*
+ * This structure defines the control data for the memory based file system.
+ */
+
+struct mfsnode {
+	struct	vnode *mfs_vnode;	/* vnode associated with this mfsnode */
+	caddr_t	mfs_baseoff;		/* base of file system in memory */
+	long	mfs_size;		/* size of memory file system */
+	pid_t	mfs_pid;		/* supporting process pid */
+	struct	buf *mfs_buflist;	/* list of I/O requests */
+	long	mfs_spare[4];
+};
+
+/*
+ * Convert between mfsnode pointers and vnode pointers
+ */
+#define VTOMFS(vp)	((struct mfsnode *)(vp)->v_data)
+#define MFSTOV(mfsp)	((mfsp)->mfs_vnode)
+
+/* Prototypes for MFS operations on vnodes. */
+#define mfs_lookup ((int (*) __P((struct  vop_lookup_args *)))mfs_badop)
+#define mfs_create ((int (*) __P((struct  vop_create_args *)))mfs_badop)
+#define mfs_mknod ((int (*) __P((struct  vop_mknod_args *)))mfs_badop)
+#define mfs_access ((int (*) __P((struct  vop_access_args *)))mfs_badop)
+#define mfs_getattr ((int (*) __P((struct  vop_getattr_args *)))mfs_badop)
+#define mfs_setattr ((int (*) __P((struct  vop_setattr_args *)))mfs_badop)
+#define mfs_read ((int (*) __P((struct  vop_read_args *)))mfs_badop)
+#define mfs_write ((int (*) __P((struct  vop_write_args *)))mfs_badop)
+#define mfs_select ((int (*) __P((struct  vop_select_args *)))mfs_badop)
+#define mfs_mmap ((int (*) __P((struct  vop_mmap_args *)))mfs_badop)
+#define mfs_seek ((int (*) __P((struct  vop_seek_args *)))mfs_badop)
+#define mfs_remove ((int (*) __P((struct  vop_remove_args *)))mfs_badop)
+#define mfs_link ((int (*) __P((struct  vop_link_args *)))mfs_badop)
+#define mfs_rename ((int (*) __P((struct  vop_rename_args *)))mfs_badop)
+#define mfs_mkdir ((int (*) __P((struct  vop_mkdir_args *)))mfs_badop)
+#define mfs_rmdir ((int (*) __P((struct  vop_rmdir_args *)))mfs_badop)
+#define mfs_symlink ((int (*) __P((struct  vop_symlink_args *)))mfs_badop)
+#define mfs_readdir ((int (*) __P((struct  vop_readdir_args *)))mfs_badop)
+#define mfs_readlink ((int (*) __P((struct  vop_readlink_args *)))mfs_badop)
+#define mfs_abortop ((int (*) __P((struct  vop_abortop_args *)))mfs_badop)
+#define mfs_lock ((int (*) __P((struct  vop_lock_args *)))nullop)
+#define mfs_unlock ((int (*) __P((struct  vop_unlock_args *)))nullop)
+#define mfs_islocked ((int (*) __P((struct  vop_islocked_args *)))nullop)
+#define mfs_pathconf ((int (*) __P((struct  vop_pathconf_args *)))mfs_badop)
+#define mfs_advlock ((int (*) __P((struct  vop_advlock_args *)))mfs_badop)
+#define mfs_blkatoff ((int (*) __P((struct  vop_blkatoff_args *)))mfs_badop)
+#define mfs_valloc ((int (*) __P((struct  vop_valloc_args *)))mfs_badop)
+#define mfs_vfree ((int (*) __P((struct  vop_vfree_args *)))mfs_badop)
+#define mfs_truncate ((int (*) __P((struct  vop_truncate_args *)))mfs_badop)
+#define mfs_update ((int (*) __P((struct  vop_update_args *)))mfs_badop)
+#define mfs_bwrite ((int (*) __P((struct  vop_bwrite_args *)))vn_bwrite)
diff --git a/sys/ufs/ufs/dinode.h b/sys/ufs/ufs/dinode.h
new file mode 100644
index 00000000000..5b9915d9cfd
--- /dev/null
+++ b/sys/ufs/ufs/dinode.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 1982, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)dinode.h	8.3 (Berkeley) 1/21/94
+ */
+
+/*
+ * The root inode is the root of the file system.  Inode 0 can't be used for
+ * normal purposes and historically bad blocks were linked to inode 1, thus
+ * the root inode is 2.  (Inode 1 is no longer used for this purpose, however
+ * numerous dump tapes make this assumption, so we are stuck with it).
+ */
+#define	ROOTINO	((ino_t)2)
+
+/*
+ * A dinode contains all the meta-data associated with a UFS file.
+ * This structure defines the on-disk format of a dinode.
+ */
+
+#define	NDADDR	12			/* Direct addresses in inode. */
+#define	NIADDR	3			/* Indirect addresses in inode. */
+
+struct dinode {
+	u_short		di_mode;	/*   0: IFMT and permissions. */
+	short		di_nlink;	/*   2: File link count. */
+	union {
+		u_short	oldids[2];	/*   4: Ffs: old user and group ids. */
+		ino_t	inumber;	/*   4: Lfs: inode number. */
+	} di_u;
+	u_quad_t	di_size;	/*   8: File byte count. */
+	struct timespec	di_atime;	/*  16: Last access time. */
+	struct timespec	di_mtime;	/*  24: Last modified time. */
+	struct timespec	di_ctime;	/*  32: Last inode change time. */
+	daddr_t		di_db[NDADDR];	/*  40: Direct disk blocks. */
+	daddr_t		di_ib[NIADDR];	/*  88: Indirect disk blocks. */
+	u_long		di_flags;	/* 100: Status flags (chflags). */
+	long		di_blocks;	/* 104: Blocks actually held. */
+	long		di_gen;		/* 108: Generation number. */
+	u_long		di_uid;		/* 112: File owner. */
+	u_long		di_gid;		/* 116: File group. */
+	long		di_spare[2];	/* 120: Reserved; currently unused */
+};
+
+/*
+ * The di_db fields may be overlaid with other information for
+ * file types that do not have associated disk storage. Block
+ * and character devices overlay the first data block with their
+ * dev_t value. Short symbolic links place their path in the
+ * di_db area.
+ */
+#define	di_inumber	di_u.inumber
+#define	di_ogid		di_u.oldids[1]
+#define	di_ouid		di_u.oldids[0]
+#define	di_rdev		di_db[0]
+#define	di_shortlink	di_db
+#define	MAXSYMLINKLEN	((NDADDR + NIADDR) * sizeof(daddr_t))
+
+/* File modes. */
+#define	IEXEC		0000100		/* Executable. */
+#define	IWRITE		0000200		/* Writeable. */
+#define	IREAD		0000400		/* Readable. */
+#define	ISVTX		0001000		/* Sticky bit. */
+#define	ISGID		0002000		/* Set-gid. */
+#define	ISUID		0004000		/* Set-uid. */
+
+/* File types. */
+#define	IFMT		0170000		/* Mask of file type. */
+#define	IFIFO		0010000		/* Named pipe (fifo). */
+#define	IFCHR		0020000		/* Character device. */
+#define	IFDIR		0040000		/* Directory file. */
+#define	IFBLK		0060000		/* Block device. */
+#define	IFREG		0100000		/* Regular file. */
+#define	IFLNK		0120000		/* Symbolic link. */
+#define	IFSOCK		0140000		/* UNIX domain socket. */
diff --git a/sys/ufs/ufs/dir.h b/sys/ufs/ufs/dir.h
new file mode 100644
index 00000000000..c51bd1cf6e1
--- /dev/null
+++ b/sys/ufs/ufs/dir.h
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)dir.h	8.2 (Berkeley) 1/21/94
+ */
+
+#ifndef _DIR_H_
+#define	_DIR_H_
+
+/*
+ * A directory consists of some number of blocks of DIRBLKSIZ
+ * bytes, where DIRBLKSIZ is chosen such that it can be transferred
+ * to disk in a single atomic operation (e.g. 512 bytes on most machines).
+ *
+ * Each DIRBLKSIZ byte block contains some number of directory entry
+ * structures, which are of variable length.  Each directory entry has
+ * a struct direct at the front of it, containing its inode number,
+ * the length of the entry, and the length of the name contained in
+ * the entry.  These are followed by the name padded to a 4 byte boundary
+ * with null bytes.  All names are guaranteed null terminated.
+ * The maximum length of a name in a directory is MAXNAMLEN.
+ *
+ * The macro DIRSIZ(fmt, dp) gives the amount of space required to represent
+ * a directory entry.  Free space in a directory is represented by
+ * entries which have dp->d_reclen > DIRSIZ(fmt, dp).  All DIRBLKSIZ bytes
+ * in a directory block are claimed by the directory entries.  This
+ * usually results in the last entry in a directory having a large
+ * dp->d_reclen.  When entries are deleted from a directory, the
+ * space is returned to the previous entry in the same directory
+ * block by increasing its dp->d_reclen.  If the first entry of
+ * a directory block is free, then its dp->d_ino is set to 0.
+ * Entries other than the first in a directory do not normally have
+ * dp->d_ino set to 0.
+ */
+#define DIRBLKSIZ	DEV_BSIZE
+#define	MAXNAMLEN	255
+
+struct	direct {
+	u_long	d_ino;			/* inode number of entry */
+	u_short	d_reclen;		/* length of this record */
+	u_char	d_type; 		/* file type, see below */
+	u_char	d_namlen;		/* length of string in d_name */
+	char	d_name[MAXNAMLEN + 1];	/* name with length <= MAXNAMLEN */
+};
+
+/*
+ * File types
+ */
+#define	DT_UNKNOWN	 0
+#define	DT_FIFO		 1
+#define	DT_CHR		 2
+#define	DT_DIR		 4
+#define	DT_BLK		 6
+#define	DT_REG		 8
+#define	DT_LNK		10
+#define	DT_SOCK		12
+
+/*
+ * Convert between stat structure types and directory types.
+ */
+#define	IFTODT(mode)	(((mode) & 0170000) >> 12)
+#define	DTTOIF(dirtype)	((dirtype) << 12)
+
+/*
+ * The DIRSIZ macro gives the minimum record length which will hold
+ * the directory entry.  This requires the amount of space in struct direct
+ * without the d_name field, plus enough space for the name with a terminating
+ * null byte (dp->d_namlen+1), rounded up to a 4 byte boundary.
+ */
+#if (BYTE_ORDER == LITTLE_ENDIAN)
+#define DIRSIZ(oldfmt, dp) \
+    ((oldfmt) ? \
+    ((sizeof (struct direct) - (MAXNAMLEN+1)) + (((dp)->d_type+1 + 3) &~ 3)) : \
+    ((sizeof (struct direct) - (MAXNAMLEN+1)) + (((dp)->d_namlen+1 + 3) &~ 3)))
+#else
+#define DIRSIZ(oldfmt, dp) \
+    ((sizeof (struct direct) - (MAXNAMLEN+1)) + (((dp)->d_namlen+1 + 3) &~ 3))
+#endif
+#define OLDDIRFMT	1
+#define NEWDIRFMT	0
+
+/*
+ * Template for manipulating directories.
+ * Should use struct direct's, but the name field
+ * is MAXNAMLEN - 1, and this just won't do.
+ */
+struct dirtemplate {
+	u_long	dot_ino;
+	short	dot_reclen;
+	u_char	dot_type;
+	u_char	dot_namlen;
+	char	dot_name[4];		/* must be multiple of 4 */
+	u_long	dotdot_ino;
+	short	dotdot_reclen;
+	u_char	dotdot_type;
+	u_char	dotdot_namlen;
+	char	dotdot_name[4];		/* ditto */
+};
+
+/*
+ * This is the old format of directories, sanz type element.
+ */
+struct odirtemplate {
+	u_long	dot_ino;
+	short	dot_reclen;
+	u_short	dot_namlen;
+	char	dot_name[4];		/* must be multiple of 4 */
+	u_long	dotdot_ino;
+	short	dotdot_reclen;
+	u_short	dotdot_namlen;
+	char	dotdot_name[4];		/* ditto */
+};
+#endif /* !_DIR_H_ */
diff --git a/sys/ufs/ufs/inode.h b/sys/ufs/ufs/inode.h
new file mode 100644
index 00000000000..df155967a7d
--- /dev/null
+++ b/sys/ufs/ufs/inode.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 1982, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)inode.h	8.4 (Berkeley) 1/21/94
+ */
+
+#include <ufs/ufs/dinode.h>
+
+/*
+ * Theoretically, directories can be more than 2Gb in length, however, in
+ * practice this seems unlikely. So, we define the type doff_t as a long
+ * to keep down the cost of doing lookup on a 32-bit machine. If you are
+ * porting to a 64-bit architecture, you should make doff_t the same as off_t.
+ */
+#define	doff_t	long
+
+/*
+ * The inode is used to describe each active (or recently active)
+ * file in the UFS filesystem. It is composed of two types of
+ * information. The first part is the information that is needed
+ * only while the file is active (such as the identity of the file
+ * and linkage to speed its lookup). The second part is the 
+ * permannent meta-data associated with the file which is read
+ * in from the permanent dinode from long term storage when the
+ * file becomes active, and is put back when the file is no longer
+ * being used.
+ */
+struct inode {
+	struct	inode *i_next;	/* Hash chain forward. */
+	struct	inode **i_prev;	/* Hash chain back. */
+	struct	vnode *i_vnode;	/* Vnode associated with this inode. */
+	struct	vnode *i_devvp;	/* Vnode for block I/O. */
+	u_long	i_flag;		/* I* flags. */
+	dev_t	i_dev;		/* Device associated with the inode. */
+	ino_t	i_number;	/* The identity of the inode. */
+	union {			/* Associated filesystem. */
+		struct	fs *fs;		/* FFS */
+		struct	lfs *lfs;	/* LFS */
+	} inode_u;
+#define	i_fs	inode_u.fs
+#define	i_lfs	inode_u.lfs
+	struct	dquot *i_dquot[MAXQUOTAS];	/* Dquot structures. */
+	u_quad_t i_modrev;	/* Revision level for lease. */
+	struct	lockf *i_lockf;	/* Head of byte-level lock list. */
+	pid_t	i_lockholder;	/* DEBUG: holder of inode lock. */
+	pid_t	i_lockwaiter;	/* DEBUG: latest blocked for inode lock. */
+	/*
+	 * Side effects; used during directory lookup.
+	 */
+	long	i_count;	/* Size of free slot in directory. */
+	doff_t	i_endoff;	/* End of useful stuff in directory. */
+	doff_t	i_diroff;	/* Offset in dir, where we found last entry. */
+	doff_t	i_offset;	/* Offset of free space in directory. */
+	ino_t	i_ino;		/* Inode number of found directory. */
+	u_long	i_reclen;	/* Size of found directory entry. */
+	long	i_spare[11];	/* Spares to round up to 128 bytes. */
+	/*
+	 * The on-disk dinode itself.
+	 */
+	struct	dinode i_din;	/* 128 bytes of the on-disk dinode. */
+};
+
+#define	i_atime		i_din.di_atime
+#define	i_blocks	i_din.di_blocks
+#define	i_ctime		i_din.di_ctime
+#define	i_db		i_din.di_db
+#define	i_flags		i_din.di_flags
+#define	i_gen		i_din.di_gen
+#define	i_gid		i_din.di_gid
+#define	i_ib		i_din.di_ib
+#define	i_mode		i_din.di_mode
+#define	i_mtime		i_din.di_mtime
+#define	i_nlink		i_din.di_nlink
+#define	i_rdev		i_din.di_rdev
+#define	i_shortlink	i_din.di_shortlink
+#define	i_size		i_din.di_size
+#define	i_uid		i_din.di_uid
+
+/* These flags are kept in i_flag. */
+#define	IN_ACCESS	0x0001		/* Access time update request. */
+#define	IN_CHANGE	0x0002		/* Inode change time update request. */
+#define	IN_EXLOCK	0x0004		/* File has exclusive lock. */
+#define	IN_LOCKED	0x0008		/* Inode lock. */
+#define	IN_LWAIT	0x0010		/* Process waiting on file lock. */
+#define	IN_MODIFIED	0x0020		/* Inode has been modified. */
+#define	IN_RENAME	0x0040		/* Inode is being renamed. */
+#define	IN_SHLOCK	0x0080		/* File has shared lock. */
+#define	IN_UPDATE	0x0100		/* Modification time update request. */
+#define	IN_WANTED	0x0200		/* Inode is wanted by a process. */
+
+#ifdef KERNEL
+/*
+ * Structure used to pass around logical block paths generated by
+ * ufs_getlbns and used by truncate and bmap code.
+ */
+struct indir {
+	daddr_t	in_lbn;			/* Logical block number. */
+	int	in_off;			/* Offset in buffer. */
+	int	in_exists;		/* Flag if the block exists. */
+};
+
+/* Convert between inode pointers and vnode pointers. */
+#define VTOI(vp)	((struct inode *)(vp)->v_data)
+#define ITOV(ip)	((ip)->i_vnode)
+
+#define	ITIMES(ip, t1, t2) {						\
+	if ((ip)->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) {	\
+		(ip)->i_flag |= IN_MODIFIED;				\
+		if ((ip)->i_flag & IN_ACCESS)				\
+			(ip)->i_atime.ts_sec = (t1)->tv_sec;		\
+		if ((ip)->i_flag & IN_UPDATE) {				\
+			(ip)->i_mtime.ts_sec = (t2)->tv_sec;		\
+			(ip)->i_modrev++;				\
+		}							\
+		if ((ip)->i_flag & IN_CHANGE)				\
+			(ip)->i_ctime.ts_sec = time.tv_sec;		\
+		(ip)->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);	\
+	}								\
+}
+
+/* This overlays the fid structure (see mount.h). */
+struct ufid {
+	u_short	ufid_len;	/* Length of structure. */
+	u_short	ufid_pad;	/* Force long alignment. */
+	ino_t	ufid_ino;	/* File number (ino). */
+	long	ufid_gen;	/* Generation number. */
+};
+#endif /* KERNEL */
diff --git a/sys/ufs/ufs/lockf.h b/sys/ufs/ufs/lockf.h
new file mode 100644
index 00000000000..0ec61dbb0cf
--- /dev/null
+++ b/sys/ufs/ufs/lockf.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Scooter Morris at Genentech Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)lockf.h	8.1 (Berkeley) 6/11/93
+ */
+
+/*
+ * The lockf structure is a kernel structure which contains the information
+ * associated with a byte range lock.  The lockf structures are linked into
+ * the inode structure. Locks are sorted by the starting byte of the lock for
+ * efficiency.
+ */
+struct lockf {
+	short	lf_flags;	 /* Lock semantics: F_POSIX, F_FLOCK, F_WAIT */
+	short	lf_type;	 /* Lock type: F_RDLCK, F_WRLCK */
+	off_t	lf_start;	 /* The byte # of the start of the lock */
+	off_t	lf_end;		 /* The byte # of the end of the lock (-1=EOF)*/
+	caddr_t	lf_id;		 /* The id of the resource holding the lock */
+	struct	inode *lf_inode; /* Back pointer to the inode */
+	struct	lockf *lf_next;	 /* A pointer to the next lock on this inode */
+	struct	lockf *lf_block; /* The list of blocked locks */
+};
+
+/* Maximum length of sleep chains to traverse to try and detect deadlock. */
+#define MAXDEPTH 50
+
+__BEGIN_DECLS
+void	 lf_addblock __P((struct lockf *, struct lockf *));
+int	 lf_clearlock __P((struct lockf *));
+int	 lf_findoverlap __P((struct lockf *,
+	    struct lockf *, int, struct lockf ***, struct lockf **));
+struct lockf *
+	 lf_getblock __P((struct lockf *));
+int	 lf_getlock __P((struct lockf *, struct flock *));
+int	 lf_setlock __P((struct lockf *));
+void	 lf_split __P((struct lockf *, struct lockf *));
+void	 lf_wakelock __P((struct lockf *));
+__END_DECLS
+
+#ifdef LOCKF_DEBUG
+extern int lockf_debug;
+
+__BEGIN_DECLS
+void	lf_print __P((char *, struct lockf *));
+void	lf_printlist __P((char *, struct lockf *));
+__END_DECLS
+#endif
diff --git a/sys/ufs/ufs/quota.h b/sys/ufs/ufs/quota.h
new file mode 100644
index 00000000000..11efb402c91
--- /dev/null
+++ b/sys/ufs/ufs/quota.h
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Robert Elz at The University of Melbourne.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)quota.h	8.1 (Berkeley) 6/11/93
+ */
+
+#ifndef _QUOTA_
+#define _QUOTA_
+
+/*
+ * Definitions for disk quotas imposed on the average user
+ * (big brother finally hits UNIX).
+ *
+ * The following constants define the amount of time given a user before the
+ * soft limits are treated as hard limits (usually resulting in an allocation
+ * failure). The timer is started when the user crosses their soft limit, it
+ * is reset when they go below their soft limit.
+ */
+#define	MAX_IQ_TIME	(7*24*60*60)	/* 1 week */
+#define	MAX_DQ_TIME	(7*24*60*60)	/* 1 week */
+
+/*
+ * The following constants define the usage of the quota file array in the
+ * ufsmount structure and dquot array in the inode structure.  The semantics
+ * of the elements of these arrays are defined in the routine getinoquota;
+ * the remainder of the quota code treats them generically and need not be
+ * inspected when changing the size of the array.
+ */
+#define	MAXQUOTAS	2
+#define	USRQUOTA	0	/* element used for user quotas */
+#define	GRPQUOTA	1	/* element used for group quotas */
+
+/*
+ * Definitions for the default names of the quotas files.
+ */
+#define INITQFNAMES { \
+	"user",		/* USRQUOTA */ \
+	"group",	/* GRPQUOTA */ \
+	"undefined", \
+};
+#define	QUOTAFILENAME	"quota"
+#define	QUOTAGROUP	"operator"
+
+/*
+ * Command definitions for the 'quotactl' system call.  The commands are
+ * broken into a main command defined below and a subcommand that is used
+ * to convey the type of quota that is being manipulated (see above).
+ */
+#define SUBCMDMASK	0x00ff
+#define SUBCMDSHIFT	8
+#define	QCMD(cmd, type)	(((cmd) << SUBCMDSHIFT) | ((type) & SUBCMDMASK))
+
+#define	Q_QUOTAON	0x0100	/* enable quotas */
+#define	Q_QUOTAOFF	0x0200	/* disable quotas */
+#define	Q_GETQUOTA	0x0300	/* get limits and usage */
+#define	Q_SETQUOTA	0x0400	/* set limits and usage */
+#define	Q_SETUSE	0x0500	/* set usage */
+#define	Q_SYNC		0x0600	/* sync disk copy of a filesystems quotas */
+
+/*
+ * The following structure defines the format of the disk quota file
+ * (as it appears on disk) - the file is an array of these structures
+ * indexed by user or group number.  The setquota system call establishes
+ * the vnode for each quota file (a pointer is retained in the ufsmount
+ * structure).
+ */
+struct	dqblk {
+	u_long	dqb_bhardlimit;	/* absolute limit on disk blks alloc */
+	u_long	dqb_bsoftlimit;	/* preferred limit on disk blks */
+	u_long	dqb_curblocks;	/* current block count */
+	u_long	dqb_ihardlimit;	/* maximum # allocated inodes + 1 */
+	u_long	dqb_isoftlimit;	/* preferred inode limit */
+	u_long	dqb_curinodes;	/* current # allocated inodes */
+	time_t	dqb_btime;	/* time limit for excessive disk use */
+	time_t	dqb_itime;	/* time limit for excessive files */
+};
+
+/*
+ * The following structure records disk usage for a user or group on a
+ * filesystem. There is one allocated for each quota that exists on any
+ * filesystem for the current user or group. A cache is kept of recently
+ * used entries.
+ */
+struct	dquot {
+	struct	dquot *dq_forw, **dq_back; /* hash list */
+	struct	dquot *dq_freef, **dq_freeb; /* free list */
+	short	dq_flags;		/* flags, see below */
+	short	dq_cnt;			/* count of active references */
+	short	dq_spare;		/* unused spare padding */
+	short	dq_type;		/* quota type of this dquot */
+	u_long	dq_id;			/* identifier this applies to */
+	struct	ufsmount *dq_ump;	/* filesystem that this is taken from */
+	struct	dqblk dq_dqb;		/* actual usage & quotas */
+};
+/*
+ * Flag values.
+ */
+#define	DQ_LOCK		0x01		/* this quota locked (no MODS) */
+#define	DQ_WANT		0x02		/* wakeup on unlock */
+#define	DQ_MOD		0x04		/* this quota modified since read */
+#define	DQ_FAKE		0x08		/* no limits here, just usage */
+#define	DQ_BLKS		0x10		/* has been warned about blk limit */
+#define	DQ_INODS	0x20		/* has been warned about inode limit */
+/*
+ * Shorthand notation.
+ */
+#define	dq_bhardlimit	dq_dqb.dqb_bhardlimit
+#define	dq_bsoftlimit	dq_dqb.dqb_bsoftlimit
+#define	dq_curblocks	dq_dqb.dqb_curblocks
+#define	dq_ihardlimit	dq_dqb.dqb_ihardlimit
+#define	dq_isoftlimit	dq_dqb.dqb_isoftlimit
+#define	dq_curinodes	dq_dqb.dqb_curinodes
+#define	dq_btime	dq_dqb.dqb_btime
+#define	dq_itime	dq_dqb.dqb_itime
+
+/*
+ * If the system has never checked for a quota for this file, then it is set
+ * to NODQUOT.  Once a write attempt is made the inode pointer is set to
+ * reference a dquot structure.
+ */
+#define	NODQUOT		((struct dquot *) 0)
+
+/*
+ * Flags to chkdq() and chkiq()
+ */
+#define	FORCE	0x01	/* force usage changes independent of limits */
+#define	CHOWN	0x02	/* (advisory) change initiated by chown */
+
+/*
+ * Macros to avoid subroutine calls to trivial functions.
+ */
+#ifdef DIAGNOSTIC
+#define	DQREF(dq)	dqref(dq)
+#else
+#define	DQREF(dq)	(dq)->dq_cnt++
+#endif
+
+#include <sys/cdefs.h>
+
+struct dquot;
+struct inode;
+struct mount;
+struct proc;
+struct ucred;
+struct ufsmount;
+struct vnode;
+__BEGIN_DECLS
+int	chkdq __P((struct inode *, long, struct ucred *, int));
+int	chkdqchg __P((struct inode *, long, struct ucred *, int));
+int	chkiq __P((struct inode *, long, struct ucred *, int));
+int	chkiqchg __P((struct inode *, long, struct ucred *, int));
+void	dqflush __P((struct vnode *));
+int	dqget __P((struct vnode *,
+	    u_long, struct ufsmount *, int, struct dquot **));
+void	dqinit __P((void));
+void	dqref __P((struct dquot *));
+void	dqrele __P((struct vnode *, struct dquot *));
+int	dqsync __P((struct vnode *, struct dquot *));
+int	getinoquota __P((struct inode *));
+int	getquota __P((struct mount *, u_long, int, caddr_t));
+int	qsync __P((struct mount *mp));
+int	quotaoff __P((struct proc *, struct mount *, int));
+int	quotaon __P((struct proc *, struct mount *, int, caddr_t));
+int	setquota __P((struct mount *, u_long, int, caddr_t));
+int	setuse __P((struct mount *, u_long, int, caddr_t));
+int	ufs_quotactl __P((struct mount *, int, uid_t, caddr_t, struct proc *));
+__END_DECLS
+
+#ifdef DIAGNOSTIC
+__BEGIN_DECLS
+void	chkdquot __P((struct inode *));
+__END_DECLS
+#endif
+
+#endif /* _QUOTA_ */
diff --git a/sys/ufs/ufs/ufs_bmap.c b/sys/ufs/ufs/ufs_bmap.c
new file mode 100644
index 00000000000..bcd838d036a
--- /dev/null
+++ b/sys/ufs/ufs/ufs_bmap.c
@@ -0,0 +1,294 @@
+/*
+ * Copyright (c) 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufs_bmap.c	8.6 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/resourcevar.h>
+#include <sys/trace.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+/*
+ * Bmap converts a the logical block number of a file to its physical block
+ * number on the disk. The conversion is done by using the logical block
+ * number to index into the array of block pointers described by the dinode.
+ */
+int
+ufs_bmap(ap)
+	struct vop_bmap_args /* {
+		struct vnode *a_vp;
+		daddr_t  a_bn;
+		struct vnode **a_vpp;
+		daddr_t *a_bnp;
+		int *a_runp;
+	} */ *ap;
+{
+	/*
+	 * Check for underlying vnode requests and ensure that logical
+	 * to physical mapping is requested.
+	 */
+	if (ap->a_vpp != NULL)
+		*ap->a_vpp = VTOI(ap->a_vp)->i_devvp;
+	if (ap->a_bnp == NULL)
+		return (0);
+
+	return (ufs_bmaparray(ap->a_vp, ap->a_bn, ap->a_bnp, NULL, NULL,
+	    ap->a_runp));
+}
+
+/*
+ * Indirect blocks are now on the vnode for the file.  They are given negative
+ * logical block numbers.  Indirect blocks are addressed by the negative
+ * address of the first data block to which they point.  Double indirect blocks
+ * are addressed by one less than the address of the first indirect block to
+ * which they point.  Triple indirect blocks are addressed by one less than
+ * the address of the first double indirect block to which they point.
+ *
+ * ufs_bmaparray does the bmap conversion, and if requested returns the
+ * array of logical blocks which must be traversed to get to a block.
+ * Each entry contains the offset into that block that gets you to the
+ * next block and the disk address of the block (if it is assigned).
+ */
+
+int
+ufs_bmaparray(vp, bn, bnp, ap, nump, runp)
+	struct vnode *vp;
+	register daddr_t bn;
+	daddr_t *bnp;
+	struct indir *ap;
+	int *nump;
+	int *runp;
+{
+	register struct inode *ip;
+	struct buf *bp;
+	struct ufsmount *ump;
+	struct mount *mp;
+	struct vnode *devvp;
+	struct indir a[NIADDR], *xap;
+	daddr_t daddr;
+	long metalbn;
+	int error, maxrun, num;
+
+	ip = VTOI(vp);
+	mp = vp->v_mount;
+	ump = VFSTOUFS(mp);
+#ifdef DIAGNOSTIC
+	if (ap != NULL && nump == NULL || ap == NULL && nump != NULL)
+		panic("ufs_bmaparray: invalid arguments");
+#endif
+
+	if (runp) {
+		/*
+		 * XXX
+		 * If MAXBSIZE is the largest transfer the disks can handle,
+		 * we probably want maxrun to be 1 block less so that we
+		 * don't create a block larger than the device can handle.
+		 */
+		*runp = 0;
+		maxrun = MAXBSIZE / mp->mnt_stat.f_iosize - 1;
+	}
+
+	xap = ap == NULL ? a : ap;
+	if (!nump)
+		nump = &num;
+	if (error = ufs_getlbns(vp, bn, xap, nump))
+		return (error);
+
+	num = *nump;
+	if (num == 0) {
+		*bnp = blkptrtodb(ump, ip->i_db[bn]);
+		if (*bnp == 0)
+			*bnp = -1;
+		else if (runp)
+			for (++bn; bn < NDADDR && *runp < maxrun &&
+			    is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]);
+			    ++bn, ++*runp);
+		return (0);
+	}
+
+
+	/* Get disk address out of indirect block array */
+	daddr = ip->i_ib[xap->in_off];
+
+	devvp = VFSTOUFS(vp->v_mount)->um_devvp;
+	for (bp = NULL, ++xap; --num; ++xap) {
+		/* 
+		 * Exit the loop if there is no disk address assigned yet and
+		 * the indirect block isn't in the cache, or if we were
+		 * looking for an indirect block and we've found it.
+		 */
+
+		metalbn = xap->in_lbn;
+		if (daddr == 0 && !incore(vp, metalbn) || metalbn == bn)
+			break;
+		/*
+		 * If we get here, we've either got the block in the cache
+		 * or we have a disk address for it, go fetch it.
+		 */
+		if (bp)
+			brelse(bp);
+
+		xap->in_exists = 1;
+		bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0);
+		if (bp->b_flags & (B_DONE | B_DELWRI)) {
+			trace(TR_BREADHIT, pack(vp, size), metalbn);
+		}
+#ifdef DIAGNOSTIC
+		else if (!daddr)
+			panic("ufs_bmaparry: indirect block not in cache");
+#endif
+		else {
+			trace(TR_BREADMISS, pack(vp, size), metalbn);
+			bp->b_blkno = blkptrtodb(ump, daddr);
+			bp->b_flags |= B_READ;
+			VOP_STRATEGY(bp);
+			curproc->p_stats->p_ru.ru_inblock++;	/* XXX */
+			if (error = biowait(bp)) {
+				brelse(bp);
+				return (error);
+			}
+		}
+
+		daddr = ((daddr_t *)bp->b_data)[xap->in_off];
+		if (num == 1 && daddr && runp)
+			for (bn = xap->in_off + 1;
+			    bn < MNINDIR(ump) && *runp < maxrun &&
+			    is_sequential(ump, ((daddr_t *)bp->b_data)[bn - 1],
+			    ((daddr_t *)bp->b_data)[bn]);
+			    ++bn, ++*runp);
+	}
+	if (bp)
+		brelse(bp);
+
+	daddr = blkptrtodb(ump, daddr);
+	*bnp = daddr == 0 ? -1 : daddr;
+	return (0);
+}
+
+/*
+ * Create an array of logical block number/offset pairs which represent the
+ * path of indirect blocks required to access a data block.  The first "pair"
+ * contains the logical block number of the appropriate single, double or
+ * triple indirect block and the offset into the inode indirect block array.
+ * Note, the logical block number of the inode single/double/triple indirect
+ * block appears twice in the array, once with the offset into the i_ib and
+ * once with the offset into the page itself.
+ */
+int
+ufs_getlbns(vp, bn, ap, nump)
+	struct vnode *vp;
+	register daddr_t bn;
+	struct indir *ap;
+	int *nump;
+{
+	long metalbn, realbn;
+	struct ufsmount *ump;
+	int blockcnt, i, numlevels, off;
+
+	ump = VFSTOUFS(vp->v_mount);
+	if (nump)
+		*nump = 0;
+	numlevels = 0;
+	realbn = bn;
+	if ((long)bn < 0)
+		bn = -(long)bn;
+
+	/* The first NDADDR blocks are direct blocks. */
+	if (bn < NDADDR)
+		return (0);
+
+	/* 
+	 * Determine the number of levels of indirection.  After this loop
+	 * is done, blockcnt indicates the number of data blocks possible
+	 * at the given level of indirection, and NIADDR - i is the number
+	 * of levels of indirection needed to locate the requested block.
+	 */
+	for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) {
+		if (i == 0)
+			return (EFBIG);
+		blockcnt *= MNINDIR(ump);
+		if (bn < blockcnt)
+			break;
+	}
+
+	/* Calculate the address of the first meta-block. */
+	if (realbn >= 0)
+		metalbn = -(realbn - bn + NIADDR - i);
+	else
+		metalbn = -(-realbn - bn + NIADDR - i);
+
+	/* 
+	 * At each iteration, off is the offset into the bap array which is
+	 * an array of disk addresses at the current level of indirection.
+	 * The logical block number and the offset in that block are stored
+	 * into the argument array.
+	 */
+	ap->in_lbn = metalbn;
+	ap->in_off = off = NIADDR - i;
+	ap->in_exists = 0;
+	ap++;
+	for (++numlevels; i <= NIADDR; i++) {
+		/* If searching for a meta-data block, quit when found. */
+		if (metalbn == realbn)
+			break;
+
+		blockcnt /= MNINDIR(ump);
+		off = (bn / blockcnt) % MNINDIR(ump);
+
+		++numlevels;
+		ap->in_lbn = metalbn;
+		ap->in_off = off;
+		ap->in_exists = 0;
+		++ap;
+
+		metalbn -= -1 + off * blockcnt;
+	}
+	if (nump)
+		*nump = numlevels;
+	return (0);
+}
diff --git a/sys/ufs/ufs/ufs_disksubr.c b/sys/ufs/ufs/ufs_disksubr.c
new file mode 100644
index 00000000000..78dede4da77
--- /dev/null
+++ b/sys/ufs/ufs/ufs_disksubr.c
@@ -0,0 +1,364 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufs_disksubr.c	8.5 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/disklabel.h>
+#include <sys/syslog.h>
+
+/*
+ * Seek sort for disks.  We depend on the driver which calls us using b_resid
+ * as the current cylinder number.
+ *
+ * The argument ap structure holds a b_actf activity chain pointer on which we
+ * keep two queues, sorted in ascending cylinder order.  The first queue holds
+ * those requests which are positioned after the current cylinder (in the first
+ * request); the second holds requests which came in after their cylinder number
+ * was passed.  Thus we implement a one way scan, retracting after reaching the
+ * end of the drive to the first request on the second queue, at which time it
+ * becomes the first queue.
+ *
+ * A one-way scan is natural because of the way UNIX read-ahead blocks are
+ * allocated.
+ */
+
+/*
+ * For portability with historic industry practice, the
+ * cylinder number has to be maintained in the `b_resid'
+ * field.
+ */
+#define	b_cylinder	b_resid
+
+void
+disksort(ap, bp)
+	register struct buf *ap, *bp;
+{
+	register struct buf *bq;
+
+	/* If the queue is empty, then it's easy. */
+	if (ap->b_actf == NULL) {
+		bp->b_actf = NULL;
+		ap->b_actf = bp;
+		return;
+	}
+
+	/*
+	 * If we lie after the first (currently active) request, then we
+	 * must locate the second request list and add ourselves to it.
+	 */
+	bq = ap->b_actf;
+	if (bp->b_cylinder < bq->b_cylinder) {
+		while (bq->b_actf) {
+			/*
+			 * Check for an ``inversion'' in the normally ascending
+			 * cylinder numbers, indicating the start of the second
+			 * request list.
+			 */
+			if (bq->b_actf->b_cylinder < bq->b_cylinder) {
+				/*
+				 * Search the second request list for the first
+				 * request at a larger cylinder number.  We go
+				 * before that; if there is no such request, we
+				 * go at end.
+				 */
+				do {
+					if (bp->b_cylinder <
+					    bq->b_actf->b_cylinder)
+						goto insert;
+					if (bp->b_cylinder ==
+					    bq->b_actf->b_cylinder &&
+					    bp->b_blkno < bq->b_actf->b_blkno)
+						goto insert;
+					bq = bq->b_actf;
+				} while (bq->b_actf);
+				goto insert;		/* after last */
+			}
+			bq = bq->b_actf;
+		}
+		/*
+		 * No inversions... we will go after the last, and
+		 * be the first request in the second request list.
+		 */
+		goto insert;
+	}
+	/*
+	 * Request is at/after the current request...
+	 * sort in the first request list.
+	 */
+	while (bq->b_actf) {
+		/*
+		 * We want to go after the current request if there is an
+		 * inversion after it (i.e. it is the end of the first
+		 * request list), or if the next request is a larger cylinder
+		 * than our request.
+		 */
+		if (bq->b_actf->b_cylinder < bq->b_cylinder ||
+		    bp->b_cylinder < bq->b_actf->b_cylinder ||
+		    (bp->b_cylinder == bq->b_actf->b_cylinder &&
+		    bp->b_blkno < bq->b_actf->b_blkno))
+			goto insert;
+		bq = bq->b_actf;
+	}
+	/*
+	 * Neither a second list nor a larger request... we go at the end of
+	 * the first list, which is the same as the end of the whole schebang.
+	 */
+insert:	bp->b_actf = bq->b_actf;
+	bq->b_actf = bp;
+}
+
+/*
+ * Attempt to read a disk label from a device using the indicated stategy
+ * routine.  The label must be partly set up before this: secpercyl and
+ * anything required in the strategy routine (e.g., sector size) must be
+ * filled in before calling us.  Returns NULL on success and an error
+ * string on failure.
+ */
+char *
+readdisklabel(dev, strat, lp)
+	dev_t dev;
+	int (*strat)();
+	register struct disklabel *lp;
+{
+	register struct buf *bp;
+	struct disklabel *dlp;
+	char *msg = NULL;
+
+	if (lp->d_secperunit == 0)
+		lp->d_secperunit = 0x1fffffff;
+	lp->d_npartitions = 1;
+	if (lp->d_partitions[0].p_size == 0)
+		lp->d_partitions[0].p_size = 0x1fffffff;
+	lp->d_partitions[0].p_offset = 0;
+
+	bp = geteblk((int)lp->d_secsize);
+	bp->b_dev = dev;
+	bp->b_blkno = LABELSECTOR;
+	bp->b_bcount = lp->d_secsize;
+	bp->b_flags = B_BUSY | B_READ;
+	bp->b_cylinder = LABELSECTOR / lp->d_secpercyl;
+	(*strat)(bp);
+	if (biowait(bp))
+		msg = "I/O error";
+	else for (dlp = (struct disklabel *)bp->b_data;
+	    dlp <= (struct disklabel *)((char *)bp->b_data +
+	    DEV_BSIZE - sizeof(*dlp));
+	    dlp = (struct disklabel *)((char *)dlp + sizeof(long))) {
+		if (dlp->d_magic != DISKMAGIC || dlp->d_magic2 != DISKMAGIC) {
+			if (msg == NULL)
+				msg = "no disk label";
+		} else if (dlp->d_npartitions > MAXPARTITIONS ||
+			   dkcksum(dlp) != 0)
+			msg = "disk label corrupted";
+		else {
+			*lp = *dlp;
+			msg = NULL;
+			break;
+		}
+	}
+	bp->b_flags = B_INVAL | B_AGE;
+	brelse(bp);
+	return (msg);
+}
+
+/*
+ * Check new disk label for sensibility before setting it.
+ */
+int
+setdisklabel(olp, nlp, openmask)
+	register struct disklabel *olp, *nlp;
+	u_long openmask;
+{
+	register i;
+	register struct partition *opp, *npp;
+
+	if (nlp->d_magic != DISKMAGIC || nlp->d_magic2 != DISKMAGIC ||
+	    dkcksum(nlp) != 0)
+		return (EINVAL);
+	while ((i = ffs((long)openmask)) != 0) {
+		i--;
+		openmask &= ~(1 << i);
+		if (nlp->d_npartitions <= i)
+			return (EBUSY);
+		opp = &olp->d_partitions[i];
+		npp = &nlp->d_partitions[i];
+		if (npp->p_offset != opp->p_offset || npp->p_size < opp->p_size)
+			return (EBUSY);
+		/*
+		 * Copy internally-set partition information
+		 * if new label doesn't include it.		XXX
+		 */
+		if (npp->p_fstype == FS_UNUSED && opp->p_fstype != FS_UNUSED) {
+			npp->p_fstype = opp->p_fstype;
+			npp->p_fsize = opp->p_fsize;
+			npp->p_frag = opp->p_frag;
+			npp->p_cpg = opp->p_cpg;
+		}
+	}
+ 	nlp->d_checksum = 0;
+ 	nlp->d_checksum = dkcksum(nlp);
+	*olp = *nlp;
+	return (0);
+}
+
+/* encoding of disk minor numbers, should be elsewhere... */
+#define dkunit(dev)		(minor(dev) >> 3)
+#define dkpart(dev)		(minor(dev) & 07)
+#define dkminor(unit, part)	(((unit) << 3) | (part))
+
+/*
+ * Write disk label back to device after modification.
+ */
+int
+writedisklabel(dev, strat, lp)
+	dev_t dev;
+	int (*strat)();
+	register struct disklabel *lp;
+{
+	struct buf *bp;
+	struct disklabel *dlp;
+	int labelpart;
+	int error = 0;
+
+	labelpart = dkpart(dev);
+	if (lp->d_partitions[labelpart].p_offset != 0) {
+		if (lp->d_partitions[0].p_offset != 0)
+			return (EXDEV);			/* not quite right */
+		labelpart = 0;
+	}
+	bp = geteblk((int)lp->d_secsize);
+	bp->b_dev = makedev(major(dev), dkminor(dkunit(dev), labelpart));
+	bp->b_blkno = LABELSECTOR;
+	bp->b_bcount = lp->d_secsize;
+	bp->b_flags = B_READ;
+	(*strat)(bp);
+	if (error = biowait(bp))
+		goto done;
+	for (dlp = (struct disklabel *)bp->b_data;
+	    dlp <= (struct disklabel *)
+	      ((char *)bp->b_data + lp->d_secsize - sizeof(*dlp));
+	    dlp = (struct disklabel *)((char *)dlp + sizeof(long))) {
+		if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC &&
+		    dkcksum(dlp) == 0) {
+			*dlp = *lp;
+			bp->b_flags = B_WRITE;
+			(*strat)(bp);
+			error = biowait(bp);
+			goto done;
+		}
+	}
+	error = ESRCH;
+done:
+	brelse(bp);
+	return (error);
+}
+
+/*
+ * Compute checksum for disk label.
+ */
+dkcksum(lp)
+	register struct disklabel *lp;
+{
+	register u_short *start, *end;
+	register u_short sum = 0;
+
+	start = (u_short *)lp;
+	end = (u_short *)&lp->d_partitions[lp->d_npartitions];
+	while (start < end)
+		sum ^= *start++;
+	return (sum);
+}
+
+/*
+ * Disk error is the preface to plaintive error messages
+ * about failing disk transfers.  It prints messages of the form
+
+hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d)
+
+ * if the offset of the error in the transfer and a disk label
+ * are both available.  blkdone should be -1 if the position of the error
+ * is unknown; the disklabel pointer may be null from drivers that have not
+ * been converted to use them.  The message is printed with printf
+ * if pri is LOG_PRINTF, otherwise it uses log at the specified priority.
+ * The message should be completed (with at least a newline) with printf
+ * or addlog, respectively.  There is no trailing space.
+ */
+void
+diskerr(bp, dname, what, pri, blkdone, lp)
+	register struct buf *bp;
+	char *dname, *what;
+	int pri, blkdone;
+	register struct disklabel *lp;
+{
+	int unit = dkunit(bp->b_dev), part = dkpart(bp->b_dev);
+	register void (*pr) __P((const char *, ...));
+	char partname = 'a' + part;
+	int sn;
+
+	if (pri != LOG_PRINTF) {
+		log(pri, "");
+		pr = addlog;
+	} else
+		pr = printf;
+	(*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what,
+	    bp->b_flags & B_READ ? "read" : "writ");
+	sn = bp->b_blkno;
+	if (bp->b_bcount <= DEV_BSIZE)
+		(*pr)("%d", sn);
+	else {
+		if (blkdone >= 0) {
+			sn += blkdone;
+			(*pr)("%d of ", sn);
+		}
+		(*pr)("%d-%d", bp->b_blkno,
+		    bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE);
+	}
+	if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) {
+#ifdef tahoe
+		sn *= DEV_BSIZE / lp->d_secsize;		/* XXX */
+#endif
+		sn += lp->d_partitions[part].p_offset;
+		(*pr)(" (%s%d bn %d; cn %d", dname, unit, sn,
+		    sn / lp->d_secpercyl);
+		sn %= lp->d_secpercyl;
+		(*pr)(" tn %d sn %d)", sn / lp->d_nsectors, sn % lp->d_nsectors);
+	}
+}
diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h
new file mode 100644
index 00000000000..e25923e947d
--- /dev/null
+++ b/sys/ufs/ufs/ufs_extern.h
@@ -0,0 +1,125 @@
+/*-
+ * Copyright (c) 1991, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufs_extern.h	8.3 (Berkeley) 4/16/94
+ */
+
+struct buf;
+struct direct;
+struct disklabel;
+struct fid;
+struct flock;
+struct inode;
+struct mbuf;
+struct mount;
+struct nameidata;
+struct proc;
+struct ucred;
+struct uio;
+struct vattr;
+struct vnode;
+struct ufs_args;
+
+__BEGIN_DECLS
+void	 diskerr
+	    __P((struct buf *, char *, char *, int, int, struct disklabel *));
+void	 disksort __P((struct buf *, struct buf *));
+u_int	 dkcksum __P((struct disklabel *));
+char	*readdisklabel __P((dev_t, int (*)(), struct disklabel *));
+int	 setdisklabel __P((struct disklabel *, struct disklabel *, u_long));
+int	 writedisklabel __P((dev_t, int (*)(), struct disklabel *));
+
+int	 ufs_abortop __P((struct vop_abortop_args *));
+int	 ufs_access __P((struct vop_access_args *));
+int	 ufs_advlock __P((struct vop_advlock_args *));
+int	 ufs_bmap __P((struct vop_bmap_args *));
+int	 ufs_check_export __P((struct mount *, struct ufid *, struct mbuf *,
+		struct vnode **, int *exflagsp, struct ucred **));
+int	 ufs_checkpath __P((struct inode *, struct inode *, struct ucred *));
+int	 ufs_close __P((struct vop_close_args *));
+int	 ufs_create __P((struct vop_create_args *));
+void	 ufs_dirbad __P((struct inode *, doff_t, char *));
+int	 ufs_dirbadentry __P((struct vnode *, struct direct *, int));
+int	 ufs_dirempty __P((struct inode *, ino_t, struct ucred *));
+int	 ufs_direnter __P((struct inode *, struct vnode *,struct componentname *));
+int	 ufs_dirremove __P((struct vnode *, struct componentname*));
+int	 ufs_dirrewrite
+	    __P((struct inode *, struct inode *, struct componentname *));
+int	 ufs_getattr __P((struct vop_getattr_args *));
+int	 ufs_getlbns __P((struct vnode *, daddr_t, struct indir *, int *));
+struct vnode *
+	 ufs_ihashget __P((dev_t, ino_t));
+void	 ufs_ihashinit __P((void));
+void	 ufs_ihashins __P((struct inode *));
+struct vnode *
+	 ufs_ihashlookup __P((dev_t, ino_t));
+void	 ufs_ihashrem __P((struct inode *));
+int	 ufs_inactive __P((struct vop_inactive_args *));
+int	 ufs_init __P((void));
+int	 ufs_ioctl __P((struct vop_ioctl_args *));
+int	 ufs_islocked __P((struct vop_islocked_args *));
+int	 ufs_link __P((struct vop_link_args *));
+int	 ufs_lock __P((struct vop_lock_args *));
+int	 ufs_lookup __P((struct vop_lookup_args *));
+int	 ufs_makeinode __P((int mode, struct vnode *, struct vnode **, struct componentname *));
+int	 ufs_mkdir __P((struct vop_mkdir_args *));
+int	 ufs_mknod __P((struct vop_mknod_args *));
+int	 ufs_mmap __P((struct vop_mmap_args *));
+int	 ufs_open __P((struct vop_open_args *));
+int	 ufs_pathconf __P((struct vop_pathconf_args *));
+int	 ufs_print __P((struct vop_print_args *));
+int	 ufs_readdir __P((struct vop_readdir_args *));
+int	 ufs_readlink __P((struct vop_readlink_args *));
+int	 ufs_reclaim __P((struct vop_reclaim_args *));
+int	 ufs_remove __P((struct vop_remove_args *));
+int	 ufs_rename __P((struct vop_rename_args *));
+int	 ufs_rmdir __P((struct vop_rmdir_args *));
+int	 ufs_root __P((struct mount *, struct vnode **));
+int	 ufs_seek __P((struct vop_seek_args *));
+int	 ufs_select __P((struct vop_select_args *));
+int	 ufs_setattr __P((struct vop_setattr_args *));
+int	 ufs_start __P((struct mount *, int, struct proc *));
+int	 ufs_strategy __P((struct vop_strategy_args *));
+int	 ufs_symlink __P((struct vop_symlink_args *));
+int	 ufs_unlock __P((struct vop_unlock_args *));
+int	 ufs_vinit __P((struct mount *,
+	    int (**)(), int (**)(), struct vnode **));
+int	 ufsspec_close __P((struct vop_close_args *));
+int	 ufsspec_read __P((struct vop_read_args *));
+int	 ufsspec_write __P((struct vop_write_args *));
+
+#ifdef FIFO
+int	ufsfifo_read __P((struct vop_read_args *));
+int	ufsfifo_write __P((struct vop_write_args *));
+int	ufsfifo_close __P((struct vop_close_args *));
+#endif
+__END_DECLS
diff --git a/sys/ufs/ufs/ufs_ihash.c b/sys/ufs/ufs/ufs_ihash.c
new file mode 100644
index 00000000000..4a37c907ef6
--- /dev/null
+++ b/sys/ufs/ufs/ufs_ihash.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufs_ihash.c	8.4 (Berkeley) 12/30/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufs_extern.h>
+
+/*
+ * Structures associated with inode cacheing.
+ */
+struct inode **ihashtbl;
+u_long	ihash;		/* size of hash table - 1 */
+#define	INOHASH(device, inum)	(((device) + (inum)) & ihash)
+
+/*
+ * Initialize inode hash table.
+ */
+void
+ufs_ihashinit()
+{
+
+	ihashtbl = hashinit(desiredvnodes, M_UFSMNT, &ihash);
+}
+
+/*
+ * Use the device/inum pair to find the incore inode, and return a pointer
+ * to it. If it is in core, return it, even if it is locked.
+ */
+struct vnode *
+ufs_ihashlookup(device, inum)
+	dev_t device;
+	ino_t inum;
+{
+	register struct inode *ip;
+
+	for (ip = ihashtbl[INOHASH(device, inum)];; ip = ip->i_next) {
+		if (ip == NULL)
+			return (NULL);
+		if (inum == ip->i_number && device == ip->i_dev)
+			return (ITOV(ip));
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * Use the device/inum pair to find the incore inode, and return a pointer
+ * to it. If it is in core, but locked, wait for it.
+ */
+struct vnode *
+ufs_ihashget(device, inum)
+	dev_t device;
+	ino_t inum;
+{
+	register struct inode *ip;
+	struct vnode *vp;
+
+	for (;;)
+		for (ip = ihashtbl[INOHASH(device, inum)];; ip = ip->i_next) {
+			if (ip == NULL)
+				return (NULL);
+			if (inum == ip->i_number && device == ip->i_dev) {
+				if (ip->i_flag & IN_LOCKED) {
+					ip->i_flag |= IN_WANTED;
+					sleep(ip, PINOD);
+					break;
+				}
+				vp = ITOV(ip);
+				if (!vget(vp, 1))
+					return (vp);
+				break;
+			}
+		}
+	/* NOTREACHED */
+}
+
+/*
+ * Insert the inode into the hash table, and return it locked.
+ */
+void
+ufs_ihashins(ip)
+	struct inode *ip;
+{
+	struct inode **ipp, *iq;
+
+	ipp = &ihashtbl[INOHASH(ip->i_dev, ip->i_number)];
+	if (iq = *ipp)
+		iq->i_prev = &ip->i_next;
+	ip->i_next = iq;
+	ip->i_prev = ipp;
+	*ipp = ip;
+	if (ip->i_flag & IN_LOCKED)
+		panic("ufs_ihashins: already locked");
+	if (curproc)
+		ip->i_lockholder = curproc->p_pid;
+	else
+		ip->i_lockholder = -1;
+	ip->i_flag |= IN_LOCKED;
+}
+
+/*
+ * Remove the inode from the hash table.
+ */
+void
+ufs_ihashrem(ip)
+	register struct inode *ip;
+{
+	register struct inode *iq;
+
+	if (iq = ip->i_next)
+		iq->i_prev = ip->i_prev;
+	*ip->i_prev = iq;
+#ifdef DIAGNOSTIC
+	ip->i_next = NULL;
+	ip->i_prev = NULL;
+#endif
+}
diff --git a/sys/ufs/ufs/ufs_inode.c b/sys/ufs/ufs/ufs_inode.c
new file mode 100644
index 00000000000..ac876f9d34d
--- /dev/null
+++ b/sys/ufs/ufs/ufs_inode.c
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufs_inode.c	8.4 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+u_long	nextgennumber;		/* Next generation number to assign. */
+int	prtactive = 0;		/* 1 => print out reclaim of active vnodes */
+
+int
+ufs_init()
+{
+	static int first = 1;
+
+	if (!first)
+		return (0);
+	first = 0;
+
+#ifdef DIAGNOSTIC
+	if ((sizeof(struct inode) - 1) & sizeof(struct inode))
+		printf("ufs_init: bad size %d\n", sizeof(struct inode));
+#endif
+	ufs_ihashinit();
+	dqinit();
+	return (0);
+}
+
+/*
+ * Last reference to an inode.  If necessary, write or delete it.
+ */
+int
+ufs_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct inode *ip = VTOI(vp);
+	struct timeval tv;
+	int mode, error;
+	extern int prtactive;
+
+	if (prtactive && vp->v_usecount != 0)
+		vprint("ffs_inactive: pushing active", vp);
+
+	/* Get rid of inodes related to stale file handles. */
+	if (ip->i_mode == 0) {
+		if ((vp->v_flag & VXLOCK) == 0)
+			vgone(vp);
+		return (0);
+	}
+
+	error = 0;
+#ifdef DIAGNOSTIC
+	if (VOP_ISLOCKED(vp))
+		panic("ffs_inactive: locked inode");
+	if (curproc)
+		ip->i_lockholder = curproc->p_pid;
+	else
+		ip->i_lockholder = -1;
+#endif
+	ip->i_flag |= IN_LOCKED;
+	if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
+#ifdef QUOTA
+		if (!getinoquota(ip))
+			(void)chkiq(ip, -1, NOCRED, 0);
+#endif
+		error = VOP_TRUNCATE(vp, (off_t)0, 0, NOCRED, NULL);
+		ip->i_rdev = 0;
+		mode = ip->i_mode;
+		ip->i_mode = 0;
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		VOP_VFREE(vp, ip->i_number, mode);
+	}
+	if (ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) {
+		tv = time;
+		VOP_UPDATE(vp, &tv, &tv, 0);
+	}
+	VOP_UNLOCK(vp);
+	/*
+	 * If we are done with the inode, reclaim it
+	 * so that it can be reused immediately.
+	 */
+	if (vp->v_usecount == 0 && ip->i_mode == 0)
+		vgone(vp);
+	return (error);
+}
+
+/*
+ * Reclaim an inode so that it can be used for other purposes.
+ */
+int
+ufs_reclaim(ap)
+	struct vop_reclaim_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct inode *ip;
+	int i, type;
+
+	if (prtactive && vp->v_usecount != 0)
+		vprint("ufs_reclaim: pushing active", vp);
+	/*
+	 * Remove the inode from its hash chain.
+	 */
+	ip = VTOI(vp);
+	ufs_ihashrem(ip);
+	/*
+	 * Purge old data structures associated with the inode.
+	 */
+	cache_purge(vp);
+	if (ip->i_devvp) {
+		vrele(ip->i_devvp);
+		ip->i_devvp = 0;
+	}
+#ifdef QUOTA
+	for (i = 0; i < MAXQUOTAS; i++) {
+		if (ip->i_dquot[i] != NODQUOT) {
+			dqrele(vp, ip->i_dquot[i]);
+			ip->i_dquot[i] = NODQUOT;
+		}
+	}
+#endif
+	switch (vp->v_mount->mnt_stat.f_type) {
+	case MOUNT_UFS:
+		type = M_FFSNODE;
+		break;
+	case MOUNT_MFS:
+		type = M_MFSNODE;
+		break;
+	case MOUNT_LFS:
+		type = M_LFSNODE;
+		break;
+	default:
+		panic("ufs_reclaim: not ufs file");
+	}
+	FREE(vp->v_data, type);
+	vp->v_data = NULL;
+	return (0);
+}
diff --git a/sys/ufs/ufs/ufs_lockf.c b/sys/ufs/ufs/ufs_lockf.c
new file mode 100644
index 00000000000..cb9a7375de1
--- /dev/null
+++ b/sys/ufs/ufs/ufs_lockf.c
@@ -0,0 +1,707 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Scooter Morris at Genentech Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufs_lockf.c	8.3 (Berkeley) 1/6/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/fcntl.h>
+
+#include <ufs/ufs/lockf.h>
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufs_extern.h>
+
+/*
+ * This variable controls the maximum number of processes that will
+ * be checked in doing deadlock detection.
+ */
+int maxlockdepth = MAXDEPTH;
+
+#ifdef LOCKF_DEBUG
+int	lockf_debug = 0;
+#endif
+
+#define NOLOCKF (struct lockf *)0
+#define SELF	0x1
+#define OTHERS	0x2
+
+/*
+ * Set a byte-range lock.
+ */
+int
+lf_setlock(lock)
+	register struct lockf *lock;
+{
+	register struct lockf *block;
+	struct inode *ip = lock->lf_inode;
+	struct lockf **prev, *overlap, *ltmp;
+	static char lockstr[] = "lockf";
+	int ovcase, priority, needtolink, error;
+
+#ifdef LOCKF_DEBUG
+	if (lockf_debug & 1)
+		lf_print("lf_setlock", lock);
+#endif /* LOCKF_DEBUG */
+
+	/*
+	 * Set the priority
+	 */
+	priority = PLOCK;
+	if (lock->lf_type == F_WRLCK)
+		priority += 4;
+	priority |= PCATCH;
+	/*
+	 * Scan lock list for this file looking for locks that would block us.
+	 */
+	while (block = lf_getblock(lock)) {
+		/*
+		 * Free the structure and return if nonblocking.
+		 */
+		if ((lock->lf_flags & F_WAIT) == 0) {
+			FREE(lock, M_LOCKF);
+			return (EAGAIN);
+		}
+		/*
+		 * We are blocked. Since flock style locks cover
+		 * the whole file, there is no chance for deadlock.
+		 * For byte-range locks we must check for deadlock.
+		 *
+		 * Deadlock detection is done by looking through the
+		 * wait channels to see if there are any cycles that
+		 * involve us. MAXDEPTH is set just to make sure we
+		 * do not go off into neverland.
+		 */
+		if ((lock->lf_flags & F_POSIX) &&
+		    (block->lf_flags & F_POSIX)) {
+			register struct proc *wproc;
+			register struct lockf *waitblock;
+			int i = 0;
+
+			/* The block is waiting on something */
+			wproc = (struct proc *)block->lf_id;
+			while (wproc->p_wchan &&
+			       (wproc->p_wmesg == lockstr) &&
+			       (i++ < maxlockdepth)) {
+				waitblock = (struct lockf *)wproc->p_wchan;
+				/* Get the owner of the blocking lock */
+				waitblock = waitblock->lf_next;
+				if ((waitblock->lf_flags & F_POSIX) == 0)
+					break;
+				wproc = (struct proc *)waitblock->lf_id;
+				if (wproc == (struct proc *)lock->lf_id) {
+					free(lock, M_LOCKF);
+					return (EDEADLK);
+				}
+			}
+		}
+		/*
+		 * For flock type locks, we must first remove
+		 * any shared locks that we hold before we sleep
+		 * waiting for an exclusive lock.
+		 */
+		if ((lock->lf_flags & F_FLOCK) &&
+		    lock->lf_type == F_WRLCK) {
+			lock->lf_type = F_UNLCK;
+			(void) lf_clearlock(lock);
+			lock->lf_type = F_WRLCK;
+		}
+		/*
+		 * Add our lock to the blocked list and sleep until we're free.
+		 * Remember who blocked us (for deadlock detection).
+		 */
+		lock->lf_next = block;
+		lf_addblock(block, lock);
+#ifdef LOCKF_DEBUG
+		if (lockf_debug & 1) {
+			lf_print("lf_setlock: blocking on", block);
+			lf_printlist("lf_setlock", block);
+		}
+#endif /* LOCKF_DEBUG */
+		if (error = tsleep((caddr_t)lock, priority, lockstr, 0)) {
+			/*
+			 * Delete ourselves from the waiting to lock list.
+			 */
+			for (block = lock->lf_next;
+			     block != NOLOCKF;
+			     block = block->lf_block) {
+				if (block->lf_block != lock)
+					continue;
+				block->lf_block = block->lf_block->lf_block;
+				break;
+			}
+			/*
+			 * If we did not find ourselves on the list, but
+			 * are still linked onto a lock list, then something
+			 * is very wrong.
+			 */
+			if (block == NOLOCKF && lock->lf_next != NOLOCKF)
+				panic("lf_setlock: lost lock");
+			free(lock, M_LOCKF);
+			return (error);
+		}
+	}
+	/*
+	 * No blocks!!  Add the lock.  Note that we will
+	 * downgrade or upgrade any overlapping locks this
+	 * process already owns.
+	 *
+	 * Skip over locks owned by other processes.
+	 * Handle any locks that overlap and are owned by ourselves.
+	 */
+	prev = &ip->i_lockf;
+	block = ip->i_lockf;
+	needtolink = 1;
+	for (;;) {
+		if (ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap))
+			block = overlap->lf_next;
+		/*
+		 * Six cases:
+		 *	0) no overlap
+		 *	1) overlap == lock
+		 *	2) overlap contains lock
+		 *	3) lock contains overlap
+		 *	4) overlap starts before lock
+		 *	5) overlap ends after lock
+		 */
+		switch (ovcase) {
+		case 0: /* no overlap */
+			if (needtolink) {
+				*prev = lock;
+				lock->lf_next = overlap;
+			}
+			break;
+
+		case 1: /* overlap == lock */
+			/*
+			 * If downgrading lock, others may be
+			 * able to acquire it.
+			 */
+			if (lock->lf_type == F_RDLCK &&
+			    overlap->lf_type == F_WRLCK)
+				lf_wakelock(overlap);
+			overlap->lf_type = lock->lf_type;
+			FREE(lock, M_LOCKF);
+			lock = overlap; /* for debug output below */
+			break;
+
+		case 2: /* overlap contains lock */
+			/*
+			 * Check for common starting point and different types.
+			 */
+			if (overlap->lf_type == lock->lf_type) {
+				free(lock, M_LOCKF);
+				lock = overlap; /* for debug output below */
+				break;
+			}
+			if (overlap->lf_start == lock->lf_start) {
+				*prev = lock;
+				lock->lf_next = overlap;
+				overlap->lf_start = lock->lf_end + 1;
+			} else
+				lf_split(overlap, lock);
+			lf_wakelock(overlap);
+			break;
+
+		case 3: /* lock contains overlap */
+			/*
+			 * If downgrading lock, others may be able to
+			 * acquire it, otherwise take the list.
+			 */
+			if (lock->lf_type == F_RDLCK &&
+			    overlap->lf_type == F_WRLCK) {
+				lf_wakelock(overlap);
+			} else {
+				ltmp = lock->lf_block;
+				lock->lf_block = overlap->lf_block;
+				lf_addblock(lock, ltmp);
+			}
+			/*
+			 * Add the new lock if necessary and delete the overlap.
+			 */
+			if (needtolink) {
+				*prev = lock;
+				lock->lf_next = overlap->lf_next;
+				prev = &lock->lf_next;
+				needtolink = 0;
+			} else
+				*prev = overlap->lf_next;
+			free(overlap, M_LOCKF);
+			continue;
+
+		case 4: /* overlap starts before lock */
+			/*
+			 * Add lock after overlap on the list.
+			 */
+			lock->lf_next = overlap->lf_next;
+			overlap->lf_next = lock;
+			overlap->lf_end = lock->lf_start - 1;
+			prev = &lock->lf_next;
+			lf_wakelock(overlap);
+			needtolink = 0;
+			continue;
+
+		case 5: /* overlap ends after lock */
+			/*
+			 * Add the new lock before overlap.
+			 */
+			if (needtolink) {
+				*prev = lock;
+				lock->lf_next = overlap;
+			}
+			overlap->lf_start = lock->lf_end + 1;
+			lf_wakelock(overlap);
+			break;
+		}
+		break;
+	}
+#ifdef LOCKF_DEBUG
+	if (lockf_debug & 1) {
+		lf_print("lf_setlock: got the lock", lock);
+		lf_printlist("lf_setlock", lock);
+	}
+#endif /* LOCKF_DEBUG */
+	return (0);
+}
+
+/*
+ * Remove a byte-range lock on an inode.
+ *
+ * Generally, find the lock (or an overlap to that lock)
+ * and remove it (or shrink it), then wakeup anyone we can.
+ */
+int
+lf_clearlock(unlock)
+	register struct lockf *unlock;
+{
+	struct inode *ip = unlock->lf_inode;
+	register struct lockf *lf = ip->i_lockf;
+	struct lockf *overlap, **prev;
+	int ovcase;
+
+	if (lf == NOLOCKF)
+		return (0);
+#ifdef LOCKF_DEBUG
+	if (unlock->lf_type != F_UNLCK)
+		panic("lf_clearlock: bad type");
+	if (lockf_debug & 1)
+		lf_print("lf_clearlock", unlock);
+#endif /* LOCKF_DEBUG */
+	prev = &ip->i_lockf;
+	while (ovcase = lf_findoverlap(lf, unlock, SELF, &prev, &overlap)) {
+		/*
+		 * Wakeup the list of locks to be retried.
+		 */
+		lf_wakelock(overlap);
+
+		switch (ovcase) {
+
+		case 1: /* overlap == lock */
+			*prev = overlap->lf_next;
+			FREE(overlap, M_LOCKF);
+			break;
+
+		case 2: /* overlap contains lock: split it */
+			if (overlap->lf_start == unlock->lf_start) {
+				overlap->lf_start = unlock->lf_end + 1;
+				break;
+			}
+			lf_split(overlap, unlock);
+			overlap->lf_next = unlock->lf_next;
+			break;
+
+		case 3: /* lock contains overlap */
+			*prev = overlap->lf_next;
+			lf = overlap->lf_next;
+			free(overlap, M_LOCKF);
+			continue;
+
+		case 4: /* overlap starts before lock */
+			overlap->lf_end = unlock->lf_start - 1;
+			prev = &overlap->lf_next;
+			lf = overlap->lf_next;
+			continue;
+
+		case 5: /* overlap ends after lock */
+			overlap->lf_start = unlock->lf_end + 1;
+			break;
+		}
+		break;
+	}
+#ifdef LOCKF_DEBUG
+	if (lockf_debug & 1)
+		lf_printlist("lf_clearlock", unlock);
+#endif /* LOCKF_DEBUG */
+	return (0);
+}
+
+/*
+ * Check whether there is a blocking lock,
+ * and if so return its process identifier.
+ */
+int
+lf_getlock(lock, fl)
+	register struct lockf *lock;
+	register struct flock *fl;
+{
+	register struct lockf *block;
+
+#ifdef LOCKF_DEBUG
+	if (lockf_debug & 1)
+		lf_print("lf_getlock", lock);
+#endif /* LOCKF_DEBUG */
+
+	if (block = lf_getblock(lock)) {
+		fl->l_type = block->lf_type;
+		fl->l_whence = SEEK_SET;
+		fl->l_start = block->lf_start;
+		if (block->lf_end == -1)
+			fl->l_len = 0;
+		else
+			fl->l_len = block->lf_end - block->lf_start + 1;
+		if (block->lf_flags & F_POSIX)
+			fl->l_pid = ((struct proc *)(block->lf_id))->p_pid;
+		else
+			fl->l_pid = -1;
+	} else {
+		fl->l_type = F_UNLCK;
+	}
+	return (0);
+}
+
+/*
+ * Walk the list of locks for an inode and
+ * return the first blocking lock.
+ */
+struct lockf *
+lf_getblock(lock)
+	register struct lockf *lock;
+{
+	struct lockf **prev, *overlap, *lf = lock->lf_inode->i_lockf;
+	int ovcase;
+
+	prev = &lock->lf_inode->i_lockf;
+	while (ovcase = lf_findoverlap(lf, lock, OTHERS, &prev, &overlap)) {
+		/*
+		 * We've found an overlap, see if it blocks us
+		 */
+		if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK))
+			return (overlap);
+		/*
+		 * Nope, point to the next one on the list and
+		 * see if it blocks us
+		 */
+		lf = overlap->lf_next;
+	}
+	return (NOLOCKF);
+}
+
+/*
+ * Walk the list of locks for an inode to
+ * find an overlapping lock (if any).
+ *
+ * NOTE: this returns only the FIRST overlapping lock.  There
+ *	 may be more than one.
+ */
+int
+lf_findoverlap(lf, lock, type, prev, overlap)
+	register struct lockf *lf;
+	struct lockf *lock;
+	int type;
+	struct lockf ***prev;
+	struct lockf **overlap;
+{
+	off_t start, end;
+
+	*overlap = lf;
+	if (lf == NOLOCKF)
+		return (0);
+#ifdef LOCKF_DEBUG
+	if (lockf_debug & 2)
+		lf_print("lf_findoverlap: looking for overlap in", lock);
+#endif /* LOCKF_DEBUG */
+	start = lock->lf_start;
+	end = lock->lf_end;
+	while (lf != NOLOCKF) {
+		if (((type & SELF) && lf->lf_id != lock->lf_id) ||
+		    ((type & OTHERS) && lf->lf_id == lock->lf_id)) {
+			*prev = &lf->lf_next;
+			*overlap = lf = lf->lf_next;
+			continue;
+		}
+#ifdef LOCKF_DEBUG
+		if (lockf_debug & 2)
+			lf_print("\tchecking", lf);
+#endif /* LOCKF_DEBUG */
+		/*
+		 * OK, check for overlap
+		 *
+		 * Six cases:
+		 *	0) no overlap
+		 *	1) overlap == lock
+		 *	2) overlap contains lock
+		 *	3) lock contains overlap
+		 *	4) overlap starts before lock
+		 *	5) overlap ends after lock
+		 */
+		if ((lf->lf_end != -1 && start > lf->lf_end) ||
+		    (end != -1 && lf->lf_start > end)) {
+			/* Case 0 */
+#ifdef LOCKF_DEBUG
+			if (lockf_debug & 2)
+				printf("no overlap\n");
+#endif /* LOCKF_DEBUG */
+			if ((type & SELF) && end != -1 && lf->lf_start > end)
+				return (0);
+			*prev = &lf->lf_next;
+			*overlap = lf = lf->lf_next;
+			continue;
+		}
+		if ((lf->lf_start == start) && (lf->lf_end == end)) {
+			/* Case 1 */
+#ifdef LOCKF_DEBUG
+			if (lockf_debug & 2)
+				printf("overlap == lock\n");
+#endif /* LOCKF_DEBUG */
+			return (1);
+		}
+		if ((lf->lf_start <= start) &&
+		    (end != -1) &&
+		    ((lf->lf_end >= end) || (lf->lf_end == -1))) {
+			/* Case 2 */
+#ifdef LOCKF_DEBUG
+			if (lockf_debug & 2)
+				printf("overlap contains lock\n");
+#endif /* LOCKF_DEBUG */
+			return (2);
+		}
+		if (start <= lf->lf_start &&
+		           (end == -1 ||
+			   (lf->lf_end != -1 && end >= lf->lf_end))) {
+			/* Case 3 */
+#ifdef LOCKF_DEBUG
+			if (lockf_debug & 2)
+				printf("lock contains overlap\n");
+#endif /* LOCKF_DEBUG */
+			return (3);
+		}
+		if ((lf->lf_start < start) &&
+			((lf->lf_end >= start) || (lf->lf_end == -1))) {
+			/* Case 4 */
+#ifdef LOCKF_DEBUG
+			if (lockf_debug & 2)
+				printf("overlap starts before lock\n");
+#endif /* LOCKF_DEBUG */
+			return (4);
+		}
+		if ((lf->lf_start > start) &&
+			(end != -1) &&
+			((lf->lf_end > end) || (lf->lf_end == -1))) {
+			/* Case 5 */
+#ifdef LOCKF_DEBUG
+			if (lockf_debug & 2)
+				printf("overlap ends after lock\n");
+#endif /* LOCKF_DEBUG */
+			return (5);
+		}
+		panic("lf_findoverlap: default");
+	}
+	return (0);
+}
+
+/*
+ * Add a lock to the end of the blocked list.
+ */
+void
+lf_addblock(lock, blocked)
+	struct lockf *lock;
+	struct lockf *blocked;
+{
+	register struct lockf *lf;
+
+	if (blocked == NOLOCKF)
+		return;
+#ifdef LOCKF_DEBUG
+	if (lockf_debug & 2) {
+		lf_print("addblock: adding", blocked);
+		lf_print("to blocked list of", lock);
+	}
+#endif /* LOCKF_DEBUG */
+	if ((lf = lock->lf_block) == NOLOCKF) {
+		lock->lf_block = blocked;
+		return;
+	}
+	while (lf->lf_block != NOLOCKF)
+		lf = lf->lf_block;
+	lf->lf_block = blocked;
+	return;
+}
+
+/*
+ * Split a lock and a contained region into
+ * two or three locks as necessary.
+ */
+void
+lf_split(lock1, lock2)
+	register struct lockf *lock1;
+	register struct lockf *lock2;
+{
+	register struct lockf *splitlock;
+
+#ifdef LOCKF_DEBUG
+	if (lockf_debug & 2) {
+		lf_print("lf_split", lock1);
+		lf_print("splitting from", lock2);
+	}
+#endif /* LOCKF_DEBUG */
+	/*
+	 * Check to see if spliting into only two pieces.
+	 */
+	if (lock1->lf_start == lock2->lf_start) {
+		lock1->lf_start = lock2->lf_end + 1;
+		lock2->lf_next = lock1;
+		return;
+	}
+	if (lock1->lf_end == lock2->lf_end) {
+		lock1->lf_end = lock2->lf_start - 1;
+		lock2->lf_next = lock1->lf_next;
+		lock1->lf_next = lock2;
+		return;
+	}
+	/*
+	 * Make a new lock consisting of the last part of
+	 * the encompassing lock
+	 */
+	MALLOC(splitlock, struct lockf *, sizeof *splitlock, M_LOCKF, M_WAITOK);
+	bcopy((caddr_t)lock1, (caddr_t)splitlock, sizeof *splitlock);
+	splitlock->lf_start = lock2->lf_end + 1;
+	splitlock->lf_block = NOLOCKF;
+	lock1->lf_end = lock2->lf_start - 1;
+	/*
+	 * OK, now link it in
+	 */
+	splitlock->lf_next = lock1->lf_next;
+	lock2->lf_next = splitlock;
+	lock1->lf_next = lock2;
+}
+
+/*
+ * Wakeup a blocklist
+ */
+void
+lf_wakelock(listhead)
+	struct lockf *listhead;
+{
+        register struct lockf *blocklist, *wakelock;
+
+	blocklist = listhead->lf_block;
+	listhead->lf_block = NOLOCKF;
+        while (blocklist != NOLOCKF) {
+                wakelock = blocklist;
+                blocklist = blocklist->lf_block;
+		wakelock->lf_block = NOLOCKF;
+		wakelock->lf_next = NOLOCKF;
+#ifdef LOCKF_DEBUG
+		if (lockf_debug & 2)
+			lf_print("lf_wakelock: awakening", wakelock);
+#endif /* LOCKF_DEBUG */
+                wakeup((caddr_t)wakelock);
+        }
+}
+
+#ifdef LOCKF_DEBUG
+/*
+ * Print out a lock.
+ */
+void
+lf_print(tag, lock)
+	char *tag;
+	register struct lockf *lock;
+{
+	
+	printf("%s: lock 0x%lx for ", tag, lock);
+	if (lock->lf_flags & F_POSIX)
+		printf("proc %d", ((struct proc *)(lock->lf_id))->p_pid);
+	else
+		printf("id 0x%x", lock->lf_id);
+	printf(" in ino %d on dev <%d, %d>, %s, start %d, end %d",
+		lock->lf_inode->i_number,
+		major(lock->lf_inode->i_dev),
+		minor(lock->lf_inode->i_dev),
+		lock->lf_type == F_RDLCK ? "shared" :
+		lock->lf_type == F_WRLCK ? "exclusive" :
+		lock->lf_type == F_UNLCK ? "unlock" :
+		"unknown", lock->lf_start, lock->lf_end);
+	if (lock->lf_block)
+		printf(" block 0x%x\n", lock->lf_block);
+	else
+		printf("\n");
+}
+
+void
+lf_printlist(tag, lock)
+	char *tag;
+	struct lockf *lock;
+{
+	register struct lockf *lf;
+
+	printf("%s: Lock list for ino %d on dev <%d, %d>:\n",
+		tag, lock->lf_inode->i_number,
+		major(lock->lf_inode->i_dev),
+		minor(lock->lf_inode->i_dev));
+	for (lf = lock->lf_inode->i_lockf; lf; lf = lf->lf_next) {
+		printf("\tlock 0x%lx for ", lf);
+		if (lf->lf_flags & F_POSIX)
+			printf("proc %d", ((struct proc *)(lf->lf_id))->p_pid);
+		else
+			printf("id 0x%x", lf->lf_id);
+		printf(", %s, start %d, end %d",
+			lf->lf_type == F_RDLCK ? "shared" :
+			lf->lf_type == F_WRLCK ? "exclusive" :
+			lf->lf_type == F_UNLCK ? "unlock" :
+			"unknown", lf->lf_start, lf->lf_end);
+		if (lf->lf_block)
+			printf(" block 0x%x\n", lf->lf_block);
+		else
+			printf("\n");
+	}
+}
+#endif /* LOCKF_DEBUG */
diff --git a/sys/ufs/ufs/ufs_lookup.c b/sys/ufs/ufs/ufs_lookup.c
new file mode 100644
index 00000000000..87c6802c79f
--- /dev/null
+++ b/sys/ufs/ufs/ufs_lookup.c
@@ -0,0 +1,970 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufs_lookup.c	8.6 (Berkeley) 4/1/94
+ */
+
+#include <sys/param.h>
+#include <sys/namei.h>
+#include <sys/buf.h>
+#include <sys/file.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/dir.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+struct	nchstats nchstats;
+#ifdef DIAGNOSTIC
+int	dirchk = 1;
+#else
+int	dirchk = 0;
+#endif
+
+#define FSFMT(vp)	((vp)->v_mount->mnt_maxsymlinklen <= 0)
+
+/*
+ * Convert a component of a pathname into a pointer to a locked inode.
+ * This is a very central and rather complicated routine.
+ * If the file system is not maintained in a strict tree hierarchy,
+ * this can result in a deadlock situation (see comments in code below).
+ *
+ * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending
+ * on whether the name is to be looked up, created, renamed, or deleted.
+ * When CREATE, RENAME, or DELETE is specified, information usable in
+ * creating, renaming, or deleting a directory entry may be calculated.
+ * If flag has LOCKPARENT or'ed into it and the target of the pathname
+ * exists, lookup returns both the target and its parent directory locked.
+ * When creating or renaming and LOCKPARENT is specified, the target may
+ * not be ".".  When deleting and LOCKPARENT is specified, the target may
+ * be "."., but the caller must check to ensure it does an vrele and vput
+ * instead of two vputs.
+ *
+ * Overall outline of ufs_lookup:
+ *
+ *	check accessibility of directory
+ *	look for name in cache, if found, then if at end of path
+ *	  and deleting or creating, drop it, else return name
+ *	search for name in directory, to found or notfound
+ * notfound:
+ *	if creating, return locked directory, leaving info on available slots
+ *	else return error
+ * found:
+ *	if at end of path and deleting, return information to allow delete
+ *	if at end of path and rewriting (RENAME and LOCKPARENT), lock target
+ *	  inode and return info to allow rewrite
+ *	if not at end, add name to cache; if at end and neither creating
+ *	  nor deleting, add name to cache
+ */
+int
+ufs_lookup(ap)
+	struct vop_lookup_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	register struct vnode *vdp;	/* vnode for directory being searched */
+	register struct inode *dp;	/* inode for directory being searched */
+	struct buf *bp;			/* a buffer of directory entries */
+	register struct direct *ep;	/* the current directory entry */
+	int entryoffsetinblock;		/* offset of ep in bp's buffer */
+	enum {NONE, COMPACT, FOUND} slotstatus;
+	doff_t slotoffset;		/* offset of area with free space */
+	int slotsize;			/* size of area at slotoffset */
+	int slotfreespace;		/* amount of space free in slot */
+	int slotneeded;			/* size of the entry we're seeking */
+	int numdirpasses;		/* strategy for directory search */
+	doff_t endsearch;		/* offset to end directory search */
+	doff_t prevoff;			/* prev entry dp->i_offset */
+	struct vnode *pdp;		/* saved dp during symlink work */
+	struct vnode *tdp;		/* returned by VFS_VGET */
+	doff_t enduseful;		/* pointer past last used dir slot */
+	u_long bmask;			/* block offset mask */
+	int lockparent;			/* 1 => lockparent flag is set */
+	int wantparent;			/* 1 => wantparent or lockparent flag */
+	int namlen, error;
+	struct vnode **vpp = ap->a_vpp;
+	struct componentname *cnp = ap->a_cnp;
+	struct ucred *cred = cnp->cn_cred;
+	int flags = cnp->cn_flags;
+	int nameiop = cnp->cn_nameiop;
+
+	bp = NULL;
+	slotoffset = -1;
+	*vpp = NULL;
+	vdp = ap->a_dvp;
+	dp = VTOI(vdp);
+	lockparent = flags & LOCKPARENT;
+	wantparent = flags & (LOCKPARENT|WANTPARENT);
+
+	/*
+	 * Check accessiblity of directory.
+	 */
+	if ((dp->i_mode & IFMT) != IFDIR)
+		return (ENOTDIR);
+	if (error = VOP_ACCESS(vdp, VEXEC, cred, cnp->cn_proc))
+		return (error);
+
+	/*
+	 * We now have a segment name to search for, and a directory to search.
+	 *
+	 * Before tediously performing a linear scan of the directory,
+	 * check the name cache to see if the directory/name pair
+	 * we are looking for is known already.
+	 */
+	if (error = cache_lookup(vdp, vpp, cnp)) {
+		int vpid;	/* capability number of vnode */
+
+		if (error == ENOENT)
+			return (error);
+		/*
+		 * Get the next vnode in the path.
+		 * See comment below starting `Step through' for
+		 * an explaination of the locking protocol.
+		 */
+		pdp = vdp;
+		dp = VTOI(*vpp);
+		vdp = *vpp;
+		vpid = vdp->v_id;
+		if (pdp == vdp) {   /* lookup on "." */
+			VREF(vdp);
+			error = 0;
+		} else if (flags & ISDOTDOT) {
+			VOP_UNLOCK(pdp);
+			error = vget(vdp, 1);
+			if (!error && lockparent && (flags & ISLASTCN))
+				error = VOP_LOCK(pdp);
+		} else {
+			error = vget(vdp, 1);
+			if (!lockparent || error || !(flags & ISLASTCN))
+				VOP_UNLOCK(pdp);
+		}
+		/*
+		 * Check that the capability number did not change
+		 * while we were waiting for the lock.
+		 */
+		if (!error) {
+			if (vpid == vdp->v_id)
+				return (0);
+			vput(vdp);
+			if (lockparent && pdp != vdp && (flags & ISLASTCN))
+				VOP_UNLOCK(pdp);
+		}
+		if (error = VOP_LOCK(pdp))
+			return (error);
+		vdp = pdp;
+		dp = VTOI(pdp);
+		*vpp = NULL;
+	}
+
+	/*
+	 * Suppress search for slots unless creating
+	 * file and at end of pathname, in which case
+	 * we watch for a place to put the new file in
+	 * case it doesn't already exist.
+	 */
+	slotstatus = FOUND;
+	slotfreespace = slotsize = slotneeded = 0;
+	if ((nameiop == CREATE || nameiop == RENAME) &&
+	    (flags & ISLASTCN)) {
+		slotstatus = NONE;
+		slotneeded = (sizeof(struct direct) - MAXNAMLEN +
+			cnp->cn_namelen + 3) &~ 3;
+	}
+
+	/*
+	 * If there is cached information on a previous search of
+	 * this directory, pick up where we last left off.
+	 * We cache only lookups as these are the most common
+	 * and have the greatest payoff. Caching CREATE has little
+	 * benefit as it usually must search the entire directory
+	 * to determine that the entry does not exist. Caching the
+	 * location of the last DELETE or RENAME has not reduced
+	 * profiling time and hence has been removed in the interest
+	 * of simplicity.
+	 */
+	bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
+	if (nameiop != LOOKUP || dp->i_diroff == 0 ||
+	    dp->i_diroff > dp->i_size) {
+		entryoffsetinblock = 0;
+		dp->i_offset = 0;
+		numdirpasses = 1;
+	} else {
+		dp->i_offset = dp->i_diroff;
+		if ((entryoffsetinblock = dp->i_offset & bmask) &&
+		    (error = VOP_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp)))
+			return (error);
+		numdirpasses = 2;
+		nchstats.ncs_2passes++;
+	}
+	prevoff = dp->i_offset;
+	endsearch = roundup(dp->i_size, DIRBLKSIZ);
+	enduseful = 0;
+
+searchloop:
+	while (dp->i_offset < endsearch) {
+		/*
+		 * If necessary, get the next directory block.
+		 */
+		if ((dp->i_offset & bmask) == 0) {
+			if (bp != NULL)
+				brelse(bp);
+			if (error =
+			    VOP_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp))
+				return (error);
+			entryoffsetinblock = 0;
+		}
+		/*
+		 * If still looking for a slot, and at a DIRBLKSIZE
+		 * boundary, have to start looking for free space again.
+		 */
+		if (slotstatus == NONE &&
+		    (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) {
+			slotoffset = -1;
+			slotfreespace = 0;
+		}
+		/*
+		 * Get pointer to next entry.
+		 * Full validation checks are slow, so we only check
+		 * enough to insure forward progress through the
+		 * directory. Complete checks can be run by patching
+		 * "dirchk" to be true.
+		 */
+		ep = (struct direct *)((char *)bp->b_data + entryoffsetinblock);
+		if (ep->d_reclen == 0 ||
+		    dirchk && ufs_dirbadentry(vdp, ep, entryoffsetinblock)) {
+			int i;
+
+			ufs_dirbad(dp, dp->i_offset, "mangled entry");
+			i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
+			dp->i_offset += i;
+			entryoffsetinblock += i;
+			continue;
+		}
+
+		/*
+		 * If an appropriate sized slot has not yet been found,
+		 * check to see if one is available. Also accumulate space
+		 * in the current block so that we can determine if
+		 * compaction is viable.
+		 */
+		if (slotstatus != FOUND) {
+			int size = ep->d_reclen;
+
+			if (ep->d_ino != 0)
+				size -= DIRSIZ(FSFMT(vdp), ep);
+			if (size > 0) {
+				if (size >= slotneeded) {
+					slotstatus = FOUND;
+					slotoffset = dp->i_offset;
+					slotsize = ep->d_reclen;
+				} else if (slotstatus == NONE) {
+					slotfreespace += size;
+					if (slotoffset == -1)
+						slotoffset = dp->i_offset;
+					if (slotfreespace >= slotneeded) {
+						slotstatus = COMPACT;
+						slotsize = dp->i_offset +
+						      ep->d_reclen - slotoffset;
+					}
+				}
+			}
+		}
+
+		/*
+		 * Check for a name match.
+		 */
+		if (ep->d_ino) {
+#			if (BYTE_ORDER == LITTLE_ENDIAN)
+				if (vdp->v_mount->mnt_maxsymlinklen > 0)
+					namlen = ep->d_namlen;
+				else
+					namlen = ep->d_type;
+#			else
+				namlen = ep->d_namlen;
+#			endif
+			if (namlen == cnp->cn_namelen &&
+			    !bcmp(cnp->cn_nameptr, ep->d_name,
+				(unsigned)namlen)) {
+				/*
+				 * Save directory entry's inode number and
+				 * reclen in ndp->ni_ufs area, and release
+				 * directory buffer.
+				 */
+				dp->i_ino = ep->d_ino;
+				dp->i_reclen = ep->d_reclen;
+				brelse(bp);
+				goto found;
+			}
+		}
+		prevoff = dp->i_offset;
+		dp->i_offset += ep->d_reclen;
+		entryoffsetinblock += ep->d_reclen;
+		if (ep->d_ino)
+			enduseful = dp->i_offset;
+	}
+/* notfound: */
+	/*
+	 * If we started in the middle of the directory and failed
+	 * to find our target, we must check the beginning as well.
+	 */
+	if (numdirpasses == 2) {
+		numdirpasses--;
+		dp->i_offset = 0;
+		endsearch = dp->i_diroff;
+		goto searchloop;
+	}
+	if (bp != NULL)
+		brelse(bp);
+	/*
+	 * If creating, and at end of pathname and current
+	 * directory has not been removed, then can consider
+	 * allowing file to be created.
+	 */
+	if ((nameiop == CREATE || nameiop == RENAME) &&
+	    (flags & ISLASTCN) && dp->i_nlink != 0) {
+		/*
+		 * Access for write is interpreted as allowing
+		 * creation of files in the directory.
+		 */
+		if (error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc))
+			return (error);
+		/*
+		 * Return an indication of where the new directory
+		 * entry should be put.  If we didn't find a slot,
+		 * then set dp->i_count to 0 indicating
+		 * that the new slot belongs at the end of the
+		 * directory. If we found a slot, then the new entry
+		 * can be put in the range from dp->i_offset to
+		 * dp->i_offset + dp->i_count.
+		 */
+		if (slotstatus == NONE) {
+			dp->i_offset = roundup(dp->i_size, DIRBLKSIZ);
+			dp->i_count = 0;
+			enduseful = dp->i_offset;
+		} else {
+			dp->i_offset = slotoffset;
+			dp->i_count = slotsize;
+			if (enduseful < slotoffset + slotsize)
+				enduseful = slotoffset + slotsize;
+		}
+		dp->i_endoff = roundup(enduseful, DIRBLKSIZ);
+		dp->i_flag |= IN_CHANGE | IN_UPDATE;
+		/*
+		 * We return with the directory locked, so that
+		 * the parameters we set up above will still be
+		 * valid if we actually decide to do a direnter().
+		 * We return ni_vp == NULL to indicate that the entry
+		 * does not currently exist; we leave a pointer to
+		 * the (locked) directory inode in ndp->ni_dvp.
+		 * The pathname buffer is saved so that the name
+		 * can be obtained later.
+		 *
+		 * NB - if the directory is unlocked, then this
+		 * information cannot be used.
+		 */
+		cnp->cn_flags |= SAVENAME;
+		if (!lockparent)
+			VOP_UNLOCK(vdp);
+		return (EJUSTRETURN);
+	}
+	/*
+	 * Insert name into cache (as non-existent) if appropriate.
+	 */
+	if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE)
+		cache_enter(vdp, *vpp, cnp);
+	return (ENOENT);
+
+found:
+	if (numdirpasses == 2)
+		nchstats.ncs_pass2++;
+	/*
+	 * Check that directory length properly reflects presence
+	 * of this entry.
+	 */
+	if (entryoffsetinblock + DIRSIZ(FSFMT(vdp), ep) > dp->i_size) {
+		ufs_dirbad(dp, dp->i_offset, "i_size too small");
+		dp->i_size = entryoffsetinblock + DIRSIZ(FSFMT(vdp), ep);
+		dp->i_flag |= IN_CHANGE | IN_UPDATE;
+	}
+
+	/*
+	 * Found component in pathname.
+	 * If the final component of path name, save information
+	 * in the cache as to where the entry was found.
+	 */
+	if ((flags & ISLASTCN) && nameiop == LOOKUP)
+		dp->i_diroff = dp->i_offset &~ (DIRBLKSIZ - 1);
+
+	/*
+	 * If deleting, and at end of pathname, return
+	 * parameters which can be used to remove file.
+	 * If the wantparent flag isn't set, we return only
+	 * the directory (in ndp->ni_dvp), otherwise we go
+	 * on and lock the inode, being careful with ".".
+	 */
+	if (nameiop == DELETE && (flags & ISLASTCN)) {
+		/*
+		 * Write access to directory required to delete files.
+		 */
+		if (error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc))
+			return (error);
+		/*
+		 * Return pointer to current entry in dp->i_offset,
+		 * and distance past previous entry (if there
+		 * is a previous entry in this block) in dp->i_count.
+		 * Save directory inode pointer in ndp->ni_dvp for dirremove().
+		 */
+		if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0)
+			dp->i_count = 0;
+		else
+			dp->i_count = dp->i_offset - prevoff;
+		if (dp->i_number == dp->i_ino) {
+			VREF(vdp);
+			*vpp = vdp;
+			return (0);
+		}
+		if (error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp))
+			return (error);
+		/*
+		 * If directory is "sticky", then user must own
+		 * the directory, or the file in it, else she
+		 * may not delete it (unless she's root). This
+		 * implements append-only directories.
+		 */
+		if ((dp->i_mode & ISVTX) &&
+		    cred->cr_uid != 0 &&
+		    cred->cr_uid != dp->i_uid &&
+		    VTOI(tdp)->i_uid != cred->cr_uid) {
+			vput(tdp);
+			return (EPERM);
+		}
+		*vpp = tdp;
+		if (!lockparent)
+			VOP_UNLOCK(vdp);
+		return (0);
+	}
+
+	/*
+	 * If rewriting (RENAME), return the inode and the
+	 * information required to rewrite the present directory
+	 * Must get inode of directory entry to verify it's a
+	 * regular file, or empty directory.
+	 */
+	if (nameiop == RENAME && wantparent &&
+	    (flags & ISLASTCN)) {
+		if (error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc))
+			return (error);
+		/*
+		 * Careful about locking second inode.
+		 * This can only occur if the target is ".".
+		 */
+		if (dp->i_number == dp->i_ino)
+			return (EISDIR);
+		if (error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp))
+			return (error);
+		*vpp = tdp;
+		cnp->cn_flags |= SAVENAME;
+		if (!lockparent)
+			VOP_UNLOCK(vdp);
+		return (0);
+	}
+
+	/*
+	 * Step through the translation in the name.  We do not `vput' the
+	 * directory because we may need it again if a symbolic link
+	 * is relative to the current directory.  Instead we save it
+	 * unlocked as "pdp".  We must get the target inode before unlocking
+	 * the directory to insure that the inode will not be removed
+	 * before we get it.  We prevent deadlock by always fetching
+	 * inodes from the root, moving down the directory tree. Thus
+	 * when following backward pointers ".." we must unlock the
+	 * parent directory before getting the requested directory.
+	 * There is a potential race condition here if both the current
+	 * and parent directories are removed before the VFS_VGET for the
+	 * inode associated with ".." returns.  We hope that this occurs
+	 * infrequently since we cannot avoid this race condition without
+	 * implementing a sophisticated deadlock detection algorithm.
+	 * Note also that this simple deadlock detection scheme will not
+	 * work if the file system has any hard links other than ".."
+	 * that point backwards in the directory structure.
+	 */
+	pdp = vdp;
+	if (flags & ISDOTDOT) {
+		VOP_UNLOCK(pdp);	/* race to get the inode */
+		if (error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) {
+			VOP_LOCK(pdp);
+			return (error);
+		}
+		if (lockparent && (flags & ISLASTCN) &&
+		    (error = VOP_LOCK(pdp))) {
+			vput(tdp);
+			return (error);
+		}
+		*vpp = tdp;
+	} else if (dp->i_number == dp->i_ino) {
+		VREF(vdp);	/* we want ourself, ie "." */
+		*vpp = vdp;
+	} else {
+		if (error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp))
+			return (error);
+		if (!lockparent || !(flags & ISLASTCN))
+			VOP_UNLOCK(pdp);
+		*vpp = tdp;
+	}
+
+	/*
+	 * Insert name into cache if appropriate.
+	 */
+	if (cnp->cn_flags & MAKEENTRY)
+		cache_enter(vdp, *vpp, cnp);
+	return (0);
+}
+
+void
+ufs_dirbad(ip, offset, how)
+	struct inode *ip;
+	doff_t offset;
+	char *how;
+{
+	struct mount *mp;
+
+	mp = ITOV(ip)->v_mount;
+	(void)printf("%s: bad dir ino %d at offset %d: %s\n",
+	    mp->mnt_stat.f_mntonname, ip->i_number, offset, how);
+	if ((mp->mnt_stat.f_flags & MNT_RDONLY) == 0)
+		panic("bad dir");
+}
+
+/*
+ * Do consistency checking on a directory entry:
+ *	record length must be multiple of 4
+ *	entry must fit in rest of its DIRBLKSIZ block
+ *	record must be large enough to contain entry
+ *	name is not longer than MAXNAMLEN
+ *	name must be as long as advertised, and null terminated
+ */
+int
+ufs_dirbadentry(dp, ep, entryoffsetinblock)
+	struct vnode *dp;
+	register struct direct *ep;
+	int entryoffsetinblock;
+{
+	register int i;
+	int namlen;
+
+#	if (BYTE_ORDER == LITTLE_ENDIAN)
+		if (dp->v_mount->mnt_maxsymlinklen > 0)
+			namlen = ep->d_namlen;
+		else
+			namlen = ep->d_type;
+#	else
+		namlen = ep->d_namlen;
+#	endif
+	if ((ep->d_reclen & 0x3) != 0 ||
+	    ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) ||
+	    ep->d_reclen < DIRSIZ(FSFMT(dp), ep) || namlen > MAXNAMLEN) {
+		/*return (1); */
+		printf("First bad\n");
+		goto bad;
+	}
+	for (i = 0; i < namlen; i++)
+		if (ep->d_name[i] == '\0') {
+			/*return (1); */
+			printf("Second bad\n");
+			goto bad;
+	}
+	if (ep->d_name[i])
+		goto bad;
+	return (ep->d_name[i]);
+bad:
+	return(1);
+}
+
+/*
+ * Write a directory entry after a call to namei, using the parameters
+ * that it left in nameidata.  The argument ip is the inode which the new
+ * directory entry will refer to.  Dvp is a pointer to the directory to
+ * be written, which was left locked by namei. Remaining parameters
+ * (dp->i_offset, dp->i_count) indicate how the space for the new
+ * entry is to be obtained.
+ */
+int
+ufs_direnter(ip, dvp, cnp)
+	struct inode *ip;
+	struct vnode *dvp;
+	register struct componentname *cnp;
+{
+	register struct direct *ep, *nep;
+	register struct inode *dp;
+	struct buf *bp;
+	struct direct newdir;
+	struct iovec aiov;
+	struct uio auio;
+	u_int dsize;
+	int error, loc, newentrysize, spacefree;
+	char *dirbuf;
+
+#ifdef DIAGNOSTIC
+	if ((cnp->cn_flags & SAVENAME) == 0)
+		panic("direnter: missing name");
+#endif
+	dp = VTOI(dvp);
+	newdir.d_ino = ip->i_number;
+	newdir.d_namlen = cnp->cn_namelen;
+	bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1);
+	if (dvp->v_mount->mnt_maxsymlinklen > 0)
+		newdir.d_type = IFTODT(ip->i_mode);
+	else {
+		newdir.d_type = 0;
+#		if (BYTE_ORDER == LITTLE_ENDIAN)
+			{ u_char tmp = newdir.d_namlen;
+			newdir.d_namlen = newdir.d_type;
+			newdir.d_type = tmp; }
+#		endif
+	}
+	newentrysize = DIRSIZ(FSFMT(dvp), &newdir);
+	if (dp->i_count == 0) {
+		/*
+		 * If dp->i_count is 0, then namei could find no
+		 * space in the directory. Here, dp->i_offset will
+		 * be on a directory block boundary and we will write the
+		 * new entry into a fresh block.
+		 */
+		if (dp->i_offset & (DIRBLKSIZ - 1))
+			panic("ufs_direnter: newblk");
+		auio.uio_offset = dp->i_offset;
+		newdir.d_reclen = DIRBLKSIZ;
+		auio.uio_resid = newentrysize;
+		aiov.iov_len = newentrysize;
+		aiov.iov_base = (caddr_t)&newdir;
+		auio.uio_iov = &aiov;
+		auio.uio_iovcnt = 1;
+		auio.uio_rw = UIO_WRITE;
+		auio.uio_segflg = UIO_SYSSPACE;
+		auio.uio_procp = (struct proc *)0;
+		error = VOP_WRITE(dvp, &auio, IO_SYNC, cnp->cn_cred);
+		if (DIRBLKSIZ >
+		    VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_bsize)
+			/* XXX should grow with balloc() */
+			panic("ufs_direnter: frag size");
+		else if (!error) {
+			dp->i_size = roundup(dp->i_size, DIRBLKSIZ);
+			dp->i_flag |= IN_CHANGE;
+		}
+		return (error);
+	}
+
+	/*
+	 * If dp->i_count is non-zero, then namei found space
+	 * for the new entry in the range dp->i_offset to
+	 * dp->i_offset + dp->i_count in the directory.
+	 * To use this space, we may have to compact the entries located
+	 * there, by copying them together towards the beginning of the
+	 * block, leaving the free space in one usable chunk at the end.
+	 */
+
+	/*
+	 * Increase size of directory if entry eats into new space.
+	 * This should never push the size past a new multiple of
+	 * DIRBLKSIZE.
+	 *
+	 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
+	 */
+	if (dp->i_offset + dp->i_count > dp->i_size)
+		dp->i_size = dp->i_offset + dp->i_count;
+	/*
+	 * Get the block containing the space for the new directory entry.
+	 */
+	if (error = VOP_BLKATOFF(dvp, (off_t)dp->i_offset, &dirbuf, &bp))
+		return (error);
+	/*
+	 * Find space for the new entry. In the simple case, the entry at
+	 * offset base will have the space. If it does not, then namei
+	 * arranged that compacting the region dp->i_offset to
+	 * dp->i_offset + dp->i_count would yield the
+	 * space.
+	 */
+	ep = (struct direct *)dirbuf;
+	dsize = DIRSIZ(FSFMT(dvp), ep);
+	spacefree = ep->d_reclen - dsize;
+	for (loc = ep->d_reclen; loc < dp->i_count; ) {
+		nep = (struct direct *)(dirbuf + loc);
+		if (ep->d_ino) {
+			/* trim the existing slot */
+			ep->d_reclen = dsize;
+			ep = (struct direct *)((char *)ep + dsize);
+		} else {
+			/* overwrite; nothing there; header is ours */
+			spacefree += dsize;
+		}
+		dsize = DIRSIZ(FSFMT(dvp), nep);
+		spacefree += nep->d_reclen - dsize;
+		loc += nep->d_reclen;
+		bcopy((caddr_t)nep, (caddr_t)ep, dsize);
+	}
+	/*
+	 * Update the pointer fields in the previous entry (if any),
+	 * copy in the new entry, and write out the block.
+	 */
+	if (ep->d_ino == 0) {
+		if (spacefree + dsize < newentrysize)
+			panic("ufs_direnter: compact1");
+		newdir.d_reclen = spacefree + dsize;
+	} else {
+		if (spacefree < newentrysize)
+			panic("ufs_direnter: compact2");
+		newdir.d_reclen = spacefree;
+		ep->d_reclen = dsize;
+		ep = (struct direct *)((char *)ep + dsize);
+	}
+	bcopy((caddr_t)&newdir, (caddr_t)ep, (u_int)newentrysize);
+	error = VOP_BWRITE(bp);
+	dp->i_flag |= IN_CHANGE | IN_UPDATE;
+	if (!error && dp->i_endoff && dp->i_endoff < dp->i_size)
+		error = VOP_TRUNCATE(dvp, (off_t)dp->i_endoff, IO_SYNC,
+		    cnp->cn_cred, cnp->cn_proc);
+	return (error);
+}
+
+/*
+ * Remove a directory entry after a call to namei, using
+ * the parameters which it left in nameidata. The entry
+ * dp->i_offset contains the offset into the directory of the
+ * entry to be eliminated.  The dp->i_count field contains the
+ * size of the previous record in the directory.  If this
+ * is 0, the first entry is being deleted, so we need only
+ * zero the inode number to mark the entry as free.  If the
+ * entry is not the first in the directory, we must reclaim
+ * the space of the now empty record by adding the record size
+ * to the size of the previous entry.
+ */
+int
+ufs_dirremove(dvp, cnp)
+	struct vnode *dvp;
+	struct componentname *cnp;
+{
+	register struct inode *dp;
+	struct direct *ep;
+	struct buf *bp;
+	int error;
+
+	dp = VTOI(dvp);
+	if (dp->i_count == 0) {
+		/*
+		 * First entry in block: set d_ino to zero.
+		 */
+		if (error =
+		    VOP_BLKATOFF(dvp, (off_t)dp->i_offset, (char **)&ep, &bp))
+			return (error);
+		ep->d_ino = 0;
+		error = VOP_BWRITE(bp);
+		dp->i_flag |= IN_CHANGE | IN_UPDATE;
+		return (error);
+	}
+	/*
+	 * Collapse new free space into previous entry.
+	 */
+	if (error = VOP_BLKATOFF(dvp, (off_t)(dp->i_offset - dp->i_count),
+	    (char **)&ep, &bp))
+		return (error);
+	ep->d_reclen += dp->i_reclen;
+	error = VOP_BWRITE(bp);
+	dp->i_flag |= IN_CHANGE | IN_UPDATE;
+	return (error);
+}
+
+/*
+ * Rewrite an existing directory entry to point at the inode
+ * supplied.  The parameters describing the directory entry are
+ * set up by a call to namei.
+ */
+int
+ufs_dirrewrite(dp, ip, cnp)
+	struct inode *dp, *ip;
+	struct componentname *cnp;
+{
+	struct buf *bp;
+	struct direct *ep;
+	struct vnode *vdp = ITOV(dp);
+	int error;
+
+	if (error = VOP_BLKATOFF(vdp, (off_t)dp->i_offset, (char **)&ep, &bp))
+		return (error);
+	ep->d_ino = ip->i_number;
+	if (vdp->v_mount->mnt_maxsymlinklen > 0)
+		ep->d_type = IFTODT(ip->i_mode);
+	error = VOP_BWRITE(bp);
+	dp->i_flag |= IN_CHANGE | IN_UPDATE;
+	return (error);
+}
+
+/*
+ * Check if a directory is empty or not.
+ * Inode supplied must be locked.
+ *
+ * Using a struct dirtemplate here is not precisely
+ * what we want, but better than using a struct direct.
+ *
+ * NB: does not handle corrupted directories.
+ */
+int
+ufs_dirempty(ip, parentino, cred)
+	register struct inode *ip;
+	ino_t parentino;
+	struct ucred *cred;
+{
+	register off_t off;
+	struct dirtemplate dbuf;
+	register struct direct *dp = (struct direct *)&dbuf;
+	int error, count, namlen;
+#define	MINDIRSIZ (sizeof (struct dirtemplate) / 2)
+
+	for (off = 0; off < ip->i_size; off += dp->d_reclen) {
+		error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, off,
+		   UIO_SYSSPACE, IO_NODELOCKED, cred, &count, (struct proc *)0);
+		/*
+		 * Since we read MINDIRSIZ, residual must
+		 * be 0 unless we're at end of file.
+		 */
+		if (error || count != 0)
+			return (0);
+		/* avoid infinite loops */
+		if (dp->d_reclen == 0)
+			return (0);
+		/* skip empty entries */
+		if (dp->d_ino == 0)
+			continue;
+		/* accept only "." and ".." */
+#		if (BYTE_ORDER == LITTLE_ENDIAN)
+			if (ITOV(ip)->v_mount->mnt_maxsymlinklen > 0)
+				namlen = dp->d_namlen;
+			else
+				namlen = dp->d_type;
+#		else
+			namlen = dp->d_namlen;
+#		endif
+		if (namlen > 2)
+			return (0);
+		if (dp->d_name[0] != '.')
+			return (0);
+		/*
+		 * At this point namlen must be 1 or 2.
+		 * 1 implies ".", 2 implies ".." if second
+		 * char is also "."
+		 */
+		if (namlen == 1)
+			continue;
+		if (dp->d_name[1] == '.' && dp->d_ino == parentino)
+			continue;
+		return (0);
+	}
+	return (1);
+}
+
+/*
+ * Check if source directory is in the path of the target directory.
+ * Target is supplied locked, source is unlocked.
+ * The target is always vput before returning.
+ */
+int
+ufs_checkpath(source, target, cred)
+	struct inode *source, *target;
+	struct ucred *cred;
+{
+	struct vnode *vp;
+	int error, rootino, namlen;
+	struct dirtemplate dirbuf;
+
+	vp = ITOV(target);
+	if (target->i_number == source->i_number) {
+		error = EEXIST;
+		goto out;
+	}
+	rootino = ROOTINO;
+	error = 0;
+	if (target->i_number == rootino)
+		goto out;
+
+	for (;;) {
+		if (vp->v_type != VDIR) {
+			error = ENOTDIR;
+			break;
+		}
+		error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf,
+			sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE,
+			IO_NODELOCKED, cred, (int *)0, (struct proc *)0);
+		if (error != 0)
+			break;
+#		if (BYTE_ORDER == LITTLE_ENDIAN)
+			if (vp->v_mount->mnt_maxsymlinklen > 0)
+				namlen = dirbuf.dotdot_namlen;
+			else
+				namlen = dirbuf.dotdot_type;
+#		else
+			namlen = dirbuf.dotdot_namlen;
+#		endif
+		if (namlen != 2 ||
+		    dirbuf.dotdot_name[0] != '.' ||
+		    dirbuf.dotdot_name[1] != '.') {
+			error = ENOTDIR;
+			break;
+		}
+		if (dirbuf.dotdot_ino == source->i_number) {
+			error = EINVAL;
+			break;
+		}
+		if (dirbuf.dotdot_ino == rootino)
+			break;
+		vput(vp);
+		if (error = VFS_VGET(vp->v_mount, dirbuf.dotdot_ino, &vp)) {
+			vp = NULL;
+			break;
+		}
+	}
+
+out:
+	if (error == ENOTDIR)
+		printf("checkpath: .. not a directory\n");
+	if (vp != NULL)
+		vput(vp);
+	return (error);
+}
diff --git a/sys/ufs/ufs/ufs_quota.c b/sys/ufs/ufs/ufs_quota.c
new file mode 100644
index 00000000000..15cb1cfbb23
--- /dev/null
+++ b/sys/ufs/ufs/ufs_quota.c
@@ -0,0 +1,938 @@
+/*
+ * Copyright (c) 1982, 1986, 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Robert Elz at The University of Melbourne.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufs_quota.c	8.2 (Berkeley) 12/30/93
+ */
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/file.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+/*
+ * Quota name to error message mapping.
+ */
+static char *quotatypes[] = INITQFNAMES;
+
+/*
+ * Set up the quotas for an inode.
+ *
+ * This routine completely defines the semantics of quotas.
+ * If other criterion want to be used to establish quotas, the
+ * MAXQUOTAS value in quotas.h should be increased, and the
+ * additional dquots set up here.
+ */
+int
+getinoquota(ip)
+	register struct inode *ip;
+{
+	struct ufsmount *ump;
+	struct vnode *vp = ITOV(ip);
+	int error;
+
+	ump = VFSTOUFS(vp->v_mount);
+	/*
+	 * Set up the user quota based on file uid.
+	 * EINVAL means that quotas are not enabled.
+	 */
+	if (ip->i_dquot[USRQUOTA] == NODQUOT &&
+	    (error =
+		dqget(vp, ip->i_uid, ump, USRQUOTA, &ip->i_dquot[USRQUOTA])) &&
+	    error != EINVAL)
+		return (error);
+	/*
+	 * Set up the group quota based on file gid.
+	 * EINVAL means that quotas are not enabled.
+	 */
+	if (ip->i_dquot[GRPQUOTA] == NODQUOT &&
+	    (error =
+		dqget(vp, ip->i_gid, ump, GRPQUOTA, &ip->i_dquot[GRPQUOTA])) &&
+	    error != EINVAL)
+		return (error);
+	return (0);
+}
+
+/*
+ * Update disk usage, and take corrective action.
+ */
+int
+chkdq(ip, change, cred, flags)
+	register struct inode *ip;
+	long change;
+	struct ucred *cred;
+	int flags;
+{
+	register struct dquot *dq;
+	register int i;
+	int ncurblocks, error;
+
+#ifdef DIAGNOSTIC
+	if ((flags & CHOWN) == 0)
+		chkdquot(ip);
+#endif
+	if (change == 0)
+		return (0);
+	if (change < 0) {
+		for (i = 0; i < MAXQUOTAS; i++) {
+			if ((dq = ip->i_dquot[i]) == NODQUOT)
+				continue;
+			while (dq->dq_flags & DQ_LOCK) {
+				dq->dq_flags |= DQ_WANT;
+				sleep((caddr_t)dq, PINOD+1);
+			}
+			ncurblocks = dq->dq_curblocks + change;
+			if (ncurblocks >= 0)
+				dq->dq_curblocks = ncurblocks;
+			else
+				dq->dq_curblocks = 0;
+			dq->dq_flags &= ~DQ_BLKS;
+			dq->dq_flags |= DQ_MOD;
+		}
+		return (0);
+	}
+	if ((flags & FORCE) == 0 && cred->cr_uid != 0) {
+		for (i = 0; i < MAXQUOTAS; i++) {
+			if ((dq = ip->i_dquot[i]) == NODQUOT)
+				continue;
+			if (error = chkdqchg(ip, change, cred, i))
+				return (error);
+		}
+	}
+	for (i = 0; i < MAXQUOTAS; i++) {
+		if ((dq = ip->i_dquot[i]) == NODQUOT)
+			continue;
+		while (dq->dq_flags & DQ_LOCK) {
+			dq->dq_flags |= DQ_WANT;
+			sleep((caddr_t)dq, PINOD+1);
+		}
+		dq->dq_curblocks += change;
+		dq->dq_flags |= DQ_MOD;
+	}
+	return (0);
+}
+
+/*
+ * Check for a valid change to a users allocation.
+ * Issue an error message if appropriate.
+ */
+int
+chkdqchg(ip, change, cred, type)
+	struct inode *ip;
+	long change;
+	struct ucred *cred;
+	int type;
+{
+	register struct dquot *dq = ip->i_dquot[type];
+	long ncurblocks = dq->dq_curblocks + change;
+
+	/*
+	 * If user would exceed their hard limit, disallow space allocation.
+	 */
+	if (ncurblocks >= dq->dq_bhardlimit && dq->dq_bhardlimit) {
+		if ((dq->dq_flags & DQ_BLKS) == 0 &&
+		    ip->i_uid == cred->cr_uid) {
+			uprintf("\n%s: write failed, %s disk limit reached\n",
+			    ITOV(ip)->v_mount->mnt_stat.f_mntonname,
+			    quotatypes[type]);
+			dq->dq_flags |= DQ_BLKS;
+		}
+		return (EDQUOT);
+	}
+	/*
+	 * If user is over their soft limit for too long, disallow space
+	 * allocation. Reset time limit as they cross their soft limit.
+	 */
+	if (ncurblocks >= dq->dq_bsoftlimit && dq->dq_bsoftlimit) {
+		if (dq->dq_curblocks < dq->dq_bsoftlimit) {
+			dq->dq_btime = time.tv_sec +
+			    VFSTOUFS(ITOV(ip)->v_mount)->um_btime[type];
+			if (ip->i_uid == cred->cr_uid)
+				uprintf("\n%s: warning, %s %s\n",
+				    ITOV(ip)->v_mount->mnt_stat.f_mntonname,
+				    quotatypes[type], "disk quota exceeded");
+			return (0);
+		}
+		if (time.tv_sec > dq->dq_btime) {
+			if ((dq->dq_flags & DQ_BLKS) == 0 &&
+			    ip->i_uid == cred->cr_uid) {
+				uprintf("\n%s: write failed, %s %s\n",
+				    ITOV(ip)->v_mount->mnt_stat.f_mntonname,
+				    quotatypes[type],
+				    "disk quota exceeded for too long");
+				dq->dq_flags |= DQ_BLKS;
+			}
+			return (EDQUOT);
+		}
+	}
+	return (0);
+}
+
+/*
+ * Check the inode limit, applying corrective action.
+ */
+int
+chkiq(ip, change, cred, flags)
+	register struct inode *ip;
+	long change;
+	struct ucred *cred;
+	int flags;
+{
+	register struct dquot *dq;
+	register int i;
+	int ncurinodes, error;
+
+#ifdef DIAGNOSTIC
+	if ((flags & CHOWN) == 0)
+		chkdquot(ip);
+#endif
+	if (change == 0)
+		return (0);
+	if (change < 0) {
+		for (i = 0; i < MAXQUOTAS; i++) {
+			if ((dq = ip->i_dquot[i]) == NODQUOT)
+				continue;
+			while (dq->dq_flags & DQ_LOCK) {
+				dq->dq_flags |= DQ_WANT;
+				sleep((caddr_t)dq, PINOD+1);
+			}
+			ncurinodes = dq->dq_curinodes + change;
+			if (ncurinodes >= 0)
+				dq->dq_curinodes = ncurinodes;
+			else
+				dq->dq_curinodes = 0;
+			dq->dq_flags &= ~DQ_INODS;
+			dq->dq_flags |= DQ_MOD;
+		}
+		return (0);
+	}
+	if ((flags & FORCE) == 0 && cred->cr_uid != 0) {
+		for (i = 0; i < MAXQUOTAS; i++) {
+			if ((dq = ip->i_dquot[i]) == NODQUOT)
+				continue;
+			if (error = chkiqchg(ip, change, cred, i))
+				return (error);
+		}
+	}
+	for (i = 0; i < MAXQUOTAS; i++) {
+		if ((dq = ip->i_dquot[i]) == NODQUOT)
+			continue;
+		while (dq->dq_flags & DQ_LOCK) {
+			dq->dq_flags |= DQ_WANT;
+			sleep((caddr_t)dq, PINOD+1);
+		}
+		dq->dq_curinodes += change;
+		dq->dq_flags |= DQ_MOD;
+	}
+	return (0);
+}
+
+/*
+ * Check for a valid change to a users allocation.
+ * Issue an error message if appropriate.
+ */
+int
+chkiqchg(ip, change, cred, type)
+	struct inode *ip;
+	long change;
+	struct ucred *cred;
+	int type;
+{
+	register struct dquot *dq = ip->i_dquot[type];
+	long ncurinodes = dq->dq_curinodes + change;
+
+	/*
+	 * If user would exceed their hard limit, disallow inode allocation.
+	 */
+	if (ncurinodes >= dq->dq_ihardlimit && dq->dq_ihardlimit) {
+		if ((dq->dq_flags & DQ_INODS) == 0 &&
+		    ip->i_uid == cred->cr_uid) {
+			uprintf("\n%s: write failed, %s inode limit reached\n",
+			    ITOV(ip)->v_mount->mnt_stat.f_mntonname,
+			    quotatypes[type]);
+			dq->dq_flags |= DQ_INODS;
+		}
+		return (EDQUOT);
+	}
+	/*
+	 * If user is over their soft limit for too long, disallow inode
+	 * allocation. Reset time limit as they cross their soft limit.
+	 */
+	if (ncurinodes >= dq->dq_isoftlimit && dq->dq_isoftlimit) {
+		if (dq->dq_curinodes < dq->dq_isoftlimit) {
+			dq->dq_itime = time.tv_sec +
+			    VFSTOUFS(ITOV(ip)->v_mount)->um_itime[type];
+			if (ip->i_uid == cred->cr_uid)
+				uprintf("\n%s: warning, %s %s\n",
+				    ITOV(ip)->v_mount->mnt_stat.f_mntonname,
+				    quotatypes[type], "inode quota exceeded");
+			return (0);
+		}
+		if (time.tv_sec > dq->dq_itime) {
+			if ((dq->dq_flags & DQ_INODS) == 0 &&
+			    ip->i_uid == cred->cr_uid) {
+				uprintf("\n%s: write failed, %s %s\n",
+				    ITOV(ip)->v_mount->mnt_stat.f_mntonname,
+				    quotatypes[type],
+				    "inode quota exceeded for too long");
+				dq->dq_flags |= DQ_INODS;
+			}
+			return (EDQUOT);
+		}
+	}
+	return (0);
+}
+
+#ifdef DIAGNOSTIC
+/*
+ * On filesystems with quotas enabled, it is an error for a file to change
+ * size and not to have a dquot structure associated with it.
+ */
+void
+chkdquot(ip)
+	register struct inode *ip;
+{
+	struct ufsmount *ump = VFSTOUFS(ITOV(ip)->v_mount);
+	register int i;
+
+	for (i = 0; i < MAXQUOTAS; i++) {
+		if (ump->um_quotas[i] == NULLVP ||
+		    (ump->um_qflags[i] & (QTF_OPENING|QTF_CLOSING)))
+			continue;
+		if (ip->i_dquot[i] == NODQUOT) {
+			vprint("chkdquot: missing dquot", ITOV(ip));
+			panic("missing dquot");
+		}
+	}
+}
+#endif
+
+/*
+ * Code to process quotactl commands.
+ */
+
+/*
+ * Q_QUOTAON - set up a quota file for a particular file system.
+ */
+int
+quotaon(p, mp, type, fname)
+	struct proc *p;
+	struct mount *mp;
+	register int type;
+	caddr_t fname;
+{
+	register struct ufsmount *ump = VFSTOUFS(mp);
+	register struct vnode *vp, **vpp;
+	struct vnode *nextvp;
+	struct dquot *dq;
+	int error;
+	struct nameidata nd;
+
+	vpp = &ump->um_quotas[type];
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fname, p);
+	if (error = vn_open(&nd, FREAD|FWRITE, 0))
+		return (error);
+	vp = nd.ni_vp;
+	VOP_UNLOCK(vp);
+	if (vp->v_type != VREG) {
+		(void) vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
+		return (EACCES);
+	}
+	if (vfs_busy(mp)) {
+		(void) vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
+		return (EBUSY);
+	}
+	if (*vpp != vp)
+		quotaoff(p, mp, type);
+	ump->um_qflags[type] |= QTF_OPENING;
+	mp->mnt_flag |= MNT_QUOTA;
+	vp->v_flag |= VSYSTEM;
+	*vpp = vp;
+	/*
+	 * Save the credential of the process that turned on quotas.
+	 * Set up the time limits for this quota.
+	 */
+	crhold(p->p_ucred);
+	ump->um_cred[type] = p->p_ucred;
+	ump->um_btime[type] = MAX_DQ_TIME;
+	ump->um_itime[type] = MAX_IQ_TIME;
+	if (dqget(NULLVP, 0, ump, type, &dq) == 0) {
+		if (dq->dq_btime > 0)
+			ump->um_btime[type] = dq->dq_btime;
+		if (dq->dq_itime > 0)
+			ump->um_itime[type] = dq->dq_itime;
+		dqrele(NULLVP, dq);
+	}
+	/*
+	 * Search vnodes associated with this mount point,
+	 * adding references to quota file being opened.
+	 * NB: only need to add dquot's for inodes being modified.
+	 */
+again:
+	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) {
+		nextvp = vp->v_mntvnodes.le_next;
+		if (vp->v_writecount == 0)
+			continue;
+		if (vget(vp, 1))
+			goto again;
+		if (error = getinoquota(VTOI(vp))) {
+			vput(vp);
+			break;
+		}
+		vput(vp);
+		if (vp->v_mntvnodes.le_next != nextvp || vp->v_mount != mp)
+			goto again;
+	}
+	ump->um_qflags[type] &= ~QTF_OPENING;
+	if (error)
+		quotaoff(p, mp, type);
+	vfs_unbusy(mp);
+	return (error);
+}
+
+/*
+ * Q_QUOTAOFF - turn off disk quotas for a filesystem.
+ */
+int
+quotaoff(p, mp, type)
+	struct proc *p;
+	struct mount *mp;
+	register int type;
+{
+	register struct vnode *vp;
+	struct vnode *qvp, *nextvp;
+	struct ufsmount *ump = VFSTOUFS(mp);
+	register struct dquot *dq;
+	register struct inode *ip;
+	int error;
+	
+	if ((mp->mnt_flag & MNT_MPBUSY) == 0)
+		panic("quotaoff: not busy");
+	if ((qvp = ump->um_quotas[type]) == NULLVP)
+		return (0);
+	ump->um_qflags[type] |= QTF_CLOSING;
+	/*
+	 * Search vnodes associated with this mount point,
+	 * deleting any references to quota file being closed.
+	 */
+again:
+	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) {
+		nextvp = vp->v_mntvnodes.le_next;
+		if (vget(vp, 1))
+			goto again;
+		ip = VTOI(vp);
+		dq = ip->i_dquot[type];
+		ip->i_dquot[type] = NODQUOT;
+		dqrele(vp, dq);
+		vput(vp);
+		if (vp->v_mntvnodes.le_next != nextvp || vp->v_mount != mp)
+			goto again;
+	}
+	dqflush(qvp);
+	qvp->v_flag &= ~VSYSTEM;
+	error = vn_close(qvp, FREAD|FWRITE, p->p_ucred, p);
+	ump->um_quotas[type] = NULLVP;
+	crfree(ump->um_cred[type]);
+	ump->um_cred[type] = NOCRED;
+	ump->um_qflags[type] &= ~QTF_CLOSING;
+	for (type = 0; type < MAXQUOTAS; type++)
+		if (ump->um_quotas[type] != NULLVP)
+			break;
+	if (type == MAXQUOTAS)
+		mp->mnt_flag &= ~MNT_QUOTA;
+	return (error);
+}
+
+/*
+ * Q_GETQUOTA - return current values in a dqblk structure.
+ */
+int
+getquota(mp, id, type, addr)
+	struct mount *mp;
+	u_long id;
+	int type;
+	caddr_t addr;
+{
+	struct dquot *dq;
+	int error;
+
+	if (error = dqget(NULLVP, id, VFSTOUFS(mp), type, &dq))
+		return (error);
+	error = copyout((caddr_t)&dq->dq_dqb, addr, sizeof (struct dqblk));
+	dqrele(NULLVP, dq);
+	return (error);
+}
+
+/*
+ * Q_SETQUOTA - assign an entire dqblk structure.
+ */
+int
+setquota(mp, id, type, addr)
+	struct mount *mp;
+	u_long id;
+	int type;
+	caddr_t addr;
+{
+	register struct dquot *dq;
+	struct dquot *ndq;
+	struct ufsmount *ump = VFSTOUFS(mp);
+	struct dqblk newlim;
+	int error;
+
+	if (error = copyin(addr, (caddr_t)&newlim, sizeof (struct dqblk)))
+		return (error);
+	if (error = dqget(NULLVP, id, ump, type, &ndq))
+		return (error);
+	dq = ndq;
+	while (dq->dq_flags & DQ_LOCK) {
+		dq->dq_flags |= DQ_WANT;
+		sleep((caddr_t)dq, PINOD+1);
+	}
+	/*
+	 * Copy all but the current values.
+	 * Reset time limit if previously had no soft limit or were
+	 * under it, but now have a soft limit and are over it.
+	 */
+	newlim.dqb_curblocks = dq->dq_curblocks;
+	newlim.dqb_curinodes = dq->dq_curinodes;
+	if (dq->dq_id != 0) {
+		newlim.dqb_btime = dq->dq_btime;
+		newlim.dqb_itime = dq->dq_itime;
+	}
+	if (newlim.dqb_bsoftlimit &&
+	    dq->dq_curblocks >= newlim.dqb_bsoftlimit &&
+	    (dq->dq_bsoftlimit == 0 || dq->dq_curblocks < dq->dq_bsoftlimit))
+		newlim.dqb_btime = time.tv_sec + ump->um_btime[type];
+	if (newlim.dqb_isoftlimit &&
+	    dq->dq_curinodes >= newlim.dqb_isoftlimit &&
+	    (dq->dq_isoftlimit == 0 || dq->dq_curinodes < dq->dq_isoftlimit))
+		newlim.dqb_itime = time.tv_sec + ump->um_itime[type];
+	dq->dq_dqb = newlim;
+	if (dq->dq_curblocks < dq->dq_bsoftlimit)
+		dq->dq_flags &= ~DQ_BLKS;
+	if (dq->dq_curinodes < dq->dq_isoftlimit)
+		dq->dq_flags &= ~DQ_INODS;
+	if (dq->dq_isoftlimit == 0 && dq->dq_bsoftlimit == 0 &&
+	    dq->dq_ihardlimit == 0 && dq->dq_bhardlimit == 0)
+		dq->dq_flags |= DQ_FAKE;
+	else
+		dq->dq_flags &= ~DQ_FAKE;
+	dq->dq_flags |= DQ_MOD;
+	dqrele(NULLVP, dq);
+	return (0);
+}
+
+/*
+ * Q_SETUSE - set current inode and block usage.
+ */
+int
+setuse(mp, id, type, addr)
+	struct mount *mp;
+	u_long id;
+	int type;
+	caddr_t addr;
+{
+	register struct dquot *dq;
+	struct ufsmount *ump = VFSTOUFS(mp);
+	struct dquot *ndq;
+	struct dqblk usage;
+	int error;
+
+	if (error = copyin(addr, (caddr_t)&usage, sizeof (struct dqblk)))
+		return (error);
+	if (error = dqget(NULLVP, id, ump, type, &ndq))
+		return (error);
+	dq = ndq;
+	while (dq->dq_flags & DQ_LOCK) {
+		dq->dq_flags |= DQ_WANT;
+		sleep((caddr_t)dq, PINOD+1);
+	}
+	/*
+	 * Reset time limit if have a soft limit and were
+	 * previously under it, but are now over it.
+	 */
+	if (dq->dq_bsoftlimit && dq->dq_curblocks < dq->dq_bsoftlimit &&
+	    usage.dqb_curblocks >= dq->dq_bsoftlimit)
+		dq->dq_btime = time.tv_sec + ump->um_btime[type];
+	if (dq->dq_isoftlimit && dq->dq_curinodes < dq->dq_isoftlimit &&
+	    usage.dqb_curinodes >= dq->dq_isoftlimit)
+		dq->dq_itime = time.tv_sec + ump->um_itime[type];
+	dq->dq_curblocks = usage.dqb_curblocks;
+	dq->dq_curinodes = usage.dqb_curinodes;
+	if (dq->dq_curblocks < dq->dq_bsoftlimit)
+		dq->dq_flags &= ~DQ_BLKS;
+	if (dq->dq_curinodes < dq->dq_isoftlimit)
+		dq->dq_flags &= ~DQ_INODS;
+	dq->dq_flags |= DQ_MOD;
+	dqrele(NULLVP, dq);
+	return (0);
+}
+
+/*
+ * Q_SYNC - sync quota files to disk.
+ */
+int
+qsync(mp)
+	struct mount *mp;
+{
+	struct ufsmount *ump = VFSTOUFS(mp);
+	register struct vnode *vp, *nextvp;
+	register struct dquot *dq;
+	register int i;
+
+	/*
+	 * Check if the mount point has any quotas.
+	 * If not, simply return.
+	 */
+	if ((mp->mnt_flag & MNT_MPBUSY) == 0)
+		panic("qsync: not busy");
+	for (i = 0; i < MAXQUOTAS; i++)
+		if (ump->um_quotas[i] != NULLVP)
+			break;
+	if (i == MAXQUOTAS)
+		return (0);
+	/*
+	 * Search vnodes associated with this mount point,
+	 * synchronizing any modified dquot structures.
+	 */
+again:
+	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) {
+		nextvp = vp->v_mntvnodes.le_next;
+		if (VOP_ISLOCKED(vp))
+			continue;
+		if (vget(vp, 1))
+			goto again;
+		for (i = 0; i < MAXQUOTAS; i++) {
+			dq = VTOI(vp)->i_dquot[i];
+			if (dq != NODQUOT && (dq->dq_flags & DQ_MOD))
+				dqsync(vp, dq);
+		}
+		vput(vp);
+		if (vp->v_mntvnodes.le_next != nextvp || vp->v_mount != mp)
+			goto again;
+	}
+	return (0);
+}
+
+/*
+ * Code pertaining to management of the in-core dquot data structures.
+ */
+struct dquot **dqhashtbl;
+u_long dqhash;
+
+/*
+ * Dquot free list.
+ */
+#define	DQUOTINC	5	/* minimum free dquots desired */
+struct dquot *dqfreel, **dqback = &dqfreel;
+long numdquot, desireddquot = DQUOTINC;
+
+/*
+ * Initialize the quota system.
+ */
+void
+dqinit()
+{
+
+	dqhashtbl = hashinit(desiredvnodes, M_DQUOT, &dqhash);
+}
+
+/*
+ * Obtain a dquot structure for the specified identifier and quota file
+ * reading the information from the file if necessary.
+ */
+int
+dqget(vp, id, ump, type, dqp)
+	struct vnode *vp;
+	u_long id;
+	register struct ufsmount *ump;
+	register int type;
+	struct dquot **dqp;
+{
+	register struct dquot *dq, *dp, **dpp;
+	register struct vnode *dqvp;
+	struct iovec aiov;
+	struct uio auio;
+	int error;
+
+	dqvp = ump->um_quotas[type];
+	if (dqvp == NULLVP || (ump->um_qflags[type] & QTF_CLOSING)) {
+		*dqp = NODQUOT;
+		return (EINVAL);
+	}
+	/*
+	 * Check the cache first.
+	 */
+	dpp = &dqhashtbl[((((int)(dqvp)) >> 8) + id) & dqhash];
+	for (dq = *dpp; dq; dq = dq->dq_forw) {
+		if (dq->dq_id != id ||
+		    dq->dq_ump->um_quotas[dq->dq_type] != dqvp)
+			continue;
+		/*
+		 * Cache hit with no references.  Take
+		 * the structure off the free list.
+		 */
+		if (dq->dq_cnt == 0) {
+			if ((dp = dq->dq_freef) != NODQUOT)
+				dp->dq_freeb = dq->dq_freeb;
+			else
+				dqback = dq->dq_freeb;
+			*dq->dq_freeb = dp;
+		}
+		DQREF(dq);
+		*dqp = dq;
+		return (0);
+	}
+	/*
+	 * Not in cache, allocate a new one.
+	 */
+	if (dqfreel == NODQUOT && numdquot < MAXQUOTAS * desiredvnodes)
+		desireddquot += DQUOTINC;
+	if (numdquot < desireddquot) {
+		dq = (struct dquot *)malloc(sizeof *dq, M_DQUOT, M_WAITOK);
+		bzero((char *)dq, sizeof *dq);
+		numdquot++;
+	} else {
+		if ((dq = dqfreel) == NULL) {
+			tablefull("dquot");
+			*dqp = NODQUOT;
+			return (EUSERS);
+		}
+		if (dq->dq_cnt || (dq->dq_flags & DQ_MOD))
+			panic("free dquot isn't");
+		if ((dp = dq->dq_freef) != NODQUOT)
+			dp->dq_freeb = &dqfreel;
+		else
+			dqback = &dqfreel;
+		dqfreel = dp;
+		dq->dq_freef = NULL;
+		dq->dq_freeb = NULL;
+		if (dp = dq->dq_forw)
+			dp->dq_back = dq->dq_back;
+		*dq->dq_back = dp;
+	}
+	/*
+	 * Initialize the contents of the dquot structure.
+	 */
+	if (vp != dqvp)
+		VOP_LOCK(dqvp);
+	if (dp = *dpp)
+		dp->dq_back = &dq->dq_forw;
+	dq->dq_forw = dp;
+	dq->dq_back = dpp;
+	*dpp = dq;
+	DQREF(dq);
+	dq->dq_flags = DQ_LOCK;
+	dq->dq_id = id;
+	dq->dq_ump = ump;
+	dq->dq_type = type;
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	aiov.iov_base = (caddr_t)&dq->dq_dqb;
+	aiov.iov_len = sizeof (struct dqblk);
+	auio.uio_resid = sizeof (struct dqblk);
+	auio.uio_offset = (off_t)(id * sizeof (struct dqblk));
+	auio.uio_segflg = UIO_SYSSPACE;
+	auio.uio_rw = UIO_READ;
+	auio.uio_procp = (struct proc *)0;
+	error = VOP_READ(dqvp, &auio, 0, ump->um_cred[type]);
+	if (auio.uio_resid == sizeof(struct dqblk) && error == 0)
+		bzero((caddr_t)&dq->dq_dqb, sizeof(struct dqblk));
+	if (vp != dqvp)
+		VOP_UNLOCK(dqvp);
+	if (dq->dq_flags & DQ_WANT)
+		wakeup((caddr_t)dq);
+	dq->dq_flags = 0;
+	/*
+	 * I/O error in reading quota file, release
+	 * quota structure and reflect problem to caller.
+	 */
+	if (error) {
+		if (dp = dq->dq_forw)
+			dp->dq_back = dq->dq_back;
+		*dq->dq_back = dp;
+		dq->dq_forw = NULL;
+		dq->dq_back = NULL;
+		dqrele(vp, dq);
+		*dqp = NODQUOT;
+		return (error);
+	}
+	/*
+	 * Check for no limit to enforce.
+	 * Initialize time values if necessary.
+	 */
+	if (dq->dq_isoftlimit == 0 && dq->dq_bsoftlimit == 0 &&
+	    dq->dq_ihardlimit == 0 && dq->dq_bhardlimit == 0)
+		dq->dq_flags |= DQ_FAKE;
+	if (dq->dq_id != 0) {
+		if (dq->dq_btime == 0)
+			dq->dq_btime = time.tv_sec + ump->um_btime[type];
+		if (dq->dq_itime == 0)
+			dq->dq_itime = time.tv_sec + ump->um_itime[type];
+	}
+	*dqp = dq;
+	return (0);
+}
+
+/*
+ * Obtain a reference to a dquot.
+ */
+void
+dqref(dq)
+	struct dquot *dq;
+{
+
+	dq->dq_cnt++;
+}
+
+/*
+ * Release a reference to a dquot.
+ */
+void
+dqrele(vp, dq)
+	struct vnode *vp;
+	register struct dquot *dq;
+{
+
+	if (dq == NODQUOT)
+		return;
+	if (dq->dq_cnt > 1) {
+		dq->dq_cnt--;
+		return;
+	}
+	if (dq->dq_flags & DQ_MOD)
+		(void) dqsync(vp, dq);
+	if (--dq->dq_cnt > 0)
+		return;
+	if (dqfreel != NODQUOT) {
+		*dqback = dq;
+		dq->dq_freeb = dqback;
+	} else {
+		dqfreel = dq;
+		dq->dq_freeb = &dqfreel;
+	}
+	dq->dq_freef = NODQUOT;
+	dqback = &dq->dq_freef;
+}
+
+/*
+ * Update the disk quota in the quota file.
+ */
+int
+dqsync(vp, dq)
+	struct vnode *vp;
+	register struct dquot *dq;
+{
+	struct vnode *dqvp;
+	struct iovec aiov;
+	struct uio auio;
+	int error;
+
+	if (dq == NODQUOT)
+		panic("dqsync: dquot");
+	if ((dq->dq_flags & DQ_MOD) == 0)
+		return (0);
+	if ((dqvp = dq->dq_ump->um_quotas[dq->dq_type]) == NULLVP)
+		panic("dqsync: file");
+	if (vp != dqvp)
+		VOP_LOCK(dqvp);
+	while (dq->dq_flags & DQ_LOCK) {
+		dq->dq_flags |= DQ_WANT;
+		sleep((caddr_t)dq, PINOD+2);
+		if ((dq->dq_flags & DQ_MOD) == 0) {
+			if (vp != dqvp)
+				VOP_UNLOCK(dqvp);
+			return (0);
+		}
+	}
+	dq->dq_flags |= DQ_LOCK;
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	aiov.iov_base = (caddr_t)&dq->dq_dqb;
+	aiov.iov_len = sizeof (struct dqblk);
+	auio.uio_resid = sizeof (struct dqblk);
+	auio.uio_offset = (off_t)(dq->dq_id * sizeof (struct dqblk));
+	auio.uio_segflg = UIO_SYSSPACE;
+	auio.uio_rw = UIO_WRITE;
+	auio.uio_procp = (struct proc *)0;
+	error = VOP_WRITE(dqvp, &auio, 0, dq->dq_ump->um_cred[dq->dq_type]);
+	if (auio.uio_resid && error == 0)
+		error = EIO;
+	if (dq->dq_flags & DQ_WANT)
+		wakeup((caddr_t)dq);
+	dq->dq_flags &= ~(DQ_MOD|DQ_LOCK|DQ_WANT);
+	if (vp != dqvp)
+		VOP_UNLOCK(dqvp);
+	return (error);
+}
+
+/*
+ * Flush all entries from the cache for a particular vnode.
+ */
+void
+dqflush(vp)
+	register struct vnode *vp;
+{
+	register struct dquot *dq, *dp, **dpp, *nextdq;
+
+	/*
+	 * Move all dquot's that used to refer to this quota
+	 * file off their hash chains (they will eventually
+	 * fall off the head of the free list and be re-used).
+	 */
+	for (dpp = &dqhashtbl[dqhash]; dpp >= dqhashtbl; dpp--) {
+		for (dq = *dpp; dq; dq = nextdq) {
+			nextdq = dq->dq_forw;
+			if (dq->dq_ump->um_quotas[dq->dq_type] != vp)
+				continue;
+			if (dq->dq_cnt)
+				panic("dqflush: stray dquot");
+			if (dp = dq->dq_forw)
+				dp->dq_back = dq->dq_back;
+			*dq->dq_back = dp;
+			dq->dq_forw = NULL;
+			dq->dq_back = NULL;
+			dq->dq_ump = (struct ufsmount *)0;
+		}
+	}
+}
diff --git a/sys/ufs/ufs/ufs_readwrite.c b/sys/ufs/ufs/ufs_readwrite.c
new file mode 100644
index 00000000000..5ead2c1a9ad
--- /dev/null
+++ b/sys/ufs/ufs/ufs_readwrite.c
@@ -0,0 +1,295 @@
+/*-
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufs_readwrite.c	8.7 (Berkeley) 1/21/94
+ */
+
+#ifdef LFS_READWRITE
+#define	BLKSIZE(a, b, c)	blksize(a)
+#define	FS			struct lfs
+#define	I_FS			i_lfs
+#define	READ			lfs_read
+#define	READ_S			"lfs_read"
+#define	WRITE			lfs_write
+#define	WRITE_S			"lfs_write"
+#define	fs_bsize		lfs_bsize
+#define	fs_maxfilesize		lfs_maxfilesize
+#else
+#define	BLKSIZE(a, b, c)	blksize(a, b, c)
+#define	FS			struct fs
+#define	I_FS			i_fs
+#define	READ			ffs_read
+#define	READ_S			"ffs_read"
+#define	WRITE			ffs_write
+#define	WRITE_S			"ffs_write"
+#endif
+
+/*
+ * Vnode op for reading.
+ */
+/* ARGSUSED */
+READ(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	register struct vnode *vp;
+	register struct inode *ip;
+	register struct uio *uio;
+	register FS *fs;
+	struct buf *bp;
+	daddr_t lbn, nextlbn;
+	off_t bytesinfile;
+	long size, xfersize, blkoffset;
+	int error;
+	u_short mode;
+
+	vp = ap->a_vp;
+	ip = VTOI(vp);
+	mode = ip->i_mode;
+	uio = ap->a_uio;
+
+#ifdef DIAGNOSTIC
+	if (uio->uio_rw != UIO_READ)
+		panic("%s: mode", READ_S);
+
+	if (vp->v_type == VLNK) {
+		if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
+			panic("%s: short symlink", READ_S);
+	} else if (vp->v_type != VREG && vp->v_type != VDIR)
+		panic("%s: type %d", READ_S, vp->v_type);
+#endif
+	fs = ip->I_FS;
+	if ((u_quad_t)uio->uio_offset > fs->fs_maxfilesize)
+		return (EFBIG);
+
+	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
+		if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
+			break;
+		lbn = lblkno(fs, uio->uio_offset);
+		nextlbn = lbn + 1;
+		size = BLKSIZE(fs, ip, lbn);
+		blkoffset = blkoff(fs, uio->uio_offset);
+		xfersize = fs->fs_bsize - blkoffset;
+		if (uio->uio_resid < xfersize)
+			xfersize = uio->uio_resid;
+		if (bytesinfile < xfersize)
+			xfersize = bytesinfile;
+
+#ifdef LFS_READWRITE
+		(void)lfs_check(vp, lbn);
+		error = cluster_read(vp, ip->i_size, lbn, size, NOCRED, &bp);
+#else
+		if (lblktosize(fs, nextlbn) > ip->i_size)
+			error = bread(vp, lbn, size, NOCRED, &bp);
+		else if (doclusterread)
+			error = cluster_read(vp,
+			    ip->i_size, lbn, size, NOCRED, &bp);
+		else if (lbn - 1 == vp->v_lastr) {
+			int nextsize = BLKSIZE(fs, ip, nextlbn);
+			error = breadn(vp, lbn,
+			    size, &nextlbn, &nextsize, 1, NOCRED, &bp);
+		} else
+			error = bread(vp, lbn, size, NOCRED, &bp);
+#endif
+		if (error)
+			break;
+		vp->v_lastr = lbn;
+
+		/*
+		 * We should only get non-zero b_resid when an I/O error
+		 * has occurred, which should cause us to break above.
+		 * However, if the short read did not cause an error,
+		 * then we want to ensure that we do not uiomove bad
+		 * or uninitialized data.
+		 */
+		size -= bp->b_resid;
+		if (size < xfersize) {
+			if (size == 0)
+				break;
+			xfersize = size;
+		}
+		if (error =
+		    uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio))
+			break;
+
+		if (S_ISREG(mode) && (xfersize + blkoffset == fs->fs_bsize ||
+		    uio->uio_offset == ip->i_size))
+			bp->b_flags |= B_AGE;
+		brelse(bp);
+	}
+	if (bp != NULL)
+		brelse(bp);
+	ip->i_flag |= IN_ACCESS;
+	return (error);
+}
+
+/*
+ * Vnode op for writing.
+ */
+WRITE(ap)
+	struct vop_write_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	register struct vnode *vp;
+	register struct uio *uio;
+	register struct inode *ip;
+	register FS *fs;
+	struct buf *bp;
+	struct proc *p;
+	daddr_t lbn;
+	off_t osize;
+	int blkoffset, error, flags, ioflag, resid, size, xfersize;
+
+	ioflag = ap->a_ioflag;
+	uio = ap->a_uio;
+	vp = ap->a_vp;
+	ip = VTOI(vp);
+
+#ifdef DIAGNOSTIC
+	if (uio->uio_rw != UIO_WRITE)
+		panic("%s: mode", WRITE_S);
+#endif
+
+	switch (vp->v_type) {
+	case VREG:
+		if (ioflag & IO_APPEND)
+			uio->uio_offset = ip->i_size;
+		if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size)
+			return (EPERM);
+		/* FALLTHROUGH */
+	case VLNK:
+		break;
+	case VDIR:
+		if ((ioflag & IO_SYNC) == 0)
+			panic("%s: nonsync dir write", WRITE_S);
+		break;
+	default:
+		panic("%s: type", WRITE_S);
+	}
+
+	fs = ip->I_FS;
+	if (uio->uio_offset < 0 ||
+	    (u_quad_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize)
+		return (EFBIG);
+	/*
+	 * Maybe this should be above the vnode op call, but so long as
+	 * file servers have no limits, I don't think it matters.
+	 */
+	p = uio->uio_procp;
+	if (vp->v_type == VREG && p &&
+	    uio->uio_offset + uio->uio_resid >
+	    p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
+		psignal(p, SIGXFSZ);
+		return (EFBIG);
+	}
+
+	resid = uio->uio_resid;
+	osize = ip->i_size;
+	flags = ioflag & IO_SYNC ? B_SYNC : 0;
+
+	for (error = 0; uio->uio_resid > 0;) {
+		lbn = lblkno(fs, uio->uio_offset);
+		blkoffset = blkoff(fs, uio->uio_offset);
+		xfersize = fs->fs_bsize - blkoffset;
+		if (uio->uio_resid < xfersize)
+			xfersize = uio->uio_resid;
+#ifdef LFS_READWRITE
+		(void)lfs_check(vp, lbn);
+		error = lfs_balloc(vp, xfersize, lbn, &bp);
+#else
+		if (fs->fs_bsize > xfersize)
+			flags |= B_CLRBUF;
+		else
+			flags &= ~B_CLRBUF;
+
+		error = ffs_balloc(ip,
+		    lbn, blkoffset + xfersize, ap->a_cred, &bp, flags);
+#endif
+		if (error)
+			break;
+		if (uio->uio_offset + xfersize > ip->i_size) {
+			ip->i_size = uio->uio_offset + xfersize;
+			vnode_pager_setsize(vp, (u_long)ip->i_size);
+		}
+		(void)vnode_pager_uncache(vp);
+
+		size = BLKSIZE(fs, ip, lbn) - bp->b_resid;
+		if (size < xfersize)
+			xfersize = size;
+
+		error =
+		    uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
+#ifdef LFS_READWRITE
+		(void)VOP_BWRITE(bp);
+#else
+		if (ioflag & IO_SYNC)
+			(void)bwrite(bp);
+		else if (xfersize + blkoffset == fs->fs_bsize)
+			if (doclusterwrite)
+				cluster_write(bp, ip->i_size);
+			else {
+				bp->b_flags |= B_AGE;
+				bawrite(bp);
+			}
+		else
+			bdwrite(bp);
+#endif
+		if (error || xfersize == 0)
+			break;
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+	}
+	/*
+	 * If we successfully wrote any data, and we are not the superuser
+	 * we clear the setuid and setgid bits as a precaution against
+	 * tampering.
+	 */
+	if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
+		ip->i_mode &= ~(ISUID | ISGID);
+	if (error) {
+		if (ioflag & IO_UNIT) {
+			(void)VOP_TRUNCATE(vp, osize,
+			    ioflag & IO_SYNC, ap->a_cred, uio->uio_procp);
+			uio->uio_offset -= resid - uio->uio_resid;
+			uio->uio_resid = resid;
+		}
+	} else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
+		error = VOP_UPDATE(vp, &time, &time, 1);
+	return (error);
+}
diff --git a/sys/ufs/ufs/ufs_vfsops.c b/sys/ufs/ufs/ufs_vfsops.c
new file mode 100644
index 00000000000..f806e0b2a83
--- /dev/null
+++ b/sys/ufs/ufs/ufs_vfsops.c
@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 1991, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufs_vfsops.c	8.4 (Berkeley) 4/16/94
+ */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+/*
+ * Flag to permit forcible unmounting.
+ */
+int doforce = 1;
+
+/*
+ * Make a filesystem operational.
+ * Nothing to do at the moment.
+ */
+/* ARGSUSED */
+int
+ufs_start(mp, flags, p)
+	struct mount *mp;
+	int flags;
+	struct proc *p;
+{
+
+	return (0);
+}
+
+/*
+ * Return the root of a filesystem.
+ */
+int
+ufs_root(mp, vpp)
+	struct mount *mp;
+	struct vnode **vpp;
+{
+	struct vnode *nvp;
+	int error;
+
+	if (error = VFS_VGET(mp, (ino_t)ROOTINO, &nvp))
+		return (error);
+	*vpp = nvp;
+	return (0);
+}
+
+/*
+ * Do operations associated with quotas
+ */
+int
+ufs_quotactl(mp, cmds, uid, arg, p)
+	struct mount *mp;
+	int cmds;
+	uid_t uid;
+	caddr_t arg;
+	struct proc *p;
+{
+	int cmd, type, error;
+
+#ifndef QUOTA
+	return (EOPNOTSUPP);
+#else
+	if (uid == -1)
+		uid = p->p_cred->p_ruid;
+	cmd = cmds >> SUBCMDSHIFT;
+
+	switch (cmd) {
+	case Q_GETQUOTA:
+	case Q_SYNC:
+		if (uid == p->p_cred->p_ruid)
+			break;
+		/* fall through */
+	default:
+		if (error = suser(p->p_ucred, &p->p_acflag))
+			return (error);
+	}
+
+	type = cmd & SUBCMDMASK;
+	if ((u_int)type >= MAXQUOTAS)
+		return (EINVAL);
+
+	switch (cmd) {
+
+	case Q_QUOTAON:
+		return (quotaon(p, mp, type, arg));
+
+	case Q_QUOTAOFF:
+		if (vfs_busy(mp))
+			return (0);
+		error = quotaoff(p, mp, type);
+		vfs_unbusy(mp);
+		return (error);
+
+	case Q_SETQUOTA:
+		return (setquota(mp, uid, type, arg));
+
+	case Q_SETUSE:
+		return (setuse(mp, uid, type, arg));
+
+	case Q_GETQUOTA:
+		return (getquota(mp, uid, type, arg));
+
+	case Q_SYNC:
+		if (vfs_busy(mp))
+			return (0);
+		error = qsync(mp);
+		vfs_unbusy(mp);
+		return (error);
+
+	default:
+		return (EINVAL);
+	}
+	/* NOTREACHED */
+#endif
+}
+
+/*
+ * This is the generic part of fhtovp called after the underlying
+ * filesystem has validated the file handle.
+ *
+ * Verify that a host should have access to a filesystem, and if so
+ * return a vnode for the presented file handle.
+ */
+int
+ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp)
+	register struct mount *mp;
+	struct ufid *ufhp;
+	struct mbuf *nam;
+	struct vnode **vpp;
+	int *exflagsp;
+	struct ucred **credanonp;
+{
+	register struct inode *ip;
+	register struct netcred *np;
+	register struct ufsmount *ump = VFSTOUFS(mp);
+	struct vnode *nvp;
+	int error;
+
+	/*
+	 * Get the export permission structure for this <mp, client> tuple.
+	 */
+	np = vfs_export_lookup(mp, &ump->um_export, nam);
+	if (np == NULL)
+		return (EACCES);
+
+	if (error = VFS_VGET(mp, ufhp->ufid_ino, &nvp)) {
+		*vpp = NULLVP;
+		return (error);
+	}
+	ip = VTOI(nvp);
+	if (ip->i_mode == 0 || ip->i_gen != ufhp->ufid_gen) {
+		vput(nvp);
+		*vpp = NULLVP;
+		return (ESTALE);
+	}
+	*vpp = nvp;
+	*exflagsp = np->netc_exflags;
+	*credanonp = &np->netc_anon;
+	return (0);
+}
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
new file mode 100644
index 00000000000..7b7c88376b9
--- /dev/null
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -0,0 +1,2159 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufs_vnops.c	8.10 (Berkeley) 4/1/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/resourcevar.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/conf.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/dirent.h>
+
+#include <vm/vm.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <ufs/ufs/lockf.h>
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/dir.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+static int ufs_chmod __P((struct vnode *, int, struct ucred *, struct proc *));
+static int ufs_chown
+	__P((struct vnode *, uid_t, gid_t, struct ucred *, struct proc *));
+
+union _qcvt {
+	quad_t qcvt;
+	long val[2];
+};
+#define SETHIGH(q, h) { \
+	union _qcvt tmp; \
+	tmp.qcvt = (q); \
+	tmp.val[_QUAD_HIGHWORD] = (h); \
+	(q) = tmp.qcvt; \
+}
+#define SETLOW(q, l) { \
+	union _qcvt tmp; \
+	tmp.qcvt = (q); \
+	tmp.val[_QUAD_LOWWORD] = (l); \
+	(q) = tmp.qcvt; \
+}
+
+/*
+ * Create a regular file
+ */
+int
+ufs_create(ap)
+	struct vop_create_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	int error;
+
+	if (error =
+	    ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode),
+	    ap->a_dvp, ap->a_vpp, ap->a_cnp))
+		return (error);
+	return (0);
+}
+
+/*
+ * Mknod vnode call
+ */
+/* ARGSUSED */
+int
+ufs_mknod(ap)
+	struct vop_mknod_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	register struct vattr *vap = ap->a_vap;
+	register struct vnode **vpp = ap->a_vpp;
+	register struct inode *ip;
+	int error;
+
+	if (error =
+	    ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
+	    ap->a_dvp, vpp, ap->a_cnp))
+		return (error);
+	ip = VTOI(*vpp);
+	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
+	if (vap->va_rdev != VNOVAL) {
+		/*
+		 * Want to be able to use this to make badblock
+		 * inodes, so don't truncate the dev number.
+		 */
+		ip->i_rdev = vap->va_rdev;
+	}
+	/*
+	 * Remove inode so that it will be reloaded by VFS_VGET and
+	 * checked to see if it is an alias of an existing entry in
+	 * the inode cache.
+	 */
+	vput(*vpp);
+	(*vpp)->v_type = VNON;
+	vgone(*vpp);
+	*vpp = 0;
+	return (0);
+}
+
+/*
+ * Open called.
+ *
+ * Nothing to do.
+ */
+/* ARGSUSED */
+int
+ufs_open(ap)
+	struct vop_open_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	/*
+	 * Files marked append-only must be opened for appending.
+	 */
+	if ((VTOI(ap->a_vp)->i_flags & APPEND) &&
+	    (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
+		return (EPERM);
+	return (0);
+}
+
+/*
+ * Close called.
+ *
+ * Update the times on the inode.
+ */
+/* ARGSUSED */
+int
+ufs_close(ap)
+	struct vop_close_args /* {
+		struct vnode *a_vp;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct inode *ip = VTOI(vp);
+
+	if (vp->v_usecount > 1 && !(ip->i_flag & IN_LOCKED))
+		ITIMES(ip, &time, &time);
+	return (0);
+}
+
+int
+ufs_access(ap)
+	struct vop_access_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct inode *ip = VTOI(vp);
+	register struct ucred *cred = ap->a_cred;
+	mode_t mask, mode = ap->a_mode;
+	register gid_t *gp;
+	int i, error;
+
+#ifdef DIAGNOSTIC
+	if (!VOP_ISLOCKED(vp)) {
+		vprint("ufs_access: not locked", vp);
+		panic("ufs_access: not locked");
+	}
+#endif
+#ifdef QUOTA
+	if (mode & VWRITE)
+		switch (vp->v_type) {
+		case VDIR:
+		case VLNK:
+		case VREG:
+			if (error = getinoquota(ip))
+				return (error);
+			break;
+		}
+#endif
+
+	/* If immutable bit set, nobody gets to write it. */
+	if ((mode & VWRITE) && (ip->i_flags & IMMUTABLE))
+		return (EPERM);
+
+	/* Otherwise, user id 0 always gets access. */
+	if (cred->cr_uid == 0)
+		return (0);
+
+	mask = 0;
+
+	/* Otherwise, check the owner. */
+	if (cred->cr_uid == ip->i_uid) {
+		if (mode & VEXEC)
+			mask |= S_IXUSR;
+		if (mode & VREAD)
+			mask |= S_IRUSR;
+		if (mode & VWRITE)
+			mask |= S_IWUSR;
+		return ((ip->i_mode & mask) == mask ? 0 : EACCES);
+	}
+
+	/* Otherwise, check the groups. */
+	for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++)
+		if (ip->i_gid == *gp) {
+			if (mode & VEXEC)
+				mask |= S_IXGRP;
+			if (mode & VREAD)
+				mask |= S_IRGRP;
+			if (mode & VWRITE)
+				mask |= S_IWGRP;
+			return ((ip->i_mode & mask) == mask ? 0 : EACCES);
+		}
+
+	/* Otherwise, check everyone else. */
+	if (mode & VEXEC)
+		mask |= S_IXOTH;
+	if (mode & VREAD)
+		mask |= S_IROTH;
+	if (mode & VWRITE)
+		mask |= S_IWOTH;
+	return ((ip->i_mode & mask) == mask ? 0 : EACCES);
+}
+
+/* ARGSUSED */
+int
+ufs_getattr(ap)
+	struct vop_getattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct inode *ip = VTOI(vp);
+	register struct vattr *vap = ap->a_vap;
+
+	ITIMES(ip, &time, &time);
+	/*
+	 * Copy from inode table
+	 */
+	vap->va_fsid = ip->i_dev;
+	vap->va_fileid = ip->i_number;
+	vap->va_mode = ip->i_mode & ~IFMT;
+	vap->va_nlink = ip->i_nlink;
+	vap->va_uid = ip->i_uid;
+	vap->va_gid = ip->i_gid;
+	vap->va_rdev = (dev_t)ip->i_rdev;
+	vap->va_size = ip->i_din.di_size;
+	vap->va_atime = ip->i_atime;
+	vap->va_mtime = ip->i_mtime;
+	vap->va_ctime = ip->i_ctime;
+	vap->va_flags = ip->i_flags;
+	vap->va_gen = ip->i_gen;
+	/* this doesn't belong here */
+	if (vp->v_type == VBLK)
+		vap->va_blocksize = BLKDEV_IOSIZE;
+	else if (vp->v_type == VCHR)
+		vap->va_blocksize = MAXBSIZE;
+	else
+		vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
+	vap->va_bytes = dbtob(ip->i_blocks);
+	vap->va_type = vp->v_type;
+	vap->va_filerev = ip->i_modrev;
+	return (0);
+}
+
+/*
+ * Set attribute vnode op. called from several syscalls
+ */
+int
+ufs_setattr(ap)
+	struct vop_setattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vattr *vap = ap->a_vap;
+	register struct vnode *vp = ap->a_vp;
+	register struct inode *ip = VTOI(vp);
+	register struct ucred *cred = ap->a_cred;
+	register struct proc *p = ap->a_p;
+	struct timeval atimeval, mtimeval;
+	int error;
+
+	/*
+	 * Check for unsettable attributes.
+	 */
+	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
+	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
+	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
+	    ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
+		return (EINVAL);
+	}
+	if (vap->va_flags != VNOVAL) {
+		if (cred->cr_uid != ip->i_uid &&
+		    (error = suser(cred, &p->p_acflag)))
+			return (error);
+		if (cred->cr_uid == 0) {
+			if ((ip->i_flags & (SF_IMMUTABLE | SF_APPEND)) &&
+			    securelevel > 0)
+				return (EPERM);
+			ip->i_flags = vap->va_flags;
+		} else {
+			if (ip->i_flags & (SF_IMMUTABLE | SF_APPEND))
+				return (EPERM);
+			ip->i_flags &= SF_SETTABLE;
+			ip->i_flags |= (vap->va_flags & UF_SETTABLE);
+		}
+		ip->i_flag |= IN_CHANGE;
+		if (vap->va_flags & (IMMUTABLE | APPEND))
+			return (0);
+	}
+	if (ip->i_flags & (IMMUTABLE | APPEND))
+		return (EPERM);
+	/*
+	 * Go through the fields and update iff not VNOVAL.
+	 */
+	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL)
+		if (error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred, p))
+			return (error);
+	if (vap->va_size != VNOVAL) {
+		if (vp->v_type == VDIR)
+			return (EISDIR);
+		if (error = VOP_TRUNCATE(vp, vap->va_size, 0, cred, p))
+			return (error);
+	}
+	ip = VTOI(vp);
+	if (vap->va_atime.ts_sec != VNOVAL || vap->va_mtime.ts_sec != VNOVAL) {
+		if (cred->cr_uid != ip->i_uid &&
+		    (error = suser(cred, &p->p_acflag)) &&
+		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 || 
+		    (error = VOP_ACCESS(vp, VWRITE, cred, p))))
+			return (error);
+		if (vap->va_atime.ts_sec != VNOVAL)
+			ip->i_flag |= IN_ACCESS;
+		if (vap->va_mtime.ts_sec != VNOVAL)
+			ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		atimeval.tv_sec = vap->va_atime.ts_sec;
+		atimeval.tv_usec = vap->va_atime.ts_nsec / 1000;
+		mtimeval.tv_sec = vap->va_mtime.ts_sec;
+		mtimeval.tv_usec = vap->va_mtime.ts_nsec / 1000;
+		if (error = VOP_UPDATE(vp, &atimeval, &mtimeval, 1))
+			return (error);
+	}
+	error = 0;
+	if (vap->va_mode != (mode_t)VNOVAL)
+		error = ufs_chmod(vp, (int)vap->va_mode, cred, p);
+	return (error);
+}
+
+/*
+ * Change the mode on a file.
+ * Inode must be locked before calling.
+ */
+static int
+ufs_chmod(vp, mode, cred, p)
+	register struct vnode *vp;
+	register int mode;
+	register struct ucred *cred;
+	struct proc *p;
+{
+	register struct inode *ip = VTOI(vp);
+	int error;
+
+	if (cred->cr_uid != ip->i_uid &&
+	    (error = suser(cred, &p->p_acflag)))
+		return (error);
+	if (cred->cr_uid) {
+		if (vp->v_type != VDIR && (mode & S_ISTXT))
+			return (EFTYPE);
+		if (!groupmember(ip->i_gid, cred) && (mode & ISGID))
+			return (EPERM);
+	}
+	ip->i_mode &= ~ALLPERMS;
+	ip->i_mode |= (mode & ALLPERMS);
+	ip->i_flag |= IN_CHANGE;
+	if ((vp->v_flag & VTEXT) && (ip->i_mode & S_ISTXT) == 0)
+		(void) vnode_pager_uncache(vp);
+	return (0);
+}
+
+/*
+ * Perform chown operation on inode ip;
+ * inode must be locked prior to call.
+ */
+static int
+ufs_chown(vp, uid, gid, cred, p)
+	register struct vnode *vp;
+	uid_t uid;
+	gid_t gid;
+	struct ucred *cred;
+	struct proc *p;
+{
+	register struct inode *ip = VTOI(vp);
+	uid_t ouid;
+	gid_t ogid;
+	int error = 0;
+#ifdef QUOTA
+	register int i;
+	long change;
+#endif
+
+	if (uid == (uid_t)VNOVAL)
+		uid = ip->i_uid;
+	if (gid == (gid_t)VNOVAL)
+		gid = ip->i_gid;
+	/*
+	 * If we don't own the file, are trying to change the owner
+	 * of the file, or are not a member of the target group,
+	 * the caller must be superuser or the call fails.
+	 */
+	if ((cred->cr_uid != ip->i_uid || uid != ip->i_uid ||
+	    !groupmember((gid_t)gid, cred)) &&
+	    (error = suser(cred, &p->p_acflag)))
+		return (error);
+	ogid = ip->i_gid;
+	ouid = ip->i_uid;
+#ifdef QUOTA
+	if (error = getinoquota(ip))
+		return (error);
+	if (ouid == uid) {
+		dqrele(vp, ip->i_dquot[USRQUOTA]);
+		ip->i_dquot[USRQUOTA] = NODQUOT;
+	}
+	if (ogid == gid) {
+		dqrele(vp, ip->i_dquot[GRPQUOTA]);
+		ip->i_dquot[GRPQUOTA] = NODQUOT;
+	}
+	change = ip->i_blocks;
+	(void) chkdq(ip, -change, cred, CHOWN);
+	(void) chkiq(ip, -1, cred, CHOWN);
+	for (i = 0; i < MAXQUOTAS; i++) {
+		dqrele(vp, ip->i_dquot[i]);
+		ip->i_dquot[i] = NODQUOT;
+	}
+#endif
+	ip->i_gid = gid;
+	ip->i_uid = uid;
+#ifdef QUOTA
+	if ((error = getinoquota(ip)) == 0) {
+		if (ouid == uid) {
+			dqrele(vp, ip->i_dquot[USRQUOTA]);
+			ip->i_dquot[USRQUOTA] = NODQUOT;
+		}
+		if (ogid == gid) {
+			dqrele(vp, ip->i_dquot[GRPQUOTA]);
+			ip->i_dquot[GRPQUOTA] = NODQUOT;
+		}
+		if ((error = chkdq(ip, change, cred, CHOWN)) == 0) {
+			if ((error = chkiq(ip, 1, cred, CHOWN)) == 0)
+				goto good;
+			else
+				(void) chkdq(ip, -change, cred, CHOWN|FORCE);
+		}
+		for (i = 0; i < MAXQUOTAS; i++) {
+			dqrele(vp, ip->i_dquot[i]);
+			ip->i_dquot[i] = NODQUOT;
+		}
+	}
+	ip->i_gid = ogid;
+	ip->i_uid = ouid;
+	if (getinoquota(ip) == 0) {
+		if (ouid == uid) {
+			dqrele(vp, ip->i_dquot[USRQUOTA]);
+			ip->i_dquot[USRQUOTA] = NODQUOT;
+		}
+		if (ogid == gid) {
+			dqrele(vp, ip->i_dquot[GRPQUOTA]);
+			ip->i_dquot[GRPQUOTA] = NODQUOT;
+		}
+		(void) chkdq(ip, change, cred, FORCE|CHOWN);
+		(void) chkiq(ip, 1, cred, FORCE|CHOWN);
+		(void) getinoquota(ip);
+	}
+	return (error);
+good:
+	if (getinoquota(ip))
+		panic("chown: lost quota");
+#endif /* QUOTA */
+	if (ouid != uid || ogid != gid)
+		ip->i_flag |= IN_CHANGE;
+	if (ouid != uid && cred->cr_uid != 0)
+		ip->i_mode &= ~ISUID;
+	if (ogid != gid && cred->cr_uid != 0)
+		ip->i_mode &= ~ISGID;
+	return (0);
+}
+
+/* ARGSUSED */
+int
+ufs_ioctl(ap)
+	struct vop_ioctl_args /* {
+		struct vnode *a_vp;
+		int  a_command;
+		caddr_t  a_data;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	return (ENOTTY);
+}
+
+/* ARGSUSED */
+int
+ufs_select(ap)
+	struct vop_select_args /* {
+		struct vnode *a_vp;
+		int  a_which;
+		int  a_fflags;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	/*
+	 * We should really check to see if I/O is possible.
+	 */
+	return (1);
+}
+
+/*
+ * Mmap a file
+ *
+ * NB Currently unsupported.
+ */
+/* ARGSUSED */
+int
+ufs_mmap(ap)
+	struct vop_mmap_args /* {
+		struct vnode *a_vp;
+		int  a_fflags;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	return (EINVAL);
+}
+
+/*
+ * Seek on a file
+ *
+ * Nothing to do, so just return.
+ */
+/* ARGSUSED */
+int
+ufs_seek(ap)
+	struct vop_seek_args /* {
+		struct vnode *a_vp;
+		off_t  a_oldoff;
+		off_t  a_newoff;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+
+	return (0);
+}
+
+int
+ufs_remove(ap)
+	struct vop_remove_args /* {
+		struct vnode *a_dvp;
+		struct vnode *a_vp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	register struct inode *ip;
+	register struct vnode *vp = ap->a_vp;
+	register struct vnode *dvp = ap->a_dvp;
+	int error;
+
+	ip = VTOI(vp);
+	if ((ip->i_flags & (IMMUTABLE | APPEND)) ||
+	    (VTOI(dvp)->i_flags & APPEND)) {
+		error = EPERM;
+		goto out;
+	}
+	if ((error = ufs_dirremove(dvp, ap->a_cnp)) == 0) {
+		ip->i_nlink--;
+		ip->i_flag |= IN_CHANGE;
+	}
+out:
+	if (dvp == vp)
+		vrele(vp);
+	else
+		vput(vp);
+	vput(dvp);
+	return (error);
+}
+
+/*
+ * link vnode call
+ */
+int
+ufs_link(ap)
+	struct vop_link_args /* {
+		struct vnode *a_vp;
+		struct vnode *a_tdvp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct vnode *tdvp = ap->a_tdvp;
+	register struct componentname *cnp = ap->a_cnp;
+	register struct inode *ip;
+	struct timeval tv;
+	int error;
+
+#ifdef DIAGNOSTIC
+	if ((cnp->cn_flags & HASBUF) == 0)
+		panic("ufs_link: no name");
+#endif
+	if (vp->v_mount != tdvp->v_mount) {
+		VOP_ABORTOP(vp, cnp);
+		error = EXDEV;
+		goto out2;
+	}
+	if (vp != tdvp && (error = VOP_LOCK(tdvp))) {
+		VOP_ABORTOP(vp, cnp);
+		goto out2;
+	}
+	ip = VTOI(tdvp);
+	if ((nlink_t)ip->i_nlink >= LINK_MAX) {
+		VOP_ABORTOP(vp, cnp);
+		error = EMLINK;
+		goto out1;
+	}
+	if (ip->i_flags & (IMMUTABLE | APPEND)) {
+		VOP_ABORTOP(vp, cnp);
+		error = EPERM;
+		goto out1;
+	}
+	ip->i_nlink++;
+	ip->i_flag |= IN_CHANGE;
+	tv = time;
+	error = VOP_UPDATE(tdvp, &tv, &tv, 1);
+	if (!error)
+		error = ufs_direnter(ip, vp, cnp);
+	if (error) {
+		ip->i_nlink--;
+		ip->i_flag |= IN_CHANGE;
+	}
+	FREE(cnp->cn_pnbuf, M_NAMEI);
+out1:
+	if (vp != tdvp)
+		VOP_UNLOCK(tdvp);
+out2:
+	vput(vp);
+	return (error);
+}
+
+
+
+/*
+ * relookup - lookup a path name component
+ *    Used by lookup to re-aquire things.
+ */
+int
+relookup(dvp, vpp, cnp)
+	struct vnode *dvp, **vpp;
+	struct componentname *cnp;
+{
+	register struct vnode *dp = 0;	/* the directory we are searching */
+	int docache;			/* == 0 do not cache last component */
+	int wantparent;			/* 1 => wantparent or lockparent flag */
+	int rdonly;			/* lookup read-only flag bit */
+	int error = 0;
+#ifdef NAMEI_DIAGNOSTIC
+	int newhash;			/* DEBUG: check name hash */
+	char *cp;			/* DEBUG: check name ptr/len */
+#endif
+
+	/*
+	 * Setup: break out flag bits into variables.
+	 */
+	wantparent = cnp->cn_flags & (LOCKPARENT|WANTPARENT);
+	docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE;
+	if (cnp->cn_nameiop == DELETE ||
+	    (wantparent && cnp->cn_nameiop != CREATE))
+		docache = 0;
+	rdonly = cnp->cn_flags & RDONLY;
+	cnp->cn_flags &= ~ISSYMLINK;
+	dp = dvp;
+	VOP_LOCK(dp);
+
+/* dirloop: */
+	/*
+	 * Search a new directory.
+	 *
+	 * The cn_hash value is for use by vfs_cache.
+	 * The last component of the filename is left accessible via
+	 * cnp->cn_nameptr for callers that need the name. Callers needing
+	 * the name set the SAVENAME flag. When done, they assume
+	 * responsibility for freeing the pathname buffer.
+	 */
+#ifdef NAMEI_DIAGNOSTIC
+	for (newhash = 0, cp = cnp->cn_nameptr; *cp != 0 && *cp != '/'; cp++)
+		newhash += (unsigned char)*cp;
+	if (newhash != cnp->cn_hash)
+		panic("relookup: bad hash");
+	if (cnp->cn_namelen != cp - cnp->cn_nameptr)
+		panic ("relookup: bad len");
+	if (*cp != 0)
+		panic("relookup: not last component");
+	printf("{%s}: ", cnp->cn_nameptr);
+#endif
+
+	/*
+	 * Check for degenerate name (e.g. / or "")
+	 * which is a way of talking about a directory,
+	 * e.g. like "/." or ".".
+	 */
+	if (cnp->cn_nameptr[0] == '\0') {
+		if (cnp->cn_nameiop != LOOKUP || wantparent) {
+			error = EISDIR;
+			goto bad;
+		}
+		if (dp->v_type != VDIR) {
+			error = ENOTDIR;
+			goto bad;
+		}
+		if (!(cnp->cn_flags & LOCKLEAF))
+			VOP_UNLOCK(dp);
+		*vpp = dp;
+		if (cnp->cn_flags & SAVESTART)
+			panic("lookup: SAVESTART");
+		return (0);
+	}
+
+	if (cnp->cn_flags & ISDOTDOT)
+		panic ("relookup: lookup on dot-dot");
+
+	/*
+	 * We now have a segment name to search for, and a directory to search.
+	 */
+	if (error = VOP_LOOKUP(dp, vpp, cnp)) {
+#ifdef DIAGNOSTIC
+		if (*vpp != NULL)
+			panic("leaf should be empty");
+#endif
+		if (error != EJUSTRETURN)
+			goto bad;
+		/*
+		 * If creating and at end of pathname, then can consider
+		 * allowing file to be created.
+		 */
+		if (rdonly || (dvp->v_mount->mnt_flag & MNT_RDONLY)) {
+			error = EROFS;
+			goto bad;
+		}
+		/* ASSERT(dvp == ndp->ni_startdir) */
+		if (cnp->cn_flags & SAVESTART)
+			VREF(dvp);
+		/*
+		 * We return with ni_vp NULL to indicate that the entry
+		 * doesn't currently exist, leaving a pointer to the
+		 * (possibly locked) directory inode in ndp->ni_dvp.
+		 */
+		return (0);
+	}
+	dp = *vpp;
+
+#ifdef DIAGNOSTIC
+	/*
+	 * Check for symbolic link
+	 */
+	if (dp->v_type == VLNK && (cnp->cn_flags & FOLLOW))
+		panic ("relookup: symlink found.\n");
+#endif
+
+	/*
+	 * Check for read-only file systems.
+	 */
+	if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME) {
+		/*
+		 * Disallow directory write attempts on read-only
+		 * file systems.
+		 */
+		if (rdonly || (dp->v_mount->mnt_flag & MNT_RDONLY) ||
+		    (wantparent &&
+		     (dvp->v_mount->mnt_flag & MNT_RDONLY))) {
+			error = EROFS;
+			goto bad2;
+		}
+	}
+	/* ASSERT(dvp == ndp->ni_startdir) */
+	if (cnp->cn_flags & SAVESTART)
+		VREF(dvp);
+	
+	if (!wantparent)
+		vrele(dvp);
+	if ((cnp->cn_flags & LOCKLEAF) == 0)
+		VOP_UNLOCK(dp);
+	return (0);
+
+bad2:
+	if ((cnp->cn_flags & LOCKPARENT) && (cnp->cn_flags & ISLASTCN))
+		VOP_UNLOCK(dvp);
+	vrele(dvp);
+bad:
+	vput(dp);
+	*vpp = NULL;
+	return (error);
+}
+
+
+/*
+ * Rename system call.
+ * 	rename("foo", "bar");
+ * is essentially
+ *	unlink("bar");
+ *	link("foo", "bar");
+ *	unlink("foo");
+ * but ``atomically''.  Can't do full commit without saving state in the
+ * inode on disk which isn't feasible at this time.  Best we can do is
+ * always guarantee the target exists.
+ *
+ * Basic algorithm is:
+ *
+ * 1) Bump link count on source while we're linking it to the
+ *    target.  This also ensure the inode won't be deleted out
+ *    from underneath us while we work (it may be truncated by
+ *    a concurrent `trunc' or `open' for creation).
+ * 2) Link source to destination.  If destination already exists,
+ *    delete it first.
+ * 3) Unlink source reference to inode if still around. If a
+ *    directory was moved and the parent of the destination
+ *    is different from the source, patch the ".." entry in the
+ *    directory.
+ */
+int
+ufs_rename(ap)
+	struct vop_rename_args  /* {
+		struct vnode *a_fdvp;
+		struct vnode *a_fvp;
+		struct componentname *a_fcnp;
+		struct vnode *a_tdvp;
+		struct vnode *a_tvp;
+		struct componentname *a_tcnp;
+	} */ *ap;
+{
+	struct vnode *tvp = ap->a_tvp;
+	register struct vnode *tdvp = ap->a_tdvp;
+	struct vnode *fvp = ap->a_fvp;
+	register struct vnode *fdvp = ap->a_fdvp;
+	register struct componentname *tcnp = ap->a_tcnp;
+	register struct componentname *fcnp = ap->a_fcnp;
+	register struct inode *ip, *xp, *dp;
+	struct dirtemplate dirbuf;
+	struct timeval tv;
+	int doingdirectory = 0, oldparent = 0, newparent = 0;
+	int error = 0;
+	u_char namlen;
+
+#ifdef DIAGNOSTIC
+	if ((tcnp->cn_flags & HASBUF) == 0 ||
+	    (fcnp->cn_flags & HASBUF) == 0)
+		panic("ufs_rename: no name");
+#endif
+	/*
+	 * Check for cross-device rename.
+	 */
+	if ((fvp->v_mount != tdvp->v_mount) ||
+	    (tvp && (fvp->v_mount != tvp->v_mount))) {
+		error = EXDEV;
+abortit:
+		VOP_ABORTOP(tdvp, tcnp); /* XXX, why not in NFS? */
+		if (tdvp == tvp)
+			vrele(tdvp);
+		else
+			vput(tdvp);
+		if (tvp)
+			vput(tvp);
+		VOP_ABORTOP(fdvp, fcnp); /* XXX, why not in NFS? */
+		vrele(fdvp);
+		vrele(fvp);
+		return (error);
+	}
+
+	/*
+	 * Check if just deleting a link name.
+	 */
+	if (tvp && ((VTOI(tvp)->i_flags & (IMMUTABLE | APPEND)) ||
+	    (VTOI(tdvp)->i_flags & APPEND))) {
+		error = EPERM;
+		goto abortit;
+	}
+	if (fvp == tvp) {
+		if (fvp->v_type == VDIR) {
+			error = EINVAL;
+			goto abortit;
+		}
+		VOP_ABORTOP(fdvp, fcnp);
+		vrele(fdvp);
+		vrele(fvp);
+		vput(tdvp);
+		vput(tvp);
+		tcnp->cn_flags &= ~MODMASK;
+		tcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
+		if ((tcnp->cn_flags & SAVESTART) == 0)
+			panic("ufs_rename: lost from startdir");
+		tcnp->cn_nameiop = DELETE;
+		(void) relookup(tdvp, &tvp, tcnp);
+		return (VOP_REMOVE(tdvp, tvp, tcnp));
+	}
+	if (error = VOP_LOCK(fvp))
+		goto abortit;
+	dp = VTOI(fdvp);
+	ip = VTOI(fvp);
+	if ((ip->i_flags & (IMMUTABLE | APPEND)) || (dp->i_flags & APPEND)) {
+		VOP_UNLOCK(fvp);
+		error = EPERM;
+		goto abortit;
+	}
+	if ((ip->i_mode & IFMT) == IFDIR) {
+		/*
+		 * Avoid ".", "..", and aliases of "." for obvious reasons.
+		 */
+		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
+		    dp == ip || (fcnp->cn_flags&ISDOTDOT) ||
+		    (ip->i_flag & IN_RENAME)) {
+			VOP_UNLOCK(fvp);
+			error = EINVAL;
+			goto abortit;
+		}
+		ip->i_flag |= IN_RENAME;
+		oldparent = dp->i_number;
+		doingdirectory++;
+	}
+	vrele(fdvp);
+
+	/*
+	 * When the target exists, both the directory
+	 * and target vnodes are returned locked.
+	 */
+	dp = VTOI(tdvp);
+	xp = NULL;
+	if (tvp)
+		xp = VTOI(tvp);
+
+	/*
+	 * 1) Bump link count while we're moving stuff
+	 *    around.  If we crash somewhere before
+	 *    completing our work, the link count
+	 *    may be wrong, but correctable.
+	 */
+	ip->i_nlink++;
+	ip->i_flag |= IN_CHANGE;
+	tv = time;
+	if (error = VOP_UPDATE(fvp, &tv, &tv, 1)) {
+		VOP_UNLOCK(fvp);
+		goto bad;
+	}
+
+	/*
+	 * If ".." must be changed (ie the directory gets a new
+	 * parent) then the source directory must not be in the
+	 * directory heirarchy above the target, as this would
+	 * orphan everything below the source directory. Also
+	 * the user must have write permission in the source so
+	 * as to be able to change "..". We must repeat the call 
+	 * to namei, as the parent directory is unlocked by the
+	 * call to checkpath().
+	 */
+	error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc);
+	VOP_UNLOCK(fvp);
+	if (oldparent != dp->i_number)
+		newparent = dp->i_number;
+	if (doingdirectory && newparent) {
+		if (error)	/* write access check above */
+			goto bad;
+		if (xp != NULL)
+			vput(tvp);
+		if (error = ufs_checkpath(ip, dp, tcnp->cn_cred))
+			goto out;
+		if ((tcnp->cn_flags & SAVESTART) == 0)
+			panic("ufs_rename: lost to startdir");
+		if (error = relookup(tdvp, &tvp, tcnp))
+			goto out;
+		dp = VTOI(tdvp);
+		xp = NULL;
+		if (tvp)
+			xp = VTOI(tvp);
+	}
+	/*
+	 * 2) If target doesn't exist, link the target
+	 *    to the source and unlink the source. 
+	 *    Otherwise, rewrite the target directory
+	 *    entry to reference the source inode and
+	 *    expunge the original entry's existence.
+	 */
+	if (xp == NULL) {
+		if (dp->i_dev != ip->i_dev)
+			panic("rename: EXDEV");
+		/*
+		 * Account for ".." in new directory.
+		 * When source and destination have the same
+		 * parent we don't fool with the link count.
+		 */
+		if (doingdirectory && newparent) {
+			if ((nlink_t)dp->i_nlink >= LINK_MAX) {
+				error = EMLINK;
+				goto bad;
+			}
+			dp->i_nlink++;
+			dp->i_flag |= IN_CHANGE;
+			if (error = VOP_UPDATE(tdvp, &tv, &tv, 1))
+				goto bad;
+		}
+		if (error = ufs_direnter(ip, tdvp, tcnp)) {
+			if (doingdirectory && newparent) {
+				dp->i_nlink--;
+				dp->i_flag |= IN_CHANGE;
+				(void)VOP_UPDATE(tdvp, &tv, &tv, 1);
+			}
+			goto bad;
+		}
+		vput(tdvp);
+	} else {
+		if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev)
+			panic("rename: EXDEV");
+		/*
+		 * Short circuit rename(foo, foo).
+		 */
+		if (xp->i_number == ip->i_number)
+			panic("rename: same file");
+		/*
+		 * If the parent directory is "sticky", then the user must
+		 * own the parent directory, or the destination of the rename,
+		 * otherwise the destination may not be changed (except by
+		 * root). This implements append-only directories.
+		 */
+		if ((dp->i_mode & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 &&
+		    tcnp->cn_cred->cr_uid != dp->i_uid &&
+		    xp->i_uid != tcnp->cn_cred->cr_uid) {
+			error = EPERM;
+			goto bad;
+		}
+		/*
+		 * Target must be empty if a directory and have no links
+		 * to it. Also, ensure source and target are compatible
+		 * (both directories, or both not directories).
+		 */
+		if ((xp->i_mode&IFMT) == IFDIR) {
+			if (!ufs_dirempty(xp, dp->i_number, tcnp->cn_cred) || 
+			    xp->i_nlink > 2) {
+				error = ENOTEMPTY;
+				goto bad;
+			}
+			if (!doingdirectory) {
+				error = ENOTDIR;
+				goto bad;
+			}
+			cache_purge(tdvp);
+		} else if (doingdirectory) {
+			error = EISDIR;
+			goto bad;
+		}
+		if (error = ufs_dirrewrite(dp, ip, tcnp))
+			goto bad;
+		/*
+		 * If the target directory is in the same
+		 * directory as the source directory,
+		 * decrement the link count on the parent
+		 * of the target directory.
+		 */
+		 if (doingdirectory && !newparent) {
+			dp->i_nlink--;
+			dp->i_flag |= IN_CHANGE;
+		}
+		vput(tdvp);
+		/*
+		 * Adjust the link count of the target to
+		 * reflect the dirrewrite above.  If this is
+		 * a directory it is empty and there are
+		 * no links to it, so we can squash the inode and
+		 * any space associated with it.  We disallowed
+		 * renaming over top of a directory with links to
+		 * it above, as the remaining link would point to
+		 * a directory without "." or ".." entries.
+		 */
+		xp->i_nlink--;
+		if (doingdirectory) {
+			if (--xp->i_nlink != 0)
+				panic("rename: linked directory");
+			error = VOP_TRUNCATE(tvp, (off_t)0, IO_SYNC,
+			    tcnp->cn_cred, tcnp->cn_proc);
+		}
+		xp->i_flag |= IN_CHANGE;
+		vput(tvp);
+		xp = NULL;
+	}
+
+	/*
+	 * 3) Unlink the source.
+	 */
+	fcnp->cn_flags &= ~MODMASK;
+	fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
+	if ((fcnp->cn_flags & SAVESTART) == 0)
+		panic("ufs_rename: lost from startdir");
+	(void) relookup(fdvp, &fvp, fcnp);
+	if (fvp != NULL) {
+		xp = VTOI(fvp);
+		dp = VTOI(fdvp);
+	} else {
+		/*
+		 * From name has disappeared.
+		 */
+		if (doingdirectory)
+			panic("rename: lost dir entry");
+		vrele(ap->a_fvp);
+		return (0);
+	}
+	/*
+	 * Ensure that the directory entry still exists and has not
+	 * changed while the new name has been entered. If the source is
+	 * a file then the entry may have been unlinked or renamed. In
+	 * either case there is no further work to be done. If the source
+	 * is a directory then it cannot have been rmdir'ed; its link
+	 * count of three would cause a rmdir to fail with ENOTEMPTY.
+	 * The IRENAME flag ensures that it cannot be moved by another
+	 * rename.
+	 */
+	if (xp != ip) {
+		if (doingdirectory)
+			panic("rename: lost dir entry");
+	} else {
+		/*
+		 * If the source is a directory with a
+		 * new parent, the link count of the old
+		 * parent directory must be decremented
+		 * and ".." set to point to the new parent.
+		 */
+		if (doingdirectory && newparent) {
+			dp->i_nlink--;
+			dp->i_flag |= IN_CHANGE;
+			error = vn_rdwr(UIO_READ, fvp, (caddr_t)&dirbuf,
+				sizeof (struct dirtemplate), (off_t)0,
+				UIO_SYSSPACE, IO_NODELOCKED, 
+				tcnp->cn_cred, (int *)0, (struct proc *)0);
+			if (error == 0) {
+#				if (BYTE_ORDER == LITTLE_ENDIAN)
+					if (fvp->v_mount->mnt_maxsymlinklen <= 0)
+						namlen = dirbuf.dotdot_type;
+					else
+						namlen = dirbuf.dotdot_namlen;
+#				else
+					namlen = dirbuf.dotdot_namlen;
+#				endif
+				if (namlen != 2 ||
+				    dirbuf.dotdot_name[0] != '.' ||
+				    dirbuf.dotdot_name[1] != '.') {
+					ufs_dirbad(xp, (doff_t)12,
+					    "rename: mangled dir");
+				} else {
+					dirbuf.dotdot_ino = newparent;
+					(void) vn_rdwr(UIO_WRITE, fvp,
+					    (caddr_t)&dirbuf,
+					    sizeof (struct dirtemplate),
+					    (off_t)0, UIO_SYSSPACE,
+					    IO_NODELOCKED|IO_SYNC,
+					    tcnp->cn_cred, (int *)0,
+					    (struct proc *)0);
+					cache_purge(fdvp);
+				}
+			}
+		}
+		error = ufs_dirremove(fdvp, fcnp);
+		if (!error) {
+			xp->i_nlink--;
+			xp->i_flag |= IN_CHANGE;
+		}
+		xp->i_flag &= ~IN_RENAME;
+	}
+	if (dp)
+		vput(fdvp);
+	if (xp)
+		vput(fvp);
+	vrele(ap->a_fvp);
+	return (error);
+
+bad:
+	if (xp)
+		vput(ITOV(xp));
+	vput(ITOV(dp));
+out:
+	if (VOP_LOCK(fvp) == 0) {
+		ip->i_nlink--;
+		ip->i_flag |= IN_CHANGE;
+		vput(fvp);
+	} else
+		vrele(fvp);
+	return (error);
+}
+
+/*
+ * A virgin directory (no blushing please).
+ */
+static struct dirtemplate mastertemplate = {
+	0, 12, DT_DIR, 1, ".",
+	0, DIRBLKSIZ - 12, DT_DIR, 2, ".."
+};
+static struct odirtemplate omastertemplate = {
+	0, 12, 1, ".",
+	0, DIRBLKSIZ - 12, 2, ".."
+};
+
+/*
+ * Mkdir system call
+ */
+int
+ufs_mkdir(ap)
+	struct vop_mkdir_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	register struct vnode *dvp = ap->a_dvp;
+	register struct vattr *vap = ap->a_vap;
+	register struct componentname *cnp = ap->a_cnp;
+	register struct inode *ip, *dp;
+	struct vnode *tvp;
+	struct dirtemplate dirtemplate, *dtp;
+	struct timeval tv;
+	int error, dmode;
+
+#ifdef DIAGNOSTIC
+	if ((cnp->cn_flags & HASBUF) == 0)
+		panic("ufs_mkdir: no name");
+#endif
+	dp = VTOI(dvp);
+	if ((nlink_t)dp->i_nlink >= LINK_MAX) {
+		error = EMLINK;
+		goto out;
+	}
+	dmode = vap->va_mode & 0777;
+	dmode |= IFDIR;
+	/*
+	 * Must simulate part of ufs_makeinode here to acquire the inode,
+	 * but not have it entered in the parent directory. The entry is
+	 * made later after writing "." and ".." entries.
+	 */
+	if (error = VOP_VALLOC(dvp, dmode, cnp->cn_cred, &tvp))
+		goto out;
+	ip = VTOI(tvp);
+	ip->i_uid = cnp->cn_cred->cr_uid;
+	ip->i_gid = dp->i_gid;
+#ifdef QUOTA
+	if ((error = getinoquota(ip)) ||
+	    (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
+		free(cnp->cn_pnbuf, M_NAMEI);
+		VOP_VFREE(tvp, ip->i_number, dmode);
+		vput(tvp);
+		vput(dvp);
+		return (error);
+	}
+#endif
+	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
+	ip->i_mode = dmode;
+	tvp->v_type = VDIR;	/* Rest init'd in getnewvnode(). */
+	ip->i_nlink = 2;
+	tv = time;
+	error = VOP_UPDATE(tvp, &tv, &tv, 1);
+
+	/*
+	 * Bump link count in parent directory
+	 * to reflect work done below.  Should
+	 * be done before reference is created
+	 * so reparation is possible if we crash.
+	 */
+	dp->i_nlink++;
+	dp->i_flag |= IN_CHANGE;
+	if (error = VOP_UPDATE(dvp, &tv, &tv, 1))
+		goto bad;
+
+	/* Initialize directory with "." and ".." from static template. */
+	if (dvp->v_mount->mnt_maxsymlinklen > 0)
+		dtp = &mastertemplate;
+	else
+		dtp = (struct dirtemplate *)&omastertemplate;
+	dirtemplate = *dtp;
+	dirtemplate.dot_ino = ip->i_number;
+	dirtemplate.dotdot_ino = dp->i_number;
+	error = vn_rdwr(UIO_WRITE, tvp, (caddr_t)&dirtemplate,
+	    sizeof (dirtemplate), (off_t)0, UIO_SYSSPACE,
+	    IO_NODELOCKED|IO_SYNC, cnp->cn_cred, (int *)0, (struct proc *)0);
+	if (error) {
+		dp->i_nlink--;
+		dp->i_flag |= IN_CHANGE;
+		goto bad;
+	}
+	if (DIRBLKSIZ > VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_bsize)
+		panic("ufs_mkdir: blksize"); /* XXX should grow with balloc() */
+	else {
+		ip->i_size = DIRBLKSIZ;
+		ip->i_flag |= IN_CHANGE;
+	}
+
+	/* Directory set up, now install it's entry in the parent directory. */
+	if (error = ufs_direnter(ip, dvp, cnp)) {
+		dp->i_nlink--;
+		dp->i_flag |= IN_CHANGE;
+	}
+bad:
+	/*
+	 * No need to do an explicit VOP_TRUNCATE here, vrele will do this
+	 * for us because we set the link count to 0.
+	 */
+	if (error) {
+		ip->i_nlink = 0;
+		ip->i_flag |= IN_CHANGE;
+		vput(tvp);
+	} else
+		*ap->a_vpp = tvp;
+out:
+	FREE(cnp->cn_pnbuf, M_NAMEI);
+	vput(dvp);
+	return (error);
+}
+
+/*
+ * Rmdir system call.
+ */
+int
+ufs_rmdir(ap)
+	struct vop_rmdir_args /* {
+		struct vnode *a_dvp;
+		struct vnode *a_vp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct vnode *dvp = ap->a_dvp;
+	register struct componentname *cnp = ap->a_cnp;
+	register struct inode *ip, *dp;
+	int error;
+
+	ip = VTOI(vp);
+	dp = VTOI(dvp);
+	/*
+	 * No rmdir "." please.
+	 */
+	if (dp == ip) {
+		vrele(dvp);
+		vput(vp);
+		return (EINVAL);
+	}
+	/*
+	 * Verify the directory is empty (and valid).
+	 * (Rmdir ".." won't be valid since
+	 *  ".." will contain a reference to
+	 *  the current directory and thus be
+	 *  non-empty.)
+	 */
+	error = 0;
+	if (ip->i_nlink != 2 ||
+	    !ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) {
+		error = ENOTEMPTY;
+		goto out;
+	}
+	if ((dp->i_flags & APPEND) || (ip->i_flags & (IMMUTABLE | APPEND))) {
+		error = EPERM;
+		goto out;
+	}
+	/*
+	 * Delete reference to directory before purging
+	 * inode.  If we crash in between, the directory
+	 * will be reattached to lost+found,
+	 */
+	if (error = ufs_dirremove(dvp, cnp))
+		goto out;
+	dp->i_nlink--;
+	dp->i_flag |= IN_CHANGE;
+	cache_purge(dvp);
+	vput(dvp);
+	dvp = NULL;
+	/*
+	 * Truncate inode.  The only stuff left
+	 * in the directory is "." and "..".  The
+	 * "." reference is inconsequential since
+	 * we're quashing it.  The ".." reference
+	 * has already been adjusted above.  We've
+	 * removed the "." reference and the reference
+	 * in the parent directory, but there may be
+	 * other hard links so decrement by 2 and
+	 * worry about them later.
+	 */
+	ip->i_nlink -= 2;
+	error = VOP_TRUNCATE(vp, (off_t)0, IO_SYNC, cnp->cn_cred,
+	    cnp->cn_proc);
+	cache_purge(ITOV(ip));
+out:
+	if (dvp)
+		vput(dvp);
+	vput(vp);
+	return (error);
+}
+
+/*
+ * symlink -- make a symbolic link
+ */
+int
+ufs_symlink(ap)
+	struct vop_symlink_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+		char *a_target;
+	} */ *ap;
+{
+	register struct vnode *vp, **vpp = ap->a_vpp;
+	register struct inode *ip;
+	int len, error;
+
+	if (error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp,
+	    vpp, ap->a_cnp))
+		return (error);
+	vp = *vpp;
+	len = strlen(ap->a_target);
+	if (len < vp->v_mount->mnt_maxsymlinklen) {
+		ip = VTOI(vp);
+		bcopy(ap->a_target, (char *)ip->i_shortlink, len);
+		ip->i_size = len;
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+	} else
+		error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0,
+		    UIO_SYSSPACE, IO_NODELOCKED, ap->a_cnp->cn_cred, (int *)0,
+		    (struct proc *)0);
+	vput(vp);
+	return (error);
+}
+
+/*
+ * Vnode op for reading directories.
+ * 
+ * The routine below assumes that the on-disk format of a directory
+ * is the same as that defined by <sys/dirent.h>. If the on-disk
+ * format changes, then it will be necessary to do a conversion
+ * from the on-disk format that read returns to the format defined
+ * by <sys/dirent.h>.
+ */
+int
+ufs_readdir(ap)
+	struct vop_readdir_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	register struct uio *uio = ap->a_uio;
+	int count, lost, error;
+
+	count = uio->uio_resid;
+	count &= ~(DIRBLKSIZ - 1);
+	lost = uio->uio_resid - count;
+	if (count < DIRBLKSIZ || (uio->uio_offset & (DIRBLKSIZ -1)))
+		return (EINVAL);
+	uio->uio_resid = count;
+	uio->uio_iov->iov_len = count;
+#	if (BYTE_ORDER == LITTLE_ENDIAN)
+		if (ap->a_vp->v_mount->mnt_maxsymlinklen > 0) {
+			error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred);
+		} else {
+			struct dirent *dp, *edp;
+			struct uio auio;
+			struct iovec aiov;
+			caddr_t dirbuf;
+			int readcnt;
+			u_char tmp;
+
+			auio = *uio;
+			auio.uio_iov = &aiov;
+			auio.uio_iovcnt = 1;
+			auio.uio_segflg = UIO_SYSSPACE;
+			aiov.iov_len = count;
+			MALLOC(dirbuf, caddr_t, count, M_TEMP, M_WAITOK);
+			aiov.iov_base = dirbuf;
+			error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred);
+			if (error == 0) {
+				readcnt = count - auio.uio_resid;
+				edp = (struct dirent *)&dirbuf[readcnt];
+				for (dp = (struct dirent *)dirbuf; dp < edp; ) {
+					tmp = dp->d_namlen;
+					dp->d_namlen = dp->d_type;
+					dp->d_type = tmp;
+					if (dp->d_reclen > 0) {
+						dp = (struct dirent *)
+						    ((char *)dp + dp->d_reclen);
+					} else {
+						error = EIO;
+						break;
+					}
+				}
+				if (dp >= edp)
+					error = uiomove(dirbuf, readcnt, uio);
+			}
+			FREE(dirbuf, M_TEMP);
+		}
+#	else
+		error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred);
+#	endif
+	uio->uio_resid += lost;
+	return (error);
+}
+
+/*
+ * Return target name of a symbolic link
+ */
+int
+ufs_readlink(ap)
+	struct vop_readlink_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct inode *ip = VTOI(vp);
+	int isize;
+
+	isize = ip->i_size;
+	if (isize < vp->v_mount->mnt_maxsymlinklen) {
+		uiomove((char *)ip->i_shortlink, isize, ap->a_uio);
+		return (0);
+	}
+	return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred));
+}
+
+/*
+ * Ufs abort op, called after namei() when a CREATE/DELETE isn't actually
+ * done. If a buffer has been saved in anticipation of a CREATE, delete it.
+ */
+/* ARGSUSED */
+int
+ufs_abortop(ap)
+	struct vop_abortop_args /* {
+		struct vnode *a_dvp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
+		FREE(ap->a_cnp->cn_pnbuf, M_NAMEI);
+	return (0);
+}
+
+/*
+ * Lock an inode. If its already locked, set the WANT bit and sleep.
+ */
+int
+ufs_lock(ap)
+	struct vop_lock_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct inode *ip;
+	struct proc *p = curproc;	/* XXX */
+
+start:
+	while (vp->v_flag & VXLOCK) {
+		vp->v_flag |= VXWANT;
+		sleep((caddr_t)vp, PINOD);
+	}
+	if (vp->v_tag == VT_NON)
+		return (ENOENT);
+	ip = VTOI(vp);
+	if (ip->i_flag & IN_LOCKED) {
+		ip->i_flag |= IN_WANTED;
+#ifdef DIAGNOSTIC
+		if (p) {
+			if (p->p_pid == ip->i_lockholder)
+				panic("locking against myself");
+			ip->i_lockwaiter = p->p_pid;
+		} else
+			ip->i_lockwaiter = -1;
+#endif
+		(void) sleep((caddr_t)ip, PINOD);
+		goto start;
+	}
+#ifdef DIAGNOSTIC
+	ip->i_lockwaiter = 0;
+	if (ip->i_lockholder != 0)
+		panic("lockholder (%d) != 0", ip->i_lockholder);
+	if (p && p->p_pid == 0)
+		printf("locking by process 0\n");
+	if (p)
+		ip->i_lockholder = p->p_pid;
+	else
+		ip->i_lockholder = -1;
+#endif
+	ip->i_flag |= IN_LOCKED;
+	return (0);
+}
+
+/*
+ * Unlock an inode.  If WANT bit is on, wakeup.
+ */
+int lockcount = 90;
+int
+ufs_unlock(ap)
+	struct vop_unlock_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	register struct inode *ip = VTOI(ap->a_vp);
+	struct proc *p = curproc;	/* XXX */
+
+#ifdef DIAGNOSTIC
+	if ((ip->i_flag & IN_LOCKED) == 0) {
+		vprint("ufs_unlock: unlocked inode", ap->a_vp);
+		panic("ufs_unlock NOT LOCKED");
+	}
+	if (p && p->p_pid != ip->i_lockholder && p->p_pid > -1 &&
+	    ip->i_lockholder > -1 && lockcount++ < 100)
+		panic("unlocker (%d) != lock holder (%d)",
+		    p->p_pid, ip->i_lockholder);
+	ip->i_lockholder = 0;
+#endif
+	ip->i_flag &= ~IN_LOCKED;
+	if (ip->i_flag & IN_WANTED) {
+		ip->i_flag &= ~IN_WANTED;
+		wakeup((caddr_t)ip);
+	}
+	return (0);
+}
+
+/*
+ * Check for a locked inode.
+ */
+int
+ufs_islocked(ap)
+	struct vop_islocked_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	if (VTOI(ap->a_vp)->i_flag & IN_LOCKED)
+		return (1);
+	return (0);
+}
+
+/*
+ * Calculate the logical to physical mapping if not done already,
+ * then call the device strategy routine.
+ */
+int
+ufs_strategy(ap)
+	struct vop_strategy_args /* {
+		struct buf *a_bp;
+	} */ *ap;
+{
+	register struct buf *bp = ap->a_bp;
+	register struct vnode *vp = bp->b_vp;
+	register struct inode *ip;
+	int error;
+
+	ip = VTOI(vp);
+	if (vp->v_type == VBLK || vp->v_type == VCHR)
+		panic("ufs_strategy: spec");
+	if (bp->b_blkno == bp->b_lblkno) {
+		if (error =
+		    VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL)) {
+			bp->b_error = error;
+			bp->b_flags |= B_ERROR;
+			biodone(bp);
+			return (error);
+		}
+		if ((long)bp->b_blkno == -1)
+			clrbuf(bp);
+	}
+	if ((long)bp->b_blkno == -1) {
+		biodone(bp);
+		return (0);
+	}
+	vp = ip->i_devvp;
+	bp->b_dev = vp->v_rdev;
+	VOCALL (vp->v_op, VOFFSET(vop_strategy), ap);
+	return (0);
+}
+
+/*
+ * Print out the contents of an inode.
+ */
+int
+ufs_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct inode *ip = VTOI(vp);
+
+	printf("tag VT_UFS, ino %d, on dev %d, %d", ip->i_number,
+		major(ip->i_dev), minor(ip->i_dev));
+#ifdef FIFO
+	if (vp->v_type == VFIFO)
+		fifo_printinfo(vp);
+#endif /* FIFO */
+	printf("%s\n", (ip->i_flag & IN_LOCKED) ? " (LOCKED)" : "");
+	if (ip->i_lockholder == 0)
+		return (0);
+	printf("\towner pid %d", ip->i_lockholder);
+	if (ip->i_lockwaiter)
+		printf(" waiting pid %d", ip->i_lockwaiter);
+	printf("\n");
+	return (0);
+}
+
+/*
+ * Read wrapper for special devices.
+ */
+int
+ufsspec_read(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+
+	/*
+	 * Set access flag.
+	 */
+	VTOI(ap->a_vp)->i_flag |= IN_ACCESS;
+	return (VOCALL (spec_vnodeop_p, VOFFSET(vop_read), ap));
+}
+
+/*
+ * Write wrapper for special devices.
+ */
+int
+ufsspec_write(ap)
+	struct vop_write_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+
+	/*
+	 * Set update and change flags.
+	 */
+	VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE;
+	return (VOCALL (spec_vnodeop_p, VOFFSET(vop_write), ap));
+}
+
+/*
+ * Close wrapper for special devices.
+ *
+ * Update the times on the inode then do device close.
+ */
+int
+ufsspec_close(ap)
+	struct vop_close_args /* {
+		struct vnode *a_vp;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct inode *ip = VTOI(ap->a_vp);
+
+	if (ap->a_vp->v_usecount > 1 && !(ip->i_flag & IN_LOCKED))
+		ITIMES(ip, &time, &time);
+	return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap));
+}
+
+#ifdef FIFO
+/*
+ * Read wrapper for fifo's
+ */
+int
+ufsfifo_read(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	extern int (**fifo_vnodeop_p)();
+
+	/*
+	 * Set access flag.
+	 */
+	VTOI(ap->a_vp)->i_flag |= IN_ACCESS;
+	return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_read), ap));
+}
+
+/*
+ * Write wrapper for fifo's.
+ */
+int
+ufsfifo_write(ap)
+	struct vop_write_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	extern int (**fifo_vnodeop_p)();
+
+	/*
+	 * Set update and change flags.
+	 */
+	VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE;
+	return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_write), ap));
+}
+
+/*
+ * Close wrapper for fifo's.
+ *
+ * Update the times on the inode then do device close.
+ */
+ufsfifo_close(ap)
+	struct vop_close_args /* {
+		struct vnode *a_vp;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	extern int (**fifo_vnodeop_p)();
+	register struct inode *ip = VTOI(ap->a_vp);
+
+	if (ap->a_vp->v_usecount > 1 && !(ip->i_flag & IN_LOCKED))
+		ITIMES(ip, &time, &time);
+	return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap));
+}
+#endif /* FIFO */
+
+/*
+ * Return POSIX pathconf information applicable to ufs filesystems.
+ */
+ufs_pathconf(ap)
+	struct vop_pathconf_args /* {
+		struct vnode *a_vp;
+		int a_name;
+		int *a_retval;
+	} */ *ap;
+{
+
+	switch (ap->a_name) {
+	case _PC_LINK_MAX:
+		*ap->a_retval = LINK_MAX;
+		return (0);
+	case _PC_NAME_MAX:
+		*ap->a_retval = NAME_MAX;
+		return (0);
+	case _PC_PATH_MAX:
+		*ap->a_retval = PATH_MAX;
+		return (0);
+	case _PC_PIPE_BUF:
+		*ap->a_retval = PIPE_BUF;
+		return (0);
+	case _PC_CHOWN_RESTRICTED:
+		*ap->a_retval = 1;
+		return (0);
+	case _PC_NO_TRUNC:
+		*ap->a_retval = 1;
+		return (0);
+	default:
+		return (EINVAL);
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * Advisory record locking support
+ */
+int
+ufs_advlock(ap)
+	struct vop_advlock_args /* {
+		struct vnode *a_vp;
+		caddr_t  a_id;
+		int  a_op;
+		struct flock *a_fl;
+		int  a_flags;
+	} */ *ap;
+{
+	register struct inode *ip = VTOI(ap->a_vp);
+	register struct flock *fl = ap->a_fl;
+	register struct lockf *lock;
+	off_t start, end;
+	int error;
+
+	/*
+	 * Avoid the common case of unlocking when inode has no locks.
+	 */
+	if (ip->i_lockf == (struct lockf *)0) {
+		if (ap->a_op != F_SETLK) {
+			fl->l_type = F_UNLCK;
+			return (0);
+		}
+	}
+	/*
+	 * Convert the flock structure into a start and end.
+	 */
+	switch (fl->l_whence) {
+
+	case SEEK_SET:
+	case SEEK_CUR:
+		/*
+		 * Caller is responsible for adding any necessary offset
+		 * when SEEK_CUR is used.
+		 */
+		start = fl->l_start;
+		break;
+
+	case SEEK_END:
+		start = ip->i_size + fl->l_start;
+		break;
+
+	default:
+		return (EINVAL);
+	}
+	if (start < 0)
+		return (EINVAL);
+	if (fl->l_len == 0)
+		end = -1;
+	else
+		end = start + fl->l_len - 1;
+	/*
+	 * Create the lockf structure
+	 */
+	MALLOC(lock, struct lockf *, sizeof *lock, M_LOCKF, M_WAITOK);
+	lock->lf_start = start;
+	lock->lf_end = end;
+	lock->lf_id = ap->a_id;
+	lock->lf_inode = ip;
+	lock->lf_type = fl->l_type;
+	lock->lf_next = (struct lockf *)0;
+	lock->lf_block = (struct lockf *)0;
+	lock->lf_flags = ap->a_flags;
+	/*
+	 * Do the requested operation.
+	 */
+	switch(ap->a_op) {
+	case F_SETLK:
+		return (lf_setlock(lock));
+
+	case F_UNLCK:
+		error = lf_clearlock(lock);
+		FREE(lock, M_LOCKF);
+		return (error);
+
+	case F_GETLK:
+		error = lf_getlock(lock, fl);
+		FREE(lock, M_LOCKF);
+		return (error);
+	
+	default:
+		free(lock, M_LOCKF);
+		return (EINVAL);
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * Initialize the vnode associated with a new inode, handle aliased
+ * vnodes.
+ */
+int
+ufs_vinit(mntp, specops, fifoops, vpp)
+	struct mount *mntp;
+	int (**specops)();
+	int (**fifoops)();
+	struct vnode **vpp;
+{
+	struct inode *ip;
+	struct vnode *vp, *nvp;
+
+	vp = *vpp;
+	ip = VTOI(vp);
+	switch(vp->v_type = IFTOVT(ip->i_mode)) {
+	case VCHR:
+	case VBLK:
+		vp->v_op = specops;
+		if (nvp = checkalias(vp, ip->i_rdev, mntp)) {
+			/*
+			 * Discard unneeded vnode, but save its inode.
+			 */
+			ufs_ihashrem(ip);
+			VOP_UNLOCK(vp);
+			nvp->v_data = vp->v_data;
+			vp->v_data = NULL;
+			vp->v_op = spec_vnodeop_p;
+			vrele(vp);
+			vgone(vp);
+			/*
+			 * Reinitialize aliased inode.
+			 */
+			vp = nvp;
+			ip->i_vnode = vp;
+			ufs_ihashins(ip);
+		}
+		break;
+	case VFIFO:
+#ifdef FIFO
+		vp->v_op = fifoops;
+		break;
+#else
+		return (EOPNOTSUPP);
+#endif
+	}
+	if (ip->i_number == ROOTINO)
+                vp->v_flag |= VROOT;
+	/*
+	 * Initialize modrev times
+	 */
+	SETHIGH(ip->i_modrev, mono_time.tv_sec);
+	SETLOW(ip->i_modrev, mono_time.tv_usec * 4294);
+	*vpp = vp;
+	return (0);
+}
+
+/*
+ * Allocate a new inode.
+ */
+int
+ufs_makeinode(mode, dvp, vpp, cnp)
+	int mode;
+	struct vnode *dvp;
+	struct vnode **vpp;
+	struct componentname *cnp;
+{
+	register struct inode *ip, *pdir;
+	struct timeval tv;
+	struct vnode *tvp;
+	int error;
+
+	pdir = VTOI(dvp);
+#ifdef DIAGNOSTIC
+	if ((cnp->cn_flags & HASBUF) == 0)
+		panic("ufs_makeinode: no name");
+#endif
+	*vpp = NULL;
+	if ((mode & IFMT) == 0)
+		mode |= IFREG;
+
+	if (error = VOP_VALLOC(dvp, mode, cnp->cn_cred, &tvp)) {
+		free(cnp->cn_pnbuf, M_NAMEI);
+		vput(dvp);
+		return (error);
+	}
+	ip = VTOI(tvp);
+	ip->i_gid = pdir->i_gid;
+	if ((mode & IFMT) == IFLNK)
+		ip->i_uid = pdir->i_uid;
+	else
+		ip->i_uid = cnp->cn_cred->cr_uid;
+#ifdef QUOTA
+	if ((error = getinoquota(ip)) ||
+	    (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
+		free(cnp->cn_pnbuf, M_NAMEI);
+		VOP_VFREE(tvp, ip->i_number, mode);
+		vput(tvp);
+		vput(dvp);
+		return (error);
+	}
+#endif
+	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
+	ip->i_mode = mode;
+	tvp->v_type = IFTOVT(mode);	/* Rest init'd in getnewvnode(). */
+	ip->i_nlink = 1;
+	if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) &&
+	    suser(cnp->cn_cred, NULL))
+		ip->i_mode &= ~ISGID;
+
+	/*
+	 * Make sure inode goes to disk before directory entry.
+	 */
+	tv = time;
+	if (error = VOP_UPDATE(tvp, &tv, &tv, 1))
+		goto bad;
+	if (error = ufs_direnter(ip, dvp, cnp))
+		goto bad;
+	if ((cnp->cn_flags & SAVESTART) == 0)
+		FREE(cnp->cn_pnbuf, M_NAMEI);
+	vput(dvp);
+	*vpp = tvp;
+	return (0);
+
+bad:
+	/*
+	 * Write error occurred trying to update the inode
+	 * or the directory so must deallocate the inode.
+	 */
+	free(cnp->cn_pnbuf, M_NAMEI);
+	vput(dvp);
+	ip->i_nlink = 0;
+	ip->i_flag |= IN_CHANGE;
+	vput(tvp);
+	return (error);
+}
diff --git a/sys/ufs/ufs/ufsmount.h b/sys/ufs/ufs/ufsmount.h
new file mode 100644
index 00000000000..237871fdaac
--- /dev/null
+++ b/sys/ufs/ufs/ufsmount.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufsmount.h	8.2 (Berkeley) 1/12/94
+ */
+
+struct buf;
+struct inode;
+struct nameidata;
+struct timeval;
+struct ucred;
+struct uio;
+struct vnode;
+struct netexport;
+
+/* This structure describes the UFS specific mount structure data. */
+struct ufsmount {
+	struct	mount *um_mountp;		/* filesystem vfs structure */
+	dev_t	um_dev;				/* device mounted */
+	struct	vnode *um_devvp;		/* block device mounted vnode */
+	union {					/* pointer to superblock */
+		struct	lfs *lfs;		/* LFS */
+		struct	fs *fs;			/* FFS */
+	} ufsmount_u;
+#define	um_fs	ufsmount_u.fs
+#define	um_lfs	ufsmount_u.lfs
+	struct	vnode *um_quotas[MAXQUOTAS];	/* pointer to quota files */
+	struct	ucred *um_cred[MAXQUOTAS];	/* quota file access cred */
+	u_long	um_nindir;			/* indirect ptrs per block */
+	u_long	um_bptrtodb;			/* indir ptr to disk block */
+	u_long	um_seqinc;			/* inc between seq blocks */
+	time_t	um_btime[MAXQUOTAS];		/* block quota time limit */
+	time_t	um_itime[MAXQUOTAS];		/* inode quota time limit */
+	char	um_qflags[MAXQUOTAS];		/* quota specific flags */
+	struct	netexport um_export;		/* export information */
+};
+/*
+ * Flags describing the state of quotas.
+ */
+#define	QTF_OPENING	0x01			/* Q_QUOTAON in progress */
+#define	QTF_CLOSING	0x02			/* Q_QUOTAOFF in progress */
+
+/* Convert mount ptr to ufsmount ptr. */
+#define VFSTOUFS(mp)	((struct ufsmount *)((mp)->mnt_data))
+
+/*
+ * Macros to access file system parameters in the ufsmount structure.
+ * Used by ufs_bmap.
+ */
+#define	blkptrtodb(ump, b)	((b) << (ump)->um_bptrtodb)
+#define	is_sequential(ump, a, b) ((b) == (a) + ump->um_seqinc)
+#define MNINDIR(ump)	((ump)->um_nindir)
+
+
diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c
new file mode 100644
index 00000000000..235c917a0c6
--- /dev/null
+++ b/sys/vm/device_pager.c
@@ -0,0 +1,368 @@
+/*
+ * Copyright (c) 1990 University of Utah.
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)device_pager.c	8.5 (Berkeley) 1/12/94
+ */
+
+/*
+ * Page to/from special files.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/mman.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+#include <vm/device_pager.h>
+
+struct pagerlst	dev_pager_list;		/* list of managed devices */
+struct pglist	dev_pager_fakelist;	/* list of available vm_page_t's */
+
+#ifdef DEBUG
+int	dpagerdebug = 0;
+#define	DDB_FOLLOW	0x01
+#define DDB_INIT	0x02
+#define DDB_ALLOC	0x04
+#define DDB_FAIL	0x08
+#endif
+
+static vm_pager_t	 dev_pager_alloc
+			    __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t));
+static void		 dev_pager_dealloc __P((vm_pager_t));
+static int		 dev_pager_getpage
+			    __P((vm_pager_t, vm_page_t *, int, boolean_t));
+static boolean_t	 dev_pager_haspage __P((vm_pager_t, vm_offset_t));
+static void		 dev_pager_init __P((void));
+static int		 dev_pager_putpage
+			    __P((vm_pager_t, vm_page_t *, int, boolean_t));
+static vm_page_t	 dev_pager_getfake __P((vm_offset_t));
+static void		 dev_pager_putfake __P((vm_page_t));
+
+struct pagerops devicepagerops = {
+	dev_pager_init,
+	dev_pager_alloc,
+	dev_pager_dealloc,
+	dev_pager_getpage,
+	dev_pager_putpage,
+	dev_pager_haspage,
+	vm_pager_clusternull
+};
+
+static void
+dev_pager_init()
+{
+#ifdef DEBUG
+	if (dpagerdebug & DDB_FOLLOW)
+		printf("dev_pager_init()\n");
+#endif
+	TAILQ_INIT(&dev_pager_list);
+	TAILQ_INIT(&dev_pager_fakelist);
+}
+
+static vm_pager_t
+dev_pager_alloc(handle, size, prot, foff)
+	caddr_t handle;
+	vm_size_t size;
+	vm_prot_t prot;
+	vm_offset_t foff;
+{
+	dev_t dev;
+	vm_pager_t pager;
+	int (*mapfunc)();
+	vm_object_t object;
+	dev_pager_t devp;
+	int npages, off;
+
+#ifdef DEBUG
+	if (dpagerdebug & DDB_FOLLOW)
+		printf("dev_pager_alloc(%x, %x, %x, %x)\n",
+		       handle, size, prot, foff);
+#endif
+#ifdef DIAGNOSTIC
+	/*
+	 * Pageout to device, should never happen.
+	 */
+	if (handle == NULL)
+		panic("dev_pager_alloc called");
+#endif
+
+	/*
+	 * Make sure this device can be mapped.
+	 */
+	dev = (dev_t)handle;
+	mapfunc = cdevsw[major(dev)].d_mmap;
+	if (mapfunc == NULL || mapfunc == enodev || mapfunc == nullop)
+		return(NULL);
+
+	/*
+	 * Offset should be page aligned.
+	 */
+	if (foff & PAGE_MASK)
+		return(NULL);
+
+	/*
+	 * Check that the specified range of the device allows the
+	 * desired protection.
+	 *
+	 * XXX assumes VM_PROT_* == PROT_*
+	 */
+	npages = atop(round_page(size));
+	for (off = foff; npages--; off += PAGE_SIZE)
+		if ((*mapfunc)(dev, off, (int)prot) == -1)
+			return(NULL);
+
+	/*
+	 * Look up pager, creating as necessary.
+	 */
+top:
+	pager = vm_pager_lookup(&dev_pager_list, handle);
+	if (pager == NULL) {
+		/*
+		 * Allocate and initialize pager structs
+		 */
+		pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, M_WAITOK);
+		if (pager == NULL)
+			return(NULL);
+		devp = (dev_pager_t)malloc(sizeof *devp, M_VMPGDATA, M_WAITOK);
+		if (devp == NULL) {
+			free((caddr_t)pager, M_VMPAGER);
+			return(NULL);
+		}
+		pager->pg_handle = handle;
+		pager->pg_ops = &devicepagerops;
+		pager->pg_type = PG_DEVICE;
+		pager->pg_flags = 0;
+		pager->pg_data = devp;
+		TAILQ_INIT(&devp->devp_pglist);
+		/*
+		 * Allocate object and associate it with the pager.
+		 */
+		object = devp->devp_object = vm_object_allocate(0);
+		vm_object_enter(object, pager);
+		vm_object_setpager(object, pager, (vm_offset_t)0, FALSE);
+		/*
+		 * Finally, put it on the managed list so other can find it.
+		 * First we re-lookup in case someone else beat us to this
+		 * point (due to blocking in the various mallocs).  If so,
+		 * we free everything and start over.
+		 */
+		if (vm_pager_lookup(&dev_pager_list, handle)) {
+			free((caddr_t)devp, M_VMPGDATA);
+			free((caddr_t)pager, M_VMPAGER);
+			goto top;
+		}
+		TAILQ_INSERT_TAIL(&dev_pager_list, pager, pg_list);
+#ifdef DEBUG
+		if (dpagerdebug & DDB_ALLOC) {
+			printf("dev_pager_alloc: pager %x devp %x object %x\n",
+			       pager, devp, object);
+			vm_object_print(object, FALSE);
+		}
+#endif
+	} else {
+		/*
+		 * vm_object_lookup() gains a reference and also
+		 * removes the object from the cache.
+		 */
+		object = vm_object_lookup(pager);
+#ifdef DIAGNOSTIC
+		devp = (dev_pager_t)pager->pg_data;
+		if (object != devp->devp_object)
+			panic("dev_pager_setup: bad object");
+#endif
+	}
+	return(pager);
+}
+
+static void
+dev_pager_dealloc(pager)
+	vm_pager_t pager;
+{
+	dev_pager_t devp;
+	vm_object_t object;
+	vm_page_t m;
+
+#ifdef DEBUG
+	if (dpagerdebug & DDB_FOLLOW)
+		printf("dev_pager_dealloc(%x)\n", pager);
+#endif
+	TAILQ_REMOVE(&dev_pager_list, pager, pg_list);
+	/*
+	 * Get the object.
+	 * Note: cannot use vm_object_lookup since object has already
+	 * been removed from the hash chain.
+	 */
+	devp = (dev_pager_t)pager->pg_data;
+	object = devp->devp_object;
+#ifdef DEBUG
+	if (dpagerdebug & DDB_ALLOC)
+		printf("dev_pager_dealloc: devp %x object %x\n", devp, object);
+#endif
+	/*
+	 * Free up our fake pages.
+	 */
+	while ((m = devp->devp_pglist.tqh_first) != NULL) {
+		TAILQ_REMOVE(&devp->devp_pglist, m, pageq);
+		dev_pager_putfake(m);
+	}
+	free((caddr_t)devp, M_VMPGDATA);
+	free((caddr_t)pager, M_VMPAGER);
+}
+
+static int
+dev_pager_getpage(pager, mlist, npages, sync)
+	vm_pager_t pager;
+	vm_page_t *mlist;
+	int npages;
+	boolean_t sync;
+{
+	register vm_object_t object;
+	vm_offset_t offset, paddr;
+	vm_page_t page;
+	dev_t dev;
+	int (*mapfunc)(), prot;
+	vm_page_t m;
+
+#ifdef DEBUG
+	if (dpagerdebug & DDB_FOLLOW)
+		printf("dev_pager_getpage(%x, %x, %x, %x)\n",
+		       pager, mlist, npages, sync);
+#endif
+
+	if (npages != 1)
+		panic("dev_pager_getpage: cannot handle multiple pages");
+	m = *mlist;
+
+	object = m->object;
+	dev = (dev_t)pager->pg_handle;
+	offset = m->offset + object->paging_offset;
+	prot = PROT_READ;	/* XXX should pass in? */
+	mapfunc = cdevsw[major(dev)].d_mmap;
+#ifdef DIAGNOSTIC
+	if (mapfunc == NULL || mapfunc == enodev || mapfunc == nullop)
+		panic("dev_pager_getpage: no map function");
+#endif
+	paddr = pmap_phys_address((*mapfunc)(dev, (int)offset, prot));
+#ifdef DIAGNOSTIC
+	if (paddr == -1)
+		panic("dev_pager_getpage: map function returns error");
+#endif
+	/*
+	 * Replace the passed in page with our own fake page and free
+	 * up the original.
+	 */
+	page = dev_pager_getfake(paddr);
+	TAILQ_INSERT_TAIL(&((dev_pager_t)pager->pg_data)->devp_pglist, page,
+	    pageq);
+	vm_object_lock(object);
+	vm_page_lock_queues();
+	vm_page_free(m);
+	vm_page_insert(page, object, offset);
+	vm_page_unlock_queues();
+	PAGE_WAKEUP(m);
+	if (offset + PAGE_SIZE > object->size)
+		object->size = offset + PAGE_SIZE;	/* XXX anal */
+	vm_object_unlock(object);
+
+	return(VM_PAGER_OK);
+}
+
+static int
+dev_pager_putpage(pager, mlist, npages, sync)
+	vm_pager_t pager;
+	vm_page_t *mlist;
+	int npages;
+	boolean_t sync;
+{
+#ifdef DEBUG
+	if (dpagerdebug & DDB_FOLLOW)
+		printf("dev_pager_putpage(%x, %x, %x, %x)\n",
+		       pager, mlist, npages, sync);
+#endif
+	if (pager == NULL)
+		return;
+	panic("dev_pager_putpage called");
+}
+
+static boolean_t
+dev_pager_haspage(pager, offset)
+	vm_pager_t pager;
+	vm_offset_t offset;
+{
+#ifdef DEBUG
+	if (dpagerdebug & DDB_FOLLOW)
+		printf("dev_pager_haspage(%x, %x)\n", pager, offset);
+#endif
+	return(TRUE);
+}
+
+static vm_page_t
+dev_pager_getfake(paddr)
+	vm_offset_t paddr;
+{
+	vm_page_t m;
+	int i;
+
+	if (dev_pager_fakelist.tqh_first == NULL) {
+		m = (vm_page_t)malloc(PAGE_SIZE, M_VMPGDATA, M_WAITOK);
+		for (i = PAGE_SIZE / sizeof(*m); i > 0; i--) {
+			TAILQ_INSERT_TAIL(&dev_pager_fakelist, m, pageq);
+			m++;
+		}
+	}
+	m = dev_pager_fakelist.tqh_first;
+	TAILQ_REMOVE(&dev_pager_fakelist, m, pageq);
+	m->flags = PG_BUSY | PG_CLEAN | PG_FAKE | PG_FICTITIOUS;
+	m->phys_addr = paddr;
+	m->wire_count = 1;
+	return(m);
+}
+
+static void
+dev_pager_putfake(m)
+	vm_page_t m;
+{
+#ifdef DIAGNOSTIC
+	if (!(m->flags & PG_FICTITIOUS))
+		panic("dev_pager_putfake: bad page");
+#endif
+	TAILQ_INSERT_TAIL(&dev_pager_fakelist, m, pageq);
+}
diff --git a/sys/vm/device_pager.h b/sys/vm/device_pager.h
new file mode 100644
index 00000000000..8840622919d
--- /dev/null
+++ b/sys/vm/device_pager.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 1990 University of Utah.
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)device_pager.h	8.3 (Berkeley) 12/13/93
+ */
+
+#ifndef	_DEVICE_PAGER_
+#define	_DEVICE_PAGER_	1
+
+/*
+ * Device pager private data.
+ */
+struct devpager {
+	struct pglist	devp_pglist;	/* list of pages allocated */
+	vm_object_t	devp_object;	/* object representing this device */
+};
+typedef struct devpager	*dev_pager_t;
+
+#endif	/* _DEVICE_PAGER_ */
diff --git a/sys/vm/kern_lock.c b/sys/vm/kern_lock.c
new file mode 100644
index 00000000000..c4fa05230a7
--- /dev/null
+++ b/sys/vm/kern_lock.c
@@ -0,0 +1,534 @@
+/* 
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)kern_lock.c	8.1 (Berkeley) 6/11/93
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ * 
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ *	Locking primitives implementation
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <vm/vm.h>
+
+/* XXX */
+#include <sys/proc.h>
+typedef	int *thread_t;
+#define	current_thread()	((thread_t)&curproc->p_thread)
+/* XXX */
+
+#if	NCPUS > 1
+
+/*
+ *	Module:		lock
+ *	Function:
+ *		Provide reader/writer sychronization.
+ *	Implementation:
+ *		Simple interlock on a bit.  Readers first interlock
+ *		increment the reader count, then let go.  Writers hold
+ *		the interlock (thus preventing further readers), and
+ *		wait for already-accepted readers to go away.
+ */
+
+/*
+ *	The simple-lock routines are the primitives out of which
+ *	the lock package is built.  The implementation is left
+ *	to the machine-dependent code.
+ */
+
+#ifdef	notdef
+/*
+ *	A sample implementation of simple locks.
+ *	assumes:
+ *		boolean_t test_and_set(boolean_t *)
+ *			indivisibly sets the boolean to TRUE
+ *			and returns its old value
+ *		and that setting a boolean to FALSE is indivisible.
+ */
+/*
+ *	simple_lock_init initializes a simple lock.  A simple lock
+ *	may only be used for exclusive locks.
+ */
+
+void simple_lock_init(l)
+	simple_lock_t	l;
+{
+	*(boolean_t *)l = FALSE;
+}
+
+void simple_lock(l)
+	simple_lock_t	l;
+{
+	while (test_and_set((boolean_t *)l))
+		continue;
+}
+
+void simple_unlock(l)
+	simple_lock_t	l;
+{
+	*(boolean_t *)l = FALSE;
+}
+
+boolean_t simple_lock_try(l)
+	simple_lock_t	l;
+{
+    	return (!test_and_set((boolean_t *)l));
+}
+#endif /* notdef */
+#endif /* NCPUS > 1 */
+
+#if	NCPUS > 1
+int lock_wait_time = 100;
+#else /* NCPUS > 1 */
+
+	/*
+	 * 	It is silly to spin on a uni-processor as if we
+	 *	thought something magical would happen to the
+	 *	want_write bit while we are executing.
+	 */
+int lock_wait_time = 0;
+#endif /* NCPUS > 1 */
+
+
+/*
+ *	Routine:	lock_init
+ *	Function:
+ *		Initialize a lock; required before use.
+ *		Note that clients declare the "struct lock"
+ *		variables and then initialize them, rather
+ *		than getting a new one from this module.
+ */
+void lock_init(l, can_sleep)
+	lock_t		l;
+	boolean_t	can_sleep;
+{
+	bzero(l, sizeof(lock_data_t));
+	simple_lock_init(&l->interlock);
+	l->want_write = FALSE;
+	l->want_upgrade = FALSE;
+	l->read_count = 0;
+	l->can_sleep = can_sleep;
+	l->thread = (char *)-1;		/* XXX */
+	l->recursion_depth = 0;
+}
+
+void lock_sleepable(l, can_sleep)
+	lock_t		l;
+	boolean_t	can_sleep;
+{
+	simple_lock(&l->interlock);
+	l->can_sleep = can_sleep;
+	simple_unlock(&l->interlock);
+}
+
+
+/*
+ *	Sleep locks.  These use the same data structure and algorithm
+ *	as the spin locks, but the process sleeps while it is waiting
+ *	for the lock.  These work on uniprocessor systems.
+ */
+
+void lock_write(l)
+	register lock_t	l;
+{
+	register int	i;
+
+	simple_lock(&l->interlock);
+
+	if (((thread_t)l->thread) == current_thread()) {
+		/*
+		 *	Recursive lock.
+		 */
+		l->recursion_depth++;
+		simple_unlock(&l->interlock);
+		return;
+	}
+
+	/*
+	 *	Try to acquire the want_write bit.
+	 */
+	while (l->want_write) {
+		if ((i = lock_wait_time) > 0) {
+			simple_unlock(&l->interlock);
+			while (--i > 0 && l->want_write)
+				continue;
+			simple_lock(&l->interlock);
+		}
+
+		if (l->can_sleep && l->want_write) {
+			l->waiting = TRUE;
+			thread_sleep((int) l, &l->interlock, FALSE);
+			simple_lock(&l->interlock);
+		}
+	}
+	l->want_write = TRUE;
+
+	/* Wait for readers (and upgrades) to finish */
+
+	while ((l->read_count != 0) || l->want_upgrade) {
+		if ((i = lock_wait_time) > 0) {
+			simple_unlock(&l->interlock);
+			while (--i > 0 && (l->read_count != 0 ||
+					l->want_upgrade))
+				continue;
+			simple_lock(&l->interlock);
+		}
+
+		if (l->can_sleep && (l->read_count != 0 || l->want_upgrade)) {
+			l->waiting = TRUE;
+			thread_sleep((int) l, &l->interlock, FALSE);
+			simple_lock(&l->interlock);
+		}
+	}
+	simple_unlock(&l->interlock);
+}
+
+void lock_done(l)
+	register lock_t	l;
+{
+	simple_lock(&l->interlock);
+
+	if (l->read_count != 0)
+		l->read_count--;
+	else
+	if (l->recursion_depth != 0)
+		l->recursion_depth--;
+	else
+	if (l->want_upgrade)
+	 	l->want_upgrade = FALSE;
+	else
+	 	l->want_write = FALSE;
+
+	if (l->waiting) {
+		l->waiting = FALSE;
+		thread_wakeup((int) l);
+	}
+	simple_unlock(&l->interlock);
+}
+
+void lock_read(l)
+	register lock_t	l;
+{
+	register int	i;
+
+	simple_lock(&l->interlock);
+
+	if (((thread_t)l->thread) == current_thread()) {
+		/*
+		 *	Recursive lock.
+		 */
+		l->read_count++;
+		simple_unlock(&l->interlock);
+		return;
+	}
+
+	while (l->want_write || l->want_upgrade) {
+		if ((i = lock_wait_time) > 0) {
+			simple_unlock(&l->interlock);
+			while (--i > 0 && (l->want_write || l->want_upgrade))
+				continue;
+			simple_lock(&l->interlock);
+		}
+
+		if (l->can_sleep && (l->want_write || l->want_upgrade)) {
+			l->waiting = TRUE;
+			thread_sleep((int) l, &l->interlock, FALSE);
+			simple_lock(&l->interlock);
+		}
+	}
+
+	l->read_count++;
+	simple_unlock(&l->interlock);
+}
+
+/*
+ *	Routine:	lock_read_to_write
+ *	Function:
+ *		Improves a read-only lock to one with
+ *		write permission.  If another reader has
+ *		already requested an upgrade to a write lock,
+ *		no lock is held upon return.
+ *
+ *		Returns TRUE if the upgrade *failed*.
+ */
+boolean_t lock_read_to_write(l)
+	register lock_t	l;
+{
+	register int	i;
+
+	simple_lock(&l->interlock);
+
+	l->read_count--;
+
+	if (((thread_t)l->thread) == current_thread()) {
+		/*
+		 *	Recursive lock.
+		 */
+		l->recursion_depth++;
+		simple_unlock(&l->interlock);
+		return(FALSE);
+	}
+
+	if (l->want_upgrade) {
+		/*
+		 *	Someone else has requested upgrade.
+		 *	Since we've released a read lock, wake
+		 *	him up.
+		 */
+		if (l->waiting) {
+			l->waiting = FALSE;
+			thread_wakeup((int) l);
+		}
+
+		simple_unlock(&l->interlock);
+		return (TRUE);
+	}
+
+	l->want_upgrade = TRUE;
+
+	while (l->read_count != 0) {
+		if ((i = lock_wait_time) > 0) {
+			simple_unlock(&l->interlock);
+			while (--i > 0 && l->read_count != 0)
+				continue;
+			simple_lock(&l->interlock);
+		}
+
+		if (l->can_sleep && l->read_count != 0) {
+			l->waiting = TRUE;
+			thread_sleep((int) l, &l->interlock, FALSE);
+			simple_lock(&l->interlock);
+		}
+	}
+
+	simple_unlock(&l->interlock);
+	return (FALSE);
+}
+
+void lock_write_to_read(l)
+	register lock_t	l;
+{
+	simple_lock(&l->interlock);
+
+	l->read_count++;
+	if (l->recursion_depth != 0)
+		l->recursion_depth--;
+	else
+	if (l->want_upgrade)
+		l->want_upgrade = FALSE;
+	else
+	 	l->want_write = FALSE;
+
+	if (l->waiting) {
+		l->waiting = FALSE;
+		thread_wakeup((int) l);
+	}
+
+	simple_unlock(&l->interlock);
+}
+
+
+/*
+ *	Routine:	lock_try_write
+ *	Function:
+ *		Tries to get a write lock.
+ *
+ *		Returns FALSE if the lock is not held on return.
+ */
+
+boolean_t lock_try_write(l)
+	register lock_t	l;
+{
+
+	simple_lock(&l->interlock);
+
+	if (((thread_t)l->thread) == current_thread()) {
+		/*
+		 *	Recursive lock
+		 */
+		l->recursion_depth++;
+		simple_unlock(&l->interlock);
+		return(TRUE);
+	}
+
+	if (l->want_write || l->want_upgrade || l->read_count) {
+		/*
+		 *	Can't get lock.
+		 */
+		simple_unlock(&l->interlock);
+		return(FALSE);
+	}
+
+	/*
+	 *	Have lock.
+	 */
+
+	l->want_write = TRUE;
+	simple_unlock(&l->interlock);
+	return(TRUE);
+}
+
+/*
+ *	Routine:	lock_try_read
+ *	Function:
+ *		Tries to get a read lock.
+ *
+ *		Returns FALSE if the lock is not held on return.
+ */
+
+boolean_t lock_try_read(l)
+	register lock_t	l;
+{
+	simple_lock(&l->interlock);
+
+	if (((thread_t)l->thread) == current_thread()) {
+		/*
+		 *	Recursive lock
+		 */
+		l->read_count++;
+		simple_unlock(&l->interlock);
+		return(TRUE);
+	}
+
+	if (l->want_write || l->want_upgrade) {
+		simple_unlock(&l->interlock);
+		return(FALSE);
+	}
+
+	l->read_count++;
+	simple_unlock(&l->interlock);
+	return(TRUE);
+}
+
+/*
+ *	Routine:	lock_try_read_to_write
+ *	Function:
+ *		Improves a read-only lock to one with
+ *		write permission.  If another reader has
+ *		already requested an upgrade to a write lock,
+ *		the read lock is still held upon return.
+ *
+ *		Returns FALSE if the upgrade *failed*.
+ */
+boolean_t lock_try_read_to_write(l)
+	register lock_t	l;
+{
+
+	simple_lock(&l->interlock);
+
+	if (((thread_t)l->thread) == current_thread()) {
+		/*
+		 *	Recursive lock
+		 */
+		l->read_count--;
+		l->recursion_depth++;
+		simple_unlock(&l->interlock);
+		return(TRUE);
+	}
+
+	if (l->want_upgrade) {
+		simple_unlock(&l->interlock);
+		return(FALSE);
+	}
+	l->want_upgrade = TRUE;
+	l->read_count--;
+
+	while (l->read_count != 0) {
+		l->waiting = TRUE;
+		thread_sleep((int) l, &l->interlock, FALSE);
+		simple_lock(&l->interlock);
+	}
+
+	simple_unlock(&l->interlock);
+	return(TRUE);
+}
+
+/*
+ *	Allow a process that has a lock for write to acquire it
+ *	recursively (for read, write, or update).
+ */
+void lock_set_recursive(l)
+	lock_t		l;
+{
+	simple_lock(&l->interlock);
+	if (!l->want_write) {
+		panic("lock_set_recursive: don't have write lock");
+	}
+	l->thread = (char *) current_thread();
+	simple_unlock(&l->interlock);
+}
+
+/*
+ *	Prevent a lock from being re-acquired.
+ */
+void lock_clear_recursive(l)
+	lock_t		l;
+{
+	simple_lock(&l->interlock);
+	if (((thread_t) l->thread) != current_thread()) {
+		panic("lock_clear_recursive: wrong thread");
+	}
+	if (l->recursion_depth == 0)
+		l->thread = (char *)-1;		/* XXX */
+	simple_unlock(&l->interlock);
+}
diff --git a/sys/vm/lock.h b/sys/vm/lock.h
new file mode 100644
index 00000000000..26bed1f048a
--- /dev/null
+++ b/sys/vm/lock.h
@@ -0,0 +1,172 @@
+/* 
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)lock.h	8.1 (Berkeley) 6/11/93
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ * 
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ *	Locking primitives definitions
+ */
+
+#ifndef	_LOCK_H_
+#define	_LOCK_H_
+
+#define	NCPUS	1		/* XXX */
+
+/*
+ *	A simple spin lock.
+ */
+
+struct slock {
+	int		lock_data;	/* in general 1 bit is sufficient */
+};
+
+typedef struct slock	simple_lock_data_t;
+typedef struct slock	*simple_lock_t;
+
+/*
+ *	The general lock structure.  Provides for multiple readers,
+ *	upgrading from read to write, and sleeping until the lock
+ *	can be gained.
+ */
+
+struct lock {
+#ifdef	vax
+	/*
+	 *	Efficient VAX implementation -- see field description below.
+	 */
+	unsigned int	read_count:16,
+			want_upgrade:1,
+			want_write:1,
+			waiting:1,
+			can_sleep:1,
+			:0;
+
+	simple_lock_data_t	interlock;
+#else /* vax */
+#ifdef	ns32000
+	/*
+	 *	Efficient ns32000 implementation --
+	 *	see field description below.
+	 */
+	simple_lock_data_t	interlock;
+	unsigned int	read_count:16,
+			want_upgrade:1,
+			want_write:1,
+			waiting:1,
+			can_sleep:1,
+			:0;
+
+#else /* ns32000 */
+	/*	Only the "interlock" field is used for hardware exclusion;
+	 *	other fields are modified with normal instructions after
+	 *	acquiring the interlock bit.
+	 */
+	simple_lock_data_t
+			interlock;	/* Interlock for remaining fields */
+	boolean_t	want_write;	/* Writer is waiting, or locked for write */
+	boolean_t	want_upgrade;	/* Read-to-write upgrade waiting */
+	boolean_t	waiting;	/* Someone is sleeping on lock */
+	boolean_t	can_sleep;	/* Can attempts to lock go to sleep */
+	int		read_count;	/* Number of accepted readers */
+#endif	/* ns32000 */
+#endif	/* vax */
+	char		*thread;	/* Thread that has lock, if recursive locking allowed */
+					/* (should be thread_t, but but we then have mutually
+					   recursive definitions) */
+	int		recursion_depth;/* Depth of recursion */
+};
+
+typedef struct lock	lock_data_t;
+typedef struct lock	*lock_t;
+
+#if NCPUS > 1
+__BEGIN_DECLS
+void		simple_lock __P((simple_lock_t));
+void		simple_lock_init __P((simple_lock_t));
+boolean_t	simple_lock_try __P((simple_lock_t));
+void		simple_unlock __P((simple_lock_t));
+__END_DECLS
+#else		/* No multiprocessor locking is necessary. */
+#define	simple_lock(l)
+#define	simple_lock_init(l)
+#define	simple_lock_try(l)	(1)	/* Always succeeds. */
+#define	simple_unlock(l)
+#endif
+
+/* Sleep locks must work even if no multiprocessing. */
+
+#define	lock_read_done(l)	lock_done(l)
+#define	lock_write_done(l)	lock_done(l)
+
+void		lock_clear_recursive __P((lock_t));
+void		lock_done __P((lock_t));
+void		lock_init __P((lock_t, boolean_t));
+void		lock_read __P((lock_t));
+boolean_t	lock_read_to_write __P((lock_t));
+void		lock_set_recursive __P((lock_t));
+void		lock_sleepable __P((lock_t, boolean_t));
+boolean_t	lock_try_read __P((lock_t));
+boolean_t	lock_try_read_to_write __P((lock_t));
+boolean_t	lock_try_write __P((lock_t));
+void		lock_write __P((lock_t));
+void		lock_write_to_read __P((lock_t));
+#endif /* !_LOCK_H_ */
diff --git a/sys/vm/pmap.h b/sys/vm/pmap.h
new file mode 100644
index 00000000000..63a83c90578
--- /dev/null
+++ b/sys/vm/pmap.h
@@ -0,0 +1,122 @@
+/* 
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)pmap.h	8.1 (Berkeley) 6/11/93
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Author: Avadis Tevanian, Jr.
+ * 
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ *	Machine address mapping definitions -- machine-independent
+ *	section.  [For machine-dependent section, see "machine/pmap.h".]
+ */
+
+#ifndef	_PMAP_VM_
+#define	_PMAP_VM_
+
+/*
+ * Each machine dependent implementation is expected to
+ * keep certain statistics.  They may do this anyway they
+ * so choose, but are expected to return the statistics
+ * in the following structure.
+ */
+struct pmap_statistics {
+	long		resident_count;	/* # of pages mapped (total)*/
+	long		wired_count;	/* # of pages wired */
+};
+typedef struct pmap_statistics	*pmap_statistics_t;
+
+#include <machine/pmap.h>
+
+#ifdef KERNEL
+__BEGIN_DECLS
+void		*pmap_bootstrap_alloc __P((int));
+void		 pmap_bootstrap( /* machine dependent */ );
+void		 pmap_change_wiring __P((pmap_t, vm_offset_t, boolean_t));
+void		 pmap_clear_modify __P((vm_offset_t pa));
+void		 pmap_clear_reference __P((vm_offset_t pa));
+void		 pmap_collect __P((pmap_t));
+void		 pmap_copy __P((pmap_t,
+		    pmap_t, vm_offset_t, vm_size_t, vm_offset_t));
+void		 pmap_copy_page __P((vm_offset_t, vm_offset_t));
+pmap_t		 pmap_create __P((vm_size_t));
+void		 pmap_destroy __P((pmap_t));
+void		 pmap_enter __P((pmap_t,
+		    vm_offset_t, vm_offset_t, vm_prot_t, boolean_t));
+vm_offset_t	 pmap_extract __P((pmap_t, vm_offset_t));
+void		 pmap_init __P((vm_offset_t, vm_offset_t));
+boolean_t	 pmap_is_modified __P((vm_offset_t pa));
+boolean_t	 pmap_is_referenced __P((vm_offset_t pa));
+vm_offset_t	 pmap_map __P((vm_offset_t, vm_offset_t, vm_offset_t, int));
+void		 pmap_page_protect __P((vm_offset_t, vm_prot_t));
+void		 pmap_pageable __P((pmap_t,
+		    vm_offset_t, vm_offset_t, boolean_t));
+vm_offset_t	 pmap_phys_address __P((int));
+void		 pmap_pinit __P((pmap_t));
+void		 pmap_protect __P((pmap_t,
+		    vm_offset_t, vm_offset_t, vm_prot_t));
+void		 pmap_reference __P((pmap_t));
+void		 pmap_release __P((pmap_t));
+void		 pmap_remove __P((pmap_t, vm_offset_t, vm_offset_t));
+void		 pmap_update __P((void));
+void		 pmap_zero_page __P((vm_offset_t));
+__END_DECLS
+#endif
+
+#endif /* _PMAP_VM_ */
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
new file mode 100644
index 00000000000..899a6cf41a0
--- /dev/null
+++ b/sys/vm/swap_pager.c
@@ -0,0 +1,1009 @@
+/*
+ * Copyright (c) 1990 University of Utah.
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
+ *
+ *	@(#)swap_pager.c	8.9 (Berkeley) 3/21/94
+ */
+
+/*
+ * Quick hack to page to dedicated partition(s).
+ * TODO:
+ *	Add multiprocessor locks
+ *	Deal with async writes in a better fashion
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/map.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+#include <vm/swap_pager.h>
+
+#define NSWSIZES	16	/* size of swtab */
+#define MAXDADDRS	64	/* max # of disk addrs for fixed allocations */
+#ifndef NPENDINGIO
+#define NPENDINGIO	64	/* max # of pending cleans */
+#endif
+
+#ifdef DEBUG
+int	swpagerdebug = 0x100;
+#define	SDB_FOLLOW	0x001
+#define SDB_INIT	0x002
+#define SDB_ALLOC	0x004
+#define SDB_IO		0x008
+#define SDB_WRITE	0x010
+#define SDB_FAIL	0x020
+#define SDB_ALLOCBLK	0x040
+#define SDB_FULL	0x080
+#define SDB_ANOM	0x100
+#define SDB_ANOMPANIC	0x200
+#define SDB_CLUSTER	0x400
+#define SDB_PARANOIA	0x800
+#endif
+
+TAILQ_HEAD(swpclean, swpagerclean);
+
+struct swpagerclean {
+	TAILQ_ENTRY(swpagerclean)	spc_list;
+	int				spc_flags;
+	struct buf			*spc_bp;
+	sw_pager_t			spc_swp;
+	vm_offset_t			spc_kva;
+	vm_page_t			spc_m;
+	int				spc_npages;
+} swcleanlist[NPENDINGIO];
+typedef struct swpagerclean *swp_clean_t;
+
+/* spc_flags values */
+#define SPC_FREE	0x00
+#define SPC_BUSY	0x01
+#define SPC_DONE	0x02
+#define SPC_ERROR	0x04
+
+struct swtab {
+	vm_size_t st_osize;	/* size of object (bytes) */
+	int	  st_bsize;	/* vs. size of swap block (DEV_BSIZE units) */
+#ifdef DEBUG
+	u_long	  st_inuse;	/* number in this range in use */
+	u_long	  st_usecnt;	/* total used of this size */
+#endif
+} swtab[NSWSIZES+1];
+
+#ifdef DEBUG
+int		swap_pager_poip;	/* pageouts in progress */
+int		swap_pager_piip;	/* pageins in progress */
+#endif
+
+int		swap_pager_maxcluster;	/* maximum cluster size */
+int		swap_pager_npendingio;	/* number of pager clean structs */
+
+struct swpclean	swap_pager_inuse;	/* list of pending page cleans */
+struct swpclean	swap_pager_free;	/* list of free pager clean structs */
+struct pagerlst	swap_pager_list;	/* list of "named" anon regions */
+
+static void 		swap_pager_init __P((void));
+static vm_pager_t	swap_pager_alloc
+			    __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t));
+static void		swap_pager_clean __P((int));
+#ifdef DEBUG
+static void		swap_pager_clean_check __P((vm_page_t *, int, int));
+#endif
+static void		swap_pager_cluster
+			    __P((vm_pager_t, vm_offset_t,
+				 vm_offset_t *, vm_offset_t *));
+static void		swap_pager_dealloc __P((vm_pager_t));
+static int		swap_pager_getpage
+			    __P((vm_pager_t, vm_page_t *, int, boolean_t));
+static boolean_t	swap_pager_haspage __P((vm_pager_t, vm_offset_t));
+static int		swap_pager_io __P((sw_pager_t, vm_page_t *, int, int));
+static void		swap_pager_iodone __P((struct buf *));
+static int		swap_pager_putpage
+			    __P((vm_pager_t, vm_page_t *, int, boolean_t));
+
+struct pagerops swappagerops = {
+	swap_pager_init,
+	swap_pager_alloc,
+	swap_pager_dealloc,
+	swap_pager_getpage,
+	swap_pager_putpage,
+	swap_pager_haspage,
+	swap_pager_cluster
+};
+
+static void
+swap_pager_init()
+{
+	register swp_clean_t spc;
+	register int i, bsize;
+	extern int dmmin, dmmax;
+	int maxbsize;
+
+#ifdef DEBUG
+	if (swpagerdebug & (SDB_FOLLOW|SDB_INIT))
+		printf("swpg_init()\n");
+#endif
+	dfltpagerops = &swappagerops;
+	TAILQ_INIT(&swap_pager_list);
+
+	/*
+	 * Allocate async IO structures.
+	 *
+	 * XXX it would be nice if we could do this dynamically based on
+	 * the value of nswbuf (since we are ultimately limited by that)
+	 * but neither nswbuf or malloc has been initialized yet.  So the
+	 * structs are statically allocated above.
+	 */
+	swap_pager_npendingio = NPENDINGIO;
+
+	/*
+	 * Initialize clean lists
+	 */
+	TAILQ_INIT(&swap_pager_inuse);
+	TAILQ_INIT(&swap_pager_free);
+	for (i = 0, spc = swcleanlist; i < swap_pager_npendingio; i++, spc++) {
+		TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
+		spc->spc_flags = SPC_FREE;
+	}
+
+	/*
+	 * Calculate the swap allocation constants.
+	 */
+        if (dmmin == 0) {
+                dmmin = DMMIN;
+		if (dmmin < CLBYTES/DEV_BSIZE)
+			dmmin = CLBYTES/DEV_BSIZE;
+	}
+        if (dmmax == 0)
+                dmmax = DMMAX;
+
+	/*
+	 * Fill in our table of object size vs. allocation size
+	 */
+	bsize = btodb(PAGE_SIZE);
+	if (bsize < dmmin)
+		bsize = dmmin;
+	maxbsize = btodb(sizeof(sw_bm_t) * NBBY * PAGE_SIZE);
+	if (maxbsize > dmmax)
+		maxbsize = dmmax;
+	for (i = 0; i < NSWSIZES; i++) {
+		swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize));
+		swtab[i].st_bsize = bsize;
+		if (bsize <= btodb(MAXPHYS))
+			swap_pager_maxcluster = dbtob(bsize);
+#ifdef DEBUG
+		if (swpagerdebug & SDB_INIT)
+			printf("swpg_init: ix %d, size %x, bsize %x\n",
+			       i, swtab[i].st_osize, swtab[i].st_bsize);
+#endif
+		if (bsize >= maxbsize)
+			break;
+		bsize *= 2;
+	}
+	swtab[i].st_osize = 0;
+	swtab[i].st_bsize = bsize;
+}
+
+/*
+ * Allocate a pager structure and associated resources.
+ * Note that if we are called from the pageout daemon (handle == NULL)
+ * we should not wait for memory as it could resulting in deadlock.
+ */
+static vm_pager_t
+swap_pager_alloc(handle, size, prot, foff)
+	caddr_t handle;
+	register vm_size_t size;
+	vm_prot_t prot;
+	vm_offset_t foff;
+{
+	register vm_pager_t pager;
+	register sw_pager_t swp;
+	struct swtab *swt;
+	int waitok;
+
+#ifdef DEBUG
+	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
+		printf("swpg_alloc(%x, %x, %x)\n", handle, size, prot);
+#endif
+	/*
+	 * If this is a "named" anonymous region, look it up and
+	 * return the appropriate pager if it exists.
+	 */
+	if (handle) {
+		pager = vm_pager_lookup(&swap_pager_list, handle);
+		if (pager != NULL) {
+			/*
+			 * Use vm_object_lookup to gain a reference
+			 * to the object and also to remove from the
+			 * object cache.
+			 */
+			if (vm_object_lookup(pager) == NULL)
+				panic("swap_pager_alloc: bad object");
+			return(pager);
+		}
+	}
+	/*
+	 * Pager doesn't exist, allocate swap management resources
+	 * and initialize.
+	 */
+	waitok = handle ? M_WAITOK : M_NOWAIT;
+	pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok);
+	if (pager == NULL)
+		return(NULL);
+	swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok);
+	if (swp == NULL) {
+#ifdef DEBUG
+		if (swpagerdebug & SDB_FAIL)
+			printf("swpg_alloc: swpager malloc failed\n");
+#endif
+		free((caddr_t)pager, M_VMPAGER);
+		return(NULL);
+	}
+	size = round_page(size);
+	for (swt = swtab; swt->st_osize; swt++)
+		if (size <= swt->st_osize)
+			break;
+#ifdef DEBUG
+	swt->st_inuse++;
+	swt->st_usecnt++;
+#endif
+	swp->sw_osize = size;
+	swp->sw_bsize = swt->st_bsize;
+	swp->sw_nblocks = (btodb(size) + swp->sw_bsize - 1) / swp->sw_bsize;
+	swp->sw_blocks = (sw_blk_t)
+		malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks),
+		       M_VMPGDATA, M_NOWAIT);
+	if (swp->sw_blocks == NULL) {
+		free((caddr_t)swp, M_VMPGDATA);
+		free((caddr_t)pager, M_VMPAGER);
+#ifdef DEBUG
+		if (swpagerdebug & SDB_FAIL)
+			printf("swpg_alloc: sw_blocks malloc failed\n");
+		swt->st_inuse--;
+		swt->st_usecnt--;
+#endif
+		return(FALSE);
+	}
+	bzero((caddr_t)swp->sw_blocks,
+	      swp->sw_nblocks * sizeof(*swp->sw_blocks));
+	swp->sw_poip = 0;
+	if (handle) {
+		vm_object_t object;
+
+		swp->sw_flags = SW_NAMED;
+		TAILQ_INSERT_TAIL(&swap_pager_list, pager, pg_list);
+		/*
+		 * Consistant with other pagers: return with object
+		 * referenced.  Can't do this with handle == NULL
+		 * since it might be the pageout daemon calling.
+		 */
+		object = vm_object_allocate(size);
+		vm_object_enter(object, pager);
+		vm_object_setpager(object, pager, 0, FALSE);
+	} else {
+		swp->sw_flags = 0;
+		pager->pg_list.tqe_next = NULL;
+		pager->pg_list.tqe_prev = NULL;
+	}
+	pager->pg_handle = handle;
+	pager->pg_ops = &swappagerops;
+	pager->pg_type = PG_SWAP;
+	pager->pg_flags = PG_CLUSTERPUT;
+	pager->pg_data = swp;
+
+#ifdef DEBUG
+	if (swpagerdebug & SDB_ALLOC)
+		printf("swpg_alloc: pg_data %x, %x of %x at %x\n",
+		       swp, swp->sw_nblocks, swp->sw_bsize, swp->sw_blocks);
+#endif
+	return(pager);
+}
+
+static void
+swap_pager_dealloc(pager)
+	vm_pager_t pager;
+{
+	register int i;
+	register sw_blk_t bp;
+	register sw_pager_t swp;
+	struct swtab *swt;
+	int s;
+
+#ifdef DEBUG
+	/* save panic time state */
+	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
+		return;
+	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
+		printf("swpg_dealloc(%x)\n", pager);
+#endif
+	/*
+	 * Remove from list right away so lookups will fail if we
+	 * block for pageout completion.
+	 */
+	swp = (sw_pager_t) pager->pg_data;
+	if (swp->sw_flags & SW_NAMED) {
+		TAILQ_REMOVE(&swap_pager_list, pager, pg_list);
+		swp->sw_flags &= ~SW_NAMED;
+	}
+#ifdef DEBUG
+	for (swt = swtab; swt->st_osize; swt++)
+		if (swp->sw_osize <= swt->st_osize)
+			break;
+	swt->st_inuse--;
+#endif
+
+	/*
+	 * Wait for all pageouts to finish and remove
+	 * all entries from cleaning list.
+	 */
+	s = splbio();
+	while (swp->sw_poip) {
+		swp->sw_flags |= SW_WANTED;
+		(void) tsleep(swp, PVM, "swpgdealloc", 0);
+	}
+	splx(s);
+	swap_pager_clean(B_WRITE);
+
+	/*
+	 * Free left over swap blocks
+	 */
+	for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++)
+		if (bp->swb_block) {
+#ifdef DEBUG
+			if (swpagerdebug & (SDB_ALLOCBLK|SDB_FULL))
+				printf("swpg_dealloc: blk %x\n",
+				       bp->swb_block);
+#endif
+			rmfree(swapmap, swp->sw_bsize, bp->swb_block);
+		}
+	/*
+	 * Free swap management resources
+	 */
+	free((caddr_t)swp->sw_blocks, M_VMPGDATA);
+	free((caddr_t)swp, M_VMPGDATA);
+	free((caddr_t)pager, M_VMPAGER);
+}
+
+static int
+swap_pager_getpage(pager, mlist, npages, sync)
+	vm_pager_t pager;
+	vm_page_t *mlist;
+	int npages;
+	boolean_t sync;
+{
+#ifdef DEBUG
+	if (swpagerdebug & SDB_FOLLOW)
+		printf("swpg_getpage(%x, %x, %x, %x)\n",
+		       pager, mlist, npages, sync);
+#endif
+	return(swap_pager_io((sw_pager_t)pager->pg_data,
+			     mlist, npages, B_READ));
+}
+
+static int
+swap_pager_putpage(pager, mlist, npages, sync)
+	vm_pager_t pager;
+	vm_page_t *mlist;
+	int npages;
+	boolean_t sync;
+{
+	int flags;
+
+#ifdef DEBUG
+	if (swpagerdebug & SDB_FOLLOW)
+		printf("swpg_putpage(%x, %x, %x, %x)\n",
+		       pager, mlist, npages, sync);
+#endif
+	if (pager == NULL) {
+		swap_pager_clean(B_WRITE);
+		return (VM_PAGER_OK);		/* ??? */
+	}
+	flags = B_WRITE;
+	if (!sync)
+		flags |= B_ASYNC;
+	return(swap_pager_io((sw_pager_t)pager->pg_data,
+			     mlist, npages, flags));
+}
+
+static boolean_t
+swap_pager_haspage(pager, offset)
+	vm_pager_t pager;
+	vm_offset_t offset;
+{
+	register sw_pager_t swp;
+	register sw_blk_t swb;
+	int ix;
+
+#ifdef DEBUG
+	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
+		printf("swpg_haspage(%x, %x) ", pager, offset);
+#endif
+	swp = (sw_pager_t) pager->pg_data;
+	ix = offset / dbtob(swp->sw_bsize);
+	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
+#ifdef DEBUG
+		if (swpagerdebug & (SDB_FAIL|SDB_FOLLOW|SDB_ALLOCBLK))
+			printf("swpg_haspage: %x bad offset %x, ix %x\n",
+			       swp->sw_blocks, offset, ix);
+#endif
+		return(FALSE);
+	}
+	swb = &swp->sw_blocks[ix];
+	if (swb->swb_block)
+		ix = atop(offset % dbtob(swp->sw_bsize));
+#ifdef DEBUG
+	if (swpagerdebug & SDB_ALLOCBLK)
+		printf("%x blk %x+%x ", swp->sw_blocks, swb->swb_block, ix);
+	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
+		printf("-> %c\n",
+		       "FT"[swb->swb_block && (swb->swb_mask & (1 << ix))]);
+#endif
+	if (swb->swb_block && (swb->swb_mask & (1 << ix)))
+		return(TRUE);
+	return(FALSE);
+}
+
+static void
+swap_pager_cluster(pager, offset, loffset, hoffset)
+	vm_pager_t	pager;
+	vm_offset_t	offset;
+	vm_offset_t	*loffset;
+	vm_offset_t	*hoffset;
+{
+	sw_pager_t swp;
+	register int bsize;
+	vm_offset_t loff, hoff;
+
+#ifdef DEBUG
+	if (swpagerdebug & (SDB_FOLLOW|SDB_CLUSTER))
+		printf("swpg_cluster(%x, %x) ", pager, offset);
+#endif
+	swp = (sw_pager_t) pager->pg_data;
+	bsize = dbtob(swp->sw_bsize);
+	if (bsize > swap_pager_maxcluster)
+		bsize = swap_pager_maxcluster;
+
+	loff = offset - (offset % bsize);
+	if (loff >= swp->sw_osize)
+		panic("swap_pager_cluster: bad offset");
+
+	hoff = loff + bsize;
+	if (hoff > swp->sw_osize)
+		hoff = swp->sw_osize;
+
+	*loffset = loff;
+	*hoffset = hoff;
+#ifdef DEBUG
+	if (swpagerdebug & (SDB_FOLLOW|SDB_CLUSTER))
+		printf("returns [%x-%x]\n", loff, hoff);
+#endif
+}
+
+/*
+ * Scaled down version of swap().
+ * Assumes that PAGE_SIZE < MAXPHYS; i.e. only one operation needed.
+ * BOGUS:  lower level IO routines expect a KVA so we have to map our
+ * provided physical page into the KVA to keep them happy.
+ */
+static int
+swap_pager_io(swp, mlist, npages, flags)
+	register sw_pager_t swp;
+	vm_page_t *mlist;
+	int npages;
+	int flags;
+{
+	register struct buf *bp;
+	register sw_blk_t swb;
+	register int s;
+	int ix, mask;
+	boolean_t rv;
+	vm_offset_t kva, off;
+	swp_clean_t spc;
+	vm_page_t m;
+
+#ifdef DEBUG
+	/* save panic time state */
+	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
+		return (VM_PAGER_FAIL);		/* XXX: correct return? */
+	if (swpagerdebug & (SDB_FOLLOW|SDB_IO))
+		printf("swpg_io(%x, %x, %x, %x)\n", swp, mlist, npages, flags);
+	if (flags & B_READ) {
+		if (flags & B_ASYNC)
+			panic("swap_pager_io: cannot do ASYNC reads");
+		if (npages != 1)
+			panic("swap_pager_io: cannot do clustered reads");
+	}
+#endif
+
+	/*
+	 * First determine if the page exists in the pager if this is
+	 * a sync read.  This quickly handles cases where we are
+	 * following shadow chains looking for the top level object
+	 * with the page.
+	 */
+	m = *mlist;
+	off = m->offset + m->object->paging_offset;
+	ix = off / dbtob(swp->sw_bsize);
+	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
+#ifdef DEBUG
+		if ((flags & B_READ) == 0 && (swpagerdebug & SDB_ANOM)) {
+			printf("swap_pager_io: no swap block on write\n");
+			return(VM_PAGER_BAD);
+		}
+#endif
+		return(VM_PAGER_FAIL);
+	}
+	swb = &swp->sw_blocks[ix];
+	off = off % dbtob(swp->sw_bsize);
+	if ((flags & B_READ) &&
+	    (swb->swb_block == 0 || (swb->swb_mask & (1 << atop(off))) == 0))
+		return(VM_PAGER_FAIL);
+
+	/*
+	 * For reads (pageins) and synchronous writes, we clean up
+	 * all completed async pageouts.
+	 */
+	if ((flags & B_ASYNC) == 0) {
+		s = splbio();
+		swap_pager_clean(flags&B_READ);
+#ifdef DEBUG
+		if (swpagerdebug & SDB_PARANOIA)
+			swap_pager_clean_check(mlist, npages, flags&B_READ);
+#endif
+		splx(s);
+	}
+	/*
+	 * For async writes (pageouts), we cleanup completed pageouts so
+	 * that all available resources are freed.  Also tells us if this
+	 * page is already being cleaned.  If it is, or no resources
+	 * are available, we try again later.
+	 */
+	else {
+		swap_pager_clean(B_WRITE);
+#ifdef DEBUG
+		if (swpagerdebug & SDB_PARANOIA)
+			swap_pager_clean_check(mlist, npages, B_WRITE);
+#endif
+		if (swap_pager_free.tqh_first == NULL) {
+#ifdef DEBUG
+			if (swpagerdebug & SDB_FAIL)
+				printf("%s: no available io headers\n",
+				       "swap_pager_io");
+#endif
+			return(VM_PAGER_AGAIN);
+		}
+	}
+
+	/*
+	 * Allocate a swap block if necessary.
+	 */
+	if (swb->swb_block == 0) {
+		swb->swb_block = rmalloc(swapmap, swp->sw_bsize);
+		if (swb->swb_block == 0) {
+#ifdef DEBUG
+			if (swpagerdebug & SDB_FAIL)
+				printf("swpg_io: rmalloc of %x failed\n",
+				       swp->sw_bsize);
+#endif
+			/*
+			 * XXX this is technically a resource shortage that
+			 * should return AGAIN, but the situation isn't likely
+			 * to be remedied just by delaying a little while and
+			 * trying again (the pageout daemon's current response
+			 * to AGAIN) so we just return FAIL.
+			 */
+			return(VM_PAGER_FAIL);
+		}
+#ifdef DEBUG
+		if (swpagerdebug & (SDB_FULL|SDB_ALLOCBLK))
+			printf("swpg_io: %x alloc blk %x at ix %x\n",
+			       swp->sw_blocks, swb->swb_block, ix);
+#endif
+	}
+
+	/*
+	 * Allocate a kernel virtual address and initialize so that PTE
+	 * is available for lower level IO drivers.
+	 */
+	kva = vm_pager_map_pages(mlist, npages, !(flags & B_ASYNC));
+	if (kva == NULL) {
+#ifdef DEBUG
+		if (swpagerdebug & SDB_FAIL)
+			printf("%s: no KVA space to map pages\n",
+			       "swap_pager_io");
+#endif
+		return(VM_PAGER_AGAIN);
+	}
+
+	/*
+	 * Get a swap buffer header and initialize it.
+	 */
+	s = splbio();
+	while (bswlist.b_actf == NULL) {
+#ifdef DEBUG
+		if (swpagerdebug & SDB_ANOM)
+			printf("swap_pager_io: wait on swbuf for %x (%d)\n",
+			       m, flags);
+#endif
+		bswlist.b_flags |= B_WANTED;
+		tsleep((caddr_t)&bswlist, PSWP+1, "swpgiobuf", 0);
+	}
+	bp = bswlist.b_actf;
+	bswlist.b_actf = bp->b_actf;
+	splx(s);
+	bp->b_flags = B_BUSY | (flags & B_READ);
+	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
+	bp->b_data = (caddr_t)kva;
+	bp->b_blkno = swb->swb_block + btodb(off);
+	VHOLD(swapdev_vp);
+	bp->b_vp = swapdev_vp;
+	if (swapdev_vp->v_type == VBLK)
+		bp->b_dev = swapdev_vp->v_rdev;
+	bp->b_bcount = npages * PAGE_SIZE;
+
+	/*
+	 * For writes we set up additional buffer fields, record a pageout
+	 * in progress and mark that these swap blocks are now allocated.
+	 */
+	if ((bp->b_flags & B_READ) == 0) {
+		bp->b_dirtyoff = 0;
+		bp->b_dirtyend = npages * PAGE_SIZE;
+		swapdev_vp->v_numoutput++;
+		s = splbio();
+		swp->sw_poip++;
+		splx(s);
+		mask = (~(~0 << npages)) << atop(off);
+#ifdef DEBUG
+		swap_pager_poip++;
+		if (swpagerdebug & SDB_WRITE)
+			printf("swpg_io: write: bp=%x swp=%x poip=%d\n",
+			       bp, swp, swp->sw_poip);
+		if ((swpagerdebug & SDB_ALLOCBLK) &&
+		    (swb->swb_mask & mask) != mask)
+			printf("swpg_io: %x write %d pages at %x+%x\n",
+			       swp->sw_blocks, npages, swb->swb_block,
+			       atop(off));
+		if (swpagerdebug & SDB_CLUSTER)
+			printf("swpg_io: off=%x, npg=%x, mask=%x, bmask=%x\n",
+			       off, npages, mask, swb->swb_mask);
+#endif
+		swb->swb_mask |= mask;
+	}
+	/*
+	 * If this is an async write we set up still more buffer fields
+	 * and place a "cleaning" entry on the inuse queue.
+	 */
+	if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
+#ifdef DEBUG
+		if (swap_pager_free.tqh_first == NULL)
+			panic("swpg_io: lost spc");
+#endif
+		spc = swap_pager_free.tqh_first;
+		TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
+#ifdef DEBUG
+		if (spc->spc_flags != SPC_FREE)
+			panic("swpg_io: bad free spc");
+#endif
+		spc->spc_flags = SPC_BUSY;
+		spc->spc_bp = bp;
+		spc->spc_swp = swp;
+		spc->spc_kva = kva;
+		/*
+		 * Record the first page.  This allows swap_pager_clean
+		 * to efficiently handle the common case of a single page.
+		 * For clusters, it allows us to locate the object easily
+		 * and we then reconstruct the rest of the mlist from spc_kva.
+		 */
+		spc->spc_m = m;
+		spc->spc_npages = npages;
+		bp->b_flags |= B_CALL;
+		bp->b_iodone = swap_pager_iodone;
+		s = splbio();
+		TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list);
+		splx(s);
+	}
+
+	/*
+	 * Finally, start the IO operation.
+	 * If it is async we are all done, otherwise we must wait for
+	 * completion and cleanup afterwards.
+	 */
+#ifdef DEBUG
+	if (swpagerdebug & SDB_IO)
+		printf("swpg_io: IO start: bp %x, db %x, va %x, pa %x\n",
+		       bp, swb->swb_block+btodb(off), kva, VM_PAGE_TO_PHYS(m));
+#endif
+	VOP_STRATEGY(bp);
+	if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
+#ifdef DEBUG
+		if (swpagerdebug & SDB_IO)
+			printf("swpg_io:  IO started: bp %x\n", bp);
+#endif
+		return(VM_PAGER_PEND);
+	}
+	s = splbio();
+#ifdef DEBUG
+	if (flags & B_READ)
+		swap_pager_piip++;
+	else
+		swap_pager_poip++;
+#endif
+	while ((bp->b_flags & B_DONE) == 0)
+		(void) tsleep(bp, PVM, "swpgio", 0);
+	if ((flags & B_READ) == 0)
+		--swp->sw_poip;
+#ifdef DEBUG
+	if (flags & B_READ)
+		--swap_pager_piip;
+	else
+		--swap_pager_poip;
+#endif
+	rv = (bp->b_flags & B_ERROR) ? VM_PAGER_ERROR : VM_PAGER_OK;
+	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
+	bp->b_actf = bswlist.b_actf;
+	bswlist.b_actf = bp;
+	if (bp->b_vp)
+		brelvp(bp);
+	if (bswlist.b_flags & B_WANTED) {
+		bswlist.b_flags &= ~B_WANTED;
+		wakeup(&bswlist);
+	}
+	if ((flags & B_READ) == 0 && rv == VM_PAGER_OK) {
+		m->flags |= PG_CLEAN;
+		pmap_clear_modify(VM_PAGE_TO_PHYS(m));
+	}
+	splx(s);
+#ifdef DEBUG
+	if (swpagerdebug & SDB_IO)
+		printf("swpg_io:  IO done: bp %x, rv %d\n", bp, rv);
+	if ((swpagerdebug & SDB_FAIL) && rv == VM_PAGER_ERROR)
+		printf("swpg_io: IO error\n");
+#endif
+	vm_pager_unmap_pages(kva, npages);
+	return(rv);
+}
+
+static void
+swap_pager_clean(rw)
+	int rw;
+{
+	register swp_clean_t spc;
+	register int s, i;
+	vm_object_t object;
+	vm_page_t m;
+
+#ifdef DEBUG
+	/* save panic time state */
+	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
+		return;
+	if (swpagerdebug & SDB_FOLLOW)
+		printf("swpg_clean(%x)\n", rw);
+#endif
+
+	for (;;) {
+		/*
+		 * Look up and removal from inuse list must be done
+		 * at splbio() to avoid conflicts with swap_pager_iodone.
+		 */
+		s = splbio();
+		for (spc = swap_pager_inuse.tqh_first;
+		     spc != NULL;
+		     spc = spc->spc_list.tqe_next) {
+			/*
+			 * If the operation is done, remove it from the
+			 * list and process it.
+			 *
+			 * XXX if we can't get the object lock we also
+			 * leave it on the list and try again later.
+			 * Is there something better we could do?
+			 */
+			if ((spc->spc_flags & SPC_DONE) &&
+			    vm_object_lock_try(spc->spc_m->object)) {
+				TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list);
+				break;
+			}
+		}
+		splx(s);
+
+		/*
+		 * No operations done, thats all we can do for now.
+		 */
+		if (spc == NULL)
+			break;
+
+		/*
+		 * Found a completed operation so finish it off.
+		 * Note: no longer at splbio since entry is off the list.
+		 */
+		m = spc->spc_m;
+		object = m->object;
+
+		/*
+		 * Process each page in the cluster.
+		 * The first page is explicitly kept in the cleaning
+		 * entry, others must be reconstructed from the KVA.
+		 */
+		for (i = 0; i < spc->spc_npages; i++) {
+			if (i)
+				m = vm_pager_atop(spc->spc_kva + ptoa(i));
+			/*
+			 * If no error mark as clean and inform the pmap
+			 * system.  If there was an error, mark as dirty
+			 * so we will try again.
+			 *
+			 * XXX could get stuck doing this, should give up
+			 * after awhile.
+			 */
+			if (spc->spc_flags & SPC_ERROR) {
+				printf("%s: clean of page %x failed\n",
+				       "swap_pager_clean",
+				       VM_PAGE_TO_PHYS(m));
+				m->flags |= PG_LAUNDRY;
+			} else {
+				m->flags |= PG_CLEAN;
+				pmap_clear_modify(VM_PAGE_TO_PHYS(m));
+			}
+			m->flags &= ~PG_BUSY;
+			PAGE_WAKEUP(m);
+		}
+
+		/*
+		 * Done with the object, decrement the paging count
+		 * and unlock it.
+		 */
+		if (--object->paging_in_progress == 0)
+			wakeup(object);
+		vm_object_unlock(object);
+
+		/*
+		 * Free up KVM used and put the entry back on the list.
+		 */
+		vm_pager_unmap_pages(spc->spc_kva, spc->spc_npages);
+		spc->spc_flags = SPC_FREE;
+		TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
+#ifdef DEBUG
+		if (swpagerdebug & SDB_WRITE)
+			printf("swpg_clean: free spc %x\n", spc);
+#endif
+	}
+}
+
+#ifdef DEBUG
+static void
+swap_pager_clean_check(mlist, npages, rw)
+	vm_page_t *mlist;
+	int npages;
+	int rw;
+{
+	register swp_clean_t spc;
+	boolean_t bad;
+	int i, j, s;
+	vm_page_t m;
+
+	if (panicstr)
+		return;
+
+	bad = FALSE;
+	s = splbio();
+	for (spc = swap_pager_inuse.tqh_first;
+	     spc != NULL;
+	     spc = spc->spc_list.tqe_next) {
+		for (j = 0; j < spc->spc_npages; j++) {
+			m = vm_pager_atop(spc->spc_kva + ptoa(j));
+			for (i = 0; i < npages; i++)
+				if (m == mlist[i]) {
+					if (swpagerdebug & SDB_ANOM)
+						printf(
+		"swpg_clean_check: %s: page %x on list, flags %x\n",
+		rw == B_WRITE ? "write" : "read", mlist[i], spc->spc_flags);
+					bad = TRUE;
+				}
+		}
+	}
+	splx(s);
+	if (bad)
+		panic("swpg_clean_check");
+}
+#endif
+
+static void
+swap_pager_iodone(bp)
+	register struct buf *bp;
+{
+	register swp_clean_t spc;
+	daddr_t blk;
+	int s;
+
+#ifdef DEBUG
+	/* save panic time state */
+	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
+		return;
+	if (swpagerdebug & SDB_FOLLOW)
+		printf("swpg_iodone(%x)\n", bp);
+#endif
+	s = splbio();
+	for (spc = swap_pager_inuse.tqh_first;
+	     spc != NULL;
+	     spc = spc->spc_list.tqe_next)
+		if (spc->spc_bp == bp)
+			break;
+#ifdef DEBUG
+	if (spc == NULL)
+		panic("swap_pager_iodone: bp not found");
+#endif
+
+	spc->spc_flags &= ~SPC_BUSY;
+	spc->spc_flags |= SPC_DONE;
+	if (bp->b_flags & B_ERROR)
+		spc->spc_flags |= SPC_ERROR;
+	spc->spc_bp = NULL;
+	blk = bp->b_blkno;
+
+#ifdef DEBUG
+	--swap_pager_poip;
+	if (swpagerdebug & SDB_WRITE)
+		printf("swpg_iodone: bp=%x swp=%x flags=%x spc=%x poip=%x\n",
+		       bp, spc->spc_swp, spc->spc_swp->sw_flags,
+		       spc, spc->spc_swp->sw_poip);
+#endif
+
+	spc->spc_swp->sw_poip--;
+	if (spc->spc_swp->sw_flags & SW_WANTED) {
+		spc->spc_swp->sw_flags &= ~SW_WANTED;
+		wakeup(spc->spc_swp);
+	}
+		
+	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
+	bp->b_actf = bswlist.b_actf;
+	bswlist.b_actf = bp;
+	if (bp->b_vp)
+		brelvp(bp);
+	if (bswlist.b_flags & B_WANTED) {
+		bswlist.b_flags &= ~B_WANTED;
+		wakeup(&bswlist);
+	}
+	wakeup(&vm_pages_needed);
+	splx(s);
+}
diff --git a/sys/vm/swap_pager.h b/sys/vm/swap_pager.h
new file mode 100644
index 00000000000..497d92a3938
--- /dev/null
+++ b/sys/vm/swap_pager.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 1990 University of Utah.
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)swap_pager.h	8.1 (Berkeley) 6/11/93
+ */
+
+#ifndef	_SWAP_PAGER_
+#define	_SWAP_PAGER_	1
+
+/*
+ * In the swap pager, the backing store for an object is organized as an
+ * array of some number of "swap blocks".  A swap block consists of a bitmask
+ * and some number of contiguous DEV_BSIZE disk blocks.  The minimum size
+ * of a swap block is:
+ *
+ *	max(PAGE_SIZE, dmmin*DEV_BSIZE)			[ 32k currently ]
+ *
+ * bytes (since the pager interface is page oriented), the maximum size is:
+ *
+ *	min(#bits(swb_mask)*PAGE_SIZE, dmmax*DEV_BSIZE)	[ 128k currently ]
+ *
+ * where dmmin and dmmax are left over from the old VM interface.  The bitmask
+ * (swb_mask) is used by swap_pager_haspage() to determine if a particular
+ * page has actually been written; i.e. the pager copy of the page is valid.
+ * All swap blocks in the backing store of an object will be the same size.
+ *
+ * The reason for variable sized swap blocks is to reduce fragmentation of
+ * swap resources.  Whenever possible we allocate smaller swap blocks to
+ * smaller objects.  The swap block size is determined from a table of
+ * object-size vs. swap-block-size computed at boot time.
+ */
+typedef	int	sw_bm_t;	/* pager bitmask */
+
+struct	swblock {
+	sw_bm_t	 swb_mask;	/* bitmask of valid pages in this block */
+	daddr_t	 swb_block;	/* starting disk block for this block */
+};
+typedef struct swblock	*sw_blk_t;
+
+/*
+ * Swap pager private data.
+ */
+struct swpager {
+	vm_size_t    sw_osize;	/* size of object we are backing (bytes) */
+	int	     sw_bsize;	/* size of swap blocks (DEV_BSIZE units) */
+	int	     sw_nblocks;/* number of blocks in list (sw_blk_t units) */
+	sw_blk_t     sw_blocks;	/* pointer to list of swap blocks */
+	short	     sw_flags;	/* flags */
+	short	     sw_poip;	/* pageouts in progress */
+};
+typedef struct swpager	*sw_pager_t;
+
+#define	SW_WANTED	0x01
+#define SW_NAMED	0x02
+
+#endif	/* _SWAP_PAGER_ */
diff --git a/sys/vm/vm.h b/sys/vm/vm.h
new file mode 100644
index 00000000000..85f892f29be
--- /dev/null
+++ b/sys/vm/vm.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vm.h	8.2 (Berkeley) 12/13/93
+ */
+
+#ifndef VM_H
+#define VM_H
+
+typedef int vm_inherit_t;		/* XXX: inheritance codes */
+
+union vm_map_object;
+typedef union vm_map_object vm_map_object_t;
+
+struct vm_map_entry;
+typedef struct vm_map_entry *vm_map_entry_t;
+
+struct vm_map;
+typedef struct vm_map *vm_map_t;
+
+struct vm_object;
+typedef struct vm_object *vm_object_t;
+
+struct vm_page;
+typedef struct vm_page  *vm_page_t;
+
+struct pager_struct;
+typedef struct pager_struct *vm_pager_t;
+
+#include <sys/vmmeter.h>
+#include <sys/queue.h>
+#include <vm/vm_param.h>
+#include <vm/lock.h>
+#include <vm/vm_prot.h>
+#include <vm/vm_inherit.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/pmap.h>
+#include <vm/vm_extern.h>
+
+/*
+ * Shareable process virtual address space.
+ * May eventually be merged with vm_map.
+ * Several fields are temporary (text, data stuff).
+ */
+struct vmspace {
+	struct	vm_map vm_map;	/* VM address map */
+	struct	pmap vm_pmap;	/* private physical map */
+	int	vm_refcnt;	/* number of references */
+	caddr_t	vm_shm;		/* SYS5 shared memory private data XXX */
+/* we copy from vm_startcopy to the end of the structure on fork */
+#define vm_startcopy vm_rssize
+	segsz_t vm_rssize; 	/* current resident set size in pages */
+	segsz_t vm_swrss;	/* resident set size before last swap */
+	segsz_t vm_tsize;	/* text size (pages) XXX */
+	segsz_t vm_dsize;	/* data size (pages) XXX */
+	segsz_t vm_ssize;	/* stack size (pages) */
+	caddr_t	vm_taddr;	/* user virtual address of text XXX */
+	caddr_t	vm_daddr;	/* user virtual address of data XXX */
+	caddr_t vm_maxsaddr;	/* user VA at max stack growth */
+};
+#endif /* VM_H */
diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h
new file mode 100644
index 00000000000..bae5f005273
--- /dev/null
+++ b/sys/vm/vm_extern.h
@@ -0,0 +1,125 @@
+/*-
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vm_extern.h	8.2 (Berkeley) 1/12/94
+ */
+
+struct buf;
+struct loadavg;
+struct proc;
+struct vmspace;
+struct vmtotal;
+struct mount;
+struct vnode;
+
+#ifdef KGDB
+void		 chgkprot __P((caddr_t, int, int));
+#endif
+
+#ifdef KERNEL
+#ifdef TYPEDEF_FOR_UAP
+int		 getpagesize __P((struct proc *p, void *, int *));
+int		 madvise __P((struct proc *, void *, int *));
+int		 mincore __P((struct proc *, void *, int *));
+int		 mprotect __P((struct proc *, void *, int *));
+int		 msync __P((struct proc *, void *, int *));
+int		 munmap __P((struct proc *, void *, int *));
+int		 obreak __P((struct proc *, void *, int *));
+int		 sbrk __P((struct proc *, void *, int *));
+int		 smmap __P((struct proc *, void *, int *));
+int		 sstk __P((struct proc *, void *, int *));
+#endif
+
+void		 assert_wait __P((int, boolean_t));
+int		 grow __P((struct proc *, u_int));
+void		 iprintf __P((const char *, ...));
+int		 kernacc __P((caddr_t, int, int));
+int		 kinfo_loadavg __P((int, char *, int *, int, int *));
+int		 kinfo_meter __P((int, caddr_t, int *, int, int *));
+vm_offset_t	 kmem_alloc __P((vm_map_t, vm_size_t));
+vm_offset_t	 kmem_alloc_pageable __P((vm_map_t, vm_size_t));
+vm_offset_t	 kmem_alloc_wait __P((vm_map_t, vm_size_t));
+void		 kmem_free __P((vm_map_t, vm_offset_t, vm_size_t));
+void		 kmem_free_wakeup __P((vm_map_t, vm_offset_t, vm_size_t));
+void		 kmem_init __P((vm_offset_t, vm_offset_t));
+vm_offset_t	 kmem_malloc __P((vm_map_t, vm_size_t, boolean_t));
+vm_map_t	 kmem_suballoc __P((vm_map_t, vm_offset_t *, vm_offset_t *,
+		    vm_size_t, boolean_t));
+void		 loadav __P((struct loadavg *));
+void		 munmapfd __P((int));
+int		 pager_cache __P((vm_object_t, boolean_t));
+void		 sched __P((void));
+int		 svm_allocate __P((struct proc *, void *, int *));
+int		 svm_deallocate __P((struct proc *, void *, int *));
+int		 svm_inherit __P((struct proc *, void *, int *));
+int		 svm_protect __P((struct proc *, void *, int *));
+void		 swapinit __P((void));
+int		 swapon __P((struct proc *, void *, int *));
+void		 swapout __P((struct proc *));
+void		 swapout_threads __P((void));
+int		 swfree __P((struct proc *, int));
+void		 swstrategy __P((struct buf *));
+void		 thread_block __P((void));
+void		 thread_sleep __P((int, simple_lock_t, boolean_t));
+void		 thread_wakeup __P((int));
+int		 useracc __P((caddr_t, int, int));
+int		 vm_allocate __P((vm_map_t,
+		    vm_offset_t *, vm_size_t, boolean_t));
+int		 vm_allocate_with_pager __P((vm_map_t, vm_offset_t *,
+		    vm_size_t, boolean_t, vm_pager_t, vm_offset_t, boolean_t));
+int		 vm_deallocate __P((vm_map_t, vm_offset_t, vm_size_t));
+int		 vm_fault __P((vm_map_t, vm_offset_t, vm_prot_t, boolean_t));
+void		 vm_fault_copy_entry __P((vm_map_t,
+		    vm_map_t, vm_map_entry_t, vm_map_entry_t));
+void		 vm_fault_unwire __P((vm_map_t, vm_offset_t, vm_offset_t));
+int		 vm_fault_wire __P((vm_map_t, vm_offset_t, vm_offset_t));
+int		 vm_fork __P((struct proc *, struct proc *, int));
+int		 vm_inherit __P((vm_map_t,
+		    vm_offset_t, vm_size_t, vm_inherit_t));
+void		 vm_init_limits __P((struct proc *));
+void		 vm_mem_init __P((void));
+int		 vm_mmap __P((vm_map_t, vm_offset_t *, vm_size_t,
+		    vm_prot_t, vm_prot_t, int, caddr_t, vm_offset_t));
+int		 vm_protect __P((vm_map_t,
+		    vm_offset_t, vm_size_t, boolean_t, vm_prot_t));
+void		 vm_set_page_size __P((void));
+void		 vmmeter __P((void));
+struct vmspace	*vmspace_alloc __P((vm_offset_t, vm_offset_t, int));
+struct vmspace	*vmspace_fork __P((struct vmspace *));
+void		 vmspace_free __P((struct vmspace *));
+void		 vmtotal __P((struct vmtotal *));
+void		 vnode_pager_setsize __P((struct vnode *, u_long));
+void		 vnode_pager_umount __P((struct mount *));
+boolean_t	 vnode_pager_uncache __P((struct vnode *));
+void		 vslock __P((caddr_t, u_int));
+void		 vsunlock __P((caddr_t, u_int, int));
+#endif
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
new file mode 100644
index 00000000000..f60abf2b5f3
--- /dev/null
+++ b/sys/vm/vm_fault.c
@@ -0,0 +1,1035 @@
+/* 
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vm_fault.c	8.4 (Berkeley) 1/12/94
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ * 
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ *	Page fault handling module.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+
+/*
+ *	vm_fault:
+ *
+ *	Handle a page fault occuring at the given address,
+ *	requiring the given permissions, in the map specified.
+ *	If successful, the page is inserted into the
+ *	associated physical map.
+ *
+ *	NOTE: the given address should be truncated to the
+ *	proper page address.
+ *
+ *	KERN_SUCCESS is returned if the page fault is handled; otherwise,
+ *	a standard error specifying why the fault is fatal is returned.
+ *
+ *
+ *	The map in question must be referenced, and remains so.
+ *	Caller may hold no locks.
+ */
+int
+vm_fault(map, vaddr, fault_type, change_wiring)
+	vm_map_t	map;
+	vm_offset_t	vaddr;
+	vm_prot_t	fault_type;
+	boolean_t	change_wiring;
+{
+	vm_object_t		first_object;
+	vm_offset_t		first_offset;
+	vm_map_entry_t		entry;
+	register vm_object_t	object;
+	register vm_offset_t	offset;
+	register vm_page_t	m;
+	vm_page_t		first_m;
+	vm_prot_t		prot;
+	int			result;
+	boolean_t		wired;
+	boolean_t		su;
+	boolean_t		lookup_still_valid;
+	boolean_t		page_exists;
+	vm_page_t		old_m;
+	vm_object_t		next_object;
+
+	cnt.v_faults++;		/* needs lock XXX */
+/*
+ *	Recovery actions
+ */
+#define	FREE_PAGE(m)	{				\
+	PAGE_WAKEUP(m);					\
+	vm_page_lock_queues();				\
+	vm_page_free(m);				\
+	vm_page_unlock_queues();			\
+}
+
+#define	RELEASE_PAGE(m)	{				\
+	PAGE_WAKEUP(m);					\
+	vm_page_lock_queues();				\
+	vm_page_activate(m);				\
+	vm_page_unlock_queues();			\
+}
+
+#define	UNLOCK_MAP	{				\
+	if (lookup_still_valid) {			\
+		vm_map_lookup_done(map, entry);		\
+		lookup_still_valid = FALSE;		\
+	}						\
+}
+
+#define	UNLOCK_THINGS	{				\
+	object->paging_in_progress--;			\
+	vm_object_unlock(object);			\
+	if (object != first_object) {			\
+		vm_object_lock(first_object);		\
+		FREE_PAGE(first_m);			\
+		first_object->paging_in_progress--;	\
+		vm_object_unlock(first_object);		\
+	}						\
+	UNLOCK_MAP;					\
+}
+
+#define	UNLOCK_AND_DEALLOCATE	{			\
+	UNLOCK_THINGS;					\
+	vm_object_deallocate(first_object);		\
+}
+
+    RetryFault: ;
+
+	/*
+	 *	Find the backing store object and offset into
+	 *	it to begin the search.
+	 */
+
+	if ((result = vm_map_lookup(&map, vaddr, fault_type, &entry,
+			&first_object, &first_offset,
+			&prot, &wired, &su)) != KERN_SUCCESS) {
+		return(result);
+	}
+	lookup_still_valid = TRUE;
+
+	if (wired)
+		fault_type = prot;
+
+	first_m = NULL;
+
+   	/*
+	 *	Make a reference to this object to
+	 *	prevent its disposal while we are messing with
+	 *	it.  Once we have the reference, the map is free
+	 *	to be diddled.  Since objects reference their
+	 *	shadows (and copies), they will stay around as well.
+	 */
+
+	vm_object_lock(first_object);
+
+	first_object->ref_count++;
+	first_object->paging_in_progress++;
+
+	/*
+	 *	INVARIANTS (through entire routine):
+	 *
+	 *	1)	At all times, we must either have the object
+	 *		lock or a busy page in some object to prevent
+	 *		some other thread from trying to bring in
+	 *		the same page.
+	 *
+	 *		Note that we cannot hold any locks during the
+	 *		pager access or when waiting for memory, so
+	 *		we use a busy page then.
+	 *
+	 *		Note also that we aren't as concerned about
+	 *		more than one thead attempting to pager_data_unlock
+	 *		the same page at once, so we don't hold the page
+	 *		as busy then, but do record the highest unlock
+	 *		value so far.  [Unlock requests may also be delivered
+	 *		out of order.]
+	 *
+	 *	2)	Once we have a busy page, we must remove it from
+	 *		the pageout queues, so that the pageout daemon
+	 *		will not grab it away.
+	 *
+	 *	3)	To prevent another thread from racing us down the
+	 *		shadow chain and entering a new page in the top
+	 *		object before we do, we must keep a busy page in
+	 *		the top object while following the shadow chain.
+	 *
+	 *	4)	We must increment paging_in_progress on any object
+	 *		for which we have a busy page, to prevent
+	 *		vm_object_collapse from removing the busy page
+	 *		without our noticing.
+	 */
+
+	/*
+	 *	Search for the page at object/offset.
+	 */
+
+	object = first_object;
+	offset = first_offset;
+
+	/*
+	 *	See whether this page is resident
+	 */
+
+	while (TRUE) {
+		m = vm_page_lookup(object, offset);
+		if (m != NULL) {
+			/*
+			 *	If the page is being brought in,
+			 *	wait for it and then retry.
+			 */
+			if (m->flags & PG_BUSY) {
+#ifdef DOTHREADS
+				int	wait_result;
+
+				PAGE_ASSERT_WAIT(m, !change_wiring);
+				UNLOCK_THINGS;
+				thread_block();
+				wait_result = current_thread()->wait_result;
+				vm_object_deallocate(first_object);
+				if (wait_result != THREAD_AWAKENED)
+					return(KERN_SUCCESS);
+				goto RetryFault;
+#else
+				PAGE_ASSERT_WAIT(m, !change_wiring);
+				UNLOCK_THINGS;
+				cnt.v_intrans++;
+				thread_block();
+				vm_object_deallocate(first_object);
+				goto RetryFault;
+#endif
+			}
+
+			/*
+			 *	Remove the page from the pageout daemon's
+			 *	reach while we play with it.
+			 */
+
+			vm_page_lock_queues();
+			if (m->flags & PG_INACTIVE) {
+				TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq);
+				m->flags &= ~PG_INACTIVE;
+				cnt.v_inactive_count--;
+				cnt.v_reactivated++;
+			} 
+
+			if (m->flags & PG_ACTIVE) {
+				TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
+				m->flags &= ~PG_ACTIVE;
+				cnt.v_active_count--;
+			}
+			vm_page_unlock_queues();
+
+			/*
+			 *	Mark page busy for other threads.
+			 */
+			m->flags |= PG_BUSY;
+			break;
+		}
+
+		if (((object->pager != NULL) &&
+				(!change_wiring || wired))
+		    || (object == first_object)) {
+
+			/*
+			 *	Allocate a new page for this object/offset
+			 *	pair.
+			 */
+
+			m = vm_page_alloc(object, offset);
+
+			if (m == NULL) {
+				UNLOCK_AND_DEALLOCATE;
+				VM_WAIT;
+				goto RetryFault;
+			}
+		}
+
+		if (object->pager != NULL && (!change_wiring || wired)) {
+			int rv;
+
+			/*
+			 *	Now that we have a busy page, we can
+			 *	release the object lock.
+			 */
+			vm_object_unlock(object);
+
+			/*
+			 *	Call the pager to retrieve the data, if any,
+			 *	after releasing the lock on the map.
+			 */
+			UNLOCK_MAP;
+			cnt.v_pageins++;
+			rv = vm_pager_get(object->pager, m, TRUE);
+
+			/*
+			 *	Reaquire the object lock to preserve our
+			 *	invariant.
+			 */
+			vm_object_lock(object);
+
+			/*
+			 *	Found the page.
+			 *	Leave it busy while we play with it.
+			 */
+			if (rv == VM_PAGER_OK) {
+				/*
+				 *	Relookup in case pager changed page.
+				 *	Pager is responsible for disposition
+				 *	of old page if moved.
+				 */
+				m = vm_page_lookup(object, offset);
+
+				cnt.v_pgpgin++;
+				m->flags &= ~PG_FAKE;
+				m->flags |= PG_CLEAN;
+				pmap_clear_modify(VM_PAGE_TO_PHYS(m));
+				break;
+			}
+
+			/*
+			 * IO error or page outside the range of the pager:
+			 * cleanup and return an error.
+			 */
+			if (rv == VM_PAGER_ERROR || rv == VM_PAGER_BAD) {
+				FREE_PAGE(m);
+				UNLOCK_AND_DEALLOCATE;
+				return(KERN_PROTECTION_FAILURE); /* XXX */
+			}
+			/*
+			 * rv == VM_PAGER_FAIL:
+			 *
+			 * Page does not exist at this object/offset.
+			 * Free the bogus page (waking up anyone waiting
+			 * for it) and continue on to the next object.
+			 *
+			 * If this is the top-level object, we must
+			 * leave the busy page to prevent another
+			 * thread from rushing past us, and inserting
+			 * the page in that object at the same time
+			 * that we are.
+			 */
+			if (object != first_object) {
+				FREE_PAGE(m);
+				/* note that `m' is not used after this */
+			}
+		}
+
+		/*
+		 * We get here if the object has no pager (or unwiring)
+		 * or the pager doesn't have the page.
+		 */
+		if (object == first_object)
+			first_m = m;
+
+		/*
+		 *	Move on to the next object.  Lock the next
+		 *	object before unlocking the current one.
+		 */
+
+		offset += object->shadow_offset;
+		next_object = object->shadow;
+		if (next_object == NULL) {
+			/*
+			 *	If there's no object left, fill the page
+			 *	in the top object with zeros.
+			 */
+			if (object != first_object) {
+				object->paging_in_progress--;
+				vm_object_unlock(object);
+
+				object = first_object;
+				offset = first_offset;
+				m = first_m;
+				vm_object_lock(object);
+			}
+			first_m = NULL;
+
+			vm_page_zero_fill(m);
+			cnt.v_zfod++;
+			m->flags &= ~PG_FAKE;
+			break;
+		}
+		else {
+			vm_object_lock(next_object);
+			if (object != first_object)
+				object->paging_in_progress--;
+			vm_object_unlock(object);
+			object = next_object;
+			object->paging_in_progress++;
+		}
+	}
+
+	if ((m->flags & (PG_ACTIVE | PG_INACTIVE | PG_BUSY)) != PG_BUSY)
+		panic("vm_fault: active, inactive or !busy after main loop");
+
+	/*
+	 *	PAGE HAS BEEN FOUND.
+	 *	[Loop invariant still holds -- the object lock
+	 *	is held.]
+	 */
+
+	old_m = m;	/* save page that would be copied */
+
+	/*
+	 *	If the page is being written, but isn't
+	 *	already owned by the top-level object,
+	 *	we have to copy it into a new page owned
+	 *	by the top-level object.
+	 */
+
+	if (object != first_object) {
+	    	/*
+		 *	We only really need to copy if we
+		 *	want to write it.
+		 */
+
+	    	if (fault_type & VM_PROT_WRITE) {
+
+			/*
+			 *	If we try to collapse first_object at this
+			 *	point, we may deadlock when we try to get
+			 *	the lock on an intermediate object (since we
+			 *	have the bottom object locked).  We can't
+			 *	unlock the bottom object, because the page
+			 *	we found may move (by collapse) if we do.
+			 *
+			 *	Instead, we first copy the page.  Then, when
+			 *	we have no more use for the bottom object,
+			 *	we unlock it and try to collapse.
+			 *
+			 *	Note that we copy the page even if we didn't
+			 *	need to... that's the breaks.
+			 */
+
+		    	/*
+			 *	We already have an empty page in
+			 *	first_object - use it.
+			 */
+
+			vm_page_copy(m, first_m);
+			first_m->flags &= ~PG_FAKE;
+
+			/*
+			 *	If another map is truly sharing this
+			 *	page with us, we have to flush all
+			 *	uses of the original page, since we
+			 *	can't distinguish those which want the
+			 *	original from those which need the
+			 *	new copy.
+			 *
+			 *	XXX If we know that only one map has
+			 *	access to this page, then we could
+			 *	avoid the pmap_page_protect() call.
+			 */
+
+			vm_page_lock_queues();
+			vm_page_activate(m);
+			vm_page_deactivate(m);
+			pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
+			vm_page_unlock_queues();
+
+			/*
+			 *	We no longer need the old page or object.
+			 */
+			PAGE_WAKEUP(m);
+			object->paging_in_progress--;
+			vm_object_unlock(object);
+
+			/*
+			 *	Only use the new page below...
+			 */
+
+			cnt.v_cow_faults++;
+			m = first_m;
+			object = first_object;
+			offset = first_offset;
+
+			/*
+			 *	Now that we've gotten the copy out of the
+			 *	way, let's try to collapse the top object.
+			 */
+			vm_object_lock(object);
+			/*
+			 *	But we have to play ugly games with
+			 *	paging_in_progress to do that...
+			 */
+			object->paging_in_progress--;
+			vm_object_collapse(object);
+			object->paging_in_progress++;
+		}
+		else {
+		    	prot &= ~VM_PROT_WRITE;
+			m->flags |= PG_COPYONWRITE;
+		}
+	}
+
+	if (m->flags & (PG_ACTIVE|PG_INACTIVE))
+		panic("vm_fault: active or inactive before copy object handling");
+
+	/*
+	 *	If the page is being written, but hasn't been
+	 *	copied to the copy-object, we have to copy it there.
+	 */
+    RetryCopy:
+	if (first_object->copy != NULL) {
+		vm_object_t copy_object = first_object->copy;
+		vm_offset_t copy_offset;
+		vm_page_t copy_m;
+
+		/*
+		 *	We only need to copy if we want to write it.
+		 */
+		if ((fault_type & VM_PROT_WRITE) == 0) {
+			prot &= ~VM_PROT_WRITE;
+			m->flags |= PG_COPYONWRITE;
+		}
+		else {
+			/*
+			 *	Try to get the lock on the copy_object.
+			 */
+			if (!vm_object_lock_try(copy_object)) {
+				vm_object_unlock(object);
+				/* should spin a bit here... */
+				vm_object_lock(object);
+				goto RetryCopy;
+			}
+
+			/*
+			 *	Make another reference to the copy-object,
+			 *	to keep it from disappearing during the
+			 *	copy.
+			 */
+			copy_object->ref_count++;
+
+			/*
+			 *	Does the page exist in the copy?
+			 */
+			copy_offset = first_offset
+				- copy_object->shadow_offset;
+			copy_m = vm_page_lookup(copy_object, copy_offset);
+			if (page_exists = (copy_m != NULL)) {
+				if (copy_m->flags & PG_BUSY) {
+#ifdef DOTHREADS
+					int	wait_result;
+
+					/*
+					 *	If the page is being brought
+					 *	in, wait for it and then retry.
+					 */
+					PAGE_ASSERT_WAIT(copy_m, !change_wiring);
+					RELEASE_PAGE(m);
+					copy_object->ref_count--;
+					vm_object_unlock(copy_object);
+					UNLOCK_THINGS;
+					thread_block();
+					wait_result = current_thread()->wait_result;
+					vm_object_deallocate(first_object);
+					if (wait_result != THREAD_AWAKENED)
+						return(KERN_SUCCESS);
+					goto RetryFault;
+#else
+					/*
+					 *	If the page is being brought
+					 *	in, wait for it and then retry.
+					 */
+					PAGE_ASSERT_WAIT(copy_m, !change_wiring);
+					RELEASE_PAGE(m);
+					copy_object->ref_count--;
+					vm_object_unlock(copy_object);
+					UNLOCK_THINGS;
+					thread_block();
+					vm_object_deallocate(first_object);
+					goto RetryFault;
+#endif
+				}
+			}
+
+			/*
+			 *	If the page is not in memory (in the object)
+			 *	and the object has a pager, we have to check
+			 *	if the pager has the data in secondary
+			 *	storage.
+			 */
+			if (!page_exists) {
+
+				/*
+				 *	If we don't allocate a (blank) page
+				 *	here... another thread could try
+				 *	to page it in, allocate a page, and
+				 *	then block on the busy page in its
+				 *	shadow (first_object).  Then we'd
+				 *	trip over the busy page after we
+				 *	found that the copy_object's pager
+				 *	doesn't have the page...
+				 */
+				copy_m = vm_page_alloc(copy_object,
+								copy_offset);
+				if (copy_m == NULL) {
+					/*
+					 *	Wait for a page, then retry.
+					 */
+					RELEASE_PAGE(m);
+					copy_object->ref_count--;
+					vm_object_unlock(copy_object);
+					UNLOCK_AND_DEALLOCATE;
+					VM_WAIT;
+					goto RetryFault;
+				}
+
+			 	if (copy_object->pager != NULL) {
+					vm_object_unlock(object);
+					vm_object_unlock(copy_object);
+					UNLOCK_MAP;
+
+					page_exists = vm_pager_has_page(
+							copy_object->pager,
+							(copy_offset + copy_object->paging_offset));
+
+					vm_object_lock(copy_object);
+
+					/*
+					 * Since the map is unlocked, someone
+					 * else could have copied this object
+					 * and put a different copy_object
+					 * between the two.  Or, the last
+					 * reference to the copy-object (other
+					 * than the one we have) may have
+					 * disappeared - if that has happened,
+					 * we don't need to make the copy.
+					 */
+					if (copy_object->shadow != object ||
+					    copy_object->ref_count == 1) {
+						/*
+						 *	Gaah... start over!
+						 */
+						FREE_PAGE(copy_m);
+						vm_object_unlock(copy_object);
+						vm_object_deallocate(copy_object);
+							/* may block */
+						vm_object_lock(object);
+						goto RetryCopy;
+					}
+					vm_object_lock(object);
+
+					if (page_exists) {
+						/*
+						 *	We didn't need the page
+						 */
+						FREE_PAGE(copy_m);
+					}
+				}
+			}
+			if (!page_exists) {
+				/*
+				 *	Must copy page into copy-object.
+				 */
+				vm_page_copy(m, copy_m);
+				copy_m->flags &= ~PG_FAKE;
+
+				/*
+				 * Things to remember:
+				 * 1. The copied page must be marked 'dirty'
+				 *    so it will be paged out to the copy
+				 *    object.
+				 * 2. If the old page was in use by any users
+				 *    of the copy-object, it must be removed
+				 *    from all pmaps.  (We can't know which
+				 *    pmaps use it.)
+				 */
+				vm_page_lock_queues();
+				pmap_page_protect(VM_PAGE_TO_PHYS(old_m),
+						  VM_PROT_NONE);
+				copy_m->flags &= ~PG_CLEAN;
+				vm_page_activate(copy_m);	/* XXX */
+				vm_page_unlock_queues();
+
+				PAGE_WAKEUP(copy_m);
+			}
+			/*
+			 *	The reference count on copy_object must be
+			 *	at least 2: one for our extra reference,
+			 *	and at least one from the outside world
+			 *	(we checked that when we last locked
+			 *	copy_object).
+			 */
+			copy_object->ref_count--;
+			vm_object_unlock(copy_object);
+			m->flags &= ~PG_COPYONWRITE;
+		}
+	}
+
+	if (m->flags & (PG_ACTIVE | PG_INACTIVE))
+		panic("vm_fault: active or inactive before retrying lookup");
+
+	/*
+	 *	We must verify that the maps have not changed
+	 *	since our last lookup.
+	 */
+
+	if (!lookup_still_valid) {
+		vm_object_t	retry_object;
+		vm_offset_t	retry_offset;
+		vm_prot_t	retry_prot;
+
+		/*
+		 *	Since map entries may be pageable, make sure we can
+		 *	take a page fault on them.
+		 */
+		vm_object_unlock(object);
+
+		/*
+		 *	To avoid trying to write_lock the map while another
+		 *	thread has it read_locked (in vm_map_pageable), we
+		 *	do not try for write permission.  If the page is
+		 *	still writable, we will get write permission.  If it
+		 *	is not, or has been marked needs_copy, we enter the
+		 *	mapping without write permission, and will merely
+		 *	take another fault.
+		 */
+		result = vm_map_lookup(&map, vaddr,
+				fault_type & ~VM_PROT_WRITE, &entry,
+				&retry_object, &retry_offset, &retry_prot,
+				&wired, &su);
+
+		vm_object_lock(object);
+
+		/*
+		 *	If we don't need the page any longer, put it on the
+		 *	active list (the easiest thing to do here).  If no
+		 *	one needs it, pageout will grab it eventually.
+		 */
+
+		if (result != KERN_SUCCESS) {
+			RELEASE_PAGE(m);
+			UNLOCK_AND_DEALLOCATE;
+			return(result);
+		}
+
+		lookup_still_valid = TRUE;
+
+		if ((retry_object != first_object) ||
+				(retry_offset != first_offset)) {
+			RELEASE_PAGE(m);
+			UNLOCK_AND_DEALLOCATE;
+			goto RetryFault;
+		}
+
+		/*
+		 *	Check whether the protection has changed or the object
+		 *	has been copied while we left the map unlocked.
+		 *	Changing from read to write permission is OK - we leave
+		 *	the page write-protected, and catch the write fault.
+		 *	Changing from write to read permission means that we
+		 *	can't mark the page write-enabled after all.
+		 */
+		prot &= retry_prot;
+		if (m->flags & PG_COPYONWRITE)
+			prot &= ~VM_PROT_WRITE;
+	}
+
+	/*
+	 * (the various bits we're fiddling with here are locked by
+	 * the object's lock)
+	 */
+
+	/* XXX This distorts the meaning of the copy_on_write bit */
+
+	if (prot & VM_PROT_WRITE)
+		m->flags &= ~PG_COPYONWRITE;
+
+	/*
+	 *	It's critically important that a wired-down page be faulted
+	 *	only once in each map for which it is wired.
+	 */
+
+	if (m->flags & (PG_ACTIVE | PG_INACTIVE))
+		panic("vm_fault: active or inactive before pmap_enter");
+
+	vm_object_unlock(object);
+
+	/*
+	 *	Put this page into the physical map.
+	 *	We had to do the unlock above because pmap_enter
+	 *	may cause other faults.   We don't put the
+	 *	page back on the active queue until later so
+	 *	that the page-out daemon won't find us (yet).
+	 */
+
+	pmap_enter(map->pmap, vaddr, VM_PAGE_TO_PHYS(m), prot, wired);
+
+	/*
+	 *	If the page is not wired down, then put it where the
+	 *	pageout daemon can find it.
+	 */
+	vm_object_lock(object);
+	vm_page_lock_queues();
+	if (change_wiring) {
+		if (wired)
+			vm_page_wire(m);
+		else
+			vm_page_unwire(m);
+	}
+	else
+		vm_page_activate(m);
+	vm_page_unlock_queues();
+
+	/*
+	 *	Unlock everything, and return
+	 */
+
+	PAGE_WAKEUP(m);
+	UNLOCK_AND_DEALLOCATE;
+
+	return(KERN_SUCCESS);
+
+}
+
+/*
+ *	vm_fault_wire:
+ *
+ *	Wire down a range of virtual addresses in a map.
+ */
+int
+vm_fault_wire(map, start, end)
+	vm_map_t	map;
+	vm_offset_t	start, end;
+{
+	register vm_offset_t	va;
+	register pmap_t		pmap;
+	int			rv;
+
+	pmap = vm_map_pmap(map);
+
+	/*
+	 *	Inform the physical mapping system that the
+	 *	range of addresses may not fault, so that
+	 *	page tables and such can be locked down as well.
+	 */
+
+	pmap_pageable(pmap, start, end, FALSE);
+
+	/*
+	 *	We simulate a fault to get the page and enter it
+	 *	in the physical map.
+	 */
+
+	for (va = start; va < end; va += PAGE_SIZE) {
+		rv = vm_fault(map, va, VM_PROT_NONE, TRUE);
+		if (rv) {
+			if (va != start)
+				vm_fault_unwire(map, start, va);
+			return(rv);
+		}
+	}
+	return(KERN_SUCCESS);
+}
+
+
+/*
+ *	vm_fault_unwire:
+ *
+ *	Unwire a range of virtual addresses in a map.
+ */
+void vm_fault_unwire(map, start, end)
+	vm_map_t	map;
+	vm_offset_t	start, end;
+{
+
+	register vm_offset_t	va, pa;
+	register pmap_t		pmap;
+
+	pmap = vm_map_pmap(map);
+
+	/*
+	 *	Since the pages are wired down, we must be able to
+	 *	get their mappings from the physical map system.
+	 */
+
+	vm_page_lock_queues();
+
+	for (va = start; va < end; va += PAGE_SIZE) {
+		pa = pmap_extract(pmap, va);
+		if (pa == (vm_offset_t) 0) {
+			panic("unwire: page not in pmap");
+		}
+		pmap_change_wiring(pmap, va, FALSE);
+		vm_page_unwire(PHYS_TO_VM_PAGE(pa));
+	}
+	vm_page_unlock_queues();
+
+	/*
+	 *	Inform the physical mapping system that the range
+	 *	of addresses may fault, so that page tables and
+	 *	such may be unwired themselves.
+	 */
+
+	pmap_pageable(pmap, start, end, TRUE);
+
+}
+
+/*
+ *	Routine:
+ *		vm_fault_copy_entry
+ *	Function:
+ *		Copy all of the pages from a wired-down map entry to another.
+ *
+ *	In/out conditions:
+ *		The source and destination maps must be locked for write.
+ *		The source map entry must be wired down (or be a sharing map
+ *		entry corresponding to a main map entry that is wired down).
+ */
+
+void vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry)
+	vm_map_t	dst_map;
+	vm_map_t	src_map;
+	vm_map_entry_t	dst_entry;
+	vm_map_entry_t	src_entry;
+{
+
+	vm_object_t	dst_object;
+	vm_object_t	src_object;
+	vm_offset_t	dst_offset;
+	vm_offset_t	src_offset;
+	vm_prot_t	prot;
+	vm_offset_t	vaddr;
+	vm_page_t	dst_m;
+	vm_page_t	src_m;
+
+#ifdef	lint
+	src_map++;
+#endif
+
+	src_object = src_entry->object.vm_object;
+	src_offset = src_entry->offset;
+
+	/*
+	 *	Create the top-level object for the destination entry.
+	 *	(Doesn't actually shadow anything - we copy the pages
+	 *	directly.)
+	 */
+	dst_object = vm_object_allocate(
+			(vm_size_t) (dst_entry->end - dst_entry->start));
+
+	dst_entry->object.vm_object = dst_object;
+	dst_entry->offset = 0;
+
+	prot  = dst_entry->max_protection;
+
+	/*
+	 *	Loop through all of the pages in the entry's range, copying
+	 *	each one from the source object (it should be there) to the
+	 *	destination object.
+	 */
+	for (vaddr = dst_entry->start, dst_offset = 0;
+	     vaddr < dst_entry->end;
+	     vaddr += PAGE_SIZE, dst_offset += PAGE_SIZE) {
+
+		/*
+		 *	Allocate a page in the destination object
+		 */
+		vm_object_lock(dst_object);
+		do {
+			dst_m = vm_page_alloc(dst_object, dst_offset);
+			if (dst_m == NULL) {
+				vm_object_unlock(dst_object);
+				VM_WAIT;
+				vm_object_lock(dst_object);
+			}
+		} while (dst_m == NULL);
+
+		/*
+		 *	Find the page in the source object, and copy it in.
+		 *	(Because the source is wired down, the page will be
+		 *	in memory.)
+		 */
+		vm_object_lock(src_object);
+		src_m = vm_page_lookup(src_object, dst_offset + src_offset);
+		if (src_m == NULL)
+			panic("vm_fault_copy_wired: page missing");
+
+		vm_page_copy(src_m, dst_m);
+
+		/*
+		 *	Enter it in the pmap...
+		 */
+		vm_object_unlock(src_object);
+		vm_object_unlock(dst_object);
+
+		pmap_enter(dst_map->pmap, vaddr, VM_PAGE_TO_PHYS(dst_m),
+				prot, FALSE);
+
+		/*
+		 *	Mark it no longer busy, and put it on the active list.
+		 */
+		vm_object_lock(dst_object);
+		vm_page_lock_queues();
+		vm_page_activate(dst_m);
+		vm_page_unlock_queues();
+		PAGE_WAKEUP(dst_m);
+		vm_object_unlock(dst_object);
+	}
+
+}
diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c
new file mode 100644
index 00000000000..5676ff3f7cc
--- /dev/null
+++ b/sys/vm/vm_glue.c
@@ -0,0 +1,605 @@
+/* 
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vm_glue.c	8.6 (Berkeley) 1/5/94
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ * 
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/buf.h>
+#include <sys/user.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#include <machine/cpu.h>
+
+int	avefree = 0;		/* XXX */
+unsigned maxdmap = MAXDSIZ;	/* XXX */
+int	readbuffers = 0;	/* XXX allow kgdb to read kernel buffer pool */
+
+int
+kernacc(addr, len, rw)
+	caddr_t addr;
+	int len, rw;
+{
+	boolean_t rv;
+	vm_offset_t saddr, eaddr;
+	vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
+
+	saddr = trunc_page(addr);
+	eaddr = round_page(addr+len);
+	rv = vm_map_check_protection(kernel_map, saddr, eaddr, prot);
+	/*
+	 * XXX there are still some things (e.g. the buffer cache) that
+	 * are managed behind the VM system's back so even though an
+	 * address is accessible in the mind of the VM system, there may
+	 * not be physical pages where the VM thinks there is.  This can
+	 * lead to bogus allocation of pages in the kernel address space
+	 * or worse, inconsistencies at the pmap level.  We only worry
+	 * about the buffer cache for now.
+	 */
+	if (!readbuffers && rv && (eaddr > (vm_offset_t)buffers &&
+		   saddr < (vm_offset_t)buffers + MAXBSIZE * nbuf))
+		rv = FALSE;
+	return(rv == TRUE);
+}
+
+int
+useracc(addr, len, rw)
+	caddr_t addr;
+	int len, rw;
+{
+	boolean_t rv;
+	vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
+
+	rv = vm_map_check_protection(&curproc->p_vmspace->vm_map,
+	    trunc_page(addr), round_page(addr+len), prot);
+	return(rv == TRUE);
+}
+
+#ifdef KGDB
+/*
+ * Change protections on kernel pages from addr to addr+len
+ * (presumably so debugger can plant a breakpoint).
+ *
+ * We force the protection change at the pmap level.  If we were
+ * to use vm_map_protect a change to allow writing would be lazily-
+ * applied meaning we would still take a protection fault, something
+ * we really don't want to do.  It would also fragment the kernel
+ * map unnecessarily.  We cannot use pmap_protect since it also won't
+ * enforce a write-enable request.  Using pmap_enter is the only way
+ * we can ensure the change takes place properly.
+ */
+void
+chgkprot(addr, len, rw)
+	register caddr_t addr;
+	int len, rw;
+{
+	vm_prot_t prot;
+	vm_offset_t pa, sva, eva;
+
+	prot = rw == B_READ ? VM_PROT_READ : VM_PROT_READ|VM_PROT_WRITE;
+	eva = round_page(addr + len);
+	for (sva = trunc_page(addr); sva < eva; sva += PAGE_SIZE) {
+		/*
+		 * Extract physical address for the page.
+		 * We use a cheezy hack to differentiate physical
+		 * page 0 from an invalid mapping, not that it
+		 * really matters...
+		 */
+		pa = pmap_extract(kernel_pmap, sva|1);
+		if (pa == 0)
+			panic("chgkprot: invalid page");
+		pmap_enter(kernel_pmap, sva, pa&~1, prot, TRUE);
+	}
+}
+#endif
+
+void
+vslock(addr, len)
+	caddr_t	addr;
+	u_int	len;
+{
+	vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr),
+			round_page(addr+len), FALSE);
+}
+
+void
+vsunlock(addr, len, dirtied)
+	caddr_t	addr;
+	u_int	len;
+	int dirtied;
+{
+#ifdef	lint
+	dirtied++;
+#endif
+	vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr),
+			round_page(addr+len), TRUE);
+}
+
+/*
+ * Implement fork's actions on an address space.
+ * Here we arrange for the address space to be copied or referenced,
+ * allocate a user struct (pcb and kernel stack), then call the
+ * machine-dependent layer to fill those in and make the new process
+ * ready to run.
+ * NOTE: the kernel stack may be at a different location in the child
+ * process, and thus addresses of automatic variables may be invalid
+ * after cpu_fork returns in the child process.  We do nothing here
+ * after cpu_fork returns.
+ */
+int
+vm_fork(p1, p2, isvfork)
+	register struct proc *p1, *p2;
+	int isvfork;
+{
+	register struct user *up;
+	vm_offset_t addr;
+
+#ifdef i386
+	/*
+	 * avoid copying any of the parent's pagetables or other per-process
+	 * objects that reside in the map by marking all of them non-inheritable
+	 */
+	(void)vm_map_inherit(&p1->p_vmspace->vm_map,
+		UPT_MIN_ADDRESS-UPAGES*NBPG, VM_MAX_ADDRESS, VM_INHERIT_NONE);
+#endif
+	p2->p_vmspace = vmspace_fork(p1->p_vmspace);
+
+#ifdef SYSVSHM
+	if (p1->p_vmspace->vm_shm)
+		shmfork(p1, p2, isvfork);
+#endif
+
+#ifndef	i386
+	/*
+	 * Allocate a wired-down (for now) pcb and kernel stack for the process
+	 */
+	addr = kmem_alloc_pageable(kernel_map, ctob(UPAGES));
+	if (addr == 0)
+		panic("vm_fork: no more kernel virtual memory");
+	vm_map_pageable(kernel_map, addr, addr + ctob(UPAGES), FALSE);
+#else
+/* XXX somehow, on 386, ocassionally pageout removes active, wired down kstack,
+and pagetables, WITHOUT going thru vm_page_unwire! Why this appears to work is
+not yet clear, yet it does... */
+	addr = kmem_alloc(kernel_map, ctob(UPAGES));
+	if (addr == 0)
+		panic("vm_fork: no more kernel virtual memory");
+#endif
+	up = (struct user *)addr;
+	p2->p_addr = up;
+
+	/*
+	 * p_stats and p_sigacts currently point at fields
+	 * in the user struct but not at &u, instead at p_addr.
+	 * Copy p_sigacts and parts of p_stats; zero the rest
+	 * of p_stats (statistics).
+	 */
+	p2->p_stats = &up->u_stats;
+	p2->p_sigacts = &up->u_sigacts;
+	up->u_sigacts = *p1->p_sigacts;
+	bzero(&up->u_stats.pstat_startzero,
+	    (unsigned) ((caddr_t)&up->u_stats.pstat_endzero -
+	    (caddr_t)&up->u_stats.pstat_startzero));
+	bcopy(&p1->p_stats->pstat_startcopy, &up->u_stats.pstat_startcopy,
+	    ((caddr_t)&up->u_stats.pstat_endcopy -
+	     (caddr_t)&up->u_stats.pstat_startcopy));
+
+#ifdef i386
+	{ u_int addr = UPT_MIN_ADDRESS - UPAGES*NBPG; struct vm_map *vp;
+
+	vp = &p2->p_vmspace->vm_map;
+	(void)vm_deallocate(vp, addr, UPT_MAX_ADDRESS - addr);
+	(void)vm_allocate(vp, &addr, UPT_MAX_ADDRESS - addr, FALSE);
+	(void)vm_map_inherit(vp, addr, UPT_MAX_ADDRESS, VM_INHERIT_NONE);
+	}
+#endif
+	/*
+	 * cpu_fork will copy and update the kernel stack and pcb,
+	 * and make the child ready to run.  It marks the child
+	 * so that it can return differently than the parent.
+	 * It returns twice, once in the parent process and
+	 * once in the child.
+	 */
+	return (cpu_fork(p1, p2));
+}
+
+/*
+ * Set default limits for VM system.
+ * Called for proc 0, and then inherited by all others.
+ */
+void
+vm_init_limits(p)
+	register struct proc *p;
+{
+
+	/*
+	 * Set up the initial limits on process VM.
+	 * Set the maximum resident set size to be all
+	 * of (reasonably) available memory.  This causes
+	 * any single, large process to start random page
+	 * replacement once it fills memory.
+	 */
+        p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ;
+        p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ;
+        p->p_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ;
+        p->p_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ;
+	p->p_rlimit[RLIMIT_RSS].rlim_cur = ptoa(cnt.v_free_count);
+}
+
+#include <vm/vm_pageout.h>
+
+#ifdef DEBUG
+int	enableswap = 1;
+int	swapdebug = 0;
+#define	SDB_FOLLOW	1
+#define SDB_SWAPIN	2
+#define SDB_SWAPOUT	4
+#endif
+
+/*
+ * Brutally simple:
+ *	1. Attempt to swapin every swaped-out, runnable process in
+ *	   order of priority.
+ *	2. If not enough memory, wake the pageout daemon and let it
+ *	   clear some space.
+ */
+void
+scheduler()
+{
+	register struct proc *p;
+	register int pri;
+	struct proc *pp;
+	int ppri;
+	vm_offset_t addr;
+	vm_size_t size;
+
+loop:
+#ifdef DEBUG
+	while (!enableswap)
+		sleep((caddr_t)&proc0, PVM);
+#endif
+	pp = NULL;
+	ppri = INT_MIN;
+	for (p = (struct proc *)allproc; p != NULL; p = p->p_next) {
+		if (p->p_stat == SRUN && (p->p_flag & P_INMEM) == 0) {
+			pri = p->p_swtime + p->p_slptime - p->p_nice * 8;
+			if (pri > ppri) {
+				pp = p;
+				ppri = pri;
+			}
+		}
+	}
+#ifdef DEBUG
+	if (swapdebug & SDB_FOLLOW)
+		printf("sched: running, procp %x pri %d\n", pp, ppri);
+#endif
+	/*
+	 * Nothing to do, back to sleep
+	 */
+	if ((p = pp) == NULL) {
+		sleep((caddr_t)&proc0, PVM);
+		goto loop;
+	}
+
+	/*
+	 * We would like to bring someone in.
+	 * This part is really bogus cuz we could deadlock on memory
+	 * despite our feeble check.
+	 */
+	size = round_page(ctob(UPAGES));
+	addr = (vm_offset_t) p->p_addr;
+	if (cnt.v_free_count > atop(size)) {
+#ifdef DEBUG
+		if (swapdebug & SDB_SWAPIN)
+			printf("swapin: pid %d(%s)@%x, pri %d free %d\n",
+			       p->p_pid, p->p_comm, p->p_addr,
+			       ppri, cnt.v_free_count);
+#endif
+		vm_map_pageable(kernel_map, addr, addr+size, FALSE);
+		/*
+		 * Some architectures need to be notified when the
+		 * user area has moved to new physical page(s) (e.g.
+		 * see pmax/pmax/vm_machdep.c).
+		 */
+		cpu_swapin(p);
+		(void) splstatclock();
+		if (p->p_stat == SRUN)
+			setrunqueue(p);
+		p->p_flag |= P_INMEM;
+		(void) spl0();
+		p->p_swtime = 0;
+		goto loop;
+	}
+	/*
+	 * Not enough memory, jab the pageout daemon and wait til the
+	 * coast is clear.
+	 */
+#ifdef DEBUG
+	if (swapdebug & SDB_FOLLOW)
+		printf("sched: no room for pid %d(%s), free %d\n",
+		       p->p_pid, p->p_comm, cnt.v_free_count);
+#endif
+	(void) splhigh();
+	VM_WAIT;
+	(void) spl0();
+#ifdef DEBUG
+	if (swapdebug & SDB_FOLLOW)
+		printf("sched: room again, free %d\n", cnt.v_free_count);
+#endif
+	goto loop;
+}
+
+#define	swappable(p)							\
+	(((p)->p_flag &							\
+	    (P_SYSTEM | P_INMEM | P_NOSWAP | P_WEXIT | P_PHYSIO)) == P_INMEM)
+
+/*
+ * Swapout is driven by the pageout daemon.  Very simple, we find eligible
+ * procs and unwire their u-areas.  We try to always "swap" at least one
+ * process in case we need the room for a swapin.
+ * If any procs have been sleeping/stopped for at least maxslp seconds,
+ * they are swapped.  Else, we swap the longest-sleeping or stopped process,
+ * if any, otherwise the longest-resident process.
+ */
+void
+swapout_threads()
+{
+	register struct proc *p;
+	struct proc *outp, *outp2;
+	int outpri, outpri2;
+	int didswap = 0;
+	extern int maxslp;
+
+#ifdef DEBUG
+	if (!enableswap)
+		return;
+#endif
+	outp = outp2 = NULL;
+	outpri = outpri2 = 0;
+	for (p = (struct proc *)allproc; p != NULL; p = p->p_next) {
+		if (!swappable(p))
+			continue;
+		switch (p->p_stat) {
+		case SRUN:
+			if (p->p_swtime > outpri2) {
+				outp2 = p;
+				outpri2 = p->p_swtime;
+			}
+			continue;
+			
+		case SSLEEP:
+		case SSTOP:
+			if (p->p_slptime >= maxslp) {
+				swapout(p);
+				didswap++;
+			} else if (p->p_slptime > outpri) {
+				outp = p;
+				outpri = p->p_slptime;
+			}
+			continue;
+		}
+	}
+	/*
+	 * If we didn't get rid of any real duds, toss out the next most
+	 * likely sleeping/stopped or running candidate.  We only do this
+	 * if we are real low on memory since we don't gain much by doing
+	 * it (UPAGES pages).
+	 */
+	if (didswap == 0 &&
+	    cnt.v_free_count <= atop(round_page(ctob(UPAGES)))) {
+		if ((p = outp) == 0)
+			p = outp2;
+#ifdef DEBUG
+		if (swapdebug & SDB_SWAPOUT)
+			printf("swapout_threads: no duds, try procp %x\n", p);
+#endif
+		if (p)
+			swapout(p);
+	}
+}
+
+void
+swapout(p)
+	register struct proc *p;
+{
+	vm_offset_t addr;
+	vm_size_t size;
+
+#ifdef DEBUG
+	if (swapdebug & SDB_SWAPOUT)
+		printf("swapout: pid %d(%s)@%x, stat %x pri %d free %d\n",
+		       p->p_pid, p->p_comm, p->p_addr, p->p_stat,
+		       p->p_slptime, cnt.v_free_count);
+#endif
+	size = round_page(ctob(UPAGES));
+	addr = (vm_offset_t) p->p_addr;
+#if defined(hp300) || defined(luna68k)
+	/*
+	 * Ugh!  u-area is double mapped to a fixed address behind the
+	 * back of the VM system and accesses are usually through that
+	 * address rather than the per-process address.  Hence reference
+	 * and modify information are recorded at the fixed address and
+	 * lost at context switch time.  We assume the u-struct and
+	 * kernel stack are always accessed/modified and force it to be so.
+	 */
+	{
+		register int i;
+		volatile long tmp;
+
+		for (i = 0; i < UPAGES; i++) {
+			tmp = *(long *)addr; *(long *)addr = tmp;
+			addr += NBPG;
+		}
+		addr = (vm_offset_t) p->p_addr;
+	}
+#endif
+#ifdef mips
+	/*
+	 * Be sure to save the floating point coprocessor state before
+	 * paging out the u-struct.
+	 */
+	{
+		extern struct proc *machFPCurProcPtr;
+
+		if (p == machFPCurProcPtr) {
+			MachSaveCurFPState(p);
+			machFPCurProcPtr = (struct proc *)0;
+		}
+	}
+#endif
+#ifndef	i386 /* temporary measure till we find spontaineous unwire of kstack */
+	vm_map_pageable(kernel_map, addr, addr+size, TRUE);
+	pmap_collect(vm_map_pmap(&p->p_vmspace->vm_map));
+#endif
+	(void) splhigh();
+	p->p_flag &= ~P_INMEM;
+	if (p->p_stat == SRUN)
+		remrq(p);
+	(void) spl0();
+	p->p_swtime = 0;
+}
+
+/*
+ * The rest of these routines fake thread handling
+ */
+
+void
+assert_wait(event, ruptible)
+	int event;
+	boolean_t ruptible;
+{
+#ifdef lint
+	ruptible++;
+#endif
+	curproc->p_thread = event;
+}
+
+void
+thread_block()
+{
+	int s = splhigh();
+
+	if (curproc->p_thread)
+		sleep((caddr_t)curproc->p_thread, PVM);
+	splx(s);
+}
+
+void
+thread_sleep(event, lock, ruptible)
+	int event;
+	simple_lock_t lock;
+	boolean_t ruptible;
+{
+#ifdef lint
+	ruptible++;
+#endif
+	int s = splhigh();
+
+	curproc->p_thread = event;
+	simple_unlock(lock);
+	if (curproc->p_thread)
+		sleep((caddr_t)event, PVM);
+	splx(s);
+}
+
+void
+thread_wakeup(event)
+	int event;
+{
+	int s = splhigh();
+
+	wakeup((caddr_t)event);
+	splx(s);
+}
+
+/*
+ * DEBUG stuff
+ */
+
+int indent = 0;
+
+#include <machine/stdarg.h>		/* see subr_prf.c */
+
+/*ARGSUSED2*/
+void
+#if __STDC__
+iprintf(const char *fmt, ...)
+#else
+iprintf(fmt /* , va_alist */)
+	char *fmt;
+	/* va_dcl */
+#endif
+{
+	register int i;
+	va_list ap;
+
+	for (i = indent; i >= 8; i -= 8)
+		printf("\t");
+	while (--i >= 0)
+		printf(" ");
+	va_start(ap, fmt);
+	printf("%r", fmt, ap);
+	va_end(ap);
+}
diff --git a/sys/vm/vm_inherit.h b/sys/vm/vm_inherit.h
new file mode 100644
index 00000000000..455f91c9390
--- /dev/null
+++ b/sys/vm/vm_inherit.h
@@ -0,0 +1,83 @@
+/* 
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vm_inherit.h	8.1 (Berkeley) 6/11/93
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ * 
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ *	Virtual memory map inheritance definitions.
+ */
+
+#ifndef	_VM_INHERIT_
+#define	_VM_INHERIT_
+
+/*
+ *	Enumeration of valid values for vm_inherit_t.
+ */
+
+#define	VM_INHERIT_SHARE	((vm_inherit_t) 0)	/* share with child */
+#define	VM_INHERIT_COPY		((vm_inherit_t) 1)	/* copy into child */
+#define VM_INHERIT_NONE		((vm_inherit_t) 2)	/* absent from child */
+#define	VM_INHERIT_DONATE_COPY	((vm_inherit_t) 3)	/* copy and delete */
+
+#define VM_INHERIT_DEFAULT	VM_INHERIT_COPY
+
+#endif /* _VM_INHERIT_ */
diff --git a/sys/vm/vm_init.c b/sys/vm/vm_init.c
new file mode 100644
index 00000000000..4874f9e707a
--- /dev/null
+++ b/sys/vm/vm_init.c
@@ -0,0 +1,103 @@
+/* 
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vm_init.c	8.1 (Berkeley) 6/11/93
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ * 
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ *	Initialize the Virtual Memory subsystem.
+ */
+
+#include <sys/param.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+/*
+ *	vm_init initializes the virtual memory system.
+ *	This is done only by the first cpu up.
+ *
+ *	The start and end address of physical memory is passed in.
+ */
+
+void vm_mem_init()
+{
+	extern vm_offset_t	avail_start, avail_end;
+	extern vm_offset_t	virtual_avail, virtual_end;
+
+	/*
+	 *	Initializes resident memory structures.
+	 *	From here on, all physical memory is accounted for,
+	 *	and we use only virtual addresses.
+	 */
+	vm_set_page_size();
+	vm_page_startup(&avail_start, &avail_end);
+
+	/*
+	 * Initialize other VM packages
+	 */
+	vm_object_init(virtual_end - VM_MIN_KERNEL_ADDRESS);
+	vm_map_startup();
+	kmem_init(virtual_avail, virtual_end);
+	pmap_init(avail_start, avail_end);
+	vm_pager_init();
+}
diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
new file mode 100644
index 00000000000..7e4db63abf2
--- /dev/null
+++ b/sys/vm/vm_kern.c
@@ -0,0 +1,450 @@
+/* 
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vm_kern.c	8.3 (Berkeley) 1/12/94
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ * 
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ *	Kernel memory management.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+#include <vm/vm_kern.h>
+
+/*
+ *	kmem_alloc_pageable:
+ *
+ *	Allocate pageable memory to the kernel's address map.
+ *	map must be "kernel_map" below.
+ */
+
+vm_offset_t kmem_alloc_pageable(map, size)
+	vm_map_t		map;
+	register vm_size_t	size;
+{
+	vm_offset_t		addr;
+	register int		result;
+
+#if	0
+	if (map != kernel_map)
+		panic("kmem_alloc_pageable: not called with kernel_map");
+#endif
+
+	size = round_page(size);
+
+	addr = vm_map_min(map);
+	result = vm_map_find(map, NULL, (vm_offset_t) 0,
+				&addr, size, TRUE);
+	if (result != KERN_SUCCESS) {
+		return(0);
+	}
+
+	return(addr);
+}
+
+/*
+ *	Allocate wired-down memory in the kernel's address map
+ *	or a submap.
+ */
+vm_offset_t kmem_alloc(map, size)
+	register vm_map_t	map;
+	register vm_size_t	size;
+{
+	vm_offset_t		addr;
+	register vm_offset_t	offset;
+	extern vm_object_t	kernel_object;
+	vm_offset_t		i;
+
+	size = round_page(size);
+
+	/*
+	 *	Use the kernel object for wired-down kernel pages.
+	 *	Assume that no region of the kernel object is
+	 *	referenced more than once.
+	 */
+
+	/*
+	 * Locate sufficient space in the map.  This will give us the
+	 * final virtual address for the new memory, and thus will tell
+	 * us the offset within the kernel map.
+	 */
+	vm_map_lock(map);
+	if (vm_map_findspace(map, 0, size, &addr)) {
+		vm_map_unlock(map);
+		return (0);
+	}
+	offset = addr - VM_MIN_KERNEL_ADDRESS;
+	vm_object_reference(kernel_object);
+	vm_map_insert(map, kernel_object, offset, addr, addr + size);
+	vm_map_unlock(map);
+
+	/*
+	 *	Guarantee that there are pages already in this object
+	 *	before calling vm_map_pageable.  This is to prevent the
+	 *	following scenario:
+	 *
+	 *		1) Threads have swapped out, so that there is a
+	 *		   pager for the kernel_object.
+	 *		2) The kmsg zone is empty, and so we are kmem_allocing
+	 *		   a new page for it.
+	 *		3) vm_map_pageable calls vm_fault; there is no page,
+	 *		   but there is a pager, so we call
+	 *		   pager_data_request.  But the kmsg zone is empty,
+	 *		   so we must kmem_alloc.
+	 *		4) goto 1
+	 *		5) Even if the kmsg zone is not empty: when we get
+	 *		   the data back from the pager, it will be (very
+	 *		   stale) non-zero data.  kmem_alloc is defined to
+	 *		   return zero-filled memory.
+	 *
+	 *	We're intentionally not activating the pages we allocate
+	 *	to prevent a race with page-out.  vm_map_pageable will wire
+	 *	the pages.
+	 */
+
+	vm_object_lock(kernel_object);
+	for (i = 0 ; i < size; i+= PAGE_SIZE) {
+		vm_page_t	mem;
+
+		while ((mem = vm_page_alloc(kernel_object, offset+i)) == NULL) {
+			vm_object_unlock(kernel_object);
+			VM_WAIT;
+			vm_object_lock(kernel_object);
+		}
+		vm_page_zero_fill(mem);
+		mem->flags &= ~PG_BUSY;
+	}
+	vm_object_unlock(kernel_object);
+		
+	/*
+	 *	And finally, mark the data as non-pageable.
+	 */
+
+	(void) vm_map_pageable(map, (vm_offset_t) addr, addr + size, FALSE);
+
+	/*
+	 *	Try to coalesce the map
+	 */
+
+	vm_map_simplify(map, addr);
+
+	return(addr);
+}
+
+/*
+ *	kmem_free:
+ *
+ *	Release a region of kernel virtual memory allocated
+ *	with kmem_alloc, and return the physical pages
+ *	associated with that region.
+ */
+void kmem_free(map, addr, size)
+	vm_map_t		map;
+	register vm_offset_t	addr;
+	vm_size_t		size;
+{
+	(void) vm_map_remove(map, trunc_page(addr), round_page(addr + size));
+}
+
+/*
+ *	kmem_suballoc:
+ *
+ *	Allocates a map to manage a subrange
+ *	of the kernel virtual address space.
+ *
+ *	Arguments are as follows:
+ *
+ *	parent		Map to take range from
+ *	size		Size of range to find
+ *	min, max	Returned endpoints of map
+ *	pageable	Can the region be paged
+ */
+vm_map_t kmem_suballoc(parent, min, max, size, pageable)
+	register vm_map_t	parent;
+	vm_offset_t		*min, *max;
+	register vm_size_t	size;
+	boolean_t		pageable;
+{
+	register int	ret;
+	vm_map_t	result;
+
+	size = round_page(size);
+
+	*min = (vm_offset_t) vm_map_min(parent);
+	ret = vm_map_find(parent, NULL, (vm_offset_t) 0,
+				min, size, TRUE);
+	if (ret != KERN_SUCCESS) {
+		printf("kmem_suballoc: bad status return of %d.\n", ret);
+		panic("kmem_suballoc");
+	}
+	*max = *min + size;
+	pmap_reference(vm_map_pmap(parent));
+	result = vm_map_create(vm_map_pmap(parent), *min, *max, pageable);
+	if (result == NULL)
+		panic("kmem_suballoc: cannot create submap");
+	if ((ret = vm_map_submap(parent, *min, *max, result)) != KERN_SUCCESS)
+		panic("kmem_suballoc: unable to change range to submap");
+	return(result);
+}
+
+/*
+ * Allocate wired-down memory in the kernel's address map for the higher
+ * level kernel memory allocator (kern/kern_malloc.c).  We cannot use
+ * kmem_alloc() because we may need to allocate memory at interrupt
+ * level where we cannot block (canwait == FALSE).
+ *
+ * This routine has its own private kernel submap (kmem_map) and object
+ * (kmem_object).  This, combined with the fact that only malloc uses
+ * this routine, ensures that we will never block in map or object waits.
+ *
+ * Note that this still only works in a uni-processor environment and
+ * when called at splhigh().
+ *
+ * We don't worry about expanding the map (adding entries) since entries
+ * for wired maps are statically allocated.
+ */
+vm_offset_t
+kmem_malloc(map, size, canwait)
+	register vm_map_t	map;
+	register vm_size_t	size;
+	boolean_t		canwait;
+{
+	register vm_offset_t	offset, i;
+	vm_map_entry_t		entry;
+	vm_offset_t		addr;
+	vm_page_t		m;
+	extern vm_object_t	kmem_object;
+
+	if (map != kmem_map && map != mb_map)
+		panic("kern_malloc_alloc: map != {kmem,mb}_map");
+
+	size = round_page(size);
+	addr = vm_map_min(map);
+
+	/*
+	 * Locate sufficient space in the map.  This will give us the
+	 * final virtual address for the new memory, and thus will tell
+	 * us the offset within the kernel map.
+	 */
+	vm_map_lock(map);
+	if (vm_map_findspace(map, 0, size, &addr)) {
+		vm_map_unlock(map);
+		if (canwait)		/* XXX  should wait */
+			panic("kmem_malloc: %s too small",
+			    map == kmem_map ? "kmem_map" : "mb_map");
+		return (0);
+	}
+	offset = addr - vm_map_min(kmem_map);
+	vm_object_reference(kmem_object);
+	vm_map_insert(map, kmem_object, offset, addr, addr + size);
+
+	/*
+	 * If we can wait, just mark the range as wired
+	 * (will fault pages as necessary).
+	 */
+	if (canwait) {
+		vm_map_unlock(map);
+		(void) vm_map_pageable(map, (vm_offset_t) addr, addr + size,
+				       FALSE);
+		vm_map_simplify(map, addr);
+		return(addr);
+	}
+
+	/*
+	 * If we cannot wait then we must allocate all memory up front,
+	 * pulling it off the active queue to prevent pageout.
+	 */
+	vm_object_lock(kmem_object);
+	for (i = 0; i < size; i += PAGE_SIZE) {
+		m = vm_page_alloc(kmem_object, offset + i);
+
+		/*
+		 * Ran out of space, free everything up and return.
+		 * Don't need to lock page queues here as we know
+		 * that the pages we got aren't on any queues.
+		 */
+		if (m == NULL) {
+			while (i != 0) {
+				i -= PAGE_SIZE;
+				m = vm_page_lookup(kmem_object, offset + i);
+				vm_page_free(m);
+			}
+			vm_object_unlock(kmem_object);
+			vm_map_delete(map, addr, addr + size);
+			vm_map_unlock(map);
+			return(0);
+		}
+#if 0
+		vm_page_zero_fill(m);
+#endif
+		m->flags &= ~PG_BUSY;
+	}
+	vm_object_unlock(kmem_object);
+
+	/*
+	 * Mark map entry as non-pageable.
+	 * Assert: vm_map_insert() will never be able to extend the previous
+	 * entry so there will be a new entry exactly corresponding to this
+	 * address range and it will have wired_count == 0.
+	 */
+	if (!vm_map_lookup_entry(map, addr, &entry) ||
+	    entry->start != addr || entry->end != addr + size ||
+	    entry->wired_count)
+		panic("kmem_malloc: entry not found or misaligned");
+	entry->wired_count++;
+
+	/*
+	 * Loop thru pages, entering them in the pmap.
+	 * (We cannot add them to the wired count without
+	 * wrapping the vm_page_queue_lock in splimp...)
+	 */
+	for (i = 0; i < size; i += PAGE_SIZE) {
+		vm_object_lock(kmem_object);
+		m = vm_page_lookup(kmem_object, offset + i);
+		vm_object_unlock(kmem_object);
+		pmap_enter(map->pmap, addr + i, VM_PAGE_TO_PHYS(m),
+			   VM_PROT_DEFAULT, TRUE);
+	}
+	vm_map_unlock(map);
+
+	vm_map_simplify(map, addr);
+	return(addr);
+}
+
+/*
+ *	kmem_alloc_wait
+ *
+ *	Allocates pageable memory from a sub-map of the kernel.  If the submap
+ *	has no room, the caller sleeps waiting for more memory in the submap.
+ *
+ */
+vm_offset_t kmem_alloc_wait(map, size)
+	vm_map_t	map;
+	vm_size_t	size;
+{
+	vm_offset_t	addr;
+
+	size = round_page(size);
+
+	for (;;) {
+		/*
+		 * To make this work for more than one map,
+		 * use the map's lock to lock out sleepers/wakers.
+		 */
+		vm_map_lock(map);
+		if (vm_map_findspace(map, 0, size, &addr) == 0)
+			break;
+		/* no space now; see if we can ever get space */
+		if (vm_map_max(map) - vm_map_min(map) < size) {
+			vm_map_unlock(map);
+			return (0);
+		}
+		assert_wait((int)map, TRUE);
+		vm_map_unlock(map);
+		thread_block();
+	}
+	vm_map_insert(map, NULL, (vm_offset_t)0, addr, addr + size);
+	vm_map_unlock(map);
+	return (addr);
+}
+
+/*
+ *	kmem_free_wakeup
+ *
+ *	Returns memory to a submap of the kernel, and wakes up any threads
+ *	waiting for memory in that map.
+ */
+void	kmem_free_wakeup(map, addr, size)
+	vm_map_t	map;
+	vm_offset_t	addr;
+	vm_size_t	size;
+{
+	vm_map_lock(map);
+	(void) vm_map_delete(map, trunc_page(addr), round_page(addr + size));
+	thread_wakeup((int)map);
+	vm_map_unlock(map);
+}
+
+/*
+ * Create the kernel map; insert a mapping covering kernel text, data, bss,
+ * and all space allocated thus far (`boostrap' data).  The new map will thus
+ * map the range between VM_MIN_KERNEL_ADDRESS and `start' as allocated, and
+ * the range between `start' and `end' as free.
+ */
+void kmem_init(start, end)
+	vm_offset_t start, end;
+{
+	register vm_map_t m;
+
+	m = vm_map_create(kernel_pmap, VM_MIN_KERNEL_ADDRESS, end, FALSE);
+	vm_map_lock(m);
+	/* N.B.: cannot use kgdb to debug, starting with this assignment ... */
+	kernel_map = m;
+	(void) vm_map_insert(m, NULL, (vm_offset_t)0,
+	    VM_MIN_KERNEL_ADDRESS, start);
+	/* ... and ending with the completion of the above `insert' */
+	vm_map_unlock(m);
+}
diff --git a/sys/vm/vm_kern.h b/sys/vm/vm_kern.h
new file mode 100644
index 00000000000..d0d2c358af0
--- /dev/null
+++ b/sys/vm/vm_kern.h
@@ -0,0 +1,72 @@
+/* 
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vm_kern.h	8.1 (Berkeley) 6/11/93
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ * 
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/* Kernel memory management definitions. */
+
+vm_map_t	buffer_map;
+vm_map_t	exec_map;
+vm_map_t	kernel_map;
+vm_map_t	kmem_map;
+vm_map_t	mb_map;
+vm_map_t	phys_map;
diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
new file mode 100644
index 00000000000..425fe0de432
--- /dev/null
+++ b/sys/vm/vm_map.c
@@ -0,0 +1,2626 @@
+/* 
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vm_map.c	8.3 (Berkeley) 1/12/94
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ * 
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ *	Virtual memory mapping module.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_object.h>
+
+/*
+ *	Virtual memory maps provide for the mapping, protection,
+ *	and sharing of virtual memory objects.  In addition,
+ *	this module provides for an efficient virtual copy of
+ *	memory from one map to another.
+ *
+ *	Synchronization is required prior to most operations.
+ *
+ *	Maps consist of an ordered doubly-linked list of simple
+ *	entries; a single hint is used to speed up lookups.
+ *
+ *	In order to properly represent the sharing of virtual
+ *	memory regions among maps, the map structure is bi-level.
+ *	Top-level ("address") maps refer to regions of sharable
+ *	virtual memory.  These regions are implemented as
+ *	("sharing") maps, which then refer to the actual virtual
+ *	memory objects.  When two address maps "share" memory,
+ *	their top-level maps both have references to the same
+ *	sharing map.  When memory is virtual-copied from one
+ *	address map to another, the references in the sharing
+ *	maps are actually copied -- no copying occurs at the
+ *	virtual memory object level.
+ *
+ *	Since portions of maps are specified by start/end addreses,
+ *	which may not align with existing map entries, all
+ *	routines merely "clip" entries to these start/end values.
+ *	[That is, an entry is split into two, bordering at a
+ *	start or end value.]  Note that these clippings may not
+ *	always be necessary (as the two resulting entries are then
+ *	not changed); however, the clipping is done for convenience.
+ *	No attempt is currently made to "glue back together" two
+ *	abutting entries.
+ *
+ *	As mentioned above, virtual copy operations are performed
+ *	by copying VM object references from one sharing map to
+ *	another, and then marking both regions as copy-on-write.
+ *	It is important to note that only one writeable reference
+ *	to a VM object region exists in any map -- this means that
+ *	shadow object creation can be delayed until a write operation
+ *	occurs.
+ */
+
+/*
+ *	vm_map_startup:
+ *
+ *	Initialize the vm_map module.  Must be called before
+ *	any other vm_map routines.
+ *
+ *	Map and entry structures are allocated from the general
+ *	purpose memory pool with some exceptions:
+ *
+ *	- The kernel map and kmem submap are allocated statically.
+ *	- Kernel map entries are allocated out of a static pool.
+ *
+ *	These restrictions are necessary since malloc() uses the
+ *	maps and requires map entries.
+ */
+
+vm_offset_t	kentry_data;
+vm_size_t	kentry_data_size;
+vm_map_entry_t	kentry_free;
+vm_map_t	kmap_free;
+
+static void	_vm_map_clip_end __P((vm_map_t, vm_map_entry_t, vm_offset_t));
+static void	_vm_map_clip_start __P((vm_map_t, vm_map_entry_t, vm_offset_t));
+
+void vm_map_startup()
+{
+	register int i;
+	register vm_map_entry_t mep;
+	vm_map_t mp;
+
+	/*
+	 * Static map structures for allocation before initialization of
+	 * kernel map or kmem map.  vm_map_create knows how to deal with them.
+	 */
+	kmap_free = mp = (vm_map_t) kentry_data;
+	i = MAX_KMAP;
+	while (--i > 0) {
+		mp->header.next = (vm_map_entry_t) (mp + 1);
+		mp++;
+	}
+	mp++->header.next = NULL;
+
+	/*
+	 * Form a free list of statically allocated kernel map entries
+	 * with the rest.
+	 */
+	kentry_free = mep = (vm_map_entry_t) mp;
+	i = (kentry_data_size - MAX_KMAP * sizeof *mp) / sizeof *mep;
+	while (--i > 0) {
+		mep->next = mep + 1;
+		mep++;
+	}
+	mep->next = NULL;
+}
+
+/*
+ * Allocate a vmspace structure, including a vm_map and pmap,
+ * and initialize those structures.  The refcnt is set to 1.
+ * The remaining fields must be initialized by the caller.
+ */
+struct vmspace *
+vmspace_alloc(min, max, pageable)
+	vm_offset_t min, max;
+	int pageable;
+{
+	register struct vmspace *vm;
+
+	MALLOC(vm, struct vmspace *, sizeof(struct vmspace), M_VMMAP, M_WAITOK);
+	bzero(vm, (caddr_t) &vm->vm_startcopy - (caddr_t) vm);
+	vm_map_init(&vm->vm_map, min, max, pageable);
+	pmap_pinit(&vm->vm_pmap);
+	vm->vm_map.pmap = &vm->vm_pmap;		/* XXX */
+	vm->vm_refcnt = 1;
+	return (vm);
+}
+
+void
+vmspace_free(vm)
+	register struct vmspace *vm;
+{
+
+	if (--vm->vm_refcnt == 0) {
+		/*
+		 * Lock the map, to wait out all other references to it.
+		 * Delete all of the mappings and pages they hold,
+		 * then call the pmap module to reclaim anything left.
+		 */
+		vm_map_lock(&vm->vm_map);
+		(void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
+		    vm->vm_map.max_offset);
+		pmap_release(&vm->vm_pmap);
+		FREE(vm, M_VMMAP);
+	}
+}
+
+/*
+ *	vm_map_create:
+ *
+ *	Creates and returns a new empty VM map with
+ *	the given physical map structure, and having
+ *	the given lower and upper address bounds.
+ */
+vm_map_t vm_map_create(pmap, min, max, pageable)
+	pmap_t		pmap;
+	vm_offset_t	min, max;
+	boolean_t	pageable;
+{
+	register vm_map_t	result;
+	extern vm_map_t		kmem_map;
+
+	if (kmem_map == NULL) {
+		result = kmap_free;
+		kmap_free = (vm_map_t) result->header.next;
+		if (result == NULL)
+			panic("vm_map_create: out of maps");
+	} else
+		MALLOC(result, vm_map_t, sizeof(struct vm_map),
+		       M_VMMAP, M_WAITOK);
+
+	vm_map_init(result, min, max, pageable);
+	result->pmap = pmap;
+	return(result);
+}
+
+/*
+ * Initialize an existing vm_map structure
+ * such as that in the vmspace structure.
+ * The pmap is set elsewhere.
+ */
+void
+vm_map_init(map, min, max, pageable)
+	register struct vm_map *map;
+	vm_offset_t	min, max;
+	boolean_t	pageable;
+{
+	map->header.next = map->header.prev = &map->header;
+	map->nentries = 0;
+	map->size = 0;
+	map->ref_count = 1;
+	map->is_main_map = TRUE;
+	map->min_offset = min;
+	map->max_offset = max;
+	map->entries_pageable = pageable;
+	map->first_free = &map->header;
+	map->hint = &map->header;
+	map->timestamp = 0;
+	lock_init(&map->lock, TRUE);
+	simple_lock_init(&map->ref_lock);
+	simple_lock_init(&map->hint_lock);
+}
+
+/*
+ *	vm_map_entry_create:	[ internal use only ]
+ *
+ *	Allocates a VM map entry for insertion.
+ *	No entry fields are filled in.  This routine is
+ */
+vm_map_entry_t vm_map_entry_create(map)
+	vm_map_t	map;
+{
+	vm_map_entry_t	entry;
+#ifdef DEBUG
+	extern vm_map_t		kernel_map, kmem_map, mb_map, pager_map;
+	boolean_t		isspecial;
+
+	isspecial = (map == kernel_map || map == kmem_map ||
+		     map == mb_map || map == pager_map);
+	if (isspecial && map->entries_pageable ||
+	    !isspecial && !map->entries_pageable)
+		panic("vm_map_entry_create: bogus map");
+#endif
+	if (map->entries_pageable) {
+		MALLOC(entry, vm_map_entry_t, sizeof(struct vm_map_entry),
+		       M_VMMAPENT, M_WAITOK);
+	} else {
+		if (entry = kentry_free)
+			kentry_free = kentry_free->next;
+	}
+	if (entry == NULL)
+		panic("vm_map_entry_create: out of map entries");
+
+	return(entry);
+}
+
+/*
+ *	vm_map_entry_dispose:	[ internal use only ]
+ *
+ *	Inverse of vm_map_entry_create.
+ */
+void vm_map_entry_dispose(map, entry)
+	vm_map_t	map;
+	vm_map_entry_t	entry;
+{
+#ifdef DEBUG
+	extern vm_map_t		kernel_map, kmem_map, mb_map, pager_map;
+	boolean_t		isspecial;
+
+	isspecial = (map == kernel_map || map == kmem_map ||
+		     map == mb_map || map == pager_map);
+	if (isspecial && map->entries_pageable ||
+	    !isspecial && !map->entries_pageable)
+		panic("vm_map_entry_dispose: bogus map");
+#endif
+	if (map->entries_pageable) {
+		FREE(entry, M_VMMAPENT);
+	} else {
+		entry->next = kentry_free;
+		kentry_free = entry;
+	}
+}
+
+/*
+ *	vm_map_entry_{un,}link:
+ *
+ *	Insert/remove entries from maps.
+ */
+#define	vm_map_entry_link(map, after_where, entry) \
+		{ \
+		(map)->nentries++; \
+		(entry)->prev = (after_where); \
+		(entry)->next = (after_where)->next; \
+		(entry)->prev->next = (entry); \
+		(entry)->next->prev = (entry); \
+		}
+#define	vm_map_entry_unlink(map, entry) \
+		{ \
+		(map)->nentries--; \
+		(entry)->next->prev = (entry)->prev; \
+		(entry)->prev->next = (entry)->next; \
+		}
+
+/*
+ *	vm_map_reference:
+ *
+ *	Creates another valid reference to the given map.
+ *
+ */
+void vm_map_reference(map)
+	register vm_map_t	map;
+{
+	if (map == NULL)
+		return;
+
+	simple_lock(&map->ref_lock);
+	map->ref_count++;
+	simple_unlock(&map->ref_lock);
+}
+
+/*
+ *	vm_map_deallocate:
+ *
+ *	Removes a reference from the specified map,
+ *	destroying it if no references remain.
+ *	The map should not be locked.
+ */
+void vm_map_deallocate(map)
+	register vm_map_t	map;
+{
+	register int		c;
+
+	if (map == NULL)
+		return;
+
+	simple_lock(&map->ref_lock);
+	c = --map->ref_count;
+	simple_unlock(&map->ref_lock);
+
+	if (c > 0) {
+		return;
+	}
+
+	/*
+	 *	Lock the map, to wait out all other references
+	 *	to it.
+	 */
+
+	vm_map_lock(map);
+
+	(void) vm_map_delete(map, map->min_offset, map->max_offset);
+
+	pmap_destroy(map->pmap);
+
+	FREE(map, M_VMMAP);
+}
+
+/*
+ *	vm_map_insert:
+ *
+ *	Inserts the given whole VM object into the target
+ *	map at the specified address range.  The object's
+ *	size should match that of the address range.
+ *
+ *	Requires that the map be locked, and leaves it so.
+ */
+int
+vm_map_insert(map, object, offset, start, end)
+	vm_map_t	map;
+	vm_object_t	object;
+	vm_offset_t	offset;
+	vm_offset_t	start;
+	vm_offset_t	end;
+{
+	register vm_map_entry_t		new_entry;
+	register vm_map_entry_t		prev_entry;
+	vm_map_entry_t			temp_entry;
+
+	/*
+	 *	Check that the start and end points are not bogus.
+	 */
+
+	if ((start < map->min_offset) || (end > map->max_offset) ||
+			(start >= end))
+		return(KERN_INVALID_ADDRESS);
+
+	/*
+	 *	Find the entry prior to the proposed
+	 *	starting address; if it's part of an
+	 *	existing entry, this range is bogus.
+	 */
+
+	if (vm_map_lookup_entry(map, start, &temp_entry))
+		return(KERN_NO_SPACE);
+
+	prev_entry = temp_entry;
+
+	/*
+	 *	Assert that the next entry doesn't overlap the
+	 *	end point.
+	 */
+
+	if ((prev_entry->next != &map->header) &&
+			(prev_entry->next->start < end))
+		return(KERN_NO_SPACE);
+
+	/*
+	 *	See if we can avoid creating a new entry by
+	 *	extending one of our neighbors.
+	 */
+
+	if (object == NULL) {
+		if ((prev_entry != &map->header) &&
+		    (prev_entry->end == start) &&
+		    (map->is_main_map) &&
+		    (prev_entry->is_a_map == FALSE) &&
+		    (prev_entry->is_sub_map == FALSE) &&
+		    (prev_entry->inheritance == VM_INHERIT_DEFAULT) &&
+		    (prev_entry->protection == VM_PROT_DEFAULT) &&
+		    (prev_entry->max_protection == VM_PROT_DEFAULT) &&
+		    (prev_entry->wired_count == 0)) {
+
+			if (vm_object_coalesce(prev_entry->object.vm_object,
+					NULL,
+					prev_entry->offset,
+					(vm_offset_t) 0,
+					(vm_size_t)(prev_entry->end
+						     - prev_entry->start),
+					(vm_size_t)(end - prev_entry->end))) {
+				/*
+				 *	Coalesced the two objects - can extend
+				 *	the previous map entry to include the
+				 *	new range.
+				 */
+				map->size += (end - prev_entry->end);
+				prev_entry->end = end;
+				return(KERN_SUCCESS);
+			}
+		}
+	}
+
+	/*
+	 *	Create a new entry
+	 */
+
+	new_entry = vm_map_entry_create(map);
+	new_entry->start = start;
+	new_entry->end = end;
+
+	new_entry->is_a_map = FALSE;
+	new_entry->is_sub_map = FALSE;
+	new_entry->object.vm_object = object;
+	new_entry->offset = offset;
+
+	new_entry->copy_on_write = FALSE;
+	new_entry->needs_copy = FALSE;
+
+	if (map->is_main_map) {
+		new_entry->inheritance = VM_INHERIT_DEFAULT;
+		new_entry->protection = VM_PROT_DEFAULT;
+		new_entry->max_protection = VM_PROT_DEFAULT;
+		new_entry->wired_count = 0;
+	}
+
+	/*
+	 *	Insert the new entry into the list
+	 */
+
+	vm_map_entry_link(map, prev_entry, new_entry);
+	map->size += new_entry->end - new_entry->start;
+
+	/*
+	 *	Update the free space hint
+	 */
+
+	if ((map->first_free == prev_entry) && (prev_entry->end >= new_entry->start))
+		map->first_free = new_entry;
+
+	return(KERN_SUCCESS);
+}
+
+/*
+ *	SAVE_HINT:
+ *
+ *	Saves the specified entry as the hint for
+ *	future lookups.  Performs necessary interlocks.
+ */
+#define	SAVE_HINT(map,value) \
+		simple_lock(&(map)->hint_lock); \
+		(map)->hint = (value); \
+		simple_unlock(&(map)->hint_lock);
+
+/*
+ *	vm_map_lookup_entry:	[ internal use only ]
+ *
+ *	Finds the map entry containing (or
+ *	immediately preceding) the specified address
+ *	in the given map; the entry is returned
+ *	in the "entry" parameter.  The boolean
+ *	result indicates whether the address is
+ *	actually contained in the map.
+ */
+boolean_t vm_map_lookup_entry(map, address, entry)
+	register vm_map_t	map;
+	register vm_offset_t	address;
+	vm_map_entry_t		*entry;		/* OUT */
+{
+	register vm_map_entry_t		cur;
+	register vm_map_entry_t		last;
+
+	/*
+	 *	Start looking either from the head of the
+	 *	list, or from the hint.
+	 */
+
+	simple_lock(&map->hint_lock);
+	cur = map->hint;
+	simple_unlock(&map->hint_lock);
+
+	if (cur == &map->header)
+		cur = cur->next;
+
+	if (address >= cur->start) {
+	    	/*
+		 *	Go from hint to end of list.
+		 *
+		 *	But first, make a quick check to see if
+		 *	we are already looking at the entry we
+		 *	want (which is usually the case).
+		 *	Note also that we don't need to save the hint
+		 *	here... it is the same hint (unless we are
+		 *	at the header, in which case the hint didn't
+		 *	buy us anything anyway).
+		 */
+		last = &map->header;
+		if ((cur != last) && (cur->end > address)) {
+			*entry = cur;
+			return(TRUE);
+		}
+	}
+	else {
+	    	/*
+		 *	Go from start to hint, *inclusively*
+		 */
+		last = cur->next;
+		cur = map->header.next;
+	}
+
+	/*
+	 *	Search linearly
+	 */
+
+	while (cur != last) {
+		if (cur->end > address) {
+			if (address >= cur->start) {
+			    	/*
+				 *	Save this lookup for future
+				 *	hints, and return
+				 */
+
+				*entry = cur;
+				SAVE_HINT(map, cur);
+				return(TRUE);
+			}
+			break;
+		}
+		cur = cur->next;
+	}
+	*entry = cur->prev;
+	SAVE_HINT(map, *entry);
+	return(FALSE);
+}
+
+/*
+ * Find sufficient space for `length' bytes in the given map, starting at
+ * `start'.  The map must be locked.  Returns 0 on success, 1 on no space.
+ */
+int
+vm_map_findspace(map, start, length, addr)
+	register vm_map_t map;
+	register vm_offset_t start;
+	vm_size_t length;
+	vm_offset_t *addr;
+{
+	register vm_map_entry_t entry, next;
+	register vm_offset_t end;
+
+	if (start < map->min_offset)
+		start = map->min_offset;
+	if (start > map->max_offset)
+		return (1);
+
+	/*
+	 * Look for the first possible address; if there's already
+	 * something at this address, we have to start after it.
+	 */
+	if (start == map->min_offset) {
+		if ((entry = map->first_free) != &map->header)
+			start = entry->end;
+	} else {
+		vm_map_entry_t tmp;
+		if (vm_map_lookup_entry(map, start, &tmp))
+			start = tmp->end;
+		entry = tmp;
+	}
+
+	/*
+	 * Look through the rest of the map, trying to fit a new region in
+	 * the gap between existing regions, or after the very last region.
+	 */
+	for (;; start = (entry = next)->end) {
+		/*
+		 * Find the end of the proposed new region.  Be sure we didn't
+		 * go beyond the end of the map, or wrap around the address;
+		 * if so, we lose.  Otherwise, if this is the last entry, or
+		 * if the proposed new region fits before the next entry, we
+		 * win.
+		 */
+		end = start + length;
+		if (end > map->max_offset || end < start)
+			return (1);
+		next = entry->next;
+		if (next == &map->header || next->start >= end)
+			break;
+	}
+	SAVE_HINT(map, entry);
+	*addr = start;
+	return (0);
+}
+
+/*
+ *	vm_map_find finds an unallocated region in the target address
+ *	map with the given length.  The search is defined to be
+ *	first-fit from the specified address; the region found is
+ *	returned in the same parameter.
+ *
+ */
+int
+vm_map_find(map, object, offset, addr, length, find_space)
+	vm_map_t	map;
+	vm_object_t	object;
+	vm_offset_t	offset;
+	vm_offset_t	*addr;		/* IN/OUT */
+	vm_size_t	length;
+	boolean_t	find_space;
+{
+	register vm_offset_t	start;
+	int			result;
+
+	start = *addr;
+	vm_map_lock(map);
+	if (find_space) {
+		if (vm_map_findspace(map, start, length, addr)) {
+			vm_map_unlock(map);
+			return (KERN_NO_SPACE);
+		}
+		start = *addr;
+	}
+	result = vm_map_insert(map, object, offset, start, start + length);
+	vm_map_unlock(map);
+	return (result);
+}
+
+/*
+ *	vm_map_simplify_entry:	[ internal use only ]
+ *
+ *	Simplify the given map entry by:
+ *		removing extra sharing maps
+ *		[XXX maybe later] merging with a neighbor
+ */
+void vm_map_simplify_entry(map, entry)
+	vm_map_t	map;
+	vm_map_entry_t	entry;
+{
+#ifdef	lint
+	map++;
+#endif
+
+	/*
+	 *	If this entry corresponds to a sharing map, then
+	 *	see if we can remove the level of indirection.
+	 *	If it's not a sharing map, then it points to
+	 *	a VM object, so see if we can merge with either
+	 *	of our neighbors.
+	 */
+
+	if (entry->is_sub_map)
+		return;
+	if (entry->is_a_map) {
+#if	0
+		vm_map_t	my_share_map;
+		int		count;
+
+		my_share_map = entry->object.share_map;	
+		simple_lock(&my_share_map->ref_lock);
+		count = my_share_map->ref_count;
+		simple_unlock(&my_share_map->ref_lock);
+		
+		if (count == 1) {
+			/* Can move the region from
+			 * entry->start to entry->end (+ entry->offset)
+			 * in my_share_map into place of entry.
+			 * Later.
+			 */
+		}
+#endif
+	}
+	else {
+		/*
+		 *	Try to merge with our neighbors.
+		 *
+		 *	Conditions for merge are:
+		 *
+		 *	1.  entries are adjacent.
+		 *	2.  both entries point to objects
+		 *	    with null pagers.
+		 *
+		 * 	If a merge is possible, we replace the two
+		 *	entries with a single entry, then merge
+		 *	the two objects into a single object.
+		 *
+		 *	Now, all that is left to do is write the
+		 *	code!
+		 */
+	}
+}
+
+/*
+ *	vm_map_clip_start:	[ internal use only ]
+ *
+ *	Asserts that the given entry begins at or after
+ *	the specified address; if necessary,
+ *	it splits the entry into two.
+ */
+#define vm_map_clip_start(map, entry, startaddr) \
+{ \
+	if (startaddr > entry->start) \
+		_vm_map_clip_start(map, entry, startaddr); \
+}
+
+/*
+ *	This routine is called only when it is known that
+ *	the entry must be split.
+ */
+static void _vm_map_clip_start(map, entry, start)
+	register vm_map_t	map;
+	register vm_map_entry_t	entry;
+	register vm_offset_t	start;
+{
+	register vm_map_entry_t	new_entry;
+
+	/*
+	 *	See if we can simplify this entry first
+	 */
+		 
+	vm_map_simplify_entry(map, entry);
+
+	/*
+	 *	Split off the front portion --
+	 *	note that we must insert the new
+	 *	entry BEFORE this one, so that
+	 *	this entry has the specified starting
+	 *	address.
+	 */
+
+	new_entry = vm_map_entry_create(map);
+	*new_entry = *entry;
+
+	new_entry->end = start;
+	entry->offset += (start - entry->start);
+	entry->start = start;
+
+	vm_map_entry_link(map, entry->prev, new_entry);
+
+	if (entry->is_a_map || entry->is_sub_map)
+	 	vm_map_reference(new_entry->object.share_map);
+	else
+		vm_object_reference(new_entry->object.vm_object);
+}
+
+/*
+ *	vm_map_clip_end:	[ internal use only ]
+ *
+ *	Asserts that the given entry ends at or before
+ *	the specified address; if necessary,
+ *	it splits the entry into two.
+ */
+
+#define vm_map_clip_end(map, entry, endaddr) \
+{ \
+	if (endaddr < entry->end) \
+		_vm_map_clip_end(map, entry, endaddr); \
+}
+
+/*
+ *	This routine is called only when it is known that
+ *	the entry must be split.
+ */
+static void _vm_map_clip_end(map, entry, end)
+	register vm_map_t	map;
+	register vm_map_entry_t	entry;
+	register vm_offset_t	end;
+{
+	register vm_map_entry_t	new_entry;
+
+	/*
+	 *	Create a new entry and insert it
+	 *	AFTER the specified entry
+	 */
+
+	new_entry = vm_map_entry_create(map);
+	*new_entry = *entry;
+
+	new_entry->start = entry->end = end;
+	new_entry->offset += (end - entry->start);
+
+	vm_map_entry_link(map, entry, new_entry);
+
+	if (entry->is_a_map || entry->is_sub_map)
+	 	vm_map_reference(new_entry->object.share_map);
+	else
+		vm_object_reference(new_entry->object.vm_object);
+}
+
+/*
+ *	VM_MAP_RANGE_CHECK:	[ internal use only ]
+ *
+ *	Asserts that the starting and ending region
+ *	addresses fall within the valid range of the map.
+ */
+#define	VM_MAP_RANGE_CHECK(map, start, end)		\
+		{					\
+		if (start < vm_map_min(map))		\
+			start = vm_map_min(map);	\
+		if (end > vm_map_max(map))		\
+			end = vm_map_max(map);		\
+		if (start > end)			\
+			start = end;			\
+		}
+
+/*
+ *	vm_map_submap:		[ kernel use only ]
+ *
+ *	Mark the given range as handled by a subordinate map.
+ *
+ *	This range must have been created with vm_map_find,
+ *	and no other operations may have been performed on this
+ *	range prior to calling vm_map_submap.
+ *
+ *	Only a limited number of operations can be performed
+ *	within this rage after calling vm_map_submap:
+ *		vm_fault
+ *	[Don't try vm_map_copy!]
+ *
+ *	To remove a submapping, one must first remove the
+ *	range from the superior map, and then destroy the
+ *	submap (if desired).  [Better yet, don't try it.]
+ */
+int
+vm_map_submap(map, start, end, submap)
+	register vm_map_t	map;
+	register vm_offset_t	start;
+	register vm_offset_t	end;
+	vm_map_t		submap;
+{
+	vm_map_entry_t		entry;
+	register int		result = KERN_INVALID_ARGUMENT;
+
+	vm_map_lock(map);
+
+	VM_MAP_RANGE_CHECK(map, start, end);
+
+	if (vm_map_lookup_entry(map, start, &entry)) {
+		vm_map_clip_start(map, entry, start);
+	}
+	 else
+		entry = entry->next;
+
+	vm_map_clip_end(map, entry, end);
+
+	if ((entry->start == start) && (entry->end == end) &&
+	    (!entry->is_a_map) &&
+	    (entry->object.vm_object == NULL) &&
+	    (!entry->copy_on_write)) {
+		entry->is_a_map = FALSE;
+		entry->is_sub_map = TRUE;
+		vm_map_reference(entry->object.sub_map = submap);
+		result = KERN_SUCCESS;
+	}
+	vm_map_unlock(map);
+
+	return(result);
+}
+
+/*
+ *	vm_map_protect:
+ *
+ *	Sets the protection of the specified address
+ *	region in the target map.  If "set_max" is
+ *	specified, the maximum protection is to be set;
+ *	otherwise, only the current protection is affected.
+ */
+int
+vm_map_protect(map, start, end, new_prot, set_max)
+	register vm_map_t	map;
+	register vm_offset_t	start;
+	register vm_offset_t	end;
+	register vm_prot_t	new_prot;
+	register boolean_t	set_max;
+{
+	register vm_map_entry_t		current;
+	vm_map_entry_t			entry;
+
+	vm_map_lock(map);
+
+	VM_MAP_RANGE_CHECK(map, start, end);
+
+	if (vm_map_lookup_entry(map, start, &entry)) {
+		vm_map_clip_start(map, entry, start);
+	}
+	 else
+		entry = entry->next;
+
+	/*
+	 *	Make a first pass to check for protection
+	 *	violations.
+	 */
+
+	current = entry;
+	while ((current != &map->header) && (current->start < end)) {
+		if (current->is_sub_map)
+			return(KERN_INVALID_ARGUMENT);
+		if ((new_prot & current->max_protection) != new_prot) {
+			vm_map_unlock(map);
+			return(KERN_PROTECTION_FAILURE);
+		}
+
+		current = current->next;
+	}
+
+	/*
+	 *	Go back and fix up protections.
+	 *	[Note that clipping is not necessary the second time.]
+	 */
+
+	current = entry;
+
+	while ((current != &map->header) && (current->start < end)) {
+		vm_prot_t	old_prot;
+
+		vm_map_clip_end(map, current, end);
+
+		old_prot = current->protection;
+		if (set_max)
+			current->protection =
+				(current->max_protection = new_prot) &
+					old_prot;
+		else
+			current->protection = new_prot;
+
+		/*
+		 *	Update physical map if necessary.
+		 *	Worry about copy-on-write here -- CHECK THIS XXX
+		 */
+
+		if (current->protection != old_prot) {
+
+#define MASK(entry)	((entry)->copy_on_write ? ~VM_PROT_WRITE : \
+							VM_PROT_ALL)
+#define	max(a,b)	((a) > (b) ? (a) : (b))
+
+			if (current->is_a_map) {
+				vm_map_entry_t	share_entry;
+				vm_offset_t	share_end;
+
+				vm_map_lock(current->object.share_map);
+				(void) vm_map_lookup_entry(
+						current->object.share_map,
+						current->offset,
+						&share_entry);
+				share_end = current->offset +
+					(current->end - current->start);
+				while ((share_entry !=
+					&current->object.share_map->header) &&
+					(share_entry->start < share_end)) {
+
+					pmap_protect(map->pmap,
+						(max(share_entry->start,
+							current->offset) -
+							current->offset +
+							current->start),
+						min(share_entry->end,
+							share_end) -
+						current->offset +
+						current->start,
+						current->protection &
+							MASK(share_entry));
+
+					share_entry = share_entry->next;
+				}
+				vm_map_unlock(current->object.share_map);
+			}
+			else
+			 	pmap_protect(map->pmap, current->start,
+					current->end,
+					current->protection & MASK(entry));
+#undef	max
+#undef	MASK
+		}
+		current = current->next;
+	}
+
+	vm_map_unlock(map);
+	return(KERN_SUCCESS);
+}
+
+/*
+ *	vm_map_inherit:
+ *
+ *	Sets the inheritance of the specified address
+ *	range in the target map.  Inheritance
+ *	affects how the map will be shared with
+ *	child maps at the time of vm_map_fork.
+ */
+int
+vm_map_inherit(map, start, end, new_inheritance)
+	register vm_map_t	map;
+	register vm_offset_t	start;
+	register vm_offset_t	end;
+	register vm_inherit_t	new_inheritance;
+{
+	register vm_map_entry_t	entry;
+	vm_map_entry_t	temp_entry;
+
+	switch (new_inheritance) {
+	case VM_INHERIT_NONE:
+	case VM_INHERIT_COPY:
+	case VM_INHERIT_SHARE:
+		break;
+	default:
+		return(KERN_INVALID_ARGUMENT);
+	}
+
+	vm_map_lock(map);
+
+	VM_MAP_RANGE_CHECK(map, start, end);
+
+	if (vm_map_lookup_entry(map, start, &temp_entry)) {
+		entry = temp_entry;
+		vm_map_clip_start(map, entry, start);
+	}
+	else
+		entry = temp_entry->next;
+
+	while ((entry != &map->header) && (entry->start < end)) {
+		vm_map_clip_end(map, entry, end);
+
+		entry->inheritance = new_inheritance;
+
+		entry = entry->next;
+	}
+
+	vm_map_unlock(map);
+	return(KERN_SUCCESS);
+}
+
+/*
+ *	vm_map_pageable:
+ *
+ *	Sets the pageability of the specified address
+ *	range in the target map.  Regions specified
+ *	as not pageable require locked-down physical
+ *	memory and physical page maps.
+ *
+ *	The map must not be locked, but a reference
+ *	must remain to the map throughout the call.
+ */
+int
+vm_map_pageable(map, start, end, new_pageable)
+	register vm_map_t	map;
+	register vm_offset_t	start;
+	register vm_offset_t	end;
+	register boolean_t	new_pageable;
+{
+	register vm_map_entry_t	entry;
+	vm_map_entry_t		start_entry;
+	register vm_offset_t	failed;
+	int			rv;
+
+	vm_map_lock(map);
+
+	VM_MAP_RANGE_CHECK(map, start, end);
+
+	/*
+	 *	Only one pageability change may take place at one
+	 *	time, since vm_fault assumes it will be called
+	 *	only once for each wiring/unwiring.  Therefore, we
+	 *	have to make sure we're actually changing the pageability
+	 *	for the entire region.  We do so before making any changes.
+	 */
+
+	if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) {
+		vm_map_unlock(map);
+		return(KERN_INVALID_ADDRESS);
+	}
+	entry = start_entry;
+
+	/*
+	 *	Actions are rather different for wiring and unwiring,
+	 *	so we have two separate cases.
+	 */
+
+	if (new_pageable) {
+
+		vm_map_clip_start(map, entry, start);
+
+		/*
+		 *	Unwiring.  First ensure that the range to be
+		 *	unwired is really wired down and that there
+		 *	are no holes.
+		 */
+		while ((entry != &map->header) && (entry->start < end)) {
+
+		    if (entry->wired_count == 0 ||
+			(entry->end < end &&
+			 (entry->next == &map->header ||
+			  entry->next->start > entry->end))) {
+			vm_map_unlock(map);
+			return(KERN_INVALID_ARGUMENT);
+		    }
+		    entry = entry->next;
+		}
+
+		/*
+		 *	Now decrement the wiring count for each region.
+		 *	If a region becomes completely unwired,
+		 *	unwire its physical pages and mappings.
+		 */
+		lock_set_recursive(&map->lock);
+
+		entry = start_entry;
+		while ((entry != &map->header) && (entry->start < end)) {
+		    vm_map_clip_end(map, entry, end);
+
+		    entry->wired_count--;
+		    if (entry->wired_count == 0)
+			vm_fault_unwire(map, entry->start, entry->end);
+
+		    entry = entry->next;
+		}
+		lock_clear_recursive(&map->lock);
+	}
+
+	else {
+		/*
+		 *	Wiring.  We must do this in two passes:
+		 *
+		 *	1.  Holding the write lock, we create any shadow
+		 *	    or zero-fill objects that need to be created.
+		 *	    Then we clip each map entry to the region to be
+		 *	    wired and increment its wiring count.  We
+		 *	    create objects before clipping the map entries
+		 *	    to avoid object proliferation.
+		 *
+		 *	2.  We downgrade to a read lock, and call
+		 *	    vm_fault_wire to fault in the pages for any
+		 *	    newly wired area (wired_count is 1).
+		 *
+		 *	Downgrading to a read lock for vm_fault_wire avoids
+		 *	a possible deadlock with another thread that may have
+		 *	faulted on one of the pages to be wired (it would mark
+		 *	the page busy, blocking us, then in turn block on the
+		 *	map lock that we hold).  Because of problems in the
+		 *	recursive lock package, we cannot upgrade to a write
+		 *	lock in vm_map_lookup.  Thus, any actions that require
+		 *	the write lock must be done beforehand.  Because we
+		 *	keep the read lock on the map, the copy-on-write status
+		 *	of the entries we modify here cannot change.
+		 */
+
+		/*
+		 *	Pass 1.
+		 */
+		while ((entry != &map->header) && (entry->start < end)) {
+		    if (entry->wired_count == 0) {
+
+			/*
+			 *	Perform actions of vm_map_lookup that need
+			 *	the write lock on the map: create a shadow
+			 *	object for a copy-on-write region, or an
+			 *	object for a zero-fill region.
+			 *
+			 *	We don't have to do this for entries that
+			 *	point to sharing maps, because we won't hold
+			 *	the lock on the sharing map.
+			 */
+			if (!entry->is_a_map) {
+			    if (entry->needs_copy &&
+				((entry->protection & VM_PROT_WRITE) != 0)) {
+
+				vm_object_shadow(&entry->object.vm_object,
+						&entry->offset,
+						(vm_size_t)(entry->end
+							- entry->start));
+				entry->needs_copy = FALSE;
+			    }
+			    else if (entry->object.vm_object == NULL) {
+				entry->object.vm_object =
+				    vm_object_allocate((vm_size_t)(entry->end
+				    			- entry->start));
+				entry->offset = (vm_offset_t)0;
+			    }
+			}
+		    }
+		    vm_map_clip_start(map, entry, start);
+		    vm_map_clip_end(map, entry, end);
+		    entry->wired_count++;
+
+		    /*
+		     * Check for holes
+		     */
+		    if (entry->end < end &&
+			(entry->next == &map->header ||
+			 entry->next->start > entry->end)) {
+			/*
+			 *	Found one.  Object creation actions
+			 *	do not need to be undone, but the
+			 *	wired counts need to be restored.
+			 */
+			while (entry != &map->header && entry->end > start) {
+			    entry->wired_count--;
+			    entry = entry->prev;
+			}
+			vm_map_unlock(map);
+			return(KERN_INVALID_ARGUMENT);
+		    }
+		    entry = entry->next;
+		}
+
+		/*
+		 *	Pass 2.
+		 */
+
+		/*
+		 * HACK HACK HACK HACK
+		 *
+		 * If we are wiring in the kernel map or a submap of it,
+		 * unlock the map to avoid deadlocks.  We trust that the
+		 * kernel threads are well-behaved, and therefore will
+		 * not do anything destructive to this region of the map
+		 * while we have it unlocked.  We cannot trust user threads
+		 * to do the same.
+		 *
+		 * HACK HACK HACK HACK
+		 */
+		if (vm_map_pmap(map) == kernel_pmap) {
+		    vm_map_unlock(map);		/* trust me ... */
+		}
+		else {
+		    lock_set_recursive(&map->lock);
+		    lock_write_to_read(&map->lock);
+		}
+
+		rv = 0;
+		entry = start_entry;
+		while (entry != &map->header && entry->start < end) {
+		    /*
+		     * If vm_fault_wire fails for any page we need to
+		     * undo what has been done.  We decrement the wiring
+		     * count for those pages which have not yet been
+		     * wired (now) and unwire those that have (later).
+		     *
+		     * XXX this violates the locking protocol on the map,
+		     * needs to be fixed.
+		     */
+		    if (rv)
+			entry->wired_count--;
+		    else if (entry->wired_count == 1) {
+			rv = vm_fault_wire(map, entry->start, entry->end);
+			if (rv) {
+			    failed = entry->start;
+			    entry->wired_count--;
+			}
+		    }
+		    entry = entry->next;
+		}
+
+		if (vm_map_pmap(map) == kernel_pmap) {
+		    vm_map_lock(map);
+		}
+		else {
+		    lock_clear_recursive(&map->lock);
+		}
+		if (rv) {
+		    vm_map_unlock(map);
+		    (void) vm_map_pageable(map, start, failed, TRUE);
+		    return(rv);
+		}
+	}
+
+	vm_map_unlock(map);
+
+	return(KERN_SUCCESS);
+}
+
+/*
+ * vm_map_clean
+ *
+ * Push any dirty cached pages in the address range to their pager.
+ * If syncio is TRUE, dirty pages are written synchronously.
+ * If invalidate is TRUE, any cached pages are freed as well.
+ *
+ * Returns an error if any part of the specified range is not mapped.
+ */
+int
+vm_map_clean(map, start, end, syncio, invalidate)
+	vm_map_t	map;
+	vm_offset_t	start;
+	vm_offset_t	end;
+	boolean_t	syncio;
+	boolean_t	invalidate;
+{
+	register vm_map_entry_t current;
+	vm_map_entry_t entry;
+	vm_size_t size;
+	vm_object_t object;
+	vm_offset_t offset;
+
+	vm_map_lock_read(map);
+	VM_MAP_RANGE_CHECK(map, start, end);
+	if (!vm_map_lookup_entry(map, start, &entry)) {
+		vm_map_unlock_read(map);
+		return(KERN_INVALID_ADDRESS);
+	}
+
+	/*
+	 * Make a first pass to check for holes.
+	 */
+	for (current = entry; current->start < end; current = current->next) {
+		if (current->is_sub_map) {
+			vm_map_unlock_read(map);
+			return(KERN_INVALID_ARGUMENT);
+		}
+		if (end > current->end &&
+		    (current->next == &map->header ||
+		     current->end != current->next->start)) {
+			vm_map_unlock_read(map);
+			return(KERN_INVALID_ADDRESS);
+		}
+	}
+
+	/*
+	 * Make a second pass, cleaning/uncaching pages from the indicated
+	 * objects as we go.
+	 */
+	for (current = entry; current->start < end; current = current->next) {
+		offset = current->offset + (start - current->start);
+		size = (end <= current->end ? end : current->end) - start;
+		if (current->is_a_map) {
+			register vm_map_t smap;
+			vm_map_entry_t tentry;
+			vm_size_t tsize;
+
+			smap = current->object.share_map;
+			vm_map_lock_read(smap);
+			(void) vm_map_lookup_entry(smap, offset, &tentry);
+			tsize = tentry->end - offset;
+			if (tsize < size)
+				size = tsize;
+			object = tentry->object.vm_object;
+			offset = tentry->offset + (offset - tentry->start);
+			vm_object_lock(object);
+			vm_map_unlock_read(smap);
+		} else {
+			object = current->object.vm_object;
+			vm_object_lock(object);
+		}
+		/*
+		 * Flush pages if writing is allowed.
+		 * XXX should we continue on an error?
+		 */
+		if ((current->protection & VM_PROT_WRITE) &&
+		    !vm_object_page_clean(object, offset, offset+size,
+					  syncio, FALSE)) {
+			vm_object_unlock(object);
+			vm_map_unlock_read(map);
+			return(KERN_FAILURE);
+		}
+		if (invalidate)
+			vm_object_page_remove(object, offset, offset+size);
+		vm_object_unlock(object);
+		start += size;
+	}
+
+	vm_map_unlock_read(map);
+	return(KERN_SUCCESS);
+}
+
+/*
+ *	vm_map_entry_unwire:	[ internal use only ]
+ *
+ *	Make the region specified by this entry pageable.
+ *
+ *	The map in question should be locked.
+ *	[This is the reason for this routine's existence.]
+ */
+void vm_map_entry_unwire(map, entry)
+	vm_map_t		map;
+	register vm_map_entry_t	entry;
+{
+	vm_fault_unwire(map, entry->start, entry->end);
+	entry->wired_count = 0;
+}
+
+/*
+ *	vm_map_entry_delete:	[ internal use only ]
+ *
+ *	Deallocate the given entry from the target map.
+ */		
+void vm_map_entry_delete(map, entry)
+	register vm_map_t	map;
+	register vm_map_entry_t	entry;
+{
+	if (entry->wired_count != 0)
+		vm_map_entry_unwire(map, entry);
+		
+	vm_map_entry_unlink(map, entry);
+	map->size -= entry->end - entry->start;
+
+	if (entry->is_a_map || entry->is_sub_map)
+		vm_map_deallocate(entry->object.share_map);
+	else
+	 	vm_object_deallocate(entry->object.vm_object);
+
+	vm_map_entry_dispose(map, entry);
+}
+
+/*
+ *	vm_map_delete:	[ internal use only ]
+ *
+ *	Deallocates the given address range from the target
+ *	map.
+ *
+ *	When called with a sharing map, removes pages from
+ *	that region from all physical maps.
+ */
+int
+vm_map_delete(map, start, end)
+	register vm_map_t	map;
+	vm_offset_t		start;
+	register vm_offset_t	end;
+{
+	register vm_map_entry_t	entry;
+	vm_map_entry_t		first_entry;
+
+	/*
+	 *	Find the start of the region, and clip it
+	 */
+
+	if (!vm_map_lookup_entry(map, start, &first_entry))
+		entry = first_entry->next;
+	else {
+		entry = first_entry;
+		vm_map_clip_start(map, entry, start);
+
+		/*
+		 *	Fix the lookup hint now, rather than each
+		 *	time though the loop.
+		 */
+
+		SAVE_HINT(map, entry->prev);
+	}
+
+	/*
+	 *	Save the free space hint
+	 */
+
+	if (map->first_free->start >= start)
+		map->first_free = entry->prev;
+
+	/*
+	 *	Step through all entries in this region
+	 */
+
+	while ((entry != &map->header) && (entry->start < end)) {
+		vm_map_entry_t		next;
+		register vm_offset_t	s, e;
+		register vm_object_t	object;
+
+		vm_map_clip_end(map, entry, end);
+
+		next = entry->next;
+		s = entry->start;
+		e = entry->end;
+
+		/*
+		 *	Unwire before removing addresses from the pmap;
+		 *	otherwise, unwiring will put the entries back in
+		 *	the pmap.
+		 */
+
+		object = entry->object.vm_object;
+		if (entry->wired_count != 0)
+			vm_map_entry_unwire(map, entry);
+
+		/*
+		 *	If this is a sharing map, we must remove
+		 *	*all* references to this data, since we can't
+		 *	find all of the physical maps which are sharing
+		 *	it.
+		 */
+
+		if (object == kernel_object || object == kmem_object)
+			vm_object_page_remove(object, entry->offset,
+					entry->offset + (e - s));
+		else if (!map->is_main_map)
+			vm_object_pmap_remove(object,
+					 entry->offset,
+					 entry->offset + (e - s));
+		else
+			pmap_remove(map->pmap, s, e);
+
+		/*
+		 *	Delete the entry (which may delete the object)
+		 *	only after removing all pmap entries pointing
+		 *	to its pages.  (Otherwise, its page frames may
+		 *	be reallocated, and any modify bits will be
+		 *	set in the wrong object!)
+		 */
+
+		vm_map_entry_delete(map, entry);
+		entry = next;
+	}
+	return(KERN_SUCCESS);
+}
+
+/*
+ *	vm_map_remove:
+ *
+ *	Remove the given address range from the target map.
+ *	This is the exported form of vm_map_delete.
+ */
+int
+vm_map_remove(map, start, end)
+	register vm_map_t	map;
+	register vm_offset_t	start;
+	register vm_offset_t	end;
+{
+	register int		result;
+
+	vm_map_lock(map);
+	VM_MAP_RANGE_CHECK(map, start, end);
+	result = vm_map_delete(map, start, end);
+	vm_map_unlock(map);
+
+	return(result);
+}
+
+/*
+ *	vm_map_check_protection:
+ *
+ *	Assert that the target map allows the specified
+ *	privilege on the entire address region given.
+ *	The entire region must be allocated.
+ */
+boolean_t vm_map_check_protection(map, start, end, protection)
+	register vm_map_t	map;
+	register vm_offset_t	start;
+	register vm_offset_t	end;
+	register vm_prot_t	protection;
+{
+	register vm_map_entry_t	entry;
+	vm_map_entry_t		tmp_entry;
+
+	if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
+		return(FALSE);
+	}
+
+	entry = tmp_entry;
+
+	while (start < end) {
+		if (entry == &map->header) {
+			return(FALSE);
+		}
+
+		/*
+		 *	No holes allowed!
+		 */
+
+		if (start < entry->start) {
+			return(FALSE);
+		}
+
+		/*
+		 * Check protection associated with entry.
+		 */
+
+		if ((entry->protection & protection) != protection) {
+			return(FALSE);
+		}
+
+		/* go to next entry */
+
+		start = entry->end;
+		entry = entry->next;
+	}
+	return(TRUE);
+}
+
+/*
+ *	vm_map_copy_entry:
+ *
+ *	Copies the contents of the source entry to the destination
+ *	entry.  The entries *must* be aligned properly.
+ */
+void vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry)
+	vm_map_t		src_map, dst_map;
+	register vm_map_entry_t	src_entry, dst_entry;
+{
+	vm_object_t	temp_object;
+
+	if (src_entry->is_sub_map || dst_entry->is_sub_map)
+		return;
+
+	if (dst_entry->object.vm_object != NULL &&
+	    (dst_entry->object.vm_object->flags & OBJ_INTERNAL) == 0)
+		printf("vm_map_copy_entry: copying over permanent data!\n");
+
+	/*
+	 *	If our destination map was wired down,
+	 *	unwire it now.
+	 */
+
+	if (dst_entry->wired_count != 0)
+		vm_map_entry_unwire(dst_map, dst_entry);
+
+	/*
+	 *	If we're dealing with a sharing map, we
+	 *	must remove the destination pages from
+	 *	all maps (since we cannot know which maps
+	 *	this sharing map belongs in).
+	 */
+
+	if (dst_map->is_main_map)
+		pmap_remove(dst_map->pmap, dst_entry->start, dst_entry->end);
+	else
+		vm_object_pmap_remove(dst_entry->object.vm_object,
+			dst_entry->offset,
+			dst_entry->offset +
+				(dst_entry->end - dst_entry->start));
+
+	if (src_entry->wired_count == 0) {
+
+		boolean_t	src_needs_copy;
+
+		/*
+		 *	If the source entry is marked needs_copy,
+		 *	it is already write-protected.
+		 */
+		if (!src_entry->needs_copy) {
+
+			boolean_t	su;
+
+			/*
+			 *	If the source entry has only one mapping,
+			 *	we can just protect the virtual address
+			 *	range.
+			 */
+			if (!(su = src_map->is_main_map)) {
+				simple_lock(&src_map->ref_lock);
+				su = (src_map->ref_count == 1);
+				simple_unlock(&src_map->ref_lock);
+			}
+
+			if (su) {
+				pmap_protect(src_map->pmap,
+					src_entry->start,
+					src_entry->end,
+					src_entry->protection & ~VM_PROT_WRITE);
+			}
+			else {
+				vm_object_pmap_copy(src_entry->object.vm_object,
+					src_entry->offset,
+					src_entry->offset + (src_entry->end
+							    -src_entry->start));
+			}
+		}
+
+		/*
+		 *	Make a copy of the object.
+		 */
+		temp_object = dst_entry->object.vm_object;
+		vm_object_copy(src_entry->object.vm_object,
+				src_entry->offset,
+				(vm_size_t)(src_entry->end -
+					    src_entry->start),
+				&dst_entry->object.vm_object,
+				&dst_entry->offset,
+				&src_needs_copy);
+		/*
+		 *	If we didn't get a copy-object now, mark the
+		 *	source map entry so that a shadow will be created
+		 *	to hold its changed pages.
+		 */
+		if (src_needs_copy)
+			src_entry->needs_copy = TRUE;
+
+		/*
+		 *	The destination always needs to have a shadow
+		 *	created.
+		 */
+		dst_entry->needs_copy = TRUE;
+
+		/*
+		 *	Mark the entries copy-on-write, so that write-enabling
+		 *	the entry won't make copy-on-write pages writable.
+		 */
+		src_entry->copy_on_write = TRUE;
+		dst_entry->copy_on_write = TRUE;
+		/*
+		 *	Get rid of the old object.
+		 */
+		vm_object_deallocate(temp_object);
+
+		pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start,
+			dst_entry->end - dst_entry->start, src_entry->start);
+	}
+	else {
+		/*
+		 *	Of course, wired down pages can't be set copy-on-write.
+		 *	Cause wired pages to be copied into the new
+		 *	map by simulating faults (the new pages are
+		 *	pageable)
+		 */
+		vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry);
+	}
+}
+
+/*
+ *	vm_map_copy:
+ *
+ *	Perform a virtual memory copy from the source
+ *	address map/range to the destination map/range.
+ *
+ *	If src_destroy or dst_alloc is requested,
+ *	the source and destination regions should be
+ *	disjoint, not only in the top-level map, but
+ *	in the sharing maps as well.  [The best way
+ *	to guarantee this is to use a new intermediate
+ *	map to make copies.  This also reduces map
+ *	fragmentation.]
+ */
+int
+vm_map_copy(dst_map, src_map,
+			  dst_addr, len, src_addr,
+			  dst_alloc, src_destroy)
+	vm_map_t	dst_map;
+	vm_map_t	src_map;
+	vm_offset_t	dst_addr;
+	vm_size_t	len;
+	vm_offset_t	src_addr;
+	boolean_t	dst_alloc;
+	boolean_t	src_destroy;
+{
+	register
+	vm_map_entry_t	src_entry;
+	register
+	vm_map_entry_t	dst_entry;
+	vm_map_entry_t	tmp_entry;
+	vm_offset_t	src_start;
+	vm_offset_t	src_end;
+	vm_offset_t	dst_start;
+	vm_offset_t	dst_end;
+	vm_offset_t	src_clip;
+	vm_offset_t	dst_clip;
+	int		result;
+	boolean_t	old_src_destroy;
+
+	/*
+	 *	XXX While we figure out why src_destroy screws up,
+	 *	we'll do it by explicitly vm_map_delete'ing at the end.
+	 */
+
+	old_src_destroy = src_destroy;
+	src_destroy = FALSE;
+
+	/*
+	 *	Compute start and end of region in both maps
+	 */
+
+	src_start = src_addr;
+	src_end = src_start + len;
+	dst_start = dst_addr;
+	dst_end = dst_start + len;
+
+	/*
+	 *	Check that the region can exist in both source
+	 *	and destination.
+	 */
+
+	if ((dst_end < dst_start) || (src_end < src_start))
+		return(KERN_NO_SPACE);
+
+	/*
+	 *	Lock the maps in question -- we avoid deadlock
+	 *	by ordering lock acquisition by map value
+	 */
+
+	if (src_map == dst_map) {
+		vm_map_lock(src_map);
+	}
+	else if ((int) src_map < (int) dst_map) {
+	 	vm_map_lock(src_map);
+		vm_map_lock(dst_map);
+	} else {
+		vm_map_lock(dst_map);
+	 	vm_map_lock(src_map);
+	}
+
+	result = KERN_SUCCESS;
+
+	/*
+	 *	Check protections... source must be completely readable and
+	 *	destination must be completely writable.  [Note that if we're
+	 *	allocating the destination region, we don't have to worry
+	 *	about protection, but instead about whether the region
+	 *	exists.]
+	 */
+
+	if (src_map->is_main_map && dst_map->is_main_map) {
+		if (!vm_map_check_protection(src_map, src_start, src_end,
+					VM_PROT_READ)) {
+			result = KERN_PROTECTION_FAILURE;
+			goto Return;
+		}
+
+		if (dst_alloc) {
+			/* XXX Consider making this a vm_map_find instead */
+			if ((result = vm_map_insert(dst_map, NULL,
+					(vm_offset_t) 0, dst_start, dst_end)) != KERN_SUCCESS)
+				goto Return;
+		}
+		else if (!vm_map_check_protection(dst_map, dst_start, dst_end,
+					VM_PROT_WRITE)) {
+			result = KERN_PROTECTION_FAILURE;
+			goto Return;
+		}
+	}
+
+	/*
+	 *	Find the start entries and clip.
+	 *
+	 *	Note that checking protection asserts that the
+	 *	lookup cannot fail.
+	 *
+	 *	Also note that we wait to do the second lookup
+	 *	until we have done the first clip, as the clip
+	 *	may affect which entry we get!
+	 */
+
+	(void) vm_map_lookup_entry(src_map, src_addr, &tmp_entry);
+	src_entry = tmp_entry;
+	vm_map_clip_start(src_map, src_entry, src_start);
+
+	(void) vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry);
+	dst_entry = tmp_entry;
+	vm_map_clip_start(dst_map, dst_entry, dst_start);
+
+	/*
+	 *	If both source and destination entries are the same,
+	 *	retry the first lookup, as it may have changed.
+	 */
+
+	if (src_entry == dst_entry) {
+		(void) vm_map_lookup_entry(src_map, src_addr, &tmp_entry);
+		src_entry = tmp_entry;
+	}
+
+	/*
+	 *	If source and destination entries are still the same,
+	 *	a null copy is being performed.
+	 */
+
+	if (src_entry == dst_entry)
+		goto Return;
+
+	/*
+	 *	Go through entries until we get to the end of the
+	 *	region.
+	 */
+
+	while (src_start < src_end) {
+		/*
+		 *	Clip the entries to the endpoint of the entire region.
+		 */
+
+		vm_map_clip_end(src_map, src_entry, src_end);
+		vm_map_clip_end(dst_map, dst_entry, dst_end);
+
+		/*
+		 *	Clip each entry to the endpoint of the other entry.
+		 */
+
+		src_clip = src_entry->start + (dst_entry->end - dst_entry->start);
+		vm_map_clip_end(src_map, src_entry, src_clip);
+
+		dst_clip = dst_entry->start + (src_entry->end - src_entry->start);
+		vm_map_clip_end(dst_map, dst_entry, dst_clip);
+
+		/*
+		 *	Both entries now match in size and relative endpoints.
+		 *
+		 *	If both entries refer to a VM object, we can
+		 *	deal with them now.
+		 */
+
+		if (!src_entry->is_a_map && !dst_entry->is_a_map) {
+			vm_map_copy_entry(src_map, dst_map, src_entry,
+						dst_entry);
+		}
+		else {
+			register vm_map_t	new_dst_map;
+			vm_offset_t		new_dst_start;
+			vm_size_t		new_size;
+			vm_map_t		new_src_map;
+			vm_offset_t		new_src_start;
+
+			/*
+			 *	We have to follow at least one sharing map.
+			 */
+
+			new_size = (dst_entry->end - dst_entry->start);
+
+			if (src_entry->is_a_map) {
+				new_src_map = src_entry->object.share_map;
+				new_src_start = src_entry->offset;
+			}
+			else {
+			 	new_src_map = src_map;
+				new_src_start = src_entry->start;
+				lock_set_recursive(&src_map->lock);
+			}
+
+			if (dst_entry->is_a_map) {
+			    	vm_offset_t	new_dst_end;
+
+				new_dst_map = dst_entry->object.share_map;
+				new_dst_start = dst_entry->offset;
+
+				/*
+				 *	Since the destination sharing entries
+				 *	will be merely deallocated, we can
+				 *	do that now, and replace the region
+				 *	with a null object.  [This prevents
+				 *	splitting the source map to match
+				 *	the form of the destination map.]
+				 *	Note that we can only do so if the
+				 *	source and destination do not overlap.
+				 */
+
+				new_dst_end = new_dst_start + new_size;
+
+				if (new_dst_map != new_src_map) {
+					vm_map_lock(new_dst_map);
+					(void) vm_map_delete(new_dst_map,
+							new_dst_start,
+							new_dst_end);
+					(void) vm_map_insert(new_dst_map,
+							NULL,
+							(vm_offset_t) 0,
+							new_dst_start,
+							new_dst_end);
+					vm_map_unlock(new_dst_map);
+				}
+			}
+			else {
+			 	new_dst_map = dst_map;
+				new_dst_start = dst_entry->start;
+				lock_set_recursive(&dst_map->lock);
+			}
+
+			/*
+			 *	Recursively copy the sharing map.
+			 */
+
+			(void) vm_map_copy(new_dst_map, new_src_map,
+				new_dst_start, new_size, new_src_start,
+				FALSE, FALSE);
+
+			if (dst_map == new_dst_map)
+				lock_clear_recursive(&dst_map->lock);
+			if (src_map == new_src_map)
+				lock_clear_recursive(&src_map->lock);
+		}
+
+		/*
+		 *	Update variables for next pass through the loop.
+		 */
+
+		src_start = src_entry->end;
+		src_entry = src_entry->next;
+		dst_start = dst_entry->end;
+		dst_entry = dst_entry->next;
+
+		/*
+		 *	If the source is to be destroyed, here is the
+		 *	place to do it.
+		 */
+
+		if (src_destroy && src_map->is_main_map &&
+						dst_map->is_main_map)
+			vm_map_entry_delete(src_map, src_entry->prev);
+	}
+
+	/*
+	 *	Update the physical maps as appropriate
+	 */
+
+	if (src_map->is_main_map && dst_map->is_main_map) {
+		if (src_destroy)
+			pmap_remove(src_map->pmap, src_addr, src_addr + len);
+	}
+
+	/*
+	 *	Unlock the maps
+	 */
+
+	Return: ;
+
+	if (old_src_destroy)
+		vm_map_delete(src_map, src_addr, src_addr + len);
+
+	vm_map_unlock(src_map);
+	if (src_map != dst_map)
+		vm_map_unlock(dst_map);
+
+	return(result);
+}
+
+/*
+ * vmspace_fork:
+ * Create a new process vmspace structure and vm_map
+ * based on those of an existing process.  The new map
+ * is based on the old map, according to the inheritance
+ * values on the regions in that map.
+ *
+ * The source map must not be locked.
+ */
+struct vmspace *
+vmspace_fork(vm1)
+	register struct vmspace *vm1;
+{
+	register struct vmspace *vm2;
+	vm_map_t	old_map = &vm1->vm_map;
+	vm_map_t	new_map;
+	vm_map_entry_t	old_entry;
+	vm_map_entry_t	new_entry;
+	pmap_t		new_pmap;
+
+	vm_map_lock(old_map);
+
+	vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset,
+	    old_map->entries_pageable);
+	bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
+	    (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
+	new_pmap = &vm2->vm_pmap;		/* XXX */
+	new_map = &vm2->vm_map;			/* XXX */
+
+	old_entry = old_map->header.next;
+
+	while (old_entry != &old_map->header) {
+		if (old_entry->is_sub_map)
+			panic("vm_map_fork: encountered a submap");
+
+		switch (old_entry->inheritance) {
+		case VM_INHERIT_NONE:
+			break;
+
+		case VM_INHERIT_SHARE:
+			/*
+			 *	If we don't already have a sharing map:
+			 */
+
+			if (!old_entry->is_a_map) {
+			 	vm_map_t	new_share_map;
+				vm_map_entry_t	new_share_entry;
+				
+				/*
+				 *	Create a new sharing map
+				 */
+				 
+				new_share_map = vm_map_create(NULL,
+							old_entry->start,
+							old_entry->end,
+							TRUE);
+				new_share_map->is_main_map = FALSE;
+
+				/*
+				 *	Create the only sharing entry from the
+				 *	old task map entry.
+				 */
+
+				new_share_entry =
+					vm_map_entry_create(new_share_map);
+				*new_share_entry = *old_entry;
+				new_share_entry->wired_count = 0;
+
+				/*
+				 *	Insert the entry into the new sharing
+				 *	map
+				 */
+
+				vm_map_entry_link(new_share_map,
+						new_share_map->header.prev,
+						new_share_entry);
+
+				/*
+				 *	Fix up the task map entry to refer
+				 *	to the sharing map now.
+				 */
+
+				old_entry->is_a_map = TRUE;
+				old_entry->object.share_map = new_share_map;
+				old_entry->offset = old_entry->start;
+			}
+
+			/*
+			 *	Clone the entry, referencing the sharing map.
+			 */
+
+			new_entry = vm_map_entry_create(new_map);
+			*new_entry = *old_entry;
+			new_entry->wired_count = 0;
+			vm_map_reference(new_entry->object.share_map);
+
+			/*
+			 *	Insert the entry into the new map -- we
+			 *	know we're inserting at the end of the new
+			 *	map.
+			 */
+
+			vm_map_entry_link(new_map, new_map->header.prev,
+						new_entry);
+
+			/*
+			 *	Update the physical map
+			 */
+
+			pmap_copy(new_map->pmap, old_map->pmap,
+				new_entry->start,
+				(old_entry->end - old_entry->start),
+				old_entry->start);
+			break;
+
+		case VM_INHERIT_COPY:
+			/*
+			 *	Clone the entry and link into the map.
+			 */
+
+			new_entry = vm_map_entry_create(new_map);
+			*new_entry = *old_entry;
+			new_entry->wired_count = 0;
+			new_entry->object.vm_object = NULL;
+			new_entry->is_a_map = FALSE;
+			vm_map_entry_link(new_map, new_map->header.prev,
+							new_entry);
+			if (old_entry->is_a_map) {
+				int	check;
+
+				check = vm_map_copy(new_map,
+						old_entry->object.share_map,
+						new_entry->start,
+						(vm_size_t)(new_entry->end -
+							new_entry->start),
+						old_entry->offset,
+						FALSE, FALSE);
+				if (check != KERN_SUCCESS)
+					printf("vm_map_fork: copy in share_map region failed\n");
+			}
+			else {
+				vm_map_copy_entry(old_map, new_map, old_entry,
+						new_entry);
+			}
+			break;
+		}
+		old_entry = old_entry->next;
+	}
+
+	new_map->size = old_map->size;
+	vm_map_unlock(old_map);
+
+	return(vm2);
+}
+
+/*
+ *	vm_map_lookup:
+ *
+ *	Finds the VM object, offset, and
+ *	protection for a given virtual address in the
+ *	specified map, assuming a page fault of the
+ *	type specified.
+ *
+ *	Leaves the map in question locked for read; return
+ *	values are guaranteed until a vm_map_lookup_done
+ *	call is performed.  Note that the map argument
+ *	is in/out; the returned map must be used in
+ *	the call to vm_map_lookup_done.
+ *
+ *	A handle (out_entry) is returned for use in
+ *	vm_map_lookup_done, to make that fast.
+ *
+ *	If a lookup is requested with "write protection"
+ *	specified, the map may be changed to perform virtual
+ *	copying operations, although the data referenced will
+ *	remain the same.
+ */
+int
+vm_map_lookup(var_map, vaddr, fault_type, out_entry,
+				object, offset, out_prot, wired, single_use)
+	vm_map_t		*var_map;	/* IN/OUT */
+	register vm_offset_t	vaddr;
+	register vm_prot_t	fault_type;
+
+	vm_map_entry_t		*out_entry;	/* OUT */
+	vm_object_t		*object;	/* OUT */
+	vm_offset_t		*offset;	/* OUT */
+	vm_prot_t		*out_prot;	/* OUT */
+	boolean_t		*wired;		/* OUT */
+	boolean_t		*single_use;	/* OUT */
+{
+	vm_map_t			share_map;
+	vm_offset_t			share_offset;
+	register vm_map_entry_t		entry;
+	register vm_map_t		map = *var_map;
+	register vm_prot_t		prot;
+	register boolean_t		su;
+
+	RetryLookup: ;
+
+	/*
+	 *	Lookup the faulting address.
+	 */
+
+	vm_map_lock_read(map);
+
+#define	RETURN(why) \
+		{ \
+		vm_map_unlock_read(map); \
+		return(why); \
+		}
+
+	/*
+	 *	If the map has an interesting hint, try it before calling
+	 *	full blown lookup routine.
+	 */
+
+	simple_lock(&map->hint_lock);
+	entry = map->hint;
+	simple_unlock(&map->hint_lock);
+
+	*out_entry = entry;
+
+	if ((entry == &map->header) ||
+	    (vaddr < entry->start) || (vaddr >= entry->end)) {
+		vm_map_entry_t	tmp_entry;
+
+		/*
+		 *	Entry was either not a valid hint, or the vaddr
+		 *	was not contained in the entry, so do a full lookup.
+		 */
+		if (!vm_map_lookup_entry(map, vaddr, &tmp_entry))
+			RETURN(KERN_INVALID_ADDRESS);
+
+		entry = tmp_entry;
+		*out_entry = entry;
+	}
+
+	/*
+	 *	Handle submaps.
+	 */
+
+	if (entry->is_sub_map) {
+		vm_map_t	old_map = map;
+
+		*var_map = map = entry->object.sub_map;
+		vm_map_unlock_read(old_map);
+		goto RetryLookup;
+	}
+		
+	/*
+	 *	Check whether this task is allowed to have
+	 *	this page.
+	 */
+
+	prot = entry->protection;
+	if ((fault_type & (prot)) != fault_type)
+		RETURN(KERN_PROTECTION_FAILURE);
+
+	/*
+	 *	If this page is not pageable, we have to get
+	 *	it for all possible accesses.
+	 */
+
+	if (*wired = (entry->wired_count != 0))
+		prot = fault_type = entry->protection;
+
+	/*
+	 *	If we don't already have a VM object, track
+	 *	it down.
+	 */
+
+	if (su = !entry->is_a_map) {
+	 	share_map = map;
+		share_offset = vaddr;
+	}
+	else {
+		vm_map_entry_t	share_entry;
+
+		/*
+		 *	Compute the sharing map, and offset into it.
+		 */
+
+		share_map = entry->object.share_map;
+		share_offset = (vaddr - entry->start) + entry->offset;
+
+		/*
+		 *	Look for the backing store object and offset
+		 */
+
+		vm_map_lock_read(share_map);
+
+		if (!vm_map_lookup_entry(share_map, share_offset,
+					&share_entry)) {
+			vm_map_unlock_read(share_map);
+			RETURN(KERN_INVALID_ADDRESS);
+		}
+		entry = share_entry;
+	}
+
+	/*
+	 *	If the entry was copy-on-write, we either ...
+	 */
+
+	if (entry->needs_copy) {
+	    	/*
+		 *	If we want to write the page, we may as well
+		 *	handle that now since we've got the sharing
+		 *	map locked.
+		 *
+		 *	If we don't need to write the page, we just
+		 *	demote the permissions allowed.
+		 */
+
+		if (fault_type & VM_PROT_WRITE) {
+			/*
+			 *	Make a new object, and place it in the
+			 *	object chain.  Note that no new references
+			 *	have appeared -- one just moved from the
+			 *	share map to the new object.
+			 */
+
+			if (lock_read_to_write(&share_map->lock)) {
+				if (share_map != map)
+					vm_map_unlock_read(map);
+				goto RetryLookup;
+			}
+
+			vm_object_shadow(
+				&entry->object.vm_object,
+				&entry->offset,
+				(vm_size_t) (entry->end - entry->start));
+				
+			entry->needs_copy = FALSE;
+			
+			lock_write_to_read(&share_map->lock);
+		}
+		else {
+			/*
+			 *	We're attempting to read a copy-on-write
+			 *	page -- don't allow writes.
+			 */
+
+			prot &= (~VM_PROT_WRITE);
+		}
+	}
+
+	/*
+	 *	Create an object if necessary.
+	 */
+	if (entry->object.vm_object == NULL) {
+
+		if (lock_read_to_write(&share_map->lock)) {
+			if (share_map != map)
+				vm_map_unlock_read(map);
+			goto RetryLookup;
+		}
+
+		entry->object.vm_object = vm_object_allocate(
+					(vm_size_t)(entry->end - entry->start));
+		entry->offset = 0;
+		lock_write_to_read(&share_map->lock);
+	}
+
+	/*
+	 *	Return the object/offset from this entry.  If the entry
+	 *	was copy-on-write or empty, it has been fixed up.
+	 */
+
+	*offset = (share_offset - entry->start) + entry->offset;
+	*object = entry->object.vm_object;
+
+	/*
+	 *	Return whether this is the only map sharing this data.
+	 */
+
+	if (!su) {
+		simple_lock(&share_map->ref_lock);
+		su = (share_map->ref_count == 1);
+		simple_unlock(&share_map->ref_lock);
+	}
+
+	*out_prot = prot;
+	*single_use = su;
+
+	return(KERN_SUCCESS);
+	
+#undef	RETURN
+}
+
+/*
+ *	vm_map_lookup_done:
+ *
+ *	Releases locks acquired by a vm_map_lookup
+ *	(according to the handle returned by that lookup).
+ */
+
+void vm_map_lookup_done(map, entry)
+	register vm_map_t	map;
+	vm_map_entry_t		entry;
+{
+	/*
+	 *	If this entry references a map, unlock it first.
+	 */
+
+	if (entry->is_a_map)
+		vm_map_unlock_read(entry->object.share_map);
+
+	/*
+	 *	Unlock the main-level map
+	 */
+
+	vm_map_unlock_read(map);
+}
+
+/*
+ *	Routine:	vm_map_simplify
+ *	Purpose:
+ *		Attempt to simplify the map representation in
+ *		the vicinity of the given starting address.
+ *	Note:
+ *		This routine is intended primarily to keep the
+ *		kernel maps more compact -- they generally don't
+ *		benefit from the "expand a map entry" technology
+ *		at allocation time because the adjacent entry
+ *		is often wired down.
+ */
+void vm_map_simplify(map, start)
+	vm_map_t	map;
+	vm_offset_t	start;
+{
+	vm_map_entry_t	this_entry;
+	vm_map_entry_t	prev_entry;
+
+	vm_map_lock(map);
+	if (
+		(vm_map_lookup_entry(map, start, &this_entry)) &&
+		((prev_entry = this_entry->prev) != &map->header) &&
+
+		(prev_entry->end == start) &&
+		(map->is_main_map) &&
+
+		(prev_entry->is_a_map == FALSE) &&
+		(prev_entry->is_sub_map == FALSE) &&
+
+		(this_entry->is_a_map == FALSE) &&
+		(this_entry->is_sub_map == FALSE) &&
+
+		(prev_entry->inheritance == this_entry->inheritance) &&
+		(prev_entry->protection == this_entry->protection) &&
+		(prev_entry->max_protection == this_entry->max_protection) &&
+		(prev_entry->wired_count == this_entry->wired_count) &&
+		
+		(prev_entry->copy_on_write == this_entry->copy_on_write) &&
+		(prev_entry->needs_copy == this_entry->needs_copy) &&
+		
+		(prev_entry->object.vm_object == this_entry->object.vm_object) &&
+		((prev_entry->offset + (prev_entry->end - prev_entry->start))
+		     == this_entry->offset)
+	) {
+		if (map->first_free == this_entry)
+			map->first_free = prev_entry;
+
+		SAVE_HINT(map, prev_entry);
+		vm_map_entry_unlink(map, this_entry);
+		prev_entry->end = this_entry->end;
+	 	vm_object_deallocate(this_entry->object.vm_object);
+		vm_map_entry_dispose(map, this_entry);
+	}
+	vm_map_unlock(map);
+}
+
+/*
+ *	vm_map_print:	[ debug ]
+ */
+void vm_map_print(map, full)
+	register vm_map_t	map;
+	boolean_t		full;
+{
+	register vm_map_entry_t	entry;
+	extern int indent;
+
+	iprintf("%s map 0x%x: pmap=0x%x,ref=%d,nentries=%d,version=%d\n",
+		(map->is_main_map ? "Task" : "Share"),
+ 		(int) map, (int) (map->pmap), map->ref_count, map->nentries,
+		map->timestamp);
+
+	if (!full && indent)
+		return;
+
+	indent += 2;
+	for (entry = map->header.next; entry != &map->header;
+				entry = entry->next) {
+		iprintf("map entry 0x%x: start=0x%x, end=0x%x, ",
+			(int) entry, (int) entry->start, (int) entry->end);
+		if (map->is_main_map) {
+		     	static char *inheritance_name[4] =
+				{ "share", "copy", "none", "donate_copy"};
+			printf("prot=%x/%x/%s, ",
+				entry->protection,
+				entry->max_protection,
+				inheritance_name[entry->inheritance]);
+			if (entry->wired_count != 0)
+				printf("wired, ");
+		}
+
+		if (entry->is_a_map || entry->is_sub_map) {
+		 	printf("share=0x%x, offset=0x%x\n",
+				(int) entry->object.share_map,
+				(int) entry->offset);
+			if ((entry->prev == &map->header) ||
+			    (!entry->prev->is_a_map) ||
+			    (entry->prev->object.share_map !=
+			     entry->object.share_map)) {
+				indent += 2;
+				vm_map_print(entry->object.share_map, full);
+				indent -= 2;
+			}
+				
+		}
+		else {
+			printf("object=0x%x, offset=0x%x",
+				(int) entry->object.vm_object,
+				(int) entry->offset);
+			if (entry->copy_on_write)
+				printf(", copy (%s)",
+				       entry->needs_copy ? "needed" : "done");
+			printf("\n");
+
+			if ((entry->prev == &map->header) ||
+			    (entry->prev->is_a_map) ||
+			    (entry->prev->object.vm_object !=
+			     entry->object.vm_object)) {
+				indent += 2;
+				vm_object_print(entry->object.vm_object, full);
+				indent -= 2;
+			}
+		}
+	}
+	indent -= 2;
+}
diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h
new file mode 100644
index 00000000000..d25b7a2d1bd
--- /dev/null
+++ b/sys/vm/vm_map.h
@@ -0,0 +1,228 @@
+/* 
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vm_map.h	8.3 (Berkeley) 3/15/94
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ * 
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ *	Virtual memory map module definitions.
+ */
+
+#ifndef	_VM_MAP_
+#define	_VM_MAP_
+
+/*
+ *	Types defined:
+ *
+ *	vm_map_t		the high-level address map data structure.
+ *	vm_map_entry_t		an entry in an address map.
+ *	vm_map_version_t	a timestamp of a map, for use with vm_map_lookup
+ */
+
+/*
+ *	Objects which live in maps may be either VM objects, or
+ *	another map (called a "sharing map") which denotes read-write
+ *	sharing with other maps.
+ */
+
+union vm_map_object {
+	struct vm_object	*vm_object;	/* object object */
+	struct vm_map		*share_map;	/* share map */
+	struct vm_map		*sub_map;	/* belongs to another map */
+};
+
+/*
+ *	Address map entries consist of start and end addresses,
+ *	a VM object (or sharing map) and offset into that object,
+ *	and user-exported inheritance and protection information.
+ *	Also included is control information for virtual copy operations.
+ */
+struct vm_map_entry {
+	struct vm_map_entry	*prev;		/* previous entry */
+	struct vm_map_entry	*next;		/* next entry */
+	vm_offset_t		start;		/* start address */
+	vm_offset_t		end;		/* end address */
+	union vm_map_object	object;		/* object I point to */
+	vm_offset_t		offset;		/* offset into object */
+	boolean_t		is_a_map;	/* Is "object" a map? */
+	boolean_t		is_sub_map;	/* Is "object" a submap? */
+		/* Only in sharing maps: */
+	boolean_t		copy_on_write;	/* is data copy-on-write */
+	boolean_t		needs_copy;	/* does object need to be copied */
+		/* Only in task maps: */
+	vm_prot_t		protection;	/* protection code */
+	vm_prot_t		max_protection;	/* maximum protection */
+	vm_inherit_t		inheritance;	/* inheritance */
+	int			wired_count;	/* can be paged if = 0 */
+};
+
+/*
+ *	Maps are doubly-linked lists of map entries, kept sorted
+ *	by address.  A single hint is provided to start
+ *	searches again from the last successful search,
+ *	insertion, or removal.
+ */
+struct vm_map {
+	struct pmap *		pmap;		/* Physical map */
+	lock_data_t		lock;		/* Lock for map data */
+	struct vm_map_entry	header;		/* List of entries */
+	int			nentries;	/* Number of entries */
+	vm_size_t		size;		/* virtual size */
+	boolean_t		is_main_map;	/* Am I a main map? */
+	int			ref_count;	/* Reference count */
+	simple_lock_data_t	ref_lock;	/* Lock for ref_count field */
+	vm_map_entry_t		hint;		/* hint for quick lookups */
+	simple_lock_data_t	hint_lock;	/* lock for hint storage */
+	vm_map_entry_t		first_free;	/* First free space hint */
+	boolean_t		entries_pageable; /* map entries pageable?? */
+	unsigned int		timestamp;	/* Version number */
+#define	min_offset		header.start
+#define max_offset		header.end
+};
+
+/*
+ *	Map versions are used to validate a previous lookup attempt.
+ *
+ *	Since lookup operations may involve both a main map and
+ *	a sharing map, it is necessary to have a timestamp from each.
+ *	[If the main map timestamp has changed, the share_map and
+ *	associated timestamp are no longer valid; the map version
+ *	does not include a reference for the imbedded share_map.]
+ */
+typedef struct {
+	int		main_timestamp;
+	vm_map_t	share_map;
+	int		share_timestamp;
+} vm_map_version_t;
+
+/*
+ *	Macros:		vm_map_lock, etc.
+ *	Function:
+ *		Perform locking on the data portion of a map.
+ */
+
+#define	vm_map_lock(map) { \
+	lock_write(&(map)->lock); \
+	(map)->timestamp++; \
+}
+#define	vm_map_unlock(map)	lock_write_done(&(map)->lock)
+#define	vm_map_lock_read(map)	lock_read(&(map)->lock)
+#define	vm_map_unlock_read(map)	lock_read_done(&(map)->lock)
+
+/*
+ *	Functions implemented as macros
+ */
+#define		vm_map_min(map)		((map)->min_offset)
+#define		vm_map_max(map)		((map)->max_offset)
+#define		vm_map_pmap(map)	((map)->pmap)
+
+/* XXX: number of kernel maps and entries to statically allocate */
+#define MAX_KMAP	10
+#define	MAX_KMAPENT	500
+
+#ifdef KERNEL
+boolean_t	 vm_map_check_protection __P((vm_map_t,
+		    vm_offset_t, vm_offset_t, vm_prot_t));
+int		 vm_map_copy __P((vm_map_t, vm_map_t, vm_offset_t,
+		    vm_size_t, vm_offset_t, boolean_t, boolean_t));
+void		 vm_map_copy_entry __P((vm_map_t,
+		    vm_map_t, vm_map_entry_t, vm_map_entry_t));
+struct pmap;
+vm_map_t	 vm_map_create __P((struct pmap *,
+		    vm_offset_t, vm_offset_t, boolean_t));
+void		 vm_map_deallocate __P((vm_map_t));
+int		 vm_map_delete __P((vm_map_t, vm_offset_t, vm_offset_t));
+vm_map_entry_t	 vm_map_entry_create __P((vm_map_t));
+void		 vm_map_entry_delete __P((vm_map_t, vm_map_entry_t));
+void		 vm_map_entry_dispose __P((vm_map_t, vm_map_entry_t));
+void		 vm_map_entry_unwire __P((vm_map_t, vm_map_entry_t));
+int		 vm_map_find __P((vm_map_t, vm_object_t,
+		    vm_offset_t, vm_offset_t *, vm_size_t, boolean_t));
+int		 vm_map_findspace __P((vm_map_t,
+		    vm_offset_t, vm_size_t, vm_offset_t *));
+int		 vm_map_inherit __P((vm_map_t,
+		    vm_offset_t, vm_offset_t, vm_inherit_t));
+void		 vm_map_init __P((struct vm_map *,
+		    vm_offset_t, vm_offset_t, boolean_t));
+int		 vm_map_insert __P((vm_map_t,
+		    vm_object_t, vm_offset_t, vm_offset_t, vm_offset_t));
+int		 vm_map_lookup __P((vm_map_t *, vm_offset_t, vm_prot_t,
+		    vm_map_entry_t *, vm_object_t *, vm_offset_t *, vm_prot_t *,
+		    boolean_t *, boolean_t *));
+void		 vm_map_lookup_done __P((vm_map_t, vm_map_entry_t));
+boolean_t	 vm_map_lookup_entry __P((vm_map_t,
+		    vm_offset_t, vm_map_entry_t *));
+int		 vm_map_pageable __P((vm_map_t,
+		    vm_offset_t, vm_offset_t, boolean_t));
+int		 vm_map_clean __P((vm_map_t,
+		    vm_offset_t, vm_offset_t, boolean_t, boolean_t));
+void		 vm_map_print __P((vm_map_t, boolean_t));
+int		 vm_map_protect __P((vm_map_t,
+		    vm_offset_t, vm_offset_t, vm_prot_t, boolean_t));
+void		 vm_map_reference __P((vm_map_t));
+int		 vm_map_remove __P((vm_map_t, vm_offset_t, vm_offset_t));
+void		 vm_map_simplify __P((vm_map_t, vm_offset_t));
+void		 vm_map_simplify_entry __P((vm_map_t, vm_map_entry_t));
+void		 vm_map_startup __P((void));
+int		 vm_map_submap __P((vm_map_t,
+		    vm_offset_t, vm_offset_t, vm_map_t));
+#endif
+#endif /* _VM_MAP_ */
diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c
new file mode 100644
index 00000000000..9db6f506c2a
--- /dev/null
+++ b/sys/vm/vm_meter.c
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vm_meter.c	8.4 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+
+struct	loadavg averunnable;		/* load average, of runnable procs */
+
+int	maxslp = MAXSLP;
+int	saferss = SAFERSS;
+
+void
+vmmeter()
+{
+
+	if (time.tv_sec % 5 == 0)
+		loadav(&averunnable);
+	if (proc0.p_slptime > maxslp/2)
+		wakeup((caddr_t)&proc0);
+}
+
+/*
+ * Constants for averages over 1, 5, and 15 minutes
+ * when sampling at 5 second intervals.
+ */
+fixpt_t	cexp[3] = {
+	0.9200444146293232 * FSCALE,	/* exp(-1/12) */
+	0.9834714538216174 * FSCALE,	/* exp(-1/60) */
+	0.9944598480048967 * FSCALE,	/* exp(-1/180) */
+};
+
+/*
+ * Compute a tenex style load average of a quantity on
+ * 1, 5 and 15 minute intervals.
+ */
+void
+loadav(avg)
+	register struct loadavg *avg;
+{
+	register int i, nrun;
+	register struct proc *p;
+
+	for (nrun = 0, p = (struct proc *)allproc; p != NULL; p = p->p_next) {
+		switch (p->p_stat) {
+		case SSLEEP:
+			if (p->p_priority > PZERO || p->p_slptime != 0)
+				continue;
+			/* fall through */
+		case SRUN:
+		case SIDL:
+			nrun++;
+		}
+	}
+	for (i = 0; i < 3; i++)
+		avg->ldavg[i] = (cexp[i] * avg->ldavg[i] +
+			nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT;
+}
+
+/*
+ * Attributes associated with virtual memory.
+ */
+vm_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
+	int *name;
+	u_int namelen;
+	void *oldp;
+	size_t *oldlenp;
+	void *newp;
+	size_t newlen;
+	struct proc *p;
+{
+	struct vmtotal vmtotals;
+
+	/* all sysctl names at this level are terminal */
+	if (namelen != 1)
+		return (ENOTDIR);		/* overloaded */
+
+	switch (name[0]) {
+	case VM_LOADAVG:
+		averunnable.fscale = FSCALE;
+		return (sysctl_rdstruct(oldp, oldlenp, newp, &averunnable,
+		    sizeof(averunnable)));
+	case VM_METER:
+		vmtotal(&vmtotals);
+		return (sysctl_rdstruct(oldp, oldlenp, newp, &vmtotals,
+		    sizeof(vmtotals)));
+	default:
+		return (EOPNOTSUPP);
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * Calculate the current state of the system.
+ * Done on demand from getkerninfo().
+ */
+void
+vmtotal(totalp)
+	register struct vmtotal *totalp;
+{
+	register struct proc *p;
+	register vm_map_entry_t	entry;
+	register vm_object_t object;
+	register vm_map_t map;
+	int paging;
+
+	bzero(totalp, sizeof *totalp);
+	/*
+	 * Mark all objects as inactive.
+	 */
+	simple_lock(&vm_object_list_lock);
+	for (object = vm_object_list.tqh_first;
+	     object != NULL;
+	     object = object->object_list.tqe_next)
+		object->flags &= ~OBJ_ACTIVE;
+	simple_unlock(&vm_object_list_lock);
+	/*
+	 * Calculate process statistics.
+	 */
+	for (p = (struct proc *)allproc; p != NULL; p = p->p_next) {
+		if (p->p_flag & P_SYSTEM)
+			continue;
+		switch (p->p_stat) {
+		case 0:
+			continue;
+
+		case SSLEEP:
+		case SSTOP:
+			if (p->p_flag & P_INMEM) {
+				if (p->p_priority <= PZERO)
+					totalp->t_dw++;
+				else if (p->p_slptime < maxslp)
+					totalp->t_sl++;
+			} else if (p->p_slptime < maxslp)
+				totalp->t_sw++;
+			if (p->p_slptime >= maxslp)
+				continue;
+			break;
+
+		case SRUN:
+		case SIDL:
+			if (p->p_flag & P_INMEM)
+				totalp->t_rq++;
+			else
+				totalp->t_sw++;
+			if (p->p_stat == SIDL)
+				continue;
+			break;
+		}
+		/*
+		 * Note active objects.
+		 */
+		paging = 0;
+		for (map = &p->p_vmspace->vm_map, entry = map->header.next;
+		     entry != &map->header; entry = entry->next) {
+			if (entry->is_a_map || entry->is_sub_map ||
+			    entry->object.vm_object == NULL)
+				continue;
+			entry->object.vm_object->flags |= OBJ_ACTIVE;
+			paging |= entry->object.vm_object->paging_in_progress;
+		}
+		if (paging)
+			totalp->t_pw++;
+	}
+	/*
+	 * Calculate object memory usage statistics.
+	 */
+	simple_lock(&vm_object_list_lock);
+	for (object = vm_object_list.tqh_first;
+	     object != NULL;
+	     object = object->object_list.tqe_next) {
+		totalp->t_vm += num_pages(object->size);
+		totalp->t_rm += object->resident_page_count;
+		if (object->flags & OBJ_ACTIVE) {
+			totalp->t_avm += num_pages(object->size);
+			totalp->t_arm += object->resident_page_count;
+		}
+		if (object->ref_count > 1) {
+			/* shared object */
+			totalp->t_vmshr += num_pages(object->size);
+			totalp->t_rmshr += object->resident_page_count;
+			if (object->flags & OBJ_ACTIVE) {
+				totalp->t_avmshr += num_pages(object->size);
+				totalp->t_armshr += object->resident_page_count;
+			}
+		}
+	}
+	totalp->t_free = cnt.v_free_count;
+}
diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
new file mode 100644
index 00000000000..340cded1ba4
--- /dev/null
+++ b/sys/vm/vm_mmap.c
@@ -0,0 +1,832 @@
+/*
+ * Copyright (c) 1988 University of Utah.
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
+ *
+ *	@(#)vm_mmap.c	8.4 (Berkeley) 1/12/94
+ */
+
+/*
+ * Mapped file (mmap) interface to VM
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/filedesc.h>
+#include <sys/resourcevar.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/mman.h>
+#include <sys/conf.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <vm/vm.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_prot.h>
+
+#ifdef DEBUG
+int mmapdebug = 0;
+#define MDB_FOLLOW	0x01
+#define MDB_SYNC	0x02
+#define MDB_MAPIT	0x04
+#endif
+
+struct sbrk_args {
+	int	incr;
+};
+/* ARGSUSED */
+int
+sbrk(p, uap, retval)
+	struct proc *p;
+	struct sbrk_args *uap;
+	int *retval;
+{
+
+	/* Not yet implemented */
+	return (EOPNOTSUPP);
+}
+
+struct sstk_args {
+	int	incr;
+};
+/* ARGSUSED */
+int
+sstk(p, uap, retval)
+	struct proc *p;
+	struct sstk_args *uap;
+	int *retval;
+{
+
+	/* Not yet implemented */
+	return (EOPNOTSUPP);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+struct getpagesize_args {
+	int	dummy;
+};
+/* ARGSUSED */
+int
+ogetpagesize(p, uap, retval)
+	struct proc *p;
+	struct getpagesize_args *uap;
+	int *retval;
+{
+
+	*retval = PAGE_SIZE;
+	return (0);
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+struct mmap_args {
+	caddr_t	addr;
+	size_t	len;
+	int	prot;
+	int	flags;
+	int	fd;
+	long	pad;
+	off_t	pos;
+};
+
+#ifdef COMPAT_43
+struct ommap_args {
+	caddr_t	addr;
+	int	len;
+	int	prot;
+	int	flags;
+	int	fd;
+	long	pos;
+};
+int
+ommap(p, uap, retval)
+	struct proc *p;
+	register struct ommap_args *uap;
+	int *retval;
+{
+	struct mmap_args nargs;
+	static const char cvtbsdprot[8] = {
+		0,
+		PROT_EXEC,
+		PROT_WRITE,
+		PROT_EXEC|PROT_WRITE,
+		PROT_READ,
+		PROT_EXEC|PROT_READ,
+		PROT_WRITE|PROT_READ,
+		PROT_EXEC|PROT_WRITE|PROT_READ,
+	};
+#define	OMAP_ANON	0x0002
+#define	OMAP_COPY	0x0020
+#define	OMAP_SHARED	0x0010
+#define	OMAP_FIXED	0x0100
+#define	OMAP_INHERIT	0x0800
+
+	nargs.addr = uap->addr;
+	nargs.len = uap->len;
+	nargs.prot = cvtbsdprot[uap->prot&0x7];
+	nargs.flags = 0;
+	if (uap->flags & OMAP_ANON)
+		nargs.flags |= MAP_ANON;
+	if (uap->flags & OMAP_COPY)
+		nargs.flags |= MAP_COPY;
+	if (uap->flags & OMAP_SHARED)
+		nargs.flags |= MAP_SHARED;
+	else
+		nargs.flags |= MAP_PRIVATE;
+	if (uap->flags & OMAP_FIXED)
+		nargs.flags |= MAP_FIXED;
+	if (uap->flags & OMAP_INHERIT)
+		nargs.flags |= MAP_INHERIT;
+	nargs.fd = uap->fd;
+	nargs.pos = uap->pos;
+	return (mmap(p, &nargs, retval));
+}
+#endif
+
+int
+mmap(p, uap, retval)
+	struct proc *p;
+	register struct mmap_args *uap;
+	int *retval;
+{
+	register struct filedesc *fdp = p->p_fd;
+	register struct file *fp;
+	struct vnode *vp;
+	vm_offset_t addr;
+	vm_size_t size;
+	vm_prot_t prot, maxprot;
+	caddr_t handle;
+	int flags, error;
+
+	prot = uap->prot & VM_PROT_ALL;
+	flags = uap->flags;
+#ifdef DEBUG
+	if (mmapdebug & MDB_FOLLOW)
+		printf("mmap(%d): addr %x len %x pro %x flg %x fd %d pos %x\n",
+		       p->p_pid, uap->addr, uap->len, prot,
+		       flags, uap->fd, (vm_offset_t)uap->pos);
+#endif
+	/*
+	 * Address (if FIXED) must be page aligned.
+	 * Size is implicitly rounded to a page boundary.
+	 */
+	addr = (vm_offset_t) uap->addr;
+	if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) ||
+	    (ssize_t)uap->len < 0 || ((flags & MAP_ANON) && uap->fd != -1))
+		return (EINVAL);
+	size = (vm_size_t) round_page(uap->len);
+	/*
+	 * Check for illegal addresses.  Watch out for address wrap...
+	 * Note that VM_*_ADDRESS are not constants due to casts (argh).
+	 */
+	if (flags & MAP_FIXED) {
+		if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
+			return (EINVAL);
+		if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
+			return (EINVAL);
+		if (addr > addr + size)
+			return (EINVAL);
+	}
+	/*
+	 * XXX if no hint provided for a non-fixed mapping place it after
+	 * the end of the largest possible heap.
+	 *
+	 * There should really be a pmap call to determine a reasonable
+	 * location.
+	 */
+	if (addr == 0 && (flags & MAP_FIXED) == 0)
+		addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
+	if (flags & MAP_ANON) {
+		/*
+		 * Mapping blank space is trivial.
+		 */
+		handle = NULL;
+		maxprot = VM_PROT_ALL;
+	} else {
+		/*
+		 * Mapping file, get fp for validation.
+		 * Obtain vnode and make sure it is of appropriate type.
+		 */
+		if (((unsigned)uap->fd) >= fdp->fd_nfiles ||
+		    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
+			return (EBADF);
+		if (fp->f_type != DTYPE_VNODE)
+			return (EINVAL);
+		vp = (struct vnode *)fp->f_data;
+		if (vp->v_type != VREG && vp->v_type != VCHR)
+			return (EINVAL);
+		/*
+		 * XXX hack to handle use of /dev/zero to map anon
+		 * memory (ala SunOS).
+		 */
+		if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
+			handle = NULL;
+			maxprot = VM_PROT_ALL;
+			flags |= MAP_ANON;
+		} else {
+			/*
+			 * Ensure that file and memory protections are
+			 * compatible.  Note that we only worry about
+			 * writability if mapping is shared; in this case,
+			 * current and max prot are dictated by the open file.
+			 * XXX use the vnode instead?  Problem is: what
+			 * credentials do we use for determination?
+			 * What if proc does a setuid?
+			 */
+			maxprot = VM_PROT_EXECUTE;	/* ??? */
+			if (fp->f_flag & FREAD)
+				maxprot |= VM_PROT_READ;
+			else if (prot & PROT_READ)
+				return (EACCES);
+			if (flags & MAP_SHARED) {
+				if (fp->f_flag & FWRITE)
+					maxprot |= VM_PROT_WRITE;
+				else if (prot & PROT_WRITE)
+					return (EACCES);
+			} else
+				maxprot |= VM_PROT_WRITE;
+			handle = (caddr_t)vp;
+		}
+	}
+	error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
+	    flags, handle, (vm_offset_t)uap->pos);
+	if (error == 0)
+		*retval = (int)addr;
+	return (error);
+}
+
+struct msync_args {
+	caddr_t	addr;
+	int	len;
+};
+int
+msync(p, uap, retval)
+	struct proc *p;
+	struct msync_args *uap;
+	int *retval;
+{
+	vm_offset_t addr;
+	vm_size_t size;
+	vm_map_t map;
+	int rv;
+	boolean_t syncio, invalidate;
+
+#ifdef DEBUG
+	if (mmapdebug & (MDB_FOLLOW|MDB_SYNC))
+		printf("msync(%d): addr %x len %x\n",
+		       p->p_pid, uap->addr, uap->len);
+#endif
+	if (((int)uap->addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
+		return (EINVAL);
+	map = &p->p_vmspace->vm_map;
+	addr = (vm_offset_t)uap->addr;
+	size = (vm_size_t)uap->len;
+	/*
+	 * XXX Gak!  If size is zero we are supposed to sync "all modified
+	 * pages with the region containing addr".  Unfortunately, we
+	 * don't really keep track of individual mmaps so we approximate
+	 * by flushing the range of the map entry containing addr.
+	 * This can be incorrect if the region splits or is coalesced
+	 * with a neighbor.
+	 */
+	if (size == 0) {
+		vm_map_entry_t entry;
+
+		vm_map_lock_read(map);
+		rv = vm_map_lookup_entry(map, addr, &entry);
+		vm_map_unlock_read(map);
+		if (rv)
+			return (EINVAL);
+		addr = entry->start;
+		size = entry->end - entry->start;
+	}
+#ifdef DEBUG
+	if (mmapdebug & MDB_SYNC)
+		printf("msync: cleaning/flushing address range [%x-%x)\n",
+		       addr, addr+size);
+#endif
+	/*
+	 * Could pass this in as a third flag argument to implement
+	 * Sun's MS_ASYNC.
+	 */
+	syncio = TRUE;
+	/*
+	 * XXX bummer, gotta flush all cached pages to ensure
+	 * consistency with the file system cache.  Otherwise, we could
+	 * pass this in to implement Sun's MS_INVALIDATE.
+	 */
+	invalidate = TRUE;
+	/*
+	 * Clean the pages and interpret the return value.
+	 */
+	rv = vm_map_clean(map, addr, addr+size, syncio, invalidate);
+	switch (rv) {
+	case KERN_SUCCESS:
+		break;
+	case KERN_INVALID_ADDRESS:
+		return (EINVAL);	/* Sun returns ENOMEM? */
+	case KERN_FAILURE:
+		return (EIO);
+	default:
+		return (EINVAL);
+	}
+	return (0);
+}
+
+struct munmap_args {
+	caddr_t	addr;
+	int	len;
+};
+int
+munmap(p, uap, retval)
+	register struct proc *p;
+	register struct munmap_args *uap;
+	int *retval;
+{
+	vm_offset_t addr;
+	vm_size_t size;
+	vm_map_t map;
+
+#ifdef DEBUG
+	if (mmapdebug & MDB_FOLLOW)
+		printf("munmap(%d): addr %x len %x\n",
+		       p->p_pid, uap->addr, uap->len);
+#endif
+
+	addr = (vm_offset_t) uap->addr;
+	if ((addr & PAGE_MASK) || uap->len < 0)
+		return(EINVAL);
+	size = (vm_size_t) round_page(uap->len);
+	if (size == 0)
+		return(0);
+	/*
+	 * Check for illegal addresses.  Watch out for address wrap...
+	 * Note that VM_*_ADDRESS are not constants due to casts (argh).
+	 */
+	if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
+		return (EINVAL);
+	if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
+		return (EINVAL);
+	if (addr > addr + size)
+		return (EINVAL);
+	map = &p->p_vmspace->vm_map;
+	/*
+	 * Make sure entire range is allocated.
+	 */
+	if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE))
+		return(EINVAL);
+	/* returns nothing but KERN_SUCCESS anyway */
+	(void) vm_map_remove(map, addr, addr+size);
+	return(0);
+}
+
+void
+munmapfd(fd)
+	int fd;
+{
+#ifdef DEBUG
+	if (mmapdebug & MDB_FOLLOW)
+		printf("munmapfd(%d): fd %d\n", curproc->p_pid, fd);
+#endif
+
+	/*
+	 * XXX should vm_deallocate any regions mapped to this file
+	 */
+	curproc->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
+}
+
+struct mprotect_args {
+	caddr_t	addr;
+	int	len;
+	int	prot;
+};
+int
+mprotect(p, uap, retval)
+	struct proc *p;
+	struct mprotect_args *uap;
+	int *retval;
+{
+	vm_offset_t addr;
+	vm_size_t size;
+	register vm_prot_t prot;
+
+#ifdef DEBUG
+	if (mmapdebug & MDB_FOLLOW)
+		printf("mprotect(%d): addr %x len %x prot %d\n",
+		       p->p_pid, uap->addr, uap->len, uap->prot);
+#endif
+
+	addr = (vm_offset_t)uap->addr;
+	if ((addr & PAGE_MASK) || uap->len < 0)
+		return(EINVAL);
+	size = (vm_size_t)uap->len;
+	prot = uap->prot & VM_PROT_ALL;
+
+	switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr+size, prot,
+	    FALSE)) {
+	case KERN_SUCCESS:
+		return (0);
+	case KERN_PROTECTION_FAILURE:
+		return (EACCES);
+	}
+	return (EINVAL);
+}
+
+struct madvise_args {
+	caddr_t	addr;
+	int	len;
+	int	behav;
+};
+/* ARGSUSED */
+int
+madvise(p, uap, retval)
+	struct proc *p;
+	struct madvise_args *uap;
+	int *retval;
+{
+
+	/* Not yet implemented */
+	return (EOPNOTSUPP);
+}
+
+struct mincore_args {
+	caddr_t	addr;
+	int	len;
+	char	*vec;
+};
+/* ARGSUSED */
+int
+mincore(p, uap, retval)
+	struct proc *p;
+	struct mincore_args *uap;
+	int *retval;
+{
+
+	/* Not yet implemented */
+	return (EOPNOTSUPP);
+}
+
+struct mlock_args {
+	caddr_t	addr;
+	size_t	len;
+};
+int
+mlock(p, uap, retval)
+	struct proc *p;
+	struct mlock_args *uap;
+	int *retval;
+{
+	vm_offset_t addr;
+	vm_size_t size;
+	int error;
+	extern int vm_page_max_wired;
+
+#ifdef DEBUG
+	if (mmapdebug & MDB_FOLLOW)
+		printf("mlock(%d): addr %x len %x\n",
+		       p->p_pid, uap->addr, uap->len);
+#endif
+	addr = (vm_offset_t)uap->addr;
+	if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
+		return (EINVAL);
+	size = round_page((vm_size_t)uap->len);
+	if (atop(size) + cnt.v_wire_count > vm_page_max_wired)
+		return (EAGAIN);
+#ifdef pmap_wired_count
+	if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
+	    p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
+		return (EAGAIN);
+#else
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+#endif
+
+	error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE);
+	return (error == KERN_SUCCESS ? 0 : ENOMEM);
+}
+
+struct munlock_args {
+	caddr_t	addr;
+	size_t	len;
+};
+int
+munlock(p, uap, retval)
+	struct proc *p;
+	struct munlock_args *uap;
+	int *retval;
+{
+	vm_offset_t addr;
+	vm_size_t size;
+	int error;
+
+#ifdef DEBUG
+	if (mmapdebug & MDB_FOLLOW)
+		printf("munlock(%d): addr %x len %x\n",
+		       p->p_pid, uap->addr, uap->len);
+#endif
+	addr = (vm_offset_t)uap->addr;
+	if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
+		return (EINVAL);
+#ifndef pmap_wired_count
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+#endif
+	size = round_page((vm_size_t)uap->len);
+
+	error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE);
+	return (error == KERN_SUCCESS ? 0 : ENOMEM);
+}
+
+/*
+ * Internal version of mmap.
+ * Currently used by mmap, exec, and sys5 shared memory.
+ * Handle is either a vnode pointer or NULL for MAP_ANON.
+ */
+int
+vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
+	register vm_map_t map;
+	register vm_offset_t *addr;
+	register vm_size_t size;
+	vm_prot_t prot, maxprot;
+	register int flags;
+	caddr_t handle;		/* XXX should be vp */
+	vm_offset_t foff;
+{
+	register vm_pager_t pager;
+	boolean_t fitit;
+	vm_object_t object;
+	struct vnode *vp = NULL;
+	int type;
+	int rv = KERN_SUCCESS;
+
+	if (size == 0)
+		return (0);
+
+	if ((flags & MAP_FIXED) == 0) {
+		fitit = TRUE;
+		*addr = round_page(*addr);
+	} else {
+		fitit = FALSE;
+		(void)vm_deallocate(map, *addr, size);
+	}
+
+	/*
+	 * Lookup/allocate pager.  All except an unnamed anonymous lookup
+	 * gain a reference to ensure continued existance of the object.
+	 * (XXX the exception is to appease the pageout daemon)
+	 */
+	if (flags & MAP_ANON)
+		type = PG_DFLT;
+	else {
+		vp = (struct vnode *)handle;
+		if (vp->v_type == VCHR) {
+			type = PG_DEVICE;
+			handle = (caddr_t)vp->v_rdev;
+		} else
+			type = PG_VNODE;
+	}
+	pager = vm_pager_allocate(type, handle, size, prot, foff);
+	if (pager == NULL)
+		return (type == PG_DEVICE ? EINVAL : ENOMEM);
+	/*
+	 * Find object and release extra reference gained by lookup
+	 */
+	object = vm_object_lookup(pager);
+	vm_object_deallocate(object);
+
+	/*
+	 * Anonymous memory.
+	 */
+	if (flags & MAP_ANON) {
+		rv = vm_allocate_with_pager(map, addr, size, fitit,
+					    pager, foff, TRUE);
+		if (rv != KERN_SUCCESS) {
+			if (handle == NULL)
+				vm_pager_deallocate(pager);
+			else
+				vm_object_deallocate(object);
+			goto out;
+		}
+		/*
+		 * Don't cache anonymous objects.
+		 * Loses the reference gained by vm_pager_allocate.
+		 * Note that object will be NULL when handle == NULL,
+		 * this is ok since vm_allocate_with_pager has made
+		 * sure that these objects are uncached.
+		 */
+		(void) pager_cache(object, FALSE);
+#ifdef DEBUG
+		if (mmapdebug & MDB_MAPIT)
+			printf("vm_mmap(%d): ANON *addr %x size %x pager %x\n",
+			       curproc->p_pid, *addr, size, pager);
+#endif
+	}
+	/*
+	 * Must be a mapped file.
+	 * Distinguish between character special and regular files.
+	 */
+	else if (vp->v_type == VCHR) {
+		rv = vm_allocate_with_pager(map, addr, size, fitit,
+					    pager, foff, FALSE);
+		/*
+		 * Uncache the object and lose the reference gained
+		 * by vm_pager_allocate().  If the call to
+		 * vm_allocate_with_pager() was sucessful, then we
+		 * gained an additional reference ensuring the object
+		 * will continue to exist.  If the call failed then
+		 * the deallocate call below will terminate the
+		 * object which is fine.
+		 */
+		(void) pager_cache(object, FALSE);
+		if (rv != KERN_SUCCESS)
+			goto out;
+	}
+	/*
+	 * A regular file
+	 */
+	else {
+#ifdef DEBUG
+		if (object == NULL)
+			printf("vm_mmap: no object: vp %x, pager %x\n",
+			       vp, pager);
+#endif
+		/*
+		 * Map it directly.
+		 * Allows modifications to go out to the vnode.
+		 */
+		if (flags & MAP_SHARED) {
+			rv = vm_allocate_with_pager(map, addr, size,
+						    fitit, pager,
+						    foff, FALSE);
+			if (rv != KERN_SUCCESS) {
+				vm_object_deallocate(object);
+				goto out;
+			}
+			/*
+			 * Don't cache the object.  This is the easiest way
+			 * of ensuring that data gets back to the filesystem
+			 * because vnode_pager_deallocate() will fsync the
+			 * vnode.  pager_cache() will lose the extra ref.
+			 */
+			if (prot & VM_PROT_WRITE)
+				pager_cache(object, FALSE);
+			else
+				vm_object_deallocate(object);
+		}
+		/*
+		 * Copy-on-write of file.  Two flavors.
+		 * MAP_COPY is true COW, you essentially get a snapshot of
+		 * the region at the time of mapping.  MAP_PRIVATE means only
+		 * that your changes are not reflected back to the object.
+		 * Changes made by others will be seen.
+		 */
+		else {
+			vm_map_t tmap;
+			vm_offset_t off;
+
+			/* locate and allocate the target address space */
+			rv = vm_map_find(map, NULL, (vm_offset_t)0,
+					 addr, size, fitit);
+			if (rv != KERN_SUCCESS) {
+				vm_object_deallocate(object);
+				goto out;
+			}
+			tmap = vm_map_create(pmap_create(size), VM_MIN_ADDRESS,
+					     VM_MIN_ADDRESS+size, TRUE);
+			off = VM_MIN_ADDRESS;
+			rv = vm_allocate_with_pager(tmap, &off, size,
+						    TRUE, pager,
+						    foff, FALSE);
+			if (rv != KERN_SUCCESS) {
+				vm_object_deallocate(object);
+				vm_map_deallocate(tmap);
+				goto out;
+			}
+			/*
+			 * (XXX)
+			 * MAP_PRIVATE implies that we see changes made by
+			 * others.  To ensure that we need to guarentee that
+			 * no copy object is created (otherwise original
+			 * pages would be pushed to the copy object and we
+			 * would never see changes made by others).  We
+			 * totally sleeze it right now by marking the object
+			 * internal temporarily.
+			 */
+			if ((flags & MAP_COPY) == 0)
+				object->flags |= OBJ_INTERNAL;
+			rv = vm_map_copy(map, tmap, *addr, size, off,
+					 FALSE, FALSE);
+			object->flags &= ~OBJ_INTERNAL;
+			/*
+			 * (XXX)
+			 * My oh my, this only gets worse...
+			 * Force creation of a shadow object so that
+			 * vm_map_fork will do the right thing.
+			 */
+			if ((flags & MAP_COPY) == 0) {
+				vm_map_t tmap;
+				vm_map_entry_t tentry;
+				vm_object_t tobject;
+				vm_offset_t toffset;
+				vm_prot_t tprot;
+				boolean_t twired, tsu;
+
+				tmap = map;
+				vm_map_lookup(&tmap, *addr, VM_PROT_WRITE,
+					      &tentry, &tobject, &toffset,
+					      &tprot, &twired, &tsu);
+				vm_map_lookup_done(tmap, tentry);
+			}
+			/*
+			 * (XXX)
+			 * Map copy code cannot detect sharing unless a
+			 * sharing map is involved.  So we cheat and write
+			 * protect everything ourselves.
+			 */
+			vm_object_pmap_copy(object, foff, foff + size);
+			vm_object_deallocate(object);
+			vm_map_deallocate(tmap);
+			if (rv != KERN_SUCCESS)
+				goto out;
+		}
+#ifdef DEBUG
+		if (mmapdebug & MDB_MAPIT)
+			printf("vm_mmap(%d): FILE *addr %x size %x pager %x\n",
+			       curproc->p_pid, *addr, size, pager);
+#endif
+	}
+	/*
+	 * Correct protection (default is VM_PROT_ALL).
+	 * If maxprot is different than prot, we must set both explicitly.
+	 */
+	rv = KERN_SUCCESS;
+	if (maxprot != VM_PROT_ALL)
+		rv = vm_map_protect(map, *addr, *addr+size, maxprot, TRUE);
+	if (rv == KERN_SUCCESS && prot != maxprot)
+		rv = vm_map_protect(map, *addr, *addr+size, prot, FALSE);
+	if (rv != KERN_SUCCESS) {
+		(void) vm_deallocate(map, *addr, size);
+		goto out;
+	}
+	/*
+	 * Shared memory is also shared with children.
+	 */
+	if (flags & MAP_SHARED) {
+		rv = vm_map_inherit(map, *addr, *addr+size, VM_INHERIT_SHARE);
+		if (rv != KERN_SUCCESS) {
+			(void) vm_deallocate(map, *addr, size);
+			goto out;
+		}
+	}
+out:
+#ifdef DEBUG
+	if (mmapdebug & MDB_MAPIT)
+		printf("vm_mmap: rv %d\n", rv);
+#endif
+	switch (rv) {
+	case KERN_SUCCESS:
+		return (0);
+	case KERN_INVALID_ADDRESS:
+	case KERN_NO_SPACE:
+		return (ENOMEM);
+	case KERN_PROTECTION_FAILURE:
+		return (EACCES);
+	default:
+		return (EINVAL);
+	}
+}
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
new file mode 100644
index 00000000000..d11fa8be014
--- /dev/null
+++ b/sys/vm/vm_object.c
@@ -0,0 +1,1436 @@
+/* 
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vm_object.c	8.5 (Berkeley) 3/22/94
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ * 
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ *	Virtual memory object module.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+
+/*
+ *	Virtual memory objects maintain the actual data
+ *	associated with allocated virtual memory.  A given
+ *	page of memory exists within exactly one object.
+ *
+ *	An object is only deallocated when all "references"
+ *	are given up.  Only one "reference" to a given
+ *	region of an object should be writeable.
+ *
+ *	Associated with each object is a list of all resident
+ *	memory pages belonging to that object; this list is
+ *	maintained by the "vm_page" module, and locked by the object's
+ *	lock.
+ *
+ *	Each object also records a "pager" routine which is
+ *	used to retrieve (and store) pages to the proper backing
+ *	storage.  In addition, objects may be backed by other
+ *	objects from which they were virtual-copied.
+ *
+ *	The only items within the object structure which are
+ *	modified after time of creation are:
+ *		reference count		locked by object's lock
+ *		pager routine		locked by object's lock
+ *
+ */
+
+struct vm_object	kernel_object_store;
+struct vm_object	kmem_object_store;
+
+#define	VM_OBJECT_HASH_COUNT	157
+
+int	vm_cache_max = 100;	/* can patch if necessary */
+struct	vm_object_hash_head vm_object_hashtable[VM_OBJECT_HASH_COUNT];
+
+long	object_collapses = 0;
+long	object_bypasses  = 0;
+
+static void _vm_object_allocate __P((vm_size_t, vm_object_t));
+
+/*
+ *	vm_object_init:
+ *
+ *	Initialize the VM objects module.
+ */
+void vm_object_init(size)
+	vm_size_t	size;
+{
+	register int	i;
+
+	TAILQ_INIT(&vm_object_cached_list);
+	TAILQ_INIT(&vm_object_list);
+	vm_object_count = 0;
+	simple_lock_init(&vm_cache_lock);
+	simple_lock_init(&vm_object_list_lock);
+
+	for (i = 0; i < VM_OBJECT_HASH_COUNT; i++)
+		TAILQ_INIT(&vm_object_hashtable[i]);
+
+	kernel_object = &kernel_object_store;
+	_vm_object_allocate(size, kernel_object);
+
+	kmem_object = &kmem_object_store;
+	_vm_object_allocate(VM_KMEM_SIZE + VM_MBUF_SIZE, kmem_object);
+}
+
+/*
+ *	vm_object_allocate:
+ *
+ *	Returns a new object with the given size.
+ */
+
+vm_object_t vm_object_allocate(size)
+	vm_size_t	size;
+{
+	register vm_object_t	result;
+
+	result = (vm_object_t)
+		malloc((u_long)sizeof *result, M_VMOBJ, M_WAITOK);
+
+	_vm_object_allocate(size, result);
+
+	return(result);
+}
+
+static void
+_vm_object_allocate(size, object)
+	vm_size_t		size;
+	register vm_object_t	object;
+{
+	TAILQ_INIT(&object->memq);
+	vm_object_lock_init(object);
+	object->ref_count = 1;
+	object->resident_page_count = 0;
+	object->size = size;
+	object->flags = OBJ_INTERNAL;	/* vm_allocate_with_pager will reset */
+	object->paging_in_progress = 0;
+	object->copy = NULL;
+
+	/*
+	 *	Object starts out read-write, with no pager.
+	 */
+
+	object->pager = NULL;
+	object->paging_offset = 0;
+	object->shadow = NULL;
+	object->shadow_offset = (vm_offset_t) 0;
+
+	simple_lock(&vm_object_list_lock);
+	TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
+	vm_object_count++;
+	cnt.v_nzfod += atop(size);
+	simple_unlock(&vm_object_list_lock);
+}
+
+/*
+ *	vm_object_reference:
+ *
+ *	Gets another reference to the given object.
+ */
+void vm_object_reference(object)
+	register vm_object_t	object;
+{
+	if (object == NULL)
+		return;
+
+	vm_object_lock(object);
+	object->ref_count++;
+	vm_object_unlock(object);
+}
+
+/*
+ *	vm_object_deallocate:
+ *
+ *	Release a reference to the specified object,
+ *	gained either through a vm_object_allocate
+ *	or a vm_object_reference call.  When all references
+ *	are gone, storage associated with this object
+ *	may be relinquished.
+ *
+ *	No object may be locked.
+ */
+void vm_object_deallocate(object)
+	register vm_object_t	object;
+{
+	vm_object_t	temp;
+
+	while (object != NULL) {
+
+		/*
+		 *	The cache holds a reference (uncounted) to
+		 *	the object; we must lock it before removing
+		 *	the object.
+		 */
+
+		vm_object_cache_lock();
+
+		/*
+		 *	Lose the reference
+		 */
+		vm_object_lock(object);
+		if (--(object->ref_count) != 0) {
+
+			/*
+			 *	If there are still references, then
+			 *	we are done.
+			 */
+			vm_object_unlock(object);
+			vm_object_cache_unlock();
+			return;
+		}
+
+		/*
+		 *	See if this object can persist.  If so, enter
+		 *	it in the cache, then deactivate all of its
+		 *	pages.
+		 */
+
+		if (object->flags & OBJ_CANPERSIST) {
+
+			TAILQ_INSERT_TAIL(&vm_object_cached_list, object,
+				cached_list);
+			vm_object_cached++;
+			vm_object_cache_unlock();
+
+			vm_object_deactivate_pages(object);
+			vm_object_unlock(object);
+
+			vm_object_cache_trim();
+			return;
+		}
+
+		/*
+		 *	Make sure no one can look us up now.
+		 */
+		vm_object_remove(object->pager);
+		vm_object_cache_unlock();
+
+		temp = object->shadow;
+		vm_object_terminate(object);
+			/* unlocks and deallocates object */
+		object = temp;
+	}
+}
+
+
+/*
+ *	vm_object_terminate actually destroys the specified object, freeing
+ *	up all previously used resources.
+ *
+ *	The object must be locked.
+ */
+void vm_object_terminate(object)
+	register vm_object_t	object;
+{
+	register vm_page_t	p;
+	vm_object_t		shadow_object;
+
+	/*
+	 *	Detach the object from its shadow if we are the shadow's
+	 *	copy.
+	 */
+	if ((shadow_object = object->shadow) != NULL) {
+		vm_object_lock(shadow_object);
+		if (shadow_object->copy == object)
+			shadow_object->copy = NULL;
+#if 0
+		else if (shadow_object->copy != NULL)
+			panic("vm_object_terminate: copy/shadow inconsistency");
+#endif
+		vm_object_unlock(shadow_object);
+	}
+
+	/*
+	 * Wait until the pageout daemon is through with the object.
+	 */
+	while (object->paging_in_progress) {
+		vm_object_sleep((int)object, object, FALSE);
+		vm_object_lock(object);
+	}
+
+	/*
+	 * If not an internal object clean all the pages, removing them
+	 * from paging queues as we go.
+	 *
+	 * XXX need to do something in the event of a cleaning error.
+	 */
+	if ((object->flags & OBJ_INTERNAL) == 0) {
+		(void) vm_object_page_clean(object, 0, 0, TRUE, TRUE);
+		vm_object_unlock(object);
+	}
+
+	/*
+	 * Now free the pages.
+	 * For internal objects, this also removes them from paging queues.
+	 */
+	while ((p = object->memq.tqh_first) != NULL) {
+		VM_PAGE_CHECK(p);
+		vm_page_lock_queues();
+		vm_page_free(p);
+		cnt.v_pfree++;
+		vm_page_unlock_queues();
+	}
+	if ((object->flags & OBJ_INTERNAL) == 0)
+		vm_object_unlock(object);
+
+	/*
+	 * Let the pager know object is dead.
+	 */
+	if (object->pager != NULL)
+		vm_pager_deallocate(object->pager);
+
+	simple_lock(&vm_object_list_lock);
+	TAILQ_REMOVE(&vm_object_list, object, object_list);
+	vm_object_count--;
+	simple_unlock(&vm_object_list_lock);
+
+	/*
+	 * Free the space for the object.
+	 */
+	free((caddr_t)object, M_VMOBJ);
+}
+
+/*
+ *	vm_object_page_clean
+ *
+ *	Clean all dirty pages in the specified range of object.
+ *	If syncio is TRUE, page cleaning is done synchronously.
+ *	If de_queue is TRUE, pages are removed from any paging queue
+ *	they were on, otherwise they are left on whatever queue they
+ *	were on before the cleaning operation began.
+ *
+ *	Odd semantics: if start == end, we clean everything.
+ *
+ *	The object must be locked.
+ *
+ *	Returns TRUE if all was well, FALSE if there was a pager error
+ *	somewhere.  We attempt to clean (and dequeue) all pages regardless
+ *	of where an error occurs.
+ */
+boolean_t
+vm_object_page_clean(object, start, end, syncio, de_queue)
+	register vm_object_t	object;
+	register vm_offset_t	start;
+	register vm_offset_t	end;
+	boolean_t		syncio;
+	boolean_t		de_queue;
+{
+	register vm_page_t	p;
+	int onqueue;
+	boolean_t noerror = TRUE;
+
+	if (object == NULL)
+		return (TRUE);
+
+	/*
+	 * If it is an internal object and there is no pager, attempt to
+	 * allocate one.  Note that vm_object_collapse may relocate one
+	 * from a collapsed object so we must recheck afterward.
+	 */
+	if ((object->flags & OBJ_INTERNAL) && object->pager == NULL) {
+		vm_object_collapse(object);
+		if (object->pager == NULL) {
+			vm_pager_t pager;
+
+			vm_object_unlock(object);
+			pager = vm_pager_allocate(PG_DFLT, (caddr_t)0,
+						  object->size, VM_PROT_ALL,
+						  (vm_offset_t)0);
+			if (pager)
+				vm_object_setpager(object, pager, 0, FALSE);
+			vm_object_lock(object);
+		}
+	}
+	if (object->pager == NULL)
+		return (FALSE);
+
+again:
+	/*
+	 * Wait until the pageout daemon is through with the object.
+	 */
+	while (object->paging_in_progress) {
+		vm_object_sleep((int)object, object, FALSE);
+		vm_object_lock(object);
+	}
+	/*
+	 * Loop through the object page list cleaning as necessary.
+	 */
+	for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) {
+		if ((start == end || p->offset >= start && p->offset < end) &&
+		    !(p->flags & PG_FICTITIOUS)) {
+			if ((p->flags & PG_CLEAN) &&
+			    pmap_is_modified(VM_PAGE_TO_PHYS(p)))
+				p->flags &= ~PG_CLEAN;
+			/*
+			 * Remove the page from any paging queue.
+			 * This needs to be done if either we have been
+			 * explicitly asked to do so or it is about to
+			 * be cleaned (see comment below).
+			 */
+			if (de_queue || !(p->flags & PG_CLEAN)) {
+				vm_page_lock_queues();
+				if (p->flags & PG_ACTIVE) {
+					TAILQ_REMOVE(&vm_page_queue_active,
+						     p, pageq);
+					p->flags &= ~PG_ACTIVE;
+					cnt.v_active_count--;
+					onqueue = 1;
+				} else if (p->flags & PG_INACTIVE) {
+					TAILQ_REMOVE(&vm_page_queue_inactive,
+						     p, pageq);
+					p->flags &= ~PG_INACTIVE;
+					cnt.v_inactive_count--;
+					onqueue = -1;
+				} else
+					onqueue = 0;
+				vm_page_unlock_queues();
+			}
+			/*
+			 * To ensure the state of the page doesn't change
+			 * during the clean operation we do two things.
+			 * First we set the busy bit and write-protect all
+			 * mappings to ensure that write accesses to the
+			 * page block (in vm_fault).  Second, we remove
+			 * the page from any paging queue to foil the
+			 * pageout daemon (vm_pageout_scan).
+			 */
+			pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_READ);
+			if (!(p->flags & PG_CLEAN)) {
+				p->flags |= PG_BUSY;
+				object->paging_in_progress++;
+				vm_object_unlock(object);
+				/*
+				 * XXX if put fails we mark the page as
+				 * clean to avoid an infinite loop.
+				 * Will loose changes to the page.
+				 */
+				if (vm_pager_put(object->pager, p, syncio)) {
+					printf("%s: pager_put error\n",
+					       "vm_object_page_clean");
+					p->flags |= PG_CLEAN;
+					noerror = FALSE;
+				}
+				vm_object_lock(object);
+				object->paging_in_progress--;
+				if (!de_queue && onqueue) {
+					vm_page_lock_queues();
+					if (onqueue > 0)
+						vm_page_activate(p);
+					else
+						vm_page_deactivate(p);
+					vm_page_unlock_queues();
+				}
+				p->flags &= ~PG_BUSY;
+				PAGE_WAKEUP(p);
+				goto again;
+			}
+		}
+	}
+	return (noerror);
+}
+
+/*
+ *	vm_object_deactivate_pages
+ *
+ *	Deactivate all pages in the specified object.  (Keep its pages
+ *	in memory even though it is no longer referenced.)
+ *
+ *	The object must be locked.
+ */
+void
+vm_object_deactivate_pages(object)
+	register vm_object_t	object;
+{
+	register vm_page_t	p, next;
+
+	for (p = object->memq.tqh_first; p != NULL; p = next) {
+		next = p->listq.tqe_next;
+		vm_page_lock_queues();
+		vm_page_deactivate(p);
+		vm_page_unlock_queues();
+	}
+}
+
+/*
+ *	Trim the object cache to size.
+ */
+void
+vm_object_cache_trim()
+{
+	register vm_object_t	object;
+
+	vm_object_cache_lock();
+	while (vm_object_cached > vm_cache_max) {
+		object = vm_object_cached_list.tqh_first;
+		vm_object_cache_unlock();
+
+		if (object != vm_object_lookup(object->pager))
+			panic("vm_object_deactivate: I'm sooo confused.");
+
+		pager_cache(object, FALSE);
+
+		vm_object_cache_lock();
+	}
+	vm_object_cache_unlock();
+}
+
+/*
+ *	vm_object_pmap_copy:
+ *
+ *	Makes all physical pages in the specified
+ *	object range copy-on-write.  No writeable
+ *	references to these pages should remain.
+ *
+ *	The object must *not* be locked.
+ */
+void vm_object_pmap_copy(object, start, end)
+	register vm_object_t	object;
+	register vm_offset_t	start;
+	register vm_offset_t	end;
+{
+	register vm_page_t	p;
+
+	if (object == NULL)
+		return;
+
+	vm_object_lock(object);
+	for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) {
+		if ((start <= p->offset) && (p->offset < end)) {
+			pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_READ);
+			p->flags |= PG_COPYONWRITE;
+		}
+	}
+	vm_object_unlock(object);
+}
+
+/*
+ *	vm_object_pmap_remove:
+ *
+ *	Removes all physical pages in the specified
+ *	object range from all physical maps.
+ *
+ *	The object must *not* be locked.
+ */
+void vm_object_pmap_remove(object, start, end)
+	register vm_object_t	object;
+	register vm_offset_t	start;
+	register vm_offset_t	end;
+{
+	register vm_page_t	p;
+
+	if (object == NULL)
+		return;
+
+	vm_object_lock(object);
+	for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next)
+		if ((start <= p->offset) && (p->offset < end))
+			pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
+	vm_object_unlock(object);
+}
+
+/*
+ *	vm_object_copy:
+ *
+ *	Create a new object which is a copy of an existing
+ *	object, and mark all of the pages in the existing
+ *	object 'copy-on-write'.  The new object has one reference.
+ *	Returns the new object.
+ *
+ *	May defer the copy until later if the object is not backed
+ *	up by a non-default pager.
+ */
+void vm_object_copy(src_object, src_offset, size,
+		    dst_object, dst_offset, src_needs_copy)
+	register vm_object_t	src_object;
+	vm_offset_t		src_offset;
+	vm_size_t		size;
+	vm_object_t		*dst_object;	/* OUT */
+	vm_offset_t		*dst_offset;	/* OUT */
+	boolean_t		*src_needs_copy;	/* OUT */
+{
+	register vm_object_t	new_copy;
+	register vm_object_t	old_copy;
+	vm_offset_t		new_start, new_end;
+
+	register vm_page_t	p;
+
+	if (src_object == NULL) {
+		/*
+		 *	Nothing to copy
+		 */
+		*dst_object = NULL;
+		*dst_offset = 0;
+		*src_needs_copy = FALSE;
+		return;
+	}
+
+	/*
+	 *	If the object's pager is null_pager or the
+	 *	default pager, we don't have to make a copy
+	 *	of it.  Instead, we set the needs copy flag and
+	 *	make a shadow later.
+	 */
+
+	vm_object_lock(src_object);
+	if (src_object->pager == NULL ||
+	    (src_object->flags & OBJ_INTERNAL)) {
+
+		/*
+		 *	Make another reference to the object
+		 */
+		src_object->ref_count++;
+
+		/*
+		 *	Mark all of the pages copy-on-write.
+		 */
+		for (p = src_object->memq.tqh_first; p; p = p->listq.tqe_next)
+			if (src_offset <= p->offset &&
+			    p->offset < src_offset + size)
+				p->flags |= PG_COPYONWRITE;
+		vm_object_unlock(src_object);
+
+		*dst_object = src_object;
+		*dst_offset = src_offset;
+		
+		/*
+		 *	Must make a shadow when write is desired
+		 */
+		*src_needs_copy = TRUE;
+		return;
+	}
+
+	/*
+	 *	Try to collapse the object before copying it.
+	 */
+	vm_object_collapse(src_object);
+
+	/*
+	 *	If the object has a pager, the pager wants to
+	 *	see all of the changes.  We need a copy-object
+	 *	for the changed pages.
+	 *
+	 *	If there is a copy-object, and it is empty,
+	 *	no changes have been made to the object since the
+	 *	copy-object was made.  We can use the same copy-
+	 *	object.
+	 */
+
+    Retry1:
+	old_copy = src_object->copy;
+	if (old_copy != NULL) {
+		/*
+		 *	Try to get the locks (out of order)
+		 */
+		if (!vm_object_lock_try(old_copy)) {
+			vm_object_unlock(src_object);
+
+			/* should spin a bit here... */
+			vm_object_lock(src_object);
+			goto Retry1;
+		}
+
+		if (old_copy->resident_page_count == 0 &&
+		    old_copy->pager == NULL) {
+			/*
+			 *	Return another reference to
+			 *	the existing copy-object.
+			 */
+			old_copy->ref_count++;
+			vm_object_unlock(old_copy);
+			vm_object_unlock(src_object);
+			*dst_object = old_copy;
+			*dst_offset = src_offset;
+			*src_needs_copy = FALSE;
+			return;
+		}
+		vm_object_unlock(old_copy);
+	}
+	vm_object_unlock(src_object);
+
+	/*
+	 *	If the object has a pager, the pager wants
+	 *	to see all of the changes.  We must make
+	 *	a copy-object and put the changed pages there.
+	 *
+	 *	The copy-object is always made large enough to
+	 *	completely shadow the original object, since
+	 *	it may have several users who want to shadow
+	 *	the original object at different points.
+	 */
+
+	new_copy = vm_object_allocate(src_object->size);
+
+    Retry2:
+	vm_object_lock(src_object);
+	/*
+	 *	Copy object may have changed while we were unlocked
+	 */
+	old_copy = src_object->copy;
+	if (old_copy != NULL) {
+		/*
+		 *	Try to get the locks (out of order)
+		 */
+		if (!vm_object_lock_try(old_copy)) {
+			vm_object_unlock(src_object);
+			goto Retry2;
+		}
+
+		/*
+		 *	Consistency check
+		 */
+		if (old_copy->shadow != src_object ||
+		    old_copy->shadow_offset != (vm_offset_t) 0)
+			panic("vm_object_copy: copy/shadow inconsistency");
+
+		/*
+		 *	Make the old copy-object shadow the new one.
+		 *	It will receive no more pages from the original
+		 *	object.
+		 */
+
+		src_object->ref_count--;	/* remove ref. from old_copy */
+		old_copy->shadow = new_copy;
+		new_copy->ref_count++;		/* locking not needed - we
+						   have the only pointer */
+		vm_object_unlock(old_copy);	/* done with old_copy */
+	}
+
+	new_start = (vm_offset_t) 0;	/* always shadow original at 0 */
+	new_end   = (vm_offset_t) new_copy->size; /* for the whole object */
+
+	/*
+	 *	Point the new copy at the existing object.
+	 */
+
+	new_copy->shadow = src_object;
+	new_copy->shadow_offset = new_start;
+	src_object->ref_count++;
+	src_object->copy = new_copy;
+
+	/*
+	 *	Mark all the affected pages of the existing object
+	 *	copy-on-write.
+	 */
+	for (p = src_object->memq.tqh_first; p != NULL; p = p->listq.tqe_next)
+		if ((new_start <= p->offset) && (p->offset < new_end))
+			p->flags |= PG_COPYONWRITE;
+
+	vm_object_unlock(src_object);
+
+	*dst_object = new_copy;
+	*dst_offset = src_offset - new_start;
+	*src_needs_copy = FALSE;
+}
+
+/*
+ *	vm_object_shadow:
+ *
+ *	Create a new object which is backed by the
+ *	specified existing object range.  The source
+ *	object reference is deallocated.
+ *
+ *	The new object and offset into that object
+ *	are returned in the source parameters.
+ */
+
+void vm_object_shadow(object, offset, length)
+	vm_object_t	*object;	/* IN/OUT */
+	vm_offset_t	*offset;	/* IN/OUT */
+	vm_size_t	length;
+{
+	register vm_object_t	source;
+	register vm_object_t	result;
+
+	source = *object;
+
+	/*
+	 *	Allocate a new object with the given length
+	 */
+
+	if ((result = vm_object_allocate(length)) == NULL)
+		panic("vm_object_shadow: no object for shadowing");
+
+	/*
+	 *	The new object shadows the source object, adding
+	 *	a reference to it.  Our caller changes his reference
+	 *	to point to the new object, removing a reference to
+	 *	the source object.  Net result: no change of reference
+	 *	count.
+	 */
+	result->shadow = source;
+	
+	/*
+	 *	Store the offset into the source object,
+	 *	and fix up the offset into the new object.
+	 */
+
+	result->shadow_offset = *offset;
+
+	/*
+	 *	Return the new things
+	 */
+
+	*offset = 0;
+	*object = result;
+}
+
+/*
+ *	Set the specified object's pager to the specified pager.
+ */
+
+void vm_object_setpager(object, pager, paging_offset,
+			read_only)
+	vm_object_t	object;
+	vm_pager_t	pager;
+	vm_offset_t	paging_offset;
+	boolean_t	read_only;
+{
+#ifdef	lint
+	read_only++;	/* No longer used */
+#endif
+
+	vm_object_lock(object);			/* XXX ? */
+	object->pager = pager;
+	object->paging_offset = paging_offset;
+	vm_object_unlock(object);			/* XXX ? */
+}
+
+/*
+ *	vm_object_hash hashes the pager/id pair.
+ */
+
+#define vm_object_hash(pager) \
+	(((unsigned)pager)%VM_OBJECT_HASH_COUNT)
+
+/*
+ *	vm_object_lookup looks in the object cache for an object with the
+ *	specified pager and paging id.
+ */
+
+vm_object_t vm_object_lookup(pager)
+	vm_pager_t	pager;
+{
+	register vm_object_hash_entry_t	entry;
+	vm_object_t			object;
+
+	vm_object_cache_lock();
+
+	for (entry = vm_object_hashtable[vm_object_hash(pager)].tqh_first;
+	     entry != NULL;
+	     entry = entry->hash_links.tqe_next) {
+		object = entry->object;
+		if (object->pager == pager) {
+			vm_object_lock(object);
+			if (object->ref_count == 0) {
+				TAILQ_REMOVE(&vm_object_cached_list, object,
+					cached_list);
+				vm_object_cached--;
+			}
+			object->ref_count++;
+			vm_object_unlock(object);
+			vm_object_cache_unlock();
+			return(object);
+		}
+	}
+
+	vm_object_cache_unlock();
+	return(NULL);
+}
+
+/*
+ *	vm_object_enter enters the specified object/pager/id into
+ *	the hash table.
+ */
+
+void vm_object_enter(object, pager)
+	vm_object_t	object;
+	vm_pager_t	pager;
+{
+	struct vm_object_hash_head	*bucket;
+	register vm_object_hash_entry_t	entry;
+
+	/*
+	 *	We don't cache null objects, and we can't cache
+	 *	objects with the null pager.
+	 */
+
+	if (object == NULL)
+		return;
+	if (pager == NULL)
+		return;
+
+	bucket = &vm_object_hashtable[vm_object_hash(pager)];
+	entry = (vm_object_hash_entry_t)
+		malloc((u_long)sizeof *entry, M_VMOBJHASH, M_WAITOK);
+	entry->object = object;
+	object->flags |= OBJ_CANPERSIST;
+
+	vm_object_cache_lock();
+	TAILQ_INSERT_TAIL(bucket, entry, hash_links);
+	vm_object_cache_unlock();
+}
+
+/*
+ *	vm_object_remove:
+ *
+ *	Remove the pager from the hash table.
+ *	Note:  This assumes that the object cache
+ *	is locked.  XXX this should be fixed
+ *	by reorganizing vm_object_deallocate.
+ */
+void
+vm_object_remove(pager)
+	register vm_pager_t	pager;
+{
+	struct vm_object_hash_head	*bucket;
+	register vm_object_hash_entry_t	entry;
+	register vm_object_t		object;
+
+	bucket = &vm_object_hashtable[vm_object_hash(pager)];
+
+	for (entry = bucket->tqh_first;
+	     entry != NULL;
+	     entry = entry->hash_links.tqe_next) {
+		object = entry->object;
+		if (object->pager == pager) {
+			TAILQ_REMOVE(bucket, entry, hash_links);
+			free((caddr_t)entry, M_VMOBJHASH);
+			break;
+		}
+	}
+}
+
+/*
+ *	vm_object_cache_clear removes all objects from the cache.
+ *
+ */
+
+void vm_object_cache_clear()
+{
+	register vm_object_t	object;
+
+	/*
+	 *	Remove each object in the cache by scanning down the
+	 *	list of cached objects.
+	 */
+	vm_object_cache_lock();
+	while ((object = vm_object_cached_list.tqh_first) != NULL) {
+		vm_object_cache_unlock();
+
+		/* 
+		 * Note: it is important that we use vm_object_lookup
+		 * to gain a reference, and not vm_object_reference, because
+		 * the logic for removing an object from the cache lies in 
+		 * lookup.
+		 */
+		if (object != vm_object_lookup(object->pager))
+			panic("vm_object_cache_clear: I'm sooo confused.");
+		pager_cache(object, FALSE);
+
+		vm_object_cache_lock();
+	}
+	vm_object_cache_unlock();
+}
+
+boolean_t	vm_object_collapse_allowed = TRUE;
+/*
+ *	vm_object_collapse:
+ *
+ *	Collapse an object with the object backing it.
+ *	Pages in the backing object are moved into the
+ *	parent, and the backing object is deallocated.
+ *
+ *	Requires that the object be locked and the page
+ *	queues be unlocked.
+ *
+ */
+void vm_object_collapse(object)
+	register vm_object_t	object;
+
+{
+	register vm_object_t	backing_object;
+	register vm_offset_t	backing_offset;
+	register vm_size_t	size;
+	register vm_offset_t	new_offset;
+	register vm_page_t	p, pp;
+
+	if (!vm_object_collapse_allowed)
+		return;
+
+	while (TRUE) {
+		/*
+		 *	Verify that the conditions are right for collapse:
+		 *
+		 *	The object exists and no pages in it are currently
+		 *	being paged out (or have ever been paged out).
+		 */
+		if (object == NULL ||
+		    object->paging_in_progress != 0 ||
+		    object->pager != NULL)
+			return;
+
+		/*
+		 *		There is a backing object, and
+		 */
+	
+		if ((backing_object = object->shadow) == NULL)
+			return;
+	
+		vm_object_lock(backing_object);
+		/*
+		 *	...
+		 *		The backing object is not read_only,
+		 *		and no pages in the backing object are
+		 *		currently being paged out.
+		 *		The backing object is internal.
+		 */
+	
+		if ((backing_object->flags & OBJ_INTERNAL) == 0 ||
+		    backing_object->paging_in_progress != 0) {
+			vm_object_unlock(backing_object);
+			return;
+		}
+	
+		/*
+		 *	The backing object can't be a copy-object:
+		 *	the shadow_offset for the copy-object must stay
+		 *	as 0.  Furthermore (for the 'we have all the
+		 *	pages' case), if we bypass backing_object and
+		 *	just shadow the next object in the chain, old
+		 *	pages from that object would then have to be copied
+		 *	BOTH into the (former) backing_object and into the
+		 *	parent object.
+		 */
+		if (backing_object->shadow != NULL &&
+		    backing_object->shadow->copy != NULL) {
+			vm_object_unlock(backing_object);
+			return;
+		}
+
+		/*
+		 *	We know that we can either collapse the backing
+		 *	object (if the parent is the only reference to
+		 *	it) or (perhaps) remove the parent's reference
+		 *	to it.
+		 */
+
+		backing_offset = object->shadow_offset;
+		size = object->size;
+
+		/*
+		 *	If there is exactly one reference to the backing
+		 *	object, we can collapse it into the parent.
+		 */
+	
+		if (backing_object->ref_count == 1) {
+
+			/*
+			 *	We can collapse the backing object.
+			 *
+			 *	Move all in-memory pages from backing_object
+			 *	to the parent.  Pages that have been paged out
+			 *	will be overwritten by any of the parent's
+			 *	pages that shadow them.
+			 */
+
+			while ((p = backing_object->memq.tqh_first) != NULL) {
+				new_offset = (p->offset - backing_offset);
+
+				/*
+				 *	If the parent has a page here, or if
+				 *	this page falls outside the parent,
+				 *	dispose of it.
+				 *
+				 *	Otherwise, move it as planned.
+				 */
+
+				if (p->offset < backing_offset ||
+				    new_offset >= size) {
+					vm_page_lock_queues();
+					vm_page_free(p);
+					vm_page_unlock_queues();
+				} else {
+				    pp = vm_page_lookup(object, new_offset);
+				    if (pp != NULL && !(pp->flags & PG_FAKE)) {
+					vm_page_lock_queues();
+					vm_page_free(p);
+					vm_page_unlock_queues();
+				    }
+				    else {
+					if (pp) {
+					    /* may be someone waiting for it */
+					    PAGE_WAKEUP(pp);
+					    vm_page_lock_queues();
+					    vm_page_free(pp);
+					    vm_page_unlock_queues();
+					}
+					vm_page_rename(p, object, new_offset);
+				    }
+				}
+			}
+
+			/*
+			 *	Move the pager from backing_object to object.
+			 *
+			 *	XXX We're only using part of the paging space
+			 *	for keeps now... we ought to discard the
+			 *	unused portion.
+			 */
+
+			if (backing_object->pager) {
+				object->pager = backing_object->pager;
+				object->paging_offset = backing_offset +
+					backing_object->paging_offset;
+				backing_object->pager = NULL;
+			}
+
+			/*
+			 *	Object now shadows whatever backing_object did.
+			 *	Note that the reference to backing_object->shadow
+			 *	moves from within backing_object to within object.
+			 */
+
+			object->shadow = backing_object->shadow;
+			object->shadow_offset += backing_object->shadow_offset;
+			if (object->shadow != NULL &&
+			    object->shadow->copy != NULL) {
+				panic("vm_object_collapse: we collapsed a copy-object!");
+			}
+			/*
+			 *	Discard backing_object.
+			 *
+			 *	Since the backing object has no pages, no
+			 *	pager left, and no object references within it,
+			 *	all that is necessary is to dispose of it.
+			 */
+
+			vm_object_unlock(backing_object);
+
+			simple_lock(&vm_object_list_lock);
+			TAILQ_REMOVE(&vm_object_list, backing_object,
+			    object_list);
+			vm_object_count--;
+			simple_unlock(&vm_object_list_lock);
+
+			free((caddr_t)backing_object, M_VMOBJ);
+
+			object_collapses++;
+		}
+		else {
+			/*
+			 *	If all of the pages in the backing object are
+			 *	shadowed by the parent object, the parent
+			 *	object no longer has to shadow the backing
+			 *	object; it can shadow the next one in the
+			 *	chain.
+			 *
+			 *	The backing object must not be paged out - we'd
+			 *	have to check all of the paged-out pages, as
+			 *	well.
+			 */
+
+			if (backing_object->pager != NULL) {
+				vm_object_unlock(backing_object);
+				return;
+			}
+
+			/*
+			 *	Should have a check for a 'small' number
+			 *	of pages here.
+			 */
+
+			for (p = backing_object->memq.tqh_first;
+			     p != NULL;
+			     p = p->listq.tqe_next) {
+				new_offset = (p->offset - backing_offset);
+
+				/*
+				 *	If the parent has a page here, or if
+				 *	this page falls outside the parent,
+				 *	keep going.
+				 *
+				 *	Otherwise, the backing_object must be
+				 *	left in the chain.
+				 */
+
+				if (p->offset >= backing_offset &&
+				    new_offset < size &&
+				    ((pp = vm_page_lookup(object, new_offset))
+				      == NULL ||
+				     (pp->flags & PG_FAKE))) {
+					/*
+					 *	Page still needed.
+					 *	Can't go any further.
+					 */
+					vm_object_unlock(backing_object);
+					return;
+				}
+			}
+
+			/*
+			 *	Make the parent shadow the next object
+			 *	in the chain.  Deallocating backing_object
+			 *	will not remove it, since its reference
+			 *	count is at least 2.
+			 */
+
+			object->shadow = backing_object->shadow;
+			vm_object_reference(object->shadow);
+			object->shadow_offset += backing_object->shadow_offset;
+
+			/*
+			 *	Backing object might have had a copy pointer
+			 *	to us.  If it did, clear it. 
+			 */
+			if (backing_object->copy == object) {
+				backing_object->copy = NULL;
+			}
+	
+			/*	Drop the reference count on backing_object.
+			 *	Since its ref_count was at least 2, it
+			 *	will not vanish; so we don't need to call
+			 *	vm_object_deallocate.
+			 */
+			backing_object->ref_count--;
+			vm_object_unlock(backing_object);
+
+			object_bypasses ++;
+
+		}
+
+		/*
+		 *	Try again with this object's new backing object.
+		 */
+	}
+}
+
+/*
+ *	vm_object_page_remove: [internal]
+ *
+ *	Removes all physical pages in the specified
+ *	object range from the object's list of pages.
+ *
+ *	The object must be locked.
+ */
+void vm_object_page_remove(object, start, end)
+	register vm_object_t	object;
+	register vm_offset_t	start;
+	register vm_offset_t	end;
+{
+	register vm_page_t	p, next;
+
+	if (object == NULL)
+		return;
+
+	for (p = object->memq.tqh_first; p != NULL; p = next) {
+		next = p->listq.tqe_next;
+		if ((start <= p->offset) && (p->offset < end)) {
+			pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
+			vm_page_lock_queues();
+			vm_page_free(p);
+			vm_page_unlock_queues();
+		}
+	}
+}
+
+/*
+ *	Routine:	vm_object_coalesce
+ *	Function:	Coalesces two objects backing up adjoining
+ *			regions of memory into a single object.
+ *
+ *	returns TRUE if objects were combined.
+ *
+ *	NOTE:	Only works at the moment if the second object is NULL -
+ *		if it's not, which object do we lock first?
+ *
+ *	Parameters:
+ *		prev_object	First object to coalesce
+ *		prev_offset	Offset into prev_object
+ *		next_object	Second object into coalesce
+ *		next_offset	Offset into next_object
+ *
+ *		prev_size	Size of reference to prev_object
+ *		next_size	Size of reference to next_object
+ *
+ *	Conditions:
+ *	The object must *not* be locked.
+ */
+boolean_t vm_object_coalesce(prev_object, next_object,
+			prev_offset, next_offset,
+			prev_size, next_size)
+
+	register vm_object_t	prev_object;
+	vm_object_t	next_object;
+	vm_offset_t	prev_offset, next_offset;
+	vm_size_t	prev_size, next_size;
+{
+	vm_size_t	newsize;
+
+#ifdef	lint
+	next_offset++;
+#endif
+
+	if (next_object != NULL) {
+		return(FALSE);
+	}
+
+	if (prev_object == NULL) {
+		return(TRUE);
+	}
+
+	vm_object_lock(prev_object);
+
+	/*
+	 *	Try to collapse the object first
+	 */
+	vm_object_collapse(prev_object);
+
+	/*
+	 *	Can't coalesce if:
+	 *	. more than one reference
+	 *	. paged out
+	 *	. shadows another object
+	 *	. has a copy elsewhere
+	 *	(any of which mean that the pages not mapped to
+	 *	prev_entry may be in use anyway)
+	 */
+
+	if (prev_object->ref_count > 1 ||
+		prev_object->pager != NULL ||
+		prev_object->shadow != NULL ||
+		prev_object->copy != NULL) {
+		vm_object_unlock(prev_object);
+		return(FALSE);
+	}
+
+	/*
+	 *	Remove any pages that may still be in the object from
+	 *	a previous deallocation.
+	 */
+
+	vm_object_page_remove(prev_object,
+			prev_offset + prev_size,
+			prev_offset + prev_size + next_size);
+
+	/*
+	 *	Extend the object if necessary.
+	 */
+	newsize = prev_offset + prev_size + next_size;
+	if (newsize > prev_object->size)
+		prev_object->size = newsize;
+
+	vm_object_unlock(prev_object);
+	return(TRUE);
+}
+
+/*
+ *	vm_object_print:	[ debug ]
+ */
+void vm_object_print(object, full)
+	vm_object_t	object;
+	boolean_t	full;
+{
+	register vm_page_t	p;
+	extern indent;
+
+	register int count;
+
+	if (object == NULL)
+		return;
+
+	iprintf("Object 0x%x: size=0x%x, res=%d, ref=%d, ",
+		(int) object, (int) object->size,
+		object->resident_page_count, object->ref_count);
+	printf("pager=0x%x+0x%x, shadow=(0x%x)+0x%x\n",
+	       (int) object->pager, (int) object->paging_offset,
+	       (int) object->shadow, (int) object->shadow_offset);
+	printf("cache: next=0x%x, prev=0x%x\n",
+	       object->cached_list.tqe_next, object->cached_list.tqe_prev);
+
+	if (!full)
+		return;
+
+	indent += 2;
+	count = 0;
+	for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) {
+		if (count == 0)
+			iprintf("memory:=");
+		else if (count == 6) {
+			printf("\n");
+			iprintf(" ...");
+			count = 0;
+		} else
+			printf(",");
+		count++;
+
+		printf("(off=0x%x,page=0x%x)", p->offset, VM_PAGE_TO_PHYS(p));
+	}
+	if (count != 0)
+		printf("\n");
+	indent -= 2;
+}
diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h
new file mode 100644
index 00000000000..5e220acd47c
--- /dev/null
+++ b/sys/vm/vm_object.h
@@ -0,0 +1,173 @@
+/* 
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vm_object.h	8.3 (Berkeley) 1/12/94
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ * 
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ *	Virtual memory object module definitions.
+ */
+
+#ifndef	_VM_OBJECT_
+#define	_VM_OBJECT_
+
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+
+/*
+ *	Types defined:
+ *
+ *	vm_object_t		Virtual memory object.
+ */
+
+struct vm_object {
+	struct pglist		memq;		/* Resident memory */
+	TAILQ_ENTRY(vm_object)	object_list;	/* list of all objects */
+	u_short			flags;		/* see below */
+	u_short			paging_in_progress; /* Paging (in or out) so
+						    don't collapse or destroy */
+	simple_lock_data_t	Lock;		/* Synchronization */
+	int			ref_count;	/* How many refs?? */
+	vm_size_t		size;		/* Object size */
+	int			resident_page_count;
+						/* number of resident pages */
+	struct vm_object	*copy;		/* Object that holds copies of
+						   my changed pages */
+	vm_pager_t		pager;		/* Where to get data */
+	vm_offset_t		paging_offset;	/* Offset into paging space */
+	struct vm_object	*shadow;	/* My shadow */
+	vm_offset_t		shadow_offset;	/* Offset in shadow */
+	TAILQ_ENTRY(vm_object)	cached_list;	/* for persistence */
+};
+/*
+ * Flags
+ */
+#define OBJ_CANPERSIST	0x0001	/* allow to persist */
+#define OBJ_INTERNAL	0x0002	/* internally created object */
+#define OBJ_ACTIVE	0x0004	/* used to mark active objects */
+
+TAILQ_HEAD(vm_object_hash_head, vm_object_hash_entry);
+
+struct vm_object_hash_entry {
+	TAILQ_ENTRY(vm_object_hash_entry)  hash_links;	/* hash chain links */
+	vm_object_t			   object;	/* object represened */
+};
+
+typedef struct vm_object_hash_entry	*vm_object_hash_entry_t;
+
+#ifdef	KERNEL
+TAILQ_HEAD(object_q, vm_object);
+
+struct object_q	vm_object_cached_list;	/* list of objects persisting */
+int		vm_object_cached;	/* size of cached list */
+simple_lock_data_t	vm_cache_lock;	/* lock for object cache */
+
+struct object_q	vm_object_list;		/* list of allocated objects */
+long		vm_object_count;	/* count of all objects */
+simple_lock_data_t	vm_object_list_lock;
+					/* lock for object list and count */
+
+vm_object_t	kernel_object;		/* the single kernel object */
+vm_object_t	kmem_object;
+
+#define	vm_object_cache_lock()		simple_lock(&vm_cache_lock)
+#define	vm_object_cache_unlock()	simple_unlock(&vm_cache_lock)
+#endif /* KERNEL */
+
+#define	vm_object_lock_init(object)	simple_lock_init(&(object)->Lock)
+#define	vm_object_lock(object)		simple_lock(&(object)->Lock)
+#define	vm_object_unlock(object)	simple_unlock(&(object)->Lock)
+#define	vm_object_lock_try(object)	simple_lock_try(&(object)->Lock)
+#define	vm_object_sleep(event, object, interruptible) \
+					thread_sleep((event), &(object)->Lock, (interruptible))
+
+#ifdef KERNEL
+vm_object_t	 vm_object_allocate __P((vm_size_t));
+void		 vm_object_cache_clear __P((void));
+void		 vm_object_cache_trim __P((void));
+boolean_t	 vm_object_coalesce __P((vm_object_t, vm_object_t,
+		    vm_offset_t, vm_offset_t, vm_offset_t, vm_size_t));
+void		 vm_object_collapse __P((vm_object_t));
+void		 vm_object_copy __P((vm_object_t, vm_offset_t, vm_size_t,
+		    vm_object_t *, vm_offset_t *, boolean_t *));
+void		 vm_object_deactivate_pages __P((vm_object_t));
+void		 vm_object_deallocate __P((vm_object_t));
+void		 vm_object_enter __P((vm_object_t, vm_pager_t));
+void		 vm_object_init __P((vm_size_t));
+vm_object_t	 vm_object_lookup __P((vm_pager_t));
+boolean_t	 vm_object_page_clean __P((vm_object_t,
+		    vm_offset_t, vm_offset_t, boolean_t, boolean_t));
+void		 vm_object_page_remove __P((vm_object_t,
+		    vm_offset_t, vm_offset_t));
+void		 vm_object_pmap_copy __P((vm_object_t,
+		    vm_offset_t, vm_offset_t));
+void		 vm_object_pmap_remove __P((vm_object_t,
+		    vm_offset_t, vm_offset_t));
+void		 vm_object_print __P((vm_object_t, boolean_t));
+void		 vm_object_reference __P((vm_object_t));
+void		 vm_object_remove __P((vm_pager_t));
+void		 vm_object_setpager __P((vm_object_t,
+		    vm_pager_t, vm_offset_t, boolean_t));
+void		 vm_object_shadow __P((vm_object_t *,
+		    vm_offset_t *, vm_size_t));
+void		 vm_object_terminate __P((vm_object_t));
+#endif
+#endif /* _VM_OBJECT_ */
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
new file mode 100644
index 00000000000..0cd9d875b69
--- /dev/null
+++ b/sys/vm/vm_page.c
@@ -0,0 +1,696 @@
+/* 
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vm_page.c	8.3 (Berkeley) 3/21/94
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ * 
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ *	Resident memory management module.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/vm_pageout.h>
+
+/*
+ *	Associated with page of user-allocatable memory is a
+ *	page structure.
+ */
+
+struct pglist	*vm_page_buckets;		/* Array of buckets */
+int		vm_page_bucket_count = 0;	/* How big is array? */
+int		vm_page_hash_mask;		/* Mask for hash function */
+simple_lock_data_t	bucket_lock;		/* lock for all buckets XXX */
+
+struct pglist	vm_page_queue_free;
+struct pglist	vm_page_queue_active;
+struct pglist	vm_page_queue_inactive;
+simple_lock_data_t	vm_page_queue_lock;
+simple_lock_data_t	vm_page_queue_free_lock;
+
+/* has physical page allocation been initialized? */
+boolean_t vm_page_startup_initialized;
+
+vm_page_t	vm_page_array;
+long		first_page;
+long		last_page;
+vm_offset_t	first_phys_addr;
+vm_offset_t	last_phys_addr;
+vm_size_t	page_mask;
+int		page_shift;
+
+/*
+ *	vm_set_page_size:
+ *
+ *	Sets the page size, perhaps based upon the memory
+ *	size.  Must be called before any use of page-size
+ *	dependent functions.
+ *
+ *	Sets page_shift and page_mask from cnt.v_page_size.
+ */
+void vm_set_page_size()
+{
+
+	if (cnt.v_page_size == 0)
+		cnt.v_page_size = DEFAULT_PAGE_SIZE;
+	page_mask = cnt.v_page_size - 1;
+	if ((page_mask & cnt.v_page_size) != 0)
+		panic("vm_set_page_size: page size not a power of two");
+	for (page_shift = 0; ; page_shift++)
+		if ((1 << page_shift) == cnt.v_page_size)
+			break;
+}
+
+
+/*
+ *	vm_page_startup:
+ *
+ *	Initializes the resident memory module.
+ *
+ *	Allocates memory for the page cells, and
+ *	for the object/offset-to-page hash table headers.
+ *	Each page cell is initialized and placed on the free list.
+ */
+void vm_page_startup(start, end)
+	vm_offset_t	*start;
+	vm_offset_t	*end;
+{
+	register vm_page_t	m;
+	register struct pglist	*bucket;
+	vm_size_t		npages;
+	int			i;
+	vm_offset_t		pa;
+	extern	vm_offset_t	kentry_data;
+	extern	vm_size_t	kentry_data_size;
+
+
+	/*
+	 *	Initialize the locks
+	 */
+
+	simple_lock_init(&vm_page_queue_free_lock);
+	simple_lock_init(&vm_page_queue_lock);
+
+	/*
+	 *	Initialize the queue headers for the free queue,
+	 *	the active queue and the inactive queue.
+	 */
+
+	TAILQ_INIT(&vm_page_queue_free);
+	TAILQ_INIT(&vm_page_queue_active);
+	TAILQ_INIT(&vm_page_queue_inactive);
+
+	/*
+	 *	Calculate the number of hash table buckets.
+	 *
+	 *	The number of buckets MUST BE a power of 2, and
+	 *	the actual value is the next power of 2 greater
+	 *	than the number of physical pages in the system.
+	 *
+	 *	Note:
+	 *		This computation can be tweaked if desired.
+	 */
+
+	if (vm_page_bucket_count == 0) {
+		vm_page_bucket_count = 1;
+		while (vm_page_bucket_count < atop(*end - *start))
+			vm_page_bucket_count <<= 1;
+	}
+
+	vm_page_hash_mask = vm_page_bucket_count - 1;
+
+	/*
+	 *	Allocate (and initialize) the hash table buckets.
+	 */
+	vm_page_buckets = (struct pglist *)
+	    pmap_bootstrap_alloc(vm_page_bucket_count * sizeof(struct pglist));
+	bucket = vm_page_buckets;
+
+	for (i = vm_page_bucket_count; i--;) {
+		TAILQ_INIT(bucket);
+		bucket++;
+	}
+
+	simple_lock_init(&bucket_lock);
+
+	/*
+	 *	Truncate the remainder of physical memory to our page size.
+	 */
+
+	*end = trunc_page(*end);
+
+	/*
+	 *	Pre-allocate maps and map entries that cannot be dynamically
+	 *	allocated via malloc().  The maps include the kernel_map and
+	 *	kmem_map which must be initialized before malloc() will
+	 *	work (obviously).  Also could include pager maps which would
+	 *	be allocated before kmeminit.
+	 *
+	 *	Allow some kernel map entries... this should be plenty
+	 *	since people shouldn't be cluttering up the kernel
+	 *	map (they should use their own maps).
+	 */
+
+	kentry_data_size = round_page(MAX_KMAP*sizeof(struct vm_map) +
+				      MAX_KMAPENT*sizeof(struct vm_map_entry));
+	kentry_data = (vm_offset_t) pmap_bootstrap_alloc(kentry_data_size);
+
+	/*
+ 	 *	Compute the number of pages of memory that will be
+	 *	available for use (taking into account the overhead
+	 *	of a page structure per page).
+	 */
+
+	cnt.v_free_count = npages = (*end - *start + sizeof(struct vm_page))
+		/ (PAGE_SIZE + sizeof(struct vm_page));
+
+	/*
+	 *	Record the extent of physical memory that the
+	 *	virtual memory system manages.
+	 */
+
+	first_page = *start;
+	first_page += npages*sizeof(struct vm_page);
+	first_page = atop(round_page(first_page));
+	last_page  = first_page + npages - 1;
+
+	first_phys_addr = ptoa(first_page);
+	last_phys_addr  = ptoa(last_page) + PAGE_MASK;
+
+
+	/*
+	 *	Allocate and clear the mem entry structures.
+	 */
+
+	m = vm_page_array = (vm_page_t)
+		pmap_bootstrap_alloc(npages * sizeof(struct vm_page));
+
+	/*
+	 *	Initialize the mem entry structures now, and
+	 *	put them in the free queue.
+	 */
+
+	pa = first_phys_addr;
+	while (npages--) {
+		m->flags = 0;
+		m->object = NULL;
+		m->phys_addr = pa;
+#ifdef i386
+		if (pmap_isvalidphys(m->phys_addr)) {
+			TAILQ_INSERT_TAIL(&vm_page_queue_free, m, pageq);
+		} else {
+			/* perhaps iomem needs it's own type, or dev pager? */
+			m->flags |= PG_FICTITIOUS | PG_BUSY;
+			cnt.v_free_count--;
+		}
+#else /* i386 */
+		TAILQ_INSERT_TAIL(&vm_page_queue_free, m, pageq);
+#endif /* i386 */
+		m++;
+		pa += PAGE_SIZE;
+	}
+
+	/*
+	 *	Initialize vm_pages_needed lock here - don't wait for pageout
+	 *	daemon	XXX
+	 */
+	simple_lock_init(&vm_pages_needed_lock);
+
+	/* from now on, pmap_bootstrap_alloc can't be used */
+	vm_page_startup_initialized = TRUE;
+}
+
+/*
+ *	vm_page_hash:
+ *
+ *	Distributes the object/offset key pair among hash buckets.
+ *
+ *	NOTE:  This macro depends on vm_page_bucket_count being a power of 2.
+ */
+#define vm_page_hash(object, offset) \
+	(((unsigned)object+(unsigned)atop(offset))&vm_page_hash_mask)
+
+/*
+ *	vm_page_insert:		[ internal use only ]
+ *
+ *	Inserts the given mem entry into the object/object-page
+ *	table and object list.
+ *
+ *	The object and page must be locked.
+ */
+
+void vm_page_insert(mem, object, offset)
+	register vm_page_t	mem;
+	register vm_object_t	object;
+	register vm_offset_t	offset;
+{
+	register struct pglist	*bucket;
+	int			spl;
+
+	VM_PAGE_CHECK(mem);
+
+	if (mem->flags & PG_TABLED)
+		panic("vm_page_insert: already inserted");
+
+	/*
+	 *	Record the object/offset pair in this page
+	 */
+
+	mem->object = object;
+	mem->offset = offset;
+
+	/*
+	 *	Insert it into the object_object/offset hash table
+	 */
+
+	bucket = &vm_page_buckets[vm_page_hash(object, offset)];
+	spl = splimp();
+	simple_lock(&bucket_lock);
+	TAILQ_INSERT_TAIL(bucket, mem, hashq);
+	simple_unlock(&bucket_lock);
+	(void) splx(spl);
+
+	/*
+	 *	Now link into the object's list of backed pages.
+	 */
+
+	TAILQ_INSERT_TAIL(&object->memq, mem, listq);
+	mem->flags |= PG_TABLED;
+
+	/*
+	 *	And show that the object has one more resident
+	 *	page.
+	 */
+
+	object->resident_page_count++;
+}
+
+/*
+ *	vm_page_remove:		[ internal use only ]
+ *				NOTE: used by device pager as well -wfj
+ *
+ *	Removes the given mem entry from the object/offset-page
+ *	table and the object page list.
+ *
+ *	The object and page must be locked.
+ */
+
+void vm_page_remove(mem)
+	register vm_page_t	mem;
+{
+	register struct pglist	*bucket;
+	int			spl;
+
+	VM_PAGE_CHECK(mem);
+
+	if (!(mem->flags & PG_TABLED))
+		return;
+
+	/*
+	 *	Remove from the object_object/offset hash table
+	 */
+
+	bucket = &vm_page_buckets[vm_page_hash(mem->object, mem->offset)];
+	spl = splimp();
+	simple_lock(&bucket_lock);
+	TAILQ_REMOVE(bucket, mem, hashq);
+	simple_unlock(&bucket_lock);
+	(void) splx(spl);
+
+	/*
+	 *	Now remove from the object's list of backed pages.
+	 */
+
+	TAILQ_REMOVE(&mem->object->memq, mem, listq);
+
+	/*
+	 *	And show that the object has one fewer resident
+	 *	page.
+	 */
+
+	mem->object->resident_page_count--;
+
+	mem->flags &= ~PG_TABLED;
+}
+
+/*
+ *	vm_page_lookup:
+ *
+ *	Returns the page associated with the object/offset
+ *	pair specified; if none is found, NULL is returned.
+ *
+ *	The object must be locked.  No side effects.
+ */
+
+vm_page_t vm_page_lookup(object, offset)
+	register vm_object_t	object;
+	register vm_offset_t	offset;
+{
+	register vm_page_t	mem;
+	register struct pglist	*bucket;
+	int			spl;
+
+	/*
+	 *	Search the hash table for this object/offset pair
+	 */
+
+	bucket = &vm_page_buckets[vm_page_hash(object, offset)];
+
+	spl = splimp();
+	simple_lock(&bucket_lock);
+	for (mem = bucket->tqh_first; mem != NULL; mem = mem->hashq.tqe_next) {
+		VM_PAGE_CHECK(mem);
+		if ((mem->object == object) && (mem->offset == offset)) {
+			simple_unlock(&bucket_lock);
+			splx(spl);
+			return(mem);
+		}
+	}
+
+	simple_unlock(&bucket_lock);
+	splx(spl);
+	return(NULL);
+}
+
+/*
+ *	vm_page_rename:
+ *
+ *	Move the given memory entry from its
+ *	current object to the specified target object/offset.
+ *
+ *	The object must be locked.
+ */
+void vm_page_rename(mem, new_object, new_offset)
+	register vm_page_t	mem;
+	register vm_object_t	new_object;
+	vm_offset_t		new_offset;
+{
+	if (mem->object == new_object)
+		return;
+
+	vm_page_lock_queues();	/* keep page from moving out from
+				   under pageout daemon */
+    	vm_page_remove(mem);
+	vm_page_insert(mem, new_object, new_offset);
+	vm_page_unlock_queues();
+}
+
+/*
+ *	vm_page_alloc:
+ *
+ *	Allocate and return a memory cell associated
+ *	with this VM object/offset pair.
+ *
+ *	Object must be locked.
+ */
+vm_page_t vm_page_alloc(object, offset)
+	vm_object_t	object;
+	vm_offset_t	offset;
+{
+	register vm_page_t	mem;
+	int		spl;
+
+	spl = splimp();				/* XXX */
+	simple_lock(&vm_page_queue_free_lock);
+	if (vm_page_queue_free.tqh_first == NULL) {
+		simple_unlock(&vm_page_queue_free_lock);
+		splx(spl);
+		return(NULL);
+	}
+
+	mem = vm_page_queue_free.tqh_first;
+	TAILQ_REMOVE(&vm_page_queue_free, mem, pageq);
+
+	cnt.v_free_count--;
+	simple_unlock(&vm_page_queue_free_lock);
+	splx(spl);
+
+	VM_PAGE_INIT(mem, object, offset);
+
+	/*
+	 *	Decide if we should poke the pageout daemon.
+	 *	We do this if the free count is less than the low
+	 *	water mark, or if the free count is less than the high
+	 *	water mark (but above the low water mark) and the inactive
+	 *	count is less than its target.
+	 *
+	 *	We don't have the counts locked ... if they change a little,
+	 *	it doesn't really matter.
+	 */
+
+	if (cnt.v_free_count < cnt.v_free_min ||
+	    (cnt.v_free_count < cnt.v_free_target &&
+	     cnt.v_inactive_count < cnt.v_inactive_target))
+		thread_wakeup((int)&vm_pages_needed);
+	return (mem);
+}
+
+/*
+ *	vm_page_free:
+ *
+ *	Returns the given page to the free list,
+ *	disassociating it with any VM object.
+ *
+ *	Object and page must be locked prior to entry.
+ */
+void vm_page_free(mem)
+	register vm_page_t	mem;
+{
+	vm_page_remove(mem);
+	if (mem->flags & PG_ACTIVE) {
+		TAILQ_REMOVE(&vm_page_queue_active, mem, pageq);
+		mem->flags &= ~PG_ACTIVE;
+		cnt.v_active_count--;
+	}
+
+	if (mem->flags & PG_INACTIVE) {
+		TAILQ_REMOVE(&vm_page_queue_inactive, mem, pageq);
+		mem->flags &= ~PG_INACTIVE;
+		cnt.v_inactive_count--;
+	}
+
+	if (!(mem->flags & PG_FICTITIOUS)) {
+		int	spl;
+
+		spl = splimp();
+		simple_lock(&vm_page_queue_free_lock);
+		TAILQ_INSERT_TAIL(&vm_page_queue_free, mem, pageq);
+
+		cnt.v_free_count++;
+		simple_unlock(&vm_page_queue_free_lock);
+		splx(spl);
+	}
+}
+
+/*
+ *	vm_page_wire:
+ *
+ *	Mark this page as wired down by yet
+ *	another map, removing it from paging queues
+ *	as necessary.
+ *
+ *	The page queues must be locked.
+ */
+void vm_page_wire(mem)
+	register vm_page_t	mem;
+{
+	VM_PAGE_CHECK(mem);
+
+	if (mem->wire_count == 0) {
+		if (mem->flags & PG_ACTIVE) {
+			TAILQ_REMOVE(&vm_page_queue_active, mem, pageq);
+			cnt.v_active_count--;
+			mem->flags &= ~PG_ACTIVE;
+		}
+		if (mem->flags & PG_INACTIVE) {
+			TAILQ_REMOVE(&vm_page_queue_inactive, mem, pageq);
+			cnt.v_inactive_count--;
+			mem->flags &= ~PG_INACTIVE;
+		}
+		cnt.v_wire_count++;
+	}
+	mem->wire_count++;
+}
+
+/*
+ *	vm_page_unwire:
+ *
+ *	Release one wiring of this page, potentially
+ *	enabling it to be paged again.
+ *
+ *	The page queues must be locked.
+ */
+void vm_page_unwire(mem)
+	register vm_page_t	mem;
+{
+	VM_PAGE_CHECK(mem);
+
+	mem->wire_count--;
+	if (mem->wire_count == 0) {
+		TAILQ_INSERT_TAIL(&vm_page_queue_active, mem, pageq);
+		cnt.v_active_count++;
+		mem->flags |= PG_ACTIVE;
+		cnt.v_wire_count--;
+	}
+}
+
+/*
+ *	vm_page_deactivate:
+ *
+ *	Returns the given page to the inactive list,
+ *	indicating that no physical maps have access
+ *	to this page.  [Used by the physical mapping system.]
+ *
+ *	The page queues must be locked.
+ */
+void vm_page_deactivate(m)
+	register vm_page_t	m;
+{
+	VM_PAGE_CHECK(m);
+
+	/*
+	 *	Only move active pages -- ignore locked or already
+	 *	inactive ones.
+	 */
+
+	if (m->flags & PG_ACTIVE) {
+		pmap_clear_reference(VM_PAGE_TO_PHYS(m));
+		TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
+		TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
+		m->flags &= ~PG_ACTIVE;
+		m->flags |= PG_INACTIVE;
+		cnt.v_active_count--;
+		cnt.v_inactive_count++;
+		if (pmap_is_modified(VM_PAGE_TO_PHYS(m)))
+			m->flags &= ~PG_CLEAN;
+		if (m->flags & PG_CLEAN)
+			m->flags &= ~PG_LAUNDRY;
+		else
+			m->flags |= PG_LAUNDRY;
+	}
+}
+
+/*
+ *	vm_page_activate:
+ *
+ *	Put the specified page on the active list (if appropriate).
+ *
+ *	The page queues must be locked.
+ */
+
+void vm_page_activate(m)
+	register vm_page_t	m;
+{
+	VM_PAGE_CHECK(m);
+
+	if (m->flags & PG_INACTIVE) {
+		TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq);
+		cnt.v_inactive_count--;
+		m->flags &= ~PG_INACTIVE;
+	}
+	if (m->wire_count == 0) {
+		if (m->flags & PG_ACTIVE)
+			panic("vm_page_activate: already active");
+
+		TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
+		m->flags |= PG_ACTIVE;
+		cnt.v_active_count++;
+	}
+}
+
+/*
+ *	vm_page_zero_fill:
+ *
+ *	Zero-fill the specified page.
+ *	Written as a standard pagein routine, to
+ *	be used by the zero-fill object.
+ */
+
+boolean_t vm_page_zero_fill(m)
+	vm_page_t	m;
+{
+	VM_PAGE_CHECK(m);
+
+	m->flags &= ~PG_CLEAN;
+	pmap_zero_page(VM_PAGE_TO_PHYS(m));
+	return(TRUE);
+}
+
+/*
+ *	vm_page_copy:
+ *
+ *	Copy one page to another
+ */
+
+void vm_page_copy(src_m, dest_m)
+	vm_page_t	src_m;
+	vm_page_t	dest_m;
+{
+	VM_PAGE_CHECK(src_m);
+	VM_PAGE_CHECK(dest_m);
+
+	dest_m->flags &= ~PG_CLEAN;
+	pmap_copy_page(VM_PAGE_TO_PHYS(src_m), VM_PAGE_TO_PHYS(dest_m));
+}
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
new file mode 100644
index 00000000000..8bf51469a1f
--- /dev/null
+++ b/sys/vm/vm_page.h
@@ -0,0 +1,242 @@
+/* 
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vm_page.h	8.2 (Berkeley) 12/13/93
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ * 
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ *	Resident memory system definitions.
+ */
+
+#ifndef	_VM_PAGE_
+#define	_VM_PAGE_
+
+/*
+ *	Management of resident (logical) pages.
+ *
+ *	A small structure is kept for each resident
+ *	page, indexed by page number.  Each structure
+ *	is an element of several lists:
+ *
+ *		A hash table bucket used to quickly
+ *		perform object/offset lookups
+ *
+ *		A list of all pages for a given object,
+ *		so they can be quickly deactivated at
+ *		time of deallocation.
+ *
+ *		An ordered list of pages due for pageout.
+ *
+ *	In addition, the structure contains the object
+ *	and offset to which this page belongs (for pageout),
+ *	and sundry status bits.
+ *
+ *	Fields in this structure are locked either by the lock on the
+ *	object that the page belongs to (O) or by the lock on the page
+ *	queues (P).
+ */
+
+TAILQ_HEAD(pglist, vm_page);
+
+struct vm_page {
+	TAILQ_ENTRY(vm_page)	pageq;		/* queue info for FIFO
+						 * queue or free list (P) */
+	TAILQ_ENTRY(vm_page)	hashq;		/* hash table links (O)*/
+	TAILQ_ENTRY(vm_page)	listq;		/* pages in same object (O)*/
+
+	vm_object_t		object;		/* which object am I in (O,P)*/
+	vm_offset_t		offset;		/* offset into object (O,P) */
+
+	u_short			wire_count;	/* wired down maps refs (P) */
+	u_short			flags;		/* see below */
+
+	vm_offset_t		phys_addr;	/* physical address of page */
+};
+
+/*
+ * These are the flags defined for vm_page.
+ *
+ * Note: PG_FILLED and PG_DIRTY are added for the filesystems.
+ */
+#define	PG_INACTIVE	0x0001		/* page is in inactive list (P) */
+#define	PG_ACTIVE	0x0002		/* page is in active list (P) */
+#define	PG_LAUNDRY	0x0004		/* page is being cleaned now (P)*/
+#define	PG_CLEAN	0x0008		/* page has not been modified */
+#define	PG_BUSY		0x0010		/* page is in transit (O) */
+#define	PG_WANTED	0x0020		/* someone is waiting for page (O) */
+#define	PG_TABLED	0x0040		/* page is in VP table (O) */
+#define	PG_COPYONWRITE	0x0080		/* must copy page before changing (O) */
+#define	PG_FICTITIOUS	0x0100		/* physical page doesn't exist (O) */
+#define	PG_FAKE		0x0200		/* page is placeholder for pagein (O) */
+#define	PG_FILLED	0x0400		/* client flag to set when filled */
+#define	PG_DIRTY	0x0800		/* client flag to set when dirty */
+#define	PG_PAGEROWNED	0x4000		/* DEBUG: async paging op in progress */
+#define	PG_PTPAGE	0x8000		/* DEBUG: is a user page table page */
+
+#if	VM_PAGE_DEBUG
+#define	VM_PAGE_CHECK(mem) { \
+	if ((((unsigned int) mem) < ((unsigned int) &vm_page_array[0])) || \
+	    (((unsigned int) mem) > \
+		((unsigned int) &vm_page_array[last_page-first_page])) || \
+	    ((mem->flags & (PG_ACTIVE | PG_INACTIVE)) == \
+		(PG_ACTIVE | PG_INACTIVE))) \
+		panic("vm_page_check: not valid!"); \
+}
+#else /* VM_PAGE_DEBUG */
+#define	VM_PAGE_CHECK(mem)
+#endif /* VM_PAGE_DEBUG */
+
+#ifdef KERNEL
+/*
+ *	Each pageable resident page falls into one of three lists:
+ *
+ *	free	
+ *		Available for allocation now.
+ *	inactive
+ *		Not referenced in any map, but still has an
+ *		object/offset-page mapping, and may be dirty.
+ *		This is the list of pages that should be
+ *		paged out next.
+ *	active
+ *		A list of pages which have been placed in
+ *		at least one physical map.  This list is
+ *		ordered, in LRU-like fashion.
+ */
+
+extern
+struct pglist	vm_page_queue_free;	/* memory free queue */
+extern
+struct pglist	vm_page_queue_active;	/* active memory queue */
+extern
+struct pglist	vm_page_queue_inactive;	/* inactive memory queue */
+
+extern
+vm_page_t	vm_page_array;		/* First resident page in table */
+extern
+long		first_page;		/* first physical page number */
+					/* ... represented in vm_page_array */
+extern
+long		last_page;		/* last physical page number */
+					/* ... represented in vm_page_array */
+					/* [INCLUSIVE] */
+extern
+vm_offset_t	first_phys_addr;	/* physical address for first_page */
+extern
+vm_offset_t	last_phys_addr;		/* physical address for last_page */
+
+#define VM_PAGE_TO_PHYS(entry)	((entry)->phys_addr)
+
+#define IS_VM_PHYSADDR(pa) \
+		((pa) >= first_phys_addr && (pa) <= last_phys_addr)
+
+#define PHYS_TO_VM_PAGE(pa) \
+		(&vm_page_array[atop(pa) - first_page ])
+
+extern
+simple_lock_data_t	vm_page_queue_lock;	/* lock on active and inactive
+						   page queues */
+extern						/* lock on free page queue */
+simple_lock_data_t	vm_page_queue_free_lock;
+
+/*
+ *	Functions implemented as macros
+ */
+
+#define PAGE_ASSERT_WAIT(m, interruptible)	{ \
+				(m)->flags |= PG_WANTED; \
+				assert_wait((int) (m), (interruptible)); \
+			}
+
+#define PAGE_WAKEUP(m)	{ \
+				(m)->flags &= ~PG_BUSY; \
+				if ((m)->flags & PG_WANTED) { \
+					(m)->flags &= ~PG_WANTED; \
+					thread_wakeup((int) (m)); \
+				} \
+			}
+
+#define	vm_page_lock_queues()	simple_lock(&vm_page_queue_lock)
+#define	vm_page_unlock_queues()	simple_unlock(&vm_page_queue_lock)
+
+#define vm_page_set_modified(m)	{ (m)->flags &= ~PG_CLEAN; }
+
+#define	VM_PAGE_INIT(mem, object, offset) { \
+	(mem)->flags = PG_BUSY | PG_CLEAN | PG_FAKE; \
+	vm_page_insert((mem), (object), (offset)); \
+	(mem)->wire_count = 0; \
+}
+
+void		 vm_page_activate __P((vm_page_t));
+vm_page_t	 vm_page_alloc __P((vm_object_t, vm_offset_t));
+void		 vm_page_copy __P((vm_page_t, vm_page_t));
+void		 vm_page_deactivate __P((vm_page_t));
+void		 vm_page_free __P((vm_page_t));
+void		 vm_page_insert __P((vm_page_t, vm_object_t, vm_offset_t));
+vm_page_t	 vm_page_lookup __P((vm_object_t, vm_offset_t));
+void		 vm_page_remove __P((vm_page_t));
+void		 vm_page_rename __P((vm_page_t, vm_object_t, vm_offset_t));
+void		 vm_page_startup __P((vm_offset_t *, vm_offset_t *));
+void		 vm_page_unwire __P((vm_page_t));
+void		 vm_page_wire __P((vm_page_t));
+boolean_t	 vm_page_zero_fill __P((vm_page_t));
+
+#endif /* KERNEL */
+#endif /* !_VM_PAGE_ */
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
new file mode 100644
index 00000000000..679540591e7
--- /dev/null
+++ b/sys/vm/vm_pageout.c
@@ -0,0 +1,567 @@
+/* 
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vm_pageout.c	8.5 (Berkeley) 2/14/94
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ * 
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ *	The proverbial page-out daemon.
+ */
+
+#include <sys/param.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+
+#ifndef VM_PAGE_FREE_MIN
+#define VM_PAGE_FREE_MIN	(cnt.v_free_count / 20)
+#endif
+
+#ifndef VM_PAGE_FREE_TARGET
+#define VM_PAGE_FREE_TARGET	((cnt.v_free_min * 4) / 3)
+#endif
+
+int	vm_page_free_min_min = 16 * 1024;
+int	vm_page_free_min_max = 256 * 1024;
+
+int	vm_pages_needed;	/* Event on which pageout daemon sleeps */
+
+int	vm_page_max_wired = 0;	/* XXX max # of wired pages system-wide */
+
+#ifdef CLUSTERED_PAGEOUT
+#define MAXPOCLUSTER		(MAXPHYS/NBPG)	/* XXX */
+int doclustered_pageout = 1;
+#endif
+
+/*
+ *	vm_pageout_scan does the dirty work for the pageout daemon.
+ */
+void
+vm_pageout_scan()
+{
+	register vm_page_t	m, next;
+	register int		page_shortage;
+	register int		s;
+	register int		pages_freed;
+	int			free;
+	vm_object_t		object;
+
+	/*
+	 *	Only continue when we want more pages to be "free"
+	 */
+
+	cnt.v_rev++;
+
+	s = splimp();
+	simple_lock(&vm_page_queue_free_lock);
+	free = cnt.v_free_count;
+	simple_unlock(&vm_page_queue_free_lock);
+	splx(s);
+
+	if (free < cnt.v_free_target) {
+		swapout_threads();
+
+		/*
+		 *	Be sure the pmap system is updated so
+		 *	we can scan the inactive queue.
+		 */
+
+		pmap_update();
+	}
+
+	/*
+	 *	Acquire the resident page system lock,
+	 *	as we may be changing what's resident quite a bit.
+	 */
+	vm_page_lock_queues();
+
+	/*
+	 *	Start scanning the inactive queue for pages we can free.
+	 *	We keep scanning until we have enough free pages or
+	 *	we have scanned through the entire queue.  If we
+	 *	encounter dirty pages, we start cleaning them.
+	 */
+
+	pages_freed = 0;
+	for (m = vm_page_queue_inactive.tqh_first; m != NULL; m = next) {
+		s = splimp();
+		simple_lock(&vm_page_queue_free_lock);
+		free = cnt.v_free_count;
+		simple_unlock(&vm_page_queue_free_lock);
+		splx(s);
+		if (free >= cnt.v_free_target)
+			break;
+
+		cnt.v_scan++;
+		next = m->pageq.tqe_next;
+
+		/*
+		 * If the page has been referenced, move it back to the
+		 * active queue.
+		 */
+		if (pmap_is_referenced(VM_PAGE_TO_PHYS(m))) {
+			vm_page_activate(m);
+			cnt.v_reactivated++;
+			continue;
+		}
+
+		/*
+		 * If the page is clean, free it up.
+		 */
+		if (m->flags & PG_CLEAN) {
+			object = m->object;
+			if (vm_object_lock_try(object)) {
+				pmap_page_protect(VM_PAGE_TO_PHYS(m),
+						  VM_PROT_NONE);
+				vm_page_free(m);
+				pages_freed++;
+				cnt.v_dfree++;
+				vm_object_unlock(object);
+			}
+			continue;
+		}
+
+		/*
+		 * If the page is dirty but already being washed, skip it.
+		 */
+		if ((m->flags & PG_LAUNDRY) == 0)
+			continue;
+
+		/*
+		 * Otherwise the page is dirty and still in the laundry,
+		 * so we start the cleaning operation and remove it from
+		 * the laundry.
+		 */
+		object = m->object;
+		if (!vm_object_lock_try(object))
+			continue;
+		cnt.v_pageouts++;
+#ifdef CLUSTERED_PAGEOUT
+		if (object->pager &&
+		    vm_pager_cancluster(object->pager, PG_CLUSTERPUT))
+			vm_pageout_cluster(m, object);
+		else
+#endif
+		vm_pageout_page(m, object);
+		thread_wakeup((int) object);
+		vm_object_unlock(object);
+		/*
+		 * Former next page may no longer even be on the inactive
+		 * queue (due to potential blocking in the pager with the
+		 * queues unlocked).  If it isn't, we just start over.
+		 */
+		if (next && (next->flags & PG_INACTIVE) == 0)
+			next = vm_page_queue_inactive.tqh_first;
+	}
+	
+	/*
+	 *	Compute the page shortage.  If we are still very low on memory
+	 *	be sure that we will move a minimal amount of pages from active
+	 *	to inactive.
+	 */
+
+	page_shortage = cnt.v_inactive_target - cnt.v_inactive_count;
+	if (page_shortage <= 0 && pages_freed == 0)
+		page_shortage = 1;
+
+	while (page_shortage > 0) {
+		/*
+		 *	Move some more pages from active to inactive.
+		 */
+
+		if ((m = vm_page_queue_active.tqh_first) == NULL)
+			break;
+		vm_page_deactivate(m);
+		page_shortage--;
+	}
+
+	vm_page_unlock_queues();
+}
+
+/*
+ * Called with object and page queues locked.
+ * If reactivate is TRUE, a pager error causes the page to be
+ * put back on the active queue, ow it is left on the inactive queue.
+ */
+void
+vm_pageout_page(m, object)
+	vm_page_t m;
+	vm_object_t object;
+{
+	vm_pager_t pager;
+	int pageout_status;
+
+	/*
+	 * We set the busy bit to cause potential page faults on
+	 * this page to block.
+	 *
+	 * We also set pageout-in-progress to keep the object from
+	 * disappearing during pageout.  This guarantees that the
+	 * page won't move from the inactive queue.  (However, any
+	 * other page on the inactive queue may move!)
+	 */
+	pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
+	m->flags |= PG_BUSY;
+
+	/*
+	 * Try to collapse the object before making a pager for it.
+	 * We must unlock the page queues first.
+	 */
+	vm_page_unlock_queues();
+	if (object->pager == NULL)
+		vm_object_collapse(object);
+
+	object->paging_in_progress++;
+	vm_object_unlock(object);
+
+	/*
+	 * Do a wakeup here in case the following operations block.
+	 */
+	thread_wakeup((int) &cnt.v_free_count);
+
+	/*
+	 * If there is no pager for the page, use the default pager.
+	 * If there is no place to put the page at the moment,
+	 * leave it in the laundry and hope that there will be
+	 * paging space later.
+	 */
+	if ((pager = object->pager) == NULL) {
+		pager = vm_pager_allocate(PG_DFLT, (caddr_t)0, object->size,
+					  VM_PROT_ALL, (vm_offset_t)0);
+		if (pager != NULL)
+			vm_object_setpager(object, pager, 0, FALSE);
+	}
+	pageout_status = pager ? vm_pager_put(pager, m, FALSE) : VM_PAGER_FAIL;
+	vm_object_lock(object);
+	vm_page_lock_queues();
+
+	switch (pageout_status) {
+	case VM_PAGER_OK:
+	case VM_PAGER_PEND:
+		cnt.v_pgpgout++;
+		m->flags &= ~PG_LAUNDRY;
+		break;
+	case VM_PAGER_BAD:
+		/*
+		 * Page outside of range of object.  Right now we
+		 * essentially lose the changes by pretending it
+		 * worked.
+		 *
+		 * XXX dubious, what should we do?
+		 */
+		m->flags &= ~PG_LAUNDRY;
+		m->flags |= PG_CLEAN;
+		pmap_clear_modify(VM_PAGE_TO_PHYS(m));
+		break;
+	case VM_PAGER_AGAIN:
+	{
+		extern int lbolt;
+
+		/*
+		 * FAIL on a write is interpreted to mean a resource
+		 * shortage, so we put pause for awhile and try again.
+		 * XXX could get stuck here.
+		 */
+		(void) tsleep((caddr_t)&lbolt, PZERO|PCATCH, "pageout", 0);
+		break;
+	}
+	case VM_PAGER_FAIL:
+	case VM_PAGER_ERROR:
+		/*
+		 * If page couldn't be paged out, then reactivate
+		 * the page so it doesn't clog the inactive list.
+		 * (We will try paging out it again later).
+		 */
+		vm_page_activate(m);
+		cnt.v_reactivated++;
+		break;
+	}
+
+	pmap_clear_reference(VM_PAGE_TO_PHYS(m));
+
+	/*
+	 * If the operation is still going, leave the page busy
+	 * to block all other accesses.  Also, leave the paging
+	 * in progress indicator set so that we don't attempt an
+	 * object collapse.
+	 */
+	if (pageout_status != VM_PAGER_PEND) {
+		m->flags &= ~PG_BUSY;
+		PAGE_WAKEUP(m);
+		object->paging_in_progress--;
+	}
+}
+
+#ifdef CLUSTERED_PAGEOUT
+#define PAGEOUTABLE(p) \
+	((((p)->flags & (PG_INACTIVE|PG_CLEAN|PG_LAUNDRY)) == \
+	  (PG_INACTIVE|PG_LAUNDRY)) && !pmap_is_referenced(VM_PAGE_TO_PHYS(p)))
+
+/*
+ * Attempt to pageout as many contiguous (to ``m'') dirty pages as possible
+ * from ``object''.  Using information returned from the pager, we assemble
+ * a sorted list of contiguous dirty pages and feed them to the pager in one
+ * chunk.  Called with paging queues and object locked.  Also, object must
+ * already have a pager.
+ */
+void
+vm_pageout_cluster(m, object)
+	vm_page_t m;
+	vm_object_t object;
+{
+	vm_offset_t offset, loff, hoff;
+	vm_page_t plist[MAXPOCLUSTER], *plistp, p;
+	int postatus, ix, count;
+
+	/*
+	 * Determine the range of pages that can be part of a cluster
+	 * for this object/offset.  If it is only our single page, just
+	 * do it normally.
+	 */
+	vm_pager_cluster(object->pager, m->offset, &loff, &hoff);
+	if (hoff - loff == PAGE_SIZE) {
+		vm_pageout_page(m, object);
+		return;
+	}
+
+	plistp = plist;
+
+	/*
+	 * Target page is always part of the cluster.
+	 */
+	pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
+	m->flags |= PG_BUSY;
+	plistp[atop(m->offset - loff)] = m;
+	count = 1;
+
+	/*
+	 * Backup from the given page til we find one not fulfilling
+	 * the pageout criteria or we hit the lower bound for the
+	 * cluster.  For each page determined to be part of the
+	 * cluster, unmap it and busy it out so it won't change.
+	 */
+	ix = atop(m->offset - loff);
+	offset = m->offset;
+	while (offset > loff && count < MAXPOCLUSTER-1) {
+		p = vm_page_lookup(object, offset - PAGE_SIZE);
+		if (p == NULL || !PAGEOUTABLE(p))
+			break;
+		pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
+		p->flags |= PG_BUSY;
+		plistp[--ix] = p;
+		offset -= PAGE_SIZE;
+		count++;
+	}
+	plistp += atop(offset - loff);
+	loff = offset;
+
+	/*
+	 * Now do the same moving forward from the target.
+	 */
+	ix = atop(m->offset - loff) + 1;
+	offset = m->offset + PAGE_SIZE;
+	while (offset < hoff && count < MAXPOCLUSTER) {
+		p = vm_page_lookup(object, offset);
+		if (p == NULL || !PAGEOUTABLE(p))
+			break;
+		pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
+		p->flags |= PG_BUSY;
+		plistp[ix++] = p;
+		offset += PAGE_SIZE;
+		count++;
+	}
+	hoff = offset;
+
+	/*
+	 * Pageout the page.
+	 * Unlock everything and do a wakeup prior to the pager call
+	 * in case it blocks.
+	 */
+	vm_page_unlock_queues();
+	object->paging_in_progress++;
+	vm_object_unlock(object);
+again:
+	thread_wakeup((int) &cnt.v_free_count);
+	postatus = vm_pager_put_pages(object->pager, plistp, count, FALSE);
+	/*
+	 * XXX rethink this
+	 */
+	if (postatus == VM_PAGER_AGAIN) {
+		extern int lbolt;
+
+		(void) tsleep((caddr_t)&lbolt, PZERO|PCATCH, "pageout", 0);
+		goto again;
+	} else if (postatus == VM_PAGER_BAD)
+		panic("vm_pageout_cluster: VM_PAGER_BAD");
+	vm_object_lock(object);
+	vm_page_lock_queues();
+
+	/*
+	 * Loop through the affected pages, reflecting the outcome of
+	 * the operation.
+	 */
+	for (ix = 0; ix < count; ix++) {
+		p = *plistp++;
+		switch (postatus) {
+		case VM_PAGER_OK:
+		case VM_PAGER_PEND:
+			cnt.v_pgpgout++;
+			p->flags &= ~PG_LAUNDRY;
+			break;
+		case VM_PAGER_FAIL:
+		case VM_PAGER_ERROR:
+			/*
+			 * Pageout failed, reactivate the target page so it
+			 * doesn't clog the inactive list.  Other pages are
+			 * left as they are.
+			 */
+			if (p == m) {
+				vm_page_activate(p);
+				cnt.v_reactivated++;
+			}
+			break;
+		}
+		pmap_clear_reference(VM_PAGE_TO_PHYS(p));
+		/*
+		 * If the operation is still going, leave the page busy
+		 * to block all other accesses.
+		 */
+		if (postatus != VM_PAGER_PEND) {
+			p->flags &= ~PG_BUSY;
+			PAGE_WAKEUP(p);
+
+		}
+	}
+	/*
+	 * If the operation is still going, leave the paging in progress
+	 * indicator set so that we don't attempt an object collapse.
+	 */
+	if (postatus != VM_PAGER_PEND)
+		object->paging_in_progress--;
+
+}
+#endif
+
+/*
+ *	vm_pageout is the high level pageout daemon.
+ */
+
+void vm_pageout()
+{
+	(void) spl0();
+
+	/*
+	 *	Initialize some paging parameters.
+	 */
+
+	if (cnt.v_free_min == 0) {
+		cnt.v_free_min = VM_PAGE_FREE_MIN;
+		vm_page_free_min_min /= cnt.v_page_size;
+		vm_page_free_min_max /= cnt.v_page_size;
+		if (cnt.v_free_min < vm_page_free_min_min)
+			cnt.v_free_min = vm_page_free_min_min;
+		if (cnt.v_free_min > vm_page_free_min_max)
+			cnt.v_free_min = vm_page_free_min_max;
+	}
+
+	if (cnt.v_free_target == 0)
+		cnt.v_free_target = VM_PAGE_FREE_TARGET;
+
+	if (cnt.v_free_target <= cnt.v_free_min)
+		cnt.v_free_target = cnt.v_free_min + 1;
+
+	/* XXX does not really belong here */
+	if (vm_page_max_wired == 0)
+		vm_page_max_wired = cnt.v_free_count / 3;
+
+	/*
+	 *	The pageout daemon is never done, so loop
+	 *	forever.
+	 */
+
+	simple_lock(&vm_pages_needed_lock);
+	while (TRUE) {
+		thread_sleep((int) &vm_pages_needed, &vm_pages_needed_lock,
+			     FALSE);
+		/*
+		 * Compute the inactive target for this scan.
+		 * We need to keep a reasonable amount of memory in the
+		 * inactive list to better simulate LRU behavior.
+		 */
+		cnt.v_inactive_target =
+			(cnt.v_active_count + cnt.v_inactive_count) / 3;
+		if (cnt.v_inactive_target <= cnt.v_free_target)
+			cnt.v_inactive_target = cnt.v_free_target + 1;
+
+		/*
+		 * Only make a scan if we are likely to do something.
+		 * Otherwise we might have been awakened by a pager
+		 * to clean up async pageouts.
+		 */
+		if (cnt.v_free_count < cnt.v_free_target ||
+		    cnt.v_inactive_count < cnt.v_inactive_target)
+			vm_pageout_scan();
+		vm_pager_sync();
+		simple_lock(&vm_pages_needed_lock);
+		thread_wakeup((int) &cnt.v_free_count);
+	}
+}
diff --git a/sys/vm/vm_pageout.h b/sys/vm/vm_pageout.h
new file mode 100644
index 00000000000..a82a0ea40ac
--- /dev/null
+++ b/sys/vm/vm_pageout.h
@@ -0,0 +1,96 @@
+/* 
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vm_pageout.h	8.2 (Berkeley) 1/12/94
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Author: Avadis Tevanian, Jr.
+ * 
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ *	Header file for pageout daemon.
+ */
+
+/*
+ *	Exported data structures.
+ */
+
+extern int	vm_pages_needed;	/* should be some "event" structure */
+simple_lock_data_t	vm_pages_needed_lock;
+
+
+/*
+ *	Exported routines.
+ */
+
+/*
+ *	Signal pageout-daemon and wait for it.
+ */
+
+#define	VM_WAIT		{ \
+			simple_lock(&vm_pages_needed_lock); \
+			thread_wakeup((int)&vm_pages_needed); \
+			thread_sleep((int)&cnt.v_free_count, \
+				&vm_pages_needed_lock, FALSE); \
+			}
+#ifdef KERNEL
+void		 vm_pageout __P((void));
+void		 vm_pageout_scan __P((void));
+void		 vm_pageout_page __P((vm_page_t, vm_object_t));
+void		 vm_pageout_cluster __P((vm_page_t, vm_object_t));
+#endif
diff --git a/sys/vm/vm_pager.c b/sys/vm/vm_pager.c
new file mode 100644
index 00000000000..7123abb16ef
--- /dev/null
+++ b/sys/vm/vm_pager.c
@@ -0,0 +1,381 @@
+/* 
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vm_pager.c	8.6 (Berkeley) 1/12/94
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ * 
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ *	Paging space routine stubs.  Emulates a matchmaker-like interface
+ *	for builtin pagers.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#ifdef SWAPPAGER
+extern struct pagerops swappagerops;
+#endif
+
+#ifdef VNODEPAGER
+extern struct pagerops vnodepagerops;
+#endif
+
+#ifdef DEVPAGER
+extern struct pagerops devicepagerops;
+#endif
+
+struct pagerops *pagertab[] = {
+#ifdef SWAPPAGER
+	&swappagerops,		/* PG_SWAP */
+#else
+	NULL,
+#endif
+#ifdef VNODEPAGER
+	&vnodepagerops,		/* PG_VNODE */
+#else
+	NULL,
+#endif
+#ifdef DEVPAGER
+	&devicepagerops,	/* PG_DEV */
+#else
+	NULL,
+#endif
+};
+int npagers = sizeof (pagertab) / sizeof (pagertab[0]);
+
+struct pagerops *dfltpagerops = NULL;	/* default pager */
+
+/*
+ * Kernel address space for mapping pages.
+ * Used by pagers where KVAs are needed for IO.
+ *
+ * XXX needs to be large enough to support the number of pending async
+ * cleaning requests (NPENDINGIO == 64) * the maximum swap cluster size
+ * (MAXPHYS == 64k) if you want to get the most efficiency.
+ */
+#define PAGER_MAP_SIZE	(4 * 1024 * 1024)
+
+vm_map_t pager_map;
+boolean_t pager_map_wanted;
+vm_offset_t pager_sva, pager_eva;
+
+void
+vm_pager_init()
+{
+	struct pagerops **pgops;
+
+	/*
+	 * Allocate a kernel submap for tracking get/put page mappings
+	 */
+	pager_map = kmem_suballoc(kernel_map, &pager_sva, &pager_eva,
+				  PAGER_MAP_SIZE, FALSE);
+	/*
+	 * Initialize known pagers
+	 */
+	for (pgops = pagertab; pgops < &pagertab[npagers]; pgops++)
+		if (pgops)
+			(*(*pgops)->pgo_init)();
+	if (dfltpagerops == NULL)
+		panic("no default pager");
+}
+
+/*
+ * Allocate an instance of a pager of the given type.
+ * Size, protection and offset parameters are passed in for pagers that
+ * need to perform page-level validation (e.g. the device pager).
+ */
+vm_pager_t
+vm_pager_allocate(type, handle, size, prot, off)
+	int type;
+	caddr_t handle;
+	vm_size_t size;
+	vm_prot_t prot;
+	vm_offset_t off;
+{
+	struct pagerops *ops;
+
+	ops = (type == PG_DFLT) ? dfltpagerops : pagertab[type];
+	if (ops)
+		return ((*ops->pgo_alloc)(handle, size, prot, off));
+	return (NULL);
+}
+
+void
+vm_pager_deallocate(pager)
+	vm_pager_t	pager;
+{
+	if (pager == NULL)
+		panic("vm_pager_deallocate: null pager");
+
+	(*pager->pg_ops->pgo_dealloc)(pager);
+}
+
+int
+vm_pager_get_pages(pager, mlist, npages, sync)
+	vm_pager_t	pager;
+	vm_page_t	*mlist;
+	int		npages;
+	boolean_t	sync;
+{
+	int rv;
+
+	if (pager == NULL) {
+		rv = VM_PAGER_OK;
+		while (npages--)
+			if (!vm_page_zero_fill(*mlist)) {
+				rv = VM_PAGER_FAIL;
+				break;
+			} else
+				mlist++;
+		return (rv);
+	}
+	return ((*pager->pg_ops->pgo_getpages)(pager, mlist, npages, sync));
+}
+
+int
+vm_pager_put_pages(pager, mlist, npages, sync)
+	vm_pager_t	pager;
+	vm_page_t	*mlist;
+	int		npages;
+	boolean_t	sync;
+{
+	if (pager == NULL)
+		panic("vm_pager_put_pages: null pager");
+	return ((*pager->pg_ops->pgo_putpages)(pager, mlist, npages, sync));
+}
+
+boolean_t
+vm_pager_has_page(pager, offset)
+	vm_pager_t	pager;
+	vm_offset_t	offset;
+{
+	if (pager == NULL)
+		panic("vm_pager_has_page: null pager");
+	return ((*pager->pg_ops->pgo_haspage)(pager, offset));
+}
+
+/*
+ * Called by pageout daemon before going back to sleep.
+ * Gives pagers a chance to clean up any completed async pageing operations.
+ */
+void
+vm_pager_sync()
+{
+	struct pagerops **pgops;
+
+	for (pgops = pagertab; pgops < &pagertab[npagers]; pgops++)
+		if (pgops)
+			(*(*pgops)->pgo_putpages)(NULL, NULL, 0, FALSE);
+}
+
+void
+vm_pager_cluster(pager, offset, loff, hoff)
+	vm_pager_t	pager;
+	vm_offset_t	offset;
+	vm_offset_t	*loff;
+	vm_offset_t	*hoff;
+{
+	if (pager == NULL)
+		panic("vm_pager_cluster: null pager");
+	return ((*pager->pg_ops->pgo_cluster)(pager, offset, loff, hoff));
+}
+
+void
+vm_pager_clusternull(pager, offset, loff, hoff)
+	vm_pager_t	pager;
+	vm_offset_t	offset;
+	vm_offset_t	*loff;
+	vm_offset_t	*hoff;
+{
+	panic("vm_pager_nullcluster called");
+}
+
+vm_offset_t
+vm_pager_map_pages(mlist, npages, canwait)
+	vm_page_t	*mlist;
+	int		npages;
+	boolean_t	canwait;
+{
+	vm_offset_t kva, va;
+	vm_size_t size;
+	vm_page_t m;
+
+	/*
+	 * Allocate space in the pager map, if none available return 0.
+	 * This is basically an expansion of kmem_alloc_wait with optional
+	 * blocking on no space.
+	 */
+	size = npages * PAGE_SIZE;
+	vm_map_lock(pager_map);
+	while (vm_map_findspace(pager_map, 0, size, &kva)) {
+		if (!canwait) {
+			vm_map_unlock(pager_map);
+			return (0);
+		}
+		pager_map_wanted = TRUE;
+		vm_map_unlock(pager_map);
+		(void) tsleep(pager_map, PVM, "pager_map", 0);
+		vm_map_lock(pager_map);
+	}
+	vm_map_insert(pager_map, NULL, 0, kva, kva + size);
+	vm_map_unlock(pager_map);
+
+	for (va = kva; npages--; va += PAGE_SIZE) {
+		m = *mlist++;
+#ifdef DEBUG
+		if ((m->flags & PG_BUSY) == 0)
+			panic("vm_pager_map_pages: page not busy");
+		if (m->flags & PG_PAGEROWNED)
+			panic("vm_pager_map_pages: page already in pager");
+#endif
+#ifdef DEBUG
+		m->flags |= PG_PAGEROWNED;
+#endif
+		pmap_enter(vm_map_pmap(pager_map), va, VM_PAGE_TO_PHYS(m),
+			   VM_PROT_DEFAULT, TRUE);
+	}
+	return (kva);
+}
+
+void
+vm_pager_unmap_pages(kva, npages)
+	vm_offset_t	kva;
+	int		npages;
+{
+	vm_size_t size = npages * PAGE_SIZE;
+
+#ifdef DEBUG
+	vm_offset_t va;
+	vm_page_t m;
+	int np = npages;
+
+	for (va = kva; np--; va += PAGE_SIZE) {
+		m = vm_pager_atop(va);
+		if (m->flags & PG_PAGEROWNED)
+			m->flags &= ~PG_PAGEROWNED;
+		else
+			printf("vm_pager_unmap_pages: %x(%x/%x) not owned\n",
+			       m, va, VM_PAGE_TO_PHYS(m));
+	}
+#endif
+	pmap_remove(vm_map_pmap(pager_map), kva, kva + size);
+	vm_map_lock(pager_map);
+	(void) vm_map_delete(pager_map, kva, kva + size);
+	if (pager_map_wanted)
+		wakeup(pager_map);
+	vm_map_unlock(pager_map);
+}
+
+vm_page_t
+vm_pager_atop(kva)
+	vm_offset_t	kva;
+{
+	vm_offset_t pa;
+
+	pa = pmap_extract(vm_map_pmap(pager_map), kva);
+	if (pa == 0)
+		panic("vm_pager_atop");
+	return (PHYS_TO_VM_PAGE(pa));
+}
+
+vm_pager_t
+vm_pager_lookup(pglist, handle)
+	register struct pagerlst *pglist;
+	caddr_t handle;
+{
+	register vm_pager_t pager;
+
+	for (pager = pglist->tqh_first; pager; pager = pager->pg_list.tqe_next)
+		if (pager->pg_handle == handle)
+			return (pager);
+	return (NULL);
+}
+
+/*
+ * This routine gains a reference to the object.
+ * Explicit deallocation is necessary.
+ */
+int
+pager_cache(object, should_cache)
+	vm_object_t	object;
+	boolean_t	should_cache;
+{
+	if (object == NULL)
+		return (KERN_INVALID_ARGUMENT);
+
+	vm_object_cache_lock();
+	vm_object_lock(object);
+	if (should_cache)
+		object->flags |= OBJ_CANPERSIST;
+	else
+		object->flags &= ~OBJ_CANPERSIST;
+	vm_object_unlock(object);
+	vm_object_cache_unlock();
+
+	vm_object_deallocate(object);
+
+	return (KERN_SUCCESS);
+}
diff --git a/sys/vm/vm_pager.h b/sys/vm/vm_pager.h
new file mode 100644
index 00000000000..e4659c268c1
--- /dev/null
+++ b/sys/vm/vm_pager.h
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 1990 University of Utah.
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vm_pager.h	8.4 (Berkeley) 1/12/94
+ */
+
+/*
+ * Pager routine interface definition.
+ * For BSD we use a cleaner version of the internal pager interface.
+ */
+
+#ifndef	_VM_PAGER_
+#define	_VM_PAGER_
+
+TAILQ_HEAD(pagerlst, pager_struct);
+
+struct	pager_struct {
+	TAILQ_ENTRY(pager_struct) pg_list;	/* links for list management */
+	caddr_t			  pg_handle;	/* ext. handle (vp, dev, fp) */
+	int			  pg_type;	/* type of pager */
+	int			  pg_flags;	/* flags */
+	struct pagerops		  *pg_ops;	/* pager operations */
+	void			  *pg_data;	/* private pager data */
+};
+
+/* pager types */
+#define PG_DFLT		-1
+#define	PG_SWAP		0
+#define	PG_VNODE	1
+#define PG_DEVICE	2
+
+/* flags */
+#define PG_CLUSTERGET	1
+#define PG_CLUSTERPUT	2
+
+struct	pagerops {
+	void		(*pgo_init)		/* Initialize pager. */
+			    __P((void));
+	vm_pager_t	(*pgo_alloc)		/* Allocate pager. */
+			    __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t));
+	void		(*pgo_dealloc)		/* Disassociate. */
+			    __P((vm_pager_t));
+	int		(*pgo_getpages)		/* Get (read) page. */
+			    __P((vm_pager_t, vm_page_t *, int, boolean_t));
+	int		(*pgo_putpages)		/* Put (write) page. */
+			    __P((vm_pager_t, vm_page_t *, int, boolean_t));
+	boolean_t  	(*pgo_haspage)		/* Does pager have page? */
+			    __P((vm_pager_t, vm_offset_t));
+	void		(*pgo_cluster)		/* Return range of cluster. */
+			    __P((vm_pager_t, vm_offset_t,
+				 vm_offset_t *, vm_offset_t *));
+};
+
+/*
+ * get/put return values
+ * OK	 operation was successful
+ * BAD	 specified data was out of the accepted range
+ * FAIL	 specified data was in range, but doesn't exist
+ * PEND	 operations was initiated but not completed
+ * ERROR error while accessing data that is in range and exists
+ * AGAIN temporary resource shortage prevented operation from happening
+ */
+#define	VM_PAGER_OK	0
+#define	VM_PAGER_BAD	1
+#define	VM_PAGER_FAIL	2
+#define	VM_PAGER_PEND	3
+#define	VM_PAGER_ERROR	4
+#define VM_PAGER_AGAIN	5
+
+#ifdef KERNEL
+extern struct pagerops *dfltpagerops;
+
+vm_pager_t	 vm_pager_allocate
+		    __P((int, caddr_t, vm_size_t, vm_prot_t, vm_offset_t));
+vm_page_t	 vm_pager_atop __P((vm_offset_t));
+void		 vm_pager_cluster
+		    __P((vm_pager_t, vm_offset_t,
+			 vm_offset_t *, vm_offset_t *));
+void		 vm_pager_clusternull
+		    __P((vm_pager_t, vm_offset_t,
+			 vm_offset_t *, vm_offset_t *));
+void		 vm_pager_deallocate __P((vm_pager_t));
+int		 vm_pager_get_pages
+		    __P((vm_pager_t, vm_page_t *, int, boolean_t));
+boolean_t	 vm_pager_has_page __P((vm_pager_t, vm_offset_t));
+void		 vm_pager_init __P((void));
+vm_pager_t	 vm_pager_lookup __P((struct pagerlst *, caddr_t));
+vm_offset_t	 vm_pager_map_pages __P((vm_page_t *, int, boolean_t));
+int		 vm_pager_put_pages
+		    __P((vm_pager_t, vm_page_t *, int, boolean_t));
+void		 vm_pager_sync __P((void));
+void		 vm_pager_unmap_pages __P((vm_offset_t, int));
+
+#define vm_pager_cancluster(p, b)	((p)->pg_flags & (b))
+
+/*
+ * XXX compat with old interface
+ */
+#define vm_pager_get(p, m, s) \
+({ \
+	vm_page_t ml[1]; \
+	ml[0] = (m); \
+	vm_pager_get_pages(p, ml, 1, s); \
+})
+#define vm_pager_put(p, m, s) \
+({ \
+	vm_page_t ml[1]; \
+	ml[0] = (m); \
+	vm_pager_put_pages(p, ml, 1, s); \
+})
+#endif
+
+#endif	/* _VM_PAGER_ */
diff --git a/sys/vm/vm_param.h b/sys/vm/vm_param.h
new file mode 100644
index 00000000000..2d2c71594ed
--- /dev/null
+++ b/sys/vm/vm_param.h
@@ -0,0 +1,159 @@
+/* 
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vm_param.h	8.1 (Berkeley) 6/11/93
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ * 
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ *	Machine independent virtual memory parameters.
+ */
+
+#ifndef	_VM_PARAM_
+#define	_VM_PARAM_
+
+#include <machine/vmparam.h>
+
+/*
+ * This belongs in types.h, but breaks too many existing programs.
+ */
+typedef int	boolean_t;
+#define	TRUE	1
+#define	FALSE	0
+
+/*
+ *	The machine independent pages are refered to as PAGES.  A page
+ *	is some number of hardware pages, depending on the target machine.
+ */
+#define DEFAULT_PAGE_SIZE	4096
+
+/*
+ *	All references to the size of a page should be done with PAGE_SIZE
+ *	or PAGE_SHIFT.  The fact they are variables is hidden here so that
+ *	we can easily make them constant if we so desire.
+ */
+#define	PAGE_SIZE	cnt.v_page_size		/* size of page */
+#define PAGE_MASK	page_mask		/* size of page - 1 */
+#define PAGE_SHIFT	page_shift		/* bits to shift for pages */
+#ifdef KERNEL
+extern vm_size_t	page_mask;
+extern int		page_shift;
+#endif
+
+/*
+ * CTL_VM identifiers
+ */
+#define	VM_METER	 1		/* struct vmmeter */
+#define	VM_LOADAVG	 2		/* struct loadavg */
+#define	VM_MAXID	 3		/* number of valid vm ids */
+
+#define CTL_VM_NAMES { \
+	{ 0, 0 }, \
+	{ "vmmeter", CTLTYPE_STRUCT }, \
+	{ "loadavg", CTLTYPE_STRUCT }, \
+}
+
+/* 
+ *	Return values from the VM routines.
+ */
+#define	KERN_SUCCESS		0
+#define	KERN_INVALID_ADDRESS	1
+#define	KERN_PROTECTION_FAILURE	2
+#define	KERN_NO_SPACE		3
+#define	KERN_INVALID_ARGUMENT	4
+#define	KERN_FAILURE		5
+#define	KERN_RESOURCE_SHORTAGE	6
+#define	KERN_NOT_RECEIVER	7
+#define	KERN_NO_ACCESS		8
+
+#ifndef ASSEMBLER
+/*
+ *	Convert addresses to pages and vice versa.
+ *	No rounding is used.
+ */
+#ifdef KERNEL
+#define	atop(x)		(((unsigned)(x)) >> PAGE_SHIFT)
+#define	ptoa(x)		((vm_offset_t)((x) << PAGE_SHIFT))
+
+/*
+ * Round off or truncate to the nearest page.  These will work
+ * for either addresses or counts (i.e., 1 byte rounds to 1 page).
+ */
+#define round_page(x) \
+	((vm_offset_t)((((vm_offset_t)(x)) + PAGE_MASK) & ~PAGE_MASK))
+#define trunc_page(x) \
+	((vm_offset_t)(((vm_offset_t)(x)) & ~PAGE_MASK))
+#define num_pages(x) \
+	((vm_offset_t)((((vm_offset_t)(x)) + PAGE_MASK) >> PAGE_SHIFT))
+
+extern vm_size_t	mem_size;	/* size of physical memory (bytes) */
+extern vm_offset_t	first_addr;	/* first physical page */
+extern vm_offset_t	last_addr;	/* last physical page */
+
+#else
+/* out-of-kernel versions of round_page and trunc_page */
+#define	round_page(x) \
+       ((((vm_offset_t)(x) + (vm_page_size - 1)) / vm_page_size) * vm_page_size)
+#define	trunc_page(x) \
+	((((vm_offset_t)(x)) / vm_page_size) * vm_page_size)
+
+#endif /* KERNEL */
+#endif /* ASSEMBLER */
+#endif /* _VM_PARAM_ */
diff --git a/sys/vm/vm_prot.h b/sys/vm/vm_prot.h
new file mode 100644
index 00000000000..b3bae438631
--- /dev/null
+++ b/sys/vm/vm_prot.h
@@ -0,0 +1,102 @@
+/* 
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vm_prot.h	8.1 (Berkeley) 6/11/93
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ * 
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ *	Virtual memory protection definitions.
+ */
+
+#ifndef	_VM_PROT_
+#define	_VM_PROT_
+
+/*
+ *	Types defined:
+ *
+ *	vm_prot_t		VM protection values.
+ */
+
+typedef int		vm_prot_t;
+
+/*
+ *	Protection values, defined as bits within the vm_prot_t type
+ */
+
+#define	VM_PROT_NONE	((vm_prot_t) 0x00)
+
+#define VM_PROT_READ	((vm_prot_t) 0x01)	/* read permission */
+#define VM_PROT_WRITE	((vm_prot_t) 0x02)	/* write permission */
+#define VM_PROT_EXECUTE	((vm_prot_t) 0x04)	/* execute permission */
+
+/*
+ *	The default protection for newly-created virtual memory
+ */
+
+#define VM_PROT_DEFAULT	(VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)
+
+/*
+ *	The maximum privileges possible, for parameter checking.
+ */
+
+#define VM_PROT_ALL	(VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)
+
+#endif /* _VM_PROT_ */
diff --git a/sys/vm/vm_swap.c b/sys/vm/vm_swap.c
new file mode 100644
index 00000000000..10b7523ae23
--- /dev/null
+++ b/sys/vm/vm_swap.c
@@ -0,0 +1,427 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vm_swap.c	8.5 (Berkeley) 2/17/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/conf.h>
+#include <sys/proc.h>
+#include <sys/namei.h>
+#include <sys/dmap.h>		/* XXX */
+#include <sys/vnode.h>
+#include <sys/map.h>
+#include <sys/file.h>
+
+#include <miscfs/specfs/specdev.h>
+
+/*
+ * Indirect driver for multi-controller paging.
+ */
+
+int	nswap, nswdev;
+#ifdef SEQSWAP
+int	niswdev;		/* number of interleaved swap devices */
+int	niswap;			/* size of interleaved swap area */
+#endif
+
+/*
+ * Set up swap devices.
+ * Initialize linked list of free swap
+ * headers. These do not actually point
+ * to buffers, but rather to pages that
+ * are being swapped in and out.
+ */
+void
+swapinit()
+{
+	register int i;
+	register struct buf *sp = swbuf;
+	register struct proc *p = &proc0;	/* XXX */
+	struct swdevt *swp;
+	int error;
+
+	/*
+	 * Count swap devices, and adjust total swap space available.
+	 * Some of the space will not be countable until later (dynamically
+	 * configurable devices) and some of the counted space will not be
+	 * available until a swapon() system call is issued, both usually
+	 * happen when the system goes multi-user.
+	 *
+	 * If using NFS for swap, swdevt[0] will already be bdevvp'd.	XXX
+	 */
+#ifdef SEQSWAP
+	nswdev = niswdev = 0;
+	nswap = niswap = 0;
+	/*
+	 * All interleaved devices must come first
+	 */
+	for (swp = swdevt; swp->sw_dev != NODEV || swp->sw_vp != NULL; swp++) {
+		if (swp->sw_flags & SW_SEQUENTIAL)
+			break;
+		niswdev++;
+		if (swp->sw_nblks > niswap)
+			niswap = swp->sw_nblks;
+	}
+	niswap = roundup(niswap, dmmax);
+	niswap *= niswdev;
+	if (swdevt[0].sw_vp == NULL &&
+	    bdevvp(swdevt[0].sw_dev, &swdevt[0].sw_vp))
+		panic("swapvp");
+	/*
+	 * The remainder must be sequential
+	 */
+	for ( ; swp->sw_dev != NODEV; swp++) {
+		if ((swp->sw_flags & SW_SEQUENTIAL) == 0)
+			panic("binit: mis-ordered swap devices");
+		nswdev++;
+		if (swp->sw_nblks > 0) {
+			if (swp->sw_nblks % dmmax)
+				swp->sw_nblks -= (swp->sw_nblks % dmmax);
+			nswap += swp->sw_nblks;
+		}
+	}
+	nswdev += niswdev;
+	if (nswdev == 0)
+		panic("swapinit");
+	nswap += niswap;
+#else
+	nswdev = 0;
+	nswap = 0;
+	for (swp = swdevt; swp->sw_dev != NODEV || swp->sw_vp != NULL; swp++) {
+		nswdev++;
+		if (swp->sw_nblks > nswap)
+			nswap = swp->sw_nblks;
+	}
+	if (nswdev == 0)
+		panic("swapinit");
+	if (nswdev > 1)
+		nswap = ((nswap + dmmax - 1) / dmmax) * dmmax;
+	nswap *= nswdev;
+	if (swdevt[0].sw_vp == NULL &&
+	    bdevvp(swdevt[0].sw_dev, &swdevt[0].sw_vp))
+		panic("swapvp");
+#endif
+	if (nswap == 0)
+		printf("WARNING: no swap space found\n");
+	else if (error = swfree(p, 0)) {
+		printf("swfree errno %d\n", error);	/* XXX */
+		panic("swapinit swfree 0");
+	}
+
+	/*
+	 * Now set up swap buffer headers.
+	 */
+	bswlist.b_actf = sp;
+	for (i = 0; i < nswbuf - 1; i++, sp++) {
+		sp->b_actf = sp + 1;
+		sp->b_rcred = sp->b_wcred = p->p_ucred;
+		sp->b_vnbufs.le_next = NOLIST;
+	}
+	sp->b_rcred = sp->b_wcred = p->p_ucred;
+	sp->b_vnbufs.le_next = NOLIST;
+	sp->b_actf = NULL;
+}
+
+void
+swstrategy(bp)
+	register struct buf *bp;
+{
+	int sz, off, seg, index;
+	register struct swdevt *sp;
+	struct vnode *vp;
+
+#ifdef GENERIC
+	/*
+	 * A mini-root gets copied into the front of the swap
+	 * and we run over top of the swap area just long
+	 * enough for us to do a mkfs and restor of the real
+	 * root (sure beats rewriting standalone restor).
+	 */
+#define	MINIROOTSIZE	4096
+	if (rootdev == dumpdev)
+		bp->b_blkno += MINIROOTSIZE;
+#endif
+	sz = howmany(bp->b_bcount, DEV_BSIZE);
+	if (bp->b_blkno + sz > nswap) {
+		bp->b_error = EINVAL;
+		bp->b_flags |= B_ERROR;
+		biodone(bp);
+		return;
+	}
+	if (nswdev > 1) {
+#ifdef SEQSWAP
+		if (bp->b_blkno < niswap) {
+			if (niswdev > 1) {
+				off = bp->b_blkno % dmmax;
+				if (off+sz > dmmax) {
+					bp->b_error = EINVAL;
+					bp->b_flags |= B_ERROR;
+					biodone(bp);
+					return;
+				}
+				seg = bp->b_blkno / dmmax;
+				index = seg % niswdev;
+				seg /= niswdev;
+				bp->b_blkno = seg*dmmax + off;
+			} else
+				index = 0;
+		} else {
+			register struct swdevt *swp;
+
+			bp->b_blkno -= niswap;
+			for (index = niswdev, swp = &swdevt[niswdev];
+			     swp->sw_dev != NODEV;
+			     swp++, index++) {
+				if (bp->b_blkno < swp->sw_nblks)
+					break;
+				bp->b_blkno -= swp->sw_nblks;
+			}
+			if (swp->sw_dev == NODEV ||
+			    bp->b_blkno+sz > swp->sw_nblks) {
+				bp->b_error = swp->sw_dev == NODEV ?
+					ENODEV : EINVAL;
+				bp->b_flags |= B_ERROR;
+				biodone(bp);
+				return;
+			}
+		}
+#else
+		off = bp->b_blkno % dmmax;
+		if (off+sz > dmmax) {
+			bp->b_error = EINVAL;
+			bp->b_flags |= B_ERROR;
+			biodone(bp);
+			return;
+		}
+		seg = bp->b_blkno / dmmax;
+		index = seg % nswdev;
+		seg /= nswdev;
+		bp->b_blkno = seg*dmmax + off;
+#endif
+	} else
+		index = 0;
+	sp = &swdevt[index];
+	if ((bp->b_dev = sp->sw_dev) == NODEV)
+		panic("swstrategy");
+	if (sp->sw_vp == NULL) {
+		bp->b_error = ENODEV;
+		bp->b_flags |= B_ERROR;
+		biodone(bp);
+		return;
+	}
+	VHOLD(sp->sw_vp);
+	if ((bp->b_flags & B_READ) == 0) {
+		if (vp = bp->b_vp) {
+			vp->v_numoutput--;
+			if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
+				vp->v_flag &= ~VBWAIT;
+				wakeup((caddr_t)&vp->v_numoutput);
+			}
+		}
+		sp->sw_vp->v_numoutput++;
+	}
+	if (bp->b_vp != NULL)
+		brelvp(bp);
+	bp->b_vp = sp->sw_vp;
+	VOP_STRATEGY(bp);
+}
+
+/*
+ * System call swapon(name) enables swapping on device name,
+ * which must be in the swdevsw.  Return EBUSY
+ * if already swapping on this device.
+ */
+struct swapon_args {
+	char	*name;
+};
+/* ARGSUSED */
+int
+swapon(p, uap, retval)
+	struct proc *p;
+	struct swapon_args *uap;
+	int *retval;
+{
+	register struct vnode *vp;
+	register struct swdevt *sp;
+	dev_t dev;
+	int error;
+	struct nameidata nd;
+
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->name, p);
+	if (error = namei(&nd))
+		return (error);
+	vp = nd.ni_vp;
+	if (vp->v_type != VBLK) {
+		vrele(vp);
+		return (ENOTBLK);
+	}
+	dev = (dev_t)vp->v_rdev;
+	if (major(dev) >= nblkdev) {
+		vrele(vp);
+		return (ENXIO);
+	}
+	for (sp = &swdevt[0]; sp->sw_dev != NODEV; sp++) {
+		if (sp->sw_dev == dev) {
+			if (sp->sw_flags & SW_FREED) {
+				vrele(vp);
+				return (EBUSY);
+			}
+			sp->sw_vp = vp;
+			if (error = swfree(p, sp - swdevt)) {
+				vrele(vp);
+				return (error);
+			}
+			return (0);
+		}
+#ifdef SEQSWAP
+		/*
+		 * If we have reached a non-freed sequential device without
+		 * finding what we are looking for, it is an error.
+		 * That is because all interleaved devices must come first
+		 * and sequential devices must be freed in order.
+		 */
+		if ((sp->sw_flags & (SW_SEQUENTIAL|SW_FREED)) == SW_SEQUENTIAL)
+			break;
+#endif
+	}
+	vrele(vp);
+	return (EINVAL);
+}
+
+/*
+ * Swfree(index) frees the index'th portion of the swap map.
+ * Each of the nswdev devices provides 1/nswdev'th of the swap
+ * space, which is laid out with blocks of dmmax pages circularly
+ * among the devices.
+ */
+int
+swfree(p, index)
+	struct proc *p;
+	int index;
+{
+	register struct swdevt *sp;
+	register swblk_t vsbase;
+	register long blk;
+	struct vnode *vp;
+	register swblk_t dvbase;
+	register int nblks;
+	int error;
+
+	sp = &swdevt[index];
+	vp = sp->sw_vp;
+	if (error = VOP_OPEN(vp, FREAD|FWRITE, p->p_ucred, p))
+		return (error);
+	sp->sw_flags |= SW_FREED;
+	nblks = sp->sw_nblks;
+	/*
+	 * Some devices may not exist til after boot time.
+	 * If so, their nblk count will be 0.
+	 */
+	if (nblks <= 0) {
+		int perdev;
+		dev_t dev = sp->sw_dev;
+
+		if (bdevsw[major(dev)].d_psize == 0 ||
+		    (nblks = (*bdevsw[major(dev)].d_psize)(dev)) == -1) {
+			(void) VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p);
+			sp->sw_flags &= ~SW_FREED;
+			return (ENXIO);
+		}
+#ifdef SEQSWAP
+		if (index < niswdev) {
+			perdev = niswap / niswdev;
+			if (nblks > perdev)
+				nblks = perdev;
+		} else {
+			if (nblks % dmmax)
+				nblks -= (nblks % dmmax);
+			nswap += nblks;
+		}
+#else
+		perdev = nswap / nswdev;
+		if (nblks > perdev)
+			nblks = perdev;
+#endif
+		sp->sw_nblks = nblks;
+	}
+	if (nblks == 0) {
+		(void) VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p);
+		sp->sw_flags &= ~SW_FREED;
+		return (0);	/* XXX error? */
+	}
+#ifdef SEQSWAP
+	if (sp->sw_flags & SW_SEQUENTIAL) {
+		register struct swdevt *swp;
+
+		blk = niswap;
+		for (swp = &swdevt[niswdev]; swp != sp; swp++)
+			blk += swp->sw_nblks;
+		rmfree(swapmap, nblks, blk);
+		return (0);
+	}
+#endif
+	for (dvbase = 0; dvbase < nblks; dvbase += dmmax) {
+		blk = nblks - dvbase;
+#ifdef SEQSWAP
+		if ((vsbase = index*dmmax + dvbase*niswdev) >= niswap)
+			panic("swfree");
+#else
+		if ((vsbase = index*dmmax + dvbase*nswdev) >= nswap)
+			panic("swfree");
+#endif
+		if (blk > dmmax)
+			blk = dmmax;
+		if (vsbase == 0) {
+			/*
+			 * First of all chunks... initialize the swapmap.
+			 * Don't use the first cluster of the device
+			 * in case it starts with a label or boot block.
+			 */
+			rminit(swapmap, blk - ctod(CLSIZE),
+			    vsbase + ctod(CLSIZE), "swap", nswapmap);
+		} else if (dvbase == 0) {
+			/*
+			 * Don't use the first cluster of the device
+			 * in case it starts with a label or boot block.
+			 */
+			rmfree(swapmap, blk - ctod(CLSIZE),
+			    vsbase + ctod(CLSIZE));
+		} else
+			rmfree(swapmap, blk, vsbase);
+	}
+	return (0);
+}
diff --git a/sys/vm/vm_unix.c b/sys/vm/vm_unix.c
new file mode 100644
index 00000000000..3d49ea71718
--- /dev/null
+++ b/sys/vm/vm_unix.c
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 1988 University of Utah.
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: Utah $Hdr: vm_unix.c 1.1 89/11/07$
+ *
+ *	@(#)vm_unix.c	8.1 (Berkeley) 6/11/93
+ */
+
+/*
+ * Traditional sbrk/grow interface to VM
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+
+#include <vm/vm.h>
+
+struct obreak_args {
+	char	*nsiz;
+};
+/* ARGSUSED */
+int
+obreak(p, uap, retval)
+	struct proc *p;
+	struct obreak_args *uap;
+	int *retval;
+{
+	register struct vmspace *vm = p->p_vmspace;
+	vm_offset_t new, old;
+	int rv;
+	register int diff;
+
+	old = (vm_offset_t)vm->vm_daddr;
+	new = round_page(uap->nsiz);
+	if ((int)(new - old) > p->p_rlimit[RLIMIT_DATA].rlim_cur)
+		return(ENOMEM);
+	old = round_page(old + ctob(vm->vm_dsize));
+	diff = new - old;
+	if (diff > 0) {
+		rv = vm_allocate(&vm->vm_map, &old, diff, FALSE);
+		if (rv != KERN_SUCCESS) {
+			uprintf("sbrk: grow failed, return = %d\n", rv);
+			return(ENOMEM);
+		}
+		vm->vm_dsize += btoc(diff);
+	} else if (diff < 0) {
+		diff = -diff;
+		rv = vm_deallocate(&vm->vm_map, new, diff);
+		if (rv != KERN_SUCCESS) {
+			uprintf("sbrk: shrink failed, return = %d\n", rv);
+			return(ENOMEM);
+		}
+		vm->vm_dsize -= btoc(diff);
+	}
+	return(0);
+}
+
+/*
+ * Enlarge the "stack segment" to include the specified
+ * stack pointer for the process.
+ */
+int
+grow(p, sp)
+	struct proc *p;
+	unsigned sp;
+{
+	register struct vmspace *vm = p->p_vmspace;
+	register int si;
+
+	/*
+	 * For user defined stacks (from sendsig).
+	 */
+	if (sp < (unsigned)vm->vm_maxsaddr)
+		return (0);
+	/*
+	 * For common case of already allocated (from trap).
+	 */
+	if (sp >= USRSTACK - ctob(vm->vm_ssize))
+		return (1);
+	/*
+	 * Really need to check vs limit and increment stack size if ok.
+	 */
+	si = clrnd(btoc(USRSTACK-sp) - vm->vm_ssize);
+	if (vm->vm_ssize + si > btoc(p->p_rlimit[RLIMIT_STACK].rlim_cur))
+		return (0);
+	vm->vm_ssize += si;
+	return (1);
+}
+
+struct ovadvise_args {
+	int	anom;
+};
+/* ARGSUSED */
+int
+ovadvise(p, uap, retval)
+	struct proc *p;
+	struct ovadvise_args *uap;
+	int *retval;
+{
+
+	return (EINVAL);
+}
diff --git a/sys/vm/vm_user.c b/sys/vm/vm_user.c
new file mode 100644
index 00000000000..20172c6c651
--- /dev/null
+++ b/sys/vm/vm_user.c
@@ -0,0 +1,312 @@
+/* 
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vm_user.c	8.2 (Berkeley) 1/12/94
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ * 
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ *	User-exported virtual memory functions.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+
+#include <vm/vm.h>
+
+simple_lock_data_t	vm_alloc_lock;	/* XXX */
+
+#ifdef MACHVMCOMPAT
+/*
+ * BSD style syscall interfaces to MACH calls
+ * All return MACH return values.
+ */
+struct svm_allocate_args {
+	vm_map_t map;
+	vm_offset_t *addr;
+	vm_size_t size;
+	boolean_t anywhere;
+};
+/* ARGSUSED */
+int
+svm_allocate(p, uap, retval)
+	struct proc *p;
+	struct svm_allocate_args *uap;
+	int *retval;
+{
+	vm_offset_t addr;
+	int rv;
+
+	uap->map = p->p_map;		/* XXX */
+
+	if (copyin((caddr_t)uap->addr, (caddr_t)&addr, sizeof (addr)))
+		rv = KERN_INVALID_ARGUMENT;
+	else
+		rv = vm_allocate(uap->map, &addr, uap->size, uap->anywhere);
+	if (rv == KERN_SUCCESS) {
+		if (copyout((caddr_t)&addr, (caddr_t)uap->addr, sizeof(addr)))
+			rv = KERN_INVALID_ARGUMENT;
+	}
+	return((int)rv);
+}
+
+struct svm_deallocate_args {
+	vm_map_t map;
+	vm_offset_t addr;
+	vm_size_t size;
+};
+/* ARGSUSED */
+int
+svm_deallocate(p, uap, retval)
+	struct proc *p;
+	struct svm_deallocate_args *uap;
+	int *retval;
+{
+	int rv;
+
+	uap->map = p->p_map;		/* XXX */
+	rv = vm_deallocate(uap->map, uap->addr, uap->size);
+	return((int)rv);
+}
+
+struct svm_inherit_args {
+	vm_map_t map;
+	vm_offset_t addr;
+	vm_size_t size;
+	vm_inherit_t inherit;
+};
+/* ARGSUSED */
+int
+svm_inherit(p, uap, retval)
+	struct proc *p;
+	struct svm_inherit_args *uap;
+	int *retval;
+{
+	int rv;
+
+	uap->map = p->p_map;		/* XXX */
+	rv = vm_inherit(uap->map, uap->addr, uap->size, uap->inherit);
+	return((int)rv);
+}
+
+struct svm_protect_args {
+	vm_map_t map;
+	vm_offset_t addr;
+	vm_size_t size;
+	boolean_t setmax;
+	vm_prot_t prot;
+};
+/* ARGSUSED */
+int
+svm_protect(p, uap, retval)
+	struct proc *p;
+	struct svm_protect_args *uap;
+	int *retval;
+{
+	int rv;
+
+	uap->map = p->p_map;		/* XXX */
+	rv = vm_protect(uap->map, uap->addr, uap->size, uap->setmax, uap->prot);
+	return((int)rv);
+}
+
+/*
+ *	vm_inherit sets the inheritence of the specified range in the
+ *	specified map.
+ */
+int
+vm_inherit(map, start, size, new_inheritance)
+	register vm_map_t	map;
+	vm_offset_t		start;
+	vm_size_t		size;
+	vm_inherit_t		new_inheritance;
+{
+	if (map == NULL)
+		return(KERN_INVALID_ARGUMENT);
+
+	return(vm_map_inherit(map, trunc_page(start), round_page(start+size), new_inheritance));
+}
+
+/*
+ *	vm_protect sets the protection of the specified range in the
+ *	specified map.
+ */
+
+int
+vm_protect(map, start, size, set_maximum, new_protection)
+	register vm_map_t	map;
+	vm_offset_t		start;
+	vm_size_t		size;
+	boolean_t		set_maximum;
+	vm_prot_t		new_protection;
+{
+	if (map == NULL)
+		return(KERN_INVALID_ARGUMENT);
+
+	return(vm_map_protect(map, trunc_page(start), round_page(start+size), new_protection, set_maximum));
+}
+#endif
+
+/*
+ *	vm_allocate allocates "zero fill" memory in the specfied
+ *	map.
+ */
+int
+vm_allocate(map, addr, size, anywhere)
+	register vm_map_t	map;
+	register vm_offset_t	*addr;
+	register vm_size_t	size;
+	boolean_t		anywhere;
+{
+	int	result;
+
+	if (map == NULL)
+		return(KERN_INVALID_ARGUMENT);
+	if (size == 0) {
+		*addr = 0;
+		return(KERN_SUCCESS);
+	}
+
+	if (anywhere)
+		*addr = vm_map_min(map);
+	else
+		*addr = trunc_page(*addr);
+	size = round_page(size);
+
+	result = vm_map_find(map, NULL, (vm_offset_t) 0, addr, size, anywhere);
+
+	return(result);
+}
+
+/*
+ *	vm_deallocate deallocates the specified range of addresses in the
+ *	specified address map.
+ */
+int
+vm_deallocate(map, start, size)
+	register vm_map_t	map;
+	vm_offset_t		start;
+	vm_size_t		size;
+{
+	if (map == NULL)
+		return(KERN_INVALID_ARGUMENT);
+
+	if (size == (vm_offset_t) 0)
+		return(KERN_SUCCESS);
+
+	return(vm_map_remove(map, trunc_page(start), round_page(start+size)));
+}
+
+/*
+ * Similar to vm_allocate but assigns an explicit pager.
+ */
+int
+vm_allocate_with_pager(map, addr, size, anywhere, pager, poffset, internal)
+	register vm_map_t	map;
+	register vm_offset_t	*addr;
+	register vm_size_t	size;
+	boolean_t		anywhere;
+	vm_pager_t		pager;
+	vm_offset_t		poffset;
+	boolean_t		internal;
+{
+	register vm_object_t	object;
+	register int		result;
+
+	if (map == NULL)
+		return(KERN_INVALID_ARGUMENT);
+
+	*addr = trunc_page(*addr);
+	size = round_page(size);
+
+	/*
+	 *	Lookup the pager/paging-space in the object cache.
+	 *	If it's not there, then create a new object and cache
+	 *	it.
+	 */
+	object = vm_object_lookup(pager);
+	cnt.v_lookups++;
+	if (object == NULL) {
+		object = vm_object_allocate(size);
+		/*
+		 * From Mike Hibler: "unnamed anonymous objects should never
+		 * be on the hash list ... For now you can just change
+		 * vm_allocate_with_pager to not do vm_object_enter if this
+		 * is an internal object ..."
+		 */
+		if (!internal)
+			vm_object_enter(object, pager);
+	} else
+		cnt.v_hits++;
+	if (internal)
+		object->flags |= OBJ_INTERNAL;
+	else {
+		object->flags &= ~OBJ_INTERNAL;
+		cnt.v_nzfod -= atop(size);
+	}
+
+	result = vm_map_find(map, object, poffset, addr, size, anywhere);
+	if (result != KERN_SUCCESS)
+		vm_object_deallocate(object);
+	else if (pager != NULL)
+		vm_object_setpager(object, pager, (vm_offset_t) 0, TRUE);
+	return(result);
+}
diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c
new file mode 100644
index 00000000000..9c2f8260cfb
--- /dev/null
+++ b/sys/vm/vnode_pager.c
@@ -0,0 +1,580 @@
+/*
+ * Copyright (c) 1990 University of Utah.
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vnode_pager.c	8.8 (Berkeley) 2/13/94
+ */
+
+/*
+ * Page to/from files (vnodes).
+ *
+ * TODO:
+ *	pageouts
+ *	fix credential use (uses current process credentials now)
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/vnode.h>
+#include <sys/uio.h>
+#include <sys/mount.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vnode_pager.h>
+
+struct pagerlst	vnode_pager_list;	/* list of managed vnodes */
+
+#ifdef DEBUG
+int	vpagerdebug = 0x00;
+#define	VDB_FOLLOW	0x01
+#define VDB_INIT	0x02
+#define VDB_IO		0x04
+#define VDB_FAIL	0x08
+#define VDB_ALLOC	0x10
+#define VDB_SIZE	0x20
+#endif
+
+static vm_pager_t	 vnode_pager_alloc
+			    __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t));
+static void		 vnode_pager_cluster
+			    __P((vm_pager_t, vm_offset_t,
+				 vm_offset_t *, vm_offset_t *));
+static void		 vnode_pager_dealloc __P((vm_pager_t));
+static int		 vnode_pager_getpage
+			    __P((vm_pager_t, vm_page_t *, int, boolean_t));
+static boolean_t	 vnode_pager_haspage __P((vm_pager_t, vm_offset_t));
+static void		 vnode_pager_init __P((void));
+static int		 vnode_pager_io
+			    __P((vn_pager_t, vm_page_t *, int,
+				 boolean_t, enum uio_rw));
+static boolean_t	 vnode_pager_putpage
+			    __P((vm_pager_t, vm_page_t *, int, boolean_t));
+
+struct pagerops vnodepagerops = {
+	vnode_pager_init,
+	vnode_pager_alloc,
+	vnode_pager_dealloc,
+	vnode_pager_getpage,
+	vnode_pager_putpage,
+	vnode_pager_haspage,
+	vnode_pager_cluster
+};
+
+static void
+vnode_pager_init()
+{
+#ifdef DEBUG
+	if (vpagerdebug & VDB_FOLLOW)
+		printf("vnode_pager_init()\n");
+#endif
+	TAILQ_INIT(&vnode_pager_list);
+}
+
+/*
+ * Allocate (or lookup) pager for a vnode.
+ * Handle is a vnode pointer.
+ */
+static vm_pager_t
+vnode_pager_alloc(handle, size, prot, foff)
+	caddr_t handle;
+	vm_size_t size;
+	vm_prot_t prot;
+	vm_offset_t foff;
+{
+	register vm_pager_t pager;
+	register vn_pager_t vnp;
+	vm_object_t object;
+	struct vattr vattr;
+	struct vnode *vp;
+	struct proc *p = curproc;	/* XXX */
+
+#ifdef DEBUG
+	if (vpagerdebug & (VDB_FOLLOW|VDB_ALLOC))
+		printf("vnode_pager_alloc(%x, %x, %x)\n", handle, size, prot);
+#endif
+	/*
+	 * Pageout to vnode, no can do yet.
+	 */
+	if (handle == NULL)
+		return(NULL);
+
+	/*
+	 * Vnodes keep a pointer to any associated pager so no need to
+	 * lookup with vm_pager_lookup.
+	 */
+	vp = (struct vnode *)handle;
+	pager = (vm_pager_t)vp->v_vmdata;
+	if (pager == NULL) {
+		/*
+		 * Allocate pager structures
+		 */
+		pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, M_WAITOK);
+		if (pager == NULL)
+			return(NULL);
+		vnp = (vn_pager_t)malloc(sizeof *vnp, M_VMPGDATA, M_WAITOK);
+		if (vnp == NULL) {
+			free((caddr_t)pager, M_VMPAGER);
+			return(NULL);
+		}
+		/*
+		 * And an object of the appropriate size
+		 */
+		if (VOP_GETATTR(vp, &vattr, p->p_ucred, p) == 0) {
+			object = vm_object_allocate(round_page(vattr.va_size));
+			vm_object_enter(object, pager);
+			vm_object_setpager(object, pager, 0, TRUE);
+		} else {
+			free((caddr_t)vnp, M_VMPGDATA);
+			free((caddr_t)pager, M_VMPAGER);
+			return(NULL);
+		}
+		/*
+		 * Hold a reference to the vnode and initialize pager data.
+		 */
+		VREF(vp);
+		vnp->vnp_flags = 0;
+		vnp->vnp_vp = vp;
+		vnp->vnp_size = vattr.va_size;
+		TAILQ_INSERT_TAIL(&vnode_pager_list, pager, pg_list);
+		pager->pg_handle = handle;
+		pager->pg_type = PG_VNODE;
+		pager->pg_flags = 0;
+		pager->pg_ops = &vnodepagerops;
+		pager->pg_data = vnp;
+		vp->v_vmdata = (caddr_t)pager;
+	} else {
+		/*
+		 * vm_object_lookup() will remove the object from the
+		 * cache if found and also gain a reference to the object.
+		 */
+		object = vm_object_lookup(pager);
+#ifdef DEBUG
+		vnp = (vn_pager_t)pager->pg_data;
+#endif
+	}
+#ifdef DEBUG
+	if (vpagerdebug & VDB_ALLOC)
+		printf("vnode_pager_setup: vp %x sz %x pager %x object %x\n",
+		       vp, vnp->vnp_size, pager, object);
+#endif
+	return(pager);
+}
+
+static void
+vnode_pager_dealloc(pager)
+	vm_pager_t pager;
+{
+	register vn_pager_t vnp = (vn_pager_t)pager->pg_data;
+	register struct vnode *vp;
+#ifdef NOTDEF
+	struct proc *p = curproc;		/* XXX */
+#endif
+
+#ifdef DEBUG
+	if (vpagerdebug & VDB_FOLLOW)
+		printf("vnode_pager_dealloc(%x)\n", pager);
+#endif
+	if (vp = vnp->vnp_vp) {
+		vp->v_vmdata = NULL;
+		vp->v_flag &= ~VTEXT;
+#if NOTDEF
+		/* can hang if done at reboot on NFS FS */
+		(void) VOP_FSYNC(vp, p->p_ucred, p);
+#endif
+		vrele(vp);
+	}
+	TAILQ_REMOVE(&vnode_pager_list, pager, pg_list);
+	free((caddr_t)vnp, M_VMPGDATA);
+	free((caddr_t)pager, M_VMPAGER);
+}
+
+static int
+vnode_pager_getpage(pager, mlist, npages, sync)
+	vm_pager_t pager;
+	vm_page_t *mlist;
+	int npages;
+	boolean_t sync;
+{
+
+#ifdef DEBUG
+	if (vpagerdebug & VDB_FOLLOW)
+		printf("vnode_pager_getpage(%x, %x, %x, %x)\n",
+		       pager, mlist, npages, sync);
+#endif
+	return(vnode_pager_io((vn_pager_t)pager->pg_data,
+			      mlist, npages, sync, UIO_READ));
+}
+
+static boolean_t
+vnode_pager_putpage(pager, mlist, npages, sync)
+	vm_pager_t pager;
+	vm_page_t *mlist;
+	int npages;
+	boolean_t sync;
+{
+	int err;
+
+#ifdef DEBUG
+	if (vpagerdebug & VDB_FOLLOW)
+		printf("vnode_pager_putpage(%x, %x, %x, %x)\n",
+		       pager, mlist, npages, sync);
+#endif
+	if (pager == NULL)
+		return (FALSE);			/* ??? */
+	err = vnode_pager_io((vn_pager_t)pager->pg_data,
+			     mlist, npages, sync, UIO_WRITE);
+	/*
+	 * If the operation was successful, mark the pages clean.
+	 */
+	if (err == VM_PAGER_OK) {
+		while (npages--) {
+			(*mlist)->flags |= PG_CLEAN;
+			pmap_clear_modify(VM_PAGE_TO_PHYS(*mlist));
+			mlist++;
+		}
+	}
+	return(err);
+}
+
+static boolean_t
+vnode_pager_haspage(pager, offset)
+	vm_pager_t pager;
+	vm_offset_t offset;
+{
+	register vn_pager_t vnp = (vn_pager_t)pager->pg_data;
+	daddr_t bn;
+	int err;
+
+#ifdef DEBUG
+	if (vpagerdebug & VDB_FOLLOW)
+		printf("vnode_pager_haspage(%x, %x)\n", pager, offset);
+#endif
+
+	/*
+	 * Offset beyond end of file, do not have the page
+	 * Lock the vnode first to make sure we have the most recent
+	 * version of the size.
+	 */
+	VOP_LOCK(vnp->vnp_vp);
+	if (offset >= vnp->vnp_size) {
+		VOP_UNLOCK(vnp->vnp_vp);
+#ifdef DEBUG
+		if (vpagerdebug & (VDB_FAIL|VDB_SIZE))
+			printf("vnode_pager_haspage: pg %x, off %x, size %x\n",
+			       pager, offset, vnp->vnp_size);
+#endif
+		return(FALSE);
+	}
+
+	/*
+	 * Read the index to find the disk block to read
+	 * from.  If there is no block, report that we don't
+	 * have this data.
+	 *
+	 * Assumes that the vnode has whole page or nothing.
+	 */
+	err = VOP_BMAP(vnp->vnp_vp,
+		       offset / vnp->vnp_vp->v_mount->mnt_stat.f_iosize,
+		       (struct vnode **)0, &bn, NULL);
+	VOP_UNLOCK(vnp->vnp_vp);
+	if (err) {
+#ifdef DEBUG
+		if (vpagerdebug & VDB_FAIL)
+			printf("vnode_pager_haspage: BMAP err %d, pg %x, off %x\n",
+			       err, pager, offset);
+#endif
+		return(TRUE);
+	}
+	return((long)bn < 0 ? FALSE : TRUE);
+}
+
+static void
+vnode_pager_cluster(pager, offset, loffset, hoffset)
+	vm_pager_t	pager;
+	vm_offset_t	offset;
+	vm_offset_t	*loffset;
+	vm_offset_t	*hoffset;
+{
+	vn_pager_t vnp = (vn_pager_t)pager->pg_data;
+	vm_offset_t loff, hoff;
+
+#ifdef DEBUG
+	if (vpagerdebug & VDB_FOLLOW)
+		printf("vnode_pager_cluster(%x, %x) ", pager, offset);
+#endif
+	loff = offset;
+	if (loff >= vnp->vnp_size)
+		panic("vnode_pager_cluster: bad offset");
+	/*
+	 * XXX could use VOP_BMAP to get maxcontig value
+	 */
+	hoff = loff + MAXBSIZE;
+	if (hoff > round_page(vnp->vnp_size))
+		hoff = round_page(vnp->vnp_size);
+
+	*loffset = loff;
+	*hoffset = hoff;
+#ifdef DEBUG
+	if (vpagerdebug & VDB_FOLLOW)
+		printf("returns [%x-%x]\n", loff, hoff);
+#endif
+}
+
+/*
+ * (XXX)
+ * Lets the VM system know about a change in size for a file.
+ * If this vnode is mapped into some address space (i.e. we have a pager
+ * for it) we adjust our own internal size and flush any cached pages in
+ * the associated object that are affected by the size change.
+ *
+ * Note: this routine may be invoked as a result of a pager put
+ * operation (possibly at object termination time), so we must be careful.
+ */
+void
+vnode_pager_setsize(vp, nsize)
+	struct vnode *vp;
+	u_long nsize;
+{
+	register vn_pager_t vnp;
+	register vm_object_t object;
+	vm_pager_t pager;
+
+	/*
+	 * Not a mapped vnode
+	 */
+	if (vp == NULL || vp->v_type != VREG || vp->v_vmdata == NULL)
+		return;
+	/*
+	 * Hasn't changed size
+	 */
+	pager = (vm_pager_t)vp->v_vmdata;
+	vnp = (vn_pager_t)pager->pg_data;
+	if (nsize == vnp->vnp_size)
+		return;
+	/*
+	 * No object.
+	 * This can happen during object termination since
+	 * vm_object_page_clean is called after the object
+	 * has been removed from the hash table, and clean
+	 * may cause vnode write operations which can wind
+	 * up back here.
+	 */
+	object = vm_object_lookup(pager);
+	if (object == NULL)
+		return;
+
+#ifdef DEBUG
+	if (vpagerdebug & (VDB_FOLLOW|VDB_SIZE))
+		printf("vnode_pager_setsize: vp %x obj %x osz %d nsz %d\n",
+		       vp, object, vnp->vnp_size, nsize);
+#endif
+	/*
+	 * File has shrunk.
+	 * Toss any cached pages beyond the new EOF.
+	 */
+	if (nsize < vnp->vnp_size) {
+		vm_object_lock(object);
+		vm_object_page_remove(object,
+				      (vm_offset_t)nsize, vnp->vnp_size);
+		vm_object_unlock(object);
+	}
+	vnp->vnp_size = (vm_offset_t)nsize;
+	vm_object_deallocate(object);
+}
+
+void
+vnode_pager_umount(mp)
+	register struct mount *mp;
+{
+	register vm_pager_t pager, npager;
+	struct vnode *vp;
+
+	for (pager = vnode_pager_list.tqh_first; pager != NULL; pager = npager){
+		/*
+		 * Save the next pointer now since uncaching may
+		 * terminate the object and render pager invalid
+		 */
+		npager = pager->pg_list.tqe_next;
+		vp = ((vn_pager_t)pager->pg_data)->vnp_vp;
+		if (mp == (struct mount *)0 || vp->v_mount == mp) {
+			VOP_LOCK(vp);
+			(void) vnode_pager_uncache(vp);
+			VOP_UNLOCK(vp);
+		}
+	}
+}
+
+/*
+ * Remove vnode associated object from the object cache.
+ *
+ * XXX unlock the vnode if it is currently locked.
+ * We must do this since uncaching the object may result in its
+ * destruction which may initiate paging activity which may necessitate
+ * re-locking the vnode.
+ */
+boolean_t
+vnode_pager_uncache(vp)
+	register struct vnode *vp;
+{
+	register vm_object_t object;
+	boolean_t uncached;
+	vm_pager_t pager;
+
+	/*
+	 * Not a mapped vnode
+	 */
+	pager = (vm_pager_t)vp->v_vmdata;
+	if (pager == NULL)
+		return (TRUE);
+#ifdef DEBUG
+	if (!VOP_ISLOCKED(vp)) {
+		extern int (**nfsv2_vnodeop_p)();
+
+		if (vp->v_op != nfsv2_vnodeop_p)
+			panic("vnode_pager_uncache: vnode not locked!");
+	}
+#endif
+	/*
+	 * Must use vm_object_lookup() as it actually removes
+	 * the object from the cache list.
+	 */
+	object = vm_object_lookup(pager);
+	if (object) {
+		uncached = (object->ref_count <= 1);
+		VOP_UNLOCK(vp);
+		pager_cache(object, FALSE);
+		VOP_LOCK(vp);
+	} else
+		uncached = TRUE;
+	return(uncached);
+}
+
+static int
+vnode_pager_io(vnp, mlist, npages, sync, rw)
+	register vn_pager_t vnp;
+	vm_page_t *mlist;
+	int npages;
+	boolean_t sync;
+	enum uio_rw rw;
+{
+	struct uio auio;
+	struct iovec aiov;
+	vm_offset_t kva, foff;
+	int error, size;
+	struct proc *p = curproc;		/* XXX */
+
+	/* XXX */
+	vm_page_t m;
+	if (npages != 1)
+		panic("vnode_pager_io: cannot handle multiple pages");
+	m = *mlist;
+	/* XXX */
+
+#ifdef DEBUG
+	if (vpagerdebug & VDB_FOLLOW)
+		printf("vnode_pager_io(%x, %x, %c): vnode %x\n",
+		       vnp, m, rw == UIO_READ ? 'R' : 'W', vnp->vnp_vp);
+#endif
+	foff = m->offset + m->object->paging_offset;
+	/*
+	 * Allocate a kernel virtual address and initialize so that
+	 * we can use VOP_READ/WRITE routines.
+	 */
+	kva = vm_pager_map_pages(mlist, npages, sync);
+	if (kva == NULL)
+		return(VM_PAGER_AGAIN);
+	/*
+	 * After all of the potentially blocking operations have been
+	 * performed, we can do the size checks:
+	 *	read beyond EOF (returns error)
+	 *	short read
+	 */
+	VOP_LOCK(vnp->vnp_vp);
+	if (foff >= vnp->vnp_size) {
+		VOP_UNLOCK(vnp->vnp_vp);
+		vm_pager_unmap_pages(kva, npages);
+#ifdef DEBUG
+		if (vpagerdebug & VDB_SIZE)
+			printf("vnode_pager_io: vp %x, off %d size %d\n",
+			       vnp->vnp_vp, foff, vnp->vnp_size);
+#endif
+		return(VM_PAGER_BAD);
+	}
+	if (foff + PAGE_SIZE > vnp->vnp_size)
+		size = vnp->vnp_size - foff;
+	else
+		size = PAGE_SIZE;
+	aiov.iov_base = (caddr_t)kva;
+	aiov.iov_len = size;
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	auio.uio_offset = foff;
+	auio.uio_segflg = UIO_SYSSPACE;
+	auio.uio_rw = rw;
+	auio.uio_resid = size;
+	auio.uio_procp = (struct proc *)0;
+#ifdef DEBUG
+	if (vpagerdebug & VDB_IO)
+		printf("vnode_pager_io: vp %x kva %x foff %x size %x",
+		       vnp->vnp_vp, kva, foff, size);
+#endif
+	if (rw == UIO_READ)
+		error = VOP_READ(vnp->vnp_vp, &auio, 0, p->p_ucred);
+	else
+		error = VOP_WRITE(vnp->vnp_vp, &auio, 0, p->p_ucred);
+	VOP_UNLOCK(vnp->vnp_vp);
+#ifdef DEBUG
+	if (vpagerdebug & VDB_IO) {
+		if (error || auio.uio_resid)
+			printf(" returns error %x, resid %x",
+			       error, auio.uio_resid);
+		printf("\n");
+	}
+#endif
+	if (!error) {
+		register int count = size - auio.uio_resid;
+
+		if (count == 0)
+			error = EINVAL;
+		else if (count != PAGE_SIZE && rw == UIO_READ)
+			bzero((void *)(kva + count), PAGE_SIZE - count);
+	}
+	vm_pager_unmap_pages(kva, npages);
+	return (error ? VM_PAGER_ERROR : VM_PAGER_OK);
+}
diff --git a/sys/vm/vnode_pager.h b/sys/vm/vnode_pager.h
new file mode 100644
index 00000000000..95c9545452a
--- /dev/null
+++ b/sys/vm/vnode_pager.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 1990 University of Utah.
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vnode_pager.h	8.1 (Berkeley) 6/11/93
+ */
+
+#ifndef	_VNODE_PAGER_
+#define	_VNODE_PAGER_	1
+
+/*
+ * VNODE pager private data.
+ */
+struct vnpager {
+	int		vnp_flags;	/* flags */
+	struct vnode	*vnp_vp;	/* vnode */
+	vm_size_t	vnp_size;	/* vnode current size */
+};
+typedef struct vnpager	*vn_pager_t;
+
+#define VN_PAGER_NULL	((vn_pager_t)0)
+
+#define	VNP_PAGING	0x01		/* vnode used for pageout */
+#define VNP_CACHED	0x02		/* vnode is cached */
+
+#endif	/* _VNODE_PAGER_ */