From 6c426f3bd473343f9e556a900758e03b8269d0f9 Mon Sep 17 00:00:00 2001 From: Xin LI Date: Sat, 8 Nov 2014 06:34:37 +0000 Subject: [PATCH 001/280] 5244 zio pipeline callers should explicitly invoke next stage Reviewed by: Adam Leventhal Reviewed by: Alex Reece Reviewed by: Christopher Siden Reviewed by: Matthew Ahrens Reviewed by: Richard Elling Reviewed by: Dan McDonald Reviewed by: Steven Hartland Approved by: Gordon Ross Author: George Wilson illumos/illumos-gate@738f37bc3dcd61e8a893af0f2d466d76690b70ec --- lib/libzpool/common/taskq.c | 4 ++++ uts/common/fs/zfs/sys/vdev_impl.h | 2 +- uts/common/fs/zfs/sys/zio.h | 3 --- uts/common/fs/zfs/vdev_disk.c | 15 ++++++++------- uts/common/fs/zfs/vdev_file.c | 12 ++++++------ uts/common/fs/zfs/vdev_mirror.c | 9 +++++---- uts/common/fs/zfs/vdev_missing.c | 6 +++--- uts/common/fs/zfs/vdev_raidz.c | 9 +++++---- uts/common/fs/zfs/zio.c | 23 ++++++++++++++++++++--- 9 files changed, 52 insertions(+), 31 deletions(-) diff --git a/lib/libzpool/common/taskq.c b/lib/libzpool/common/taskq.c index 2c5dfd86dcc..a4ab58963d7 100644 --- a/lib/libzpool/common/taskq.c +++ b/lib/libzpool/common/taskq.c @@ -25,6 +25,7 @@ /* * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright 2012 Garrett D'Amore . All rights reserved. + * Copyright (c) 2014 by Delphix. All rights reserved. */ #include @@ -33,8 +34,10 @@ int taskq_now; taskq_t *system_taskq; #define TASKQ_ACTIVE 0x00010000 +#define TASKQ_NAMELEN 31 struct taskq { + char tq_name[TASKQ_NAMELEN + 1]; kmutex_t tq_lock; krwlock_t tq_threadlock; kcondvar_t tq_dispatch_cv; @@ -247,6 +250,7 @@ taskq_create(const char *name, int nthreads, pri_t pri, cv_init(&tq->tq_dispatch_cv, NULL, CV_DEFAULT, NULL); cv_init(&tq->tq_wait_cv, NULL, CV_DEFAULT, NULL); cv_init(&tq->tq_maxalloc_cv, NULL, CV_DEFAULT, NULL); + (void) strncpy(tq->tq_name, name, TASKQ_NAMELEN + 1); tq->tq_flags = flags | TASKQ_ACTIVE; tq->tq_active = nthreads; tq->tq_nthreads = nthreads; diff --git a/uts/common/fs/zfs/sys/vdev_impl.h b/uts/common/fs/zfs/sys/vdev_impl.h index 9c22384f733..6d9bcb17d00 100644 --- a/uts/common/fs/zfs/sys/vdev_impl.h +++ b/uts/common/fs/zfs/sys/vdev_impl.h @@ -60,7 +60,7 @@ typedef int vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *max_size, uint64_t *ashift); typedef void vdev_close_func_t(vdev_t *vd); typedef uint64_t vdev_asize_func_t(vdev_t *vd, uint64_t psize); -typedef int vdev_io_start_func_t(zio_t *zio); +typedef void vdev_io_start_func_t(zio_t *zio); typedef void vdev_io_done_func_t(zio_t *zio); typedef void vdev_state_change_func_t(vdev_t *vd, int, int); typedef void vdev_hold_func_t(vdev_t *vd); diff --git a/uts/common/fs/zfs/sys/zio.h b/uts/common/fs/zfs/sys/zio.h index f158042eb40..f6cf259bf71 100644 --- a/uts/common/fs/zfs/sys/zio.h +++ b/uts/common/fs/zfs/sys/zio.h @@ -151,9 +151,6 @@ typedef enum zio_priority { ZIO_PRIORITY_NOW /* non-queued i/os (e.g. free) */ } zio_priority_t; -#define ZIO_PIPELINE_CONTINUE 0x100 -#define ZIO_PIPELINE_STOP 0x101 - enum zio_flag { /* * Flags inherited by gang, ddt, and vdev children, diff --git a/uts/common/fs/zfs/vdev_disk.c b/uts/common/fs/zfs/vdev_disk.c index 3a4bc20f5be..ed4a8b773bf 100644 --- a/uts/common/fs/zfs/vdev_disk.c +++ b/uts/common/fs/zfs/vdev_disk.c @@ -715,7 +715,7 @@ vdev_disk_ioctl_done(void *zio_arg, int error) zio_interrupt(zio); } -static int +static void vdev_disk_io_start(zio_t *zio) { vdev_t *vd = zio->io_vd; @@ -731,14 +731,16 @@ vdev_disk_io_start(zio_t *zio) */ if (dvd == NULL || (dvd->vd_ldi_offline && dvd->vd_lh == NULL)) { zio->io_error = ENXIO; - return (ZIO_PIPELINE_CONTINUE); + zio_interrupt(zio); + return; } if (zio->io_type == ZIO_TYPE_IOCTL) { /* XXPOLICY */ if (!vdev_readable(vd)) { zio->io_error = SET_ERROR(ENXIO); - return (ZIO_PIPELINE_CONTINUE); + zio_interrupt(zio); + return; } switch (zio->io_cmd) { @@ -769,7 +771,7 @@ vdev_disk_io_start(zio_t *zio) * and will call vdev_disk_ioctl_done() * upon completion. */ - return (ZIO_PIPELINE_STOP); + return; } if (error == ENOTSUP || error == ENOTTY) { @@ -790,7 +792,8 @@ vdev_disk_io_start(zio_t *zio) zio->io_error = SET_ERROR(ENOTSUP); } - return (ZIO_PIPELINE_CONTINUE); + zio_execute(zio); + return; } vb = kmem_alloc(sizeof (vdev_buf_t), KM_SLEEP); @@ -811,8 +814,6 @@ vdev_disk_io_start(zio_t *zio) /* ldi_strategy() will return non-zero only on programming errors */ VERIFY(ldi_strategy(dvd->vd_lh, bp) == 0); - - return (ZIO_PIPELINE_STOP); } static void diff --git a/uts/common/fs/zfs/vdev_file.c b/uts/common/fs/zfs/vdev_file.c index a05abeb9d91..5dfc331d20a 100644 --- a/uts/common/fs/zfs/vdev_file.c +++ b/uts/common/fs/zfs/vdev_file.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2011, 2014 by Delphix. All rights reserved. */ #include @@ -182,7 +182,7 @@ vdev_file_io_strategy(void *arg) } } -static int +static void vdev_file_io_start(zio_t *zio) { vdev_t *vd = zio->io_vd; @@ -194,7 +194,8 @@ vdev_file_io_start(zio_t *zio) /* XXPOLICY */ if (!vdev_readable(vd)) { zio->io_error = SET_ERROR(ENXIO); - return (ZIO_PIPELINE_CONTINUE); + zio_interrupt(zio); + return; } switch (zio->io_cmd) { @@ -206,7 +207,8 @@ vdev_file_io_start(zio_t *zio) zio->io_error = SET_ERROR(ENOTSUP); } - return (ZIO_PIPELINE_CONTINUE); + zio_execute(zio); + return; } vb = kmem_alloc(sizeof (vdev_buf_t), KM_SLEEP); @@ -225,8 +227,6 @@ vdev_file_io_start(zio_t *zio) VERIFY3U(taskq_dispatch(system_taskq, vdev_file_io_strategy, bp, TQ_SLEEP), !=, 0); - - return (ZIO_PIPELINE_STOP); } /* ARGSUSED */ diff --git a/uts/common/fs/zfs/vdev_mirror.c b/uts/common/fs/zfs/vdev_mirror.c index f62c1e3617d..8749e539f46 100644 --- a/uts/common/fs/zfs/vdev_mirror.c +++ b/uts/common/fs/zfs/vdev_mirror.c @@ -24,7 +24,7 @@ */ /* - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2012, 2014 by Delphix. All rights reserved. */ #include @@ -260,7 +260,7 @@ vdev_mirror_child_select(zio_t *zio) return (-1); } -static int +static void vdev_mirror_io_start(zio_t *zio) { mirror_map_t *mm; @@ -285,7 +285,8 @@ vdev_mirror_io_start(zio_t *zio) zio->io_type, zio->io_priority, 0, vdev_mirror_scrub_done, mc)); } - return (ZIO_PIPELINE_CONTINUE); + zio_execute(zio); + return; } /* * For normal reads just pick one child. @@ -311,7 +312,7 @@ vdev_mirror_io_start(zio_t *zio) c++; } - return (ZIO_PIPELINE_CONTINUE); + zio_execute(zio); } static int diff --git a/uts/common/fs/zfs/vdev_missing.c b/uts/common/fs/zfs/vdev_missing.c index b9eb99d1800..22875733423 100644 --- a/uts/common/fs/zfs/vdev_missing.c +++ b/uts/common/fs/zfs/vdev_missing.c @@ -24,7 +24,7 @@ */ /* - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2012, 2014 by Delphix. All rights reserved. */ /* @@ -66,11 +66,11 @@ vdev_missing_close(vdev_t *vd) } /* ARGSUSED */ -static int +static void vdev_missing_io_start(zio_t *zio) { zio->io_error = SET_ERROR(ENOTSUP); - return (ZIO_PIPELINE_CONTINUE); + zio_execute(zio); } /* ARGSUSED */ diff --git a/uts/common/fs/zfs/vdev_raidz.c b/uts/common/fs/zfs/vdev_raidz.c index 480141dc637..f686d56e9e5 100644 --- a/uts/common/fs/zfs/vdev_raidz.c +++ b/uts/common/fs/zfs/vdev_raidz.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2012, 2014 by Delphix. All rights reserved. * Copyright (c) 2013, Joyent, Inc. All rights reserved. */ @@ -1711,7 +1711,7 @@ vdev_raidz_child_done(zio_t *zio) * vdevs have had errors, then create zio read operations to the parity * columns' VDevs as well. */ -static int +static void vdev_raidz_io_start(zio_t *zio) { vdev_t *vd = zio->io_vd; @@ -1759,7 +1759,8 @@ vdev_raidz_io_start(zio_t *zio) ZIO_FLAG_NODATA | ZIO_FLAG_OPTIONAL, NULL, NULL)); } - return (ZIO_PIPELINE_CONTINUE); + zio_execute(zio); + return; } ASSERT(zio->io_type == ZIO_TYPE_READ); @@ -1799,7 +1800,7 @@ vdev_raidz_io_start(zio_t *zio) } } - return (ZIO_PIPELINE_CONTINUE); + zio_execute(zio); } diff --git a/uts/common/fs/zfs/zio.c b/uts/common/fs/zfs/zio.c index b154cb639d5..302d692c64a 100644 --- a/uts/common/fs/zfs/zio.c +++ b/uts/common/fs/zfs/zio.c @@ -63,6 +63,9 @@ kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT]; extern vmem_t *zio_alloc_arena; #endif +#define ZIO_PIPELINE_CONTINUE 0x100 +#define ZIO_PIPELINE_STOP 0x101 + /* * The following actions directly effect the spa's sync-to-convergence logic. * The values below define the sync pass when we start performing the action. @@ -2466,6 +2469,18 @@ zio_free_zil(spa_t *spa, uint64_t txg, blkptr_t *bp) * Read and write to physical devices * ========================================================================== */ + + +/* + * Issue an I/O to the underlying vdev. Typically the issue pipeline + * stops after this stage and will resume upon I/O completion. + * However, there are instances where the vdev layer may need to + * continue the pipeline when an I/O was not issued. Since the I/O + * that was sent to the vdev layer might be different than the one + * currently active in the pipeline (see vdev_queue_io()), we explicitly + * force the underlying vdev layers to call either zio_execute() or + * zio_interrupt() to ensure that the pipeline continues with the correct I/O. + */ static int zio_vdev_io_start(zio_t *zio) { @@ -2483,7 +2498,8 @@ zio_vdev_io_start(zio_t *zio) /* * The mirror_ops handle multiple DVAs in a single BP. */ - return (vdev_mirror_ops.vdev_op_io_start(zio)); + vdev_mirror_ops.vdev_op_io_start(zio); + return (ZIO_PIPELINE_STOP); } /* @@ -2491,7 +2507,7 @@ zio_vdev_io_start(zio_t *zio) * can quickly react to certain workloads. In particular, we care * about non-scrubbing, top-level reads and writes with the following * characteristics: - * - synchronous writes of user data to non-slog devices + * - synchronous writes of user data to non-slog devices * - any reads of user data * When these conditions are met, adjust the timestamp of spa_last_io * which allows the scan thread to adjust its workload accordingly. @@ -2577,7 +2593,8 @@ zio_vdev_io_start(zio_t *zio) } } - return (vd->vdev_ops->vdev_op_io_start(zio)); + vd->vdev_ops->vdev_op_io_start(zio); + return (ZIO_PIPELINE_STOP); } static int From 0945ce02d2314be0ecc3766528cb9ef289bcd418 Mon Sep 17 00:00:00 2001 From: Xin LI Date: Sat, 8 Nov 2014 06:43:33 +0000 Subject: [PATCH 002/280] 5027 zfs large block support Reviewed by: Alek Pinchuk Reviewed by: George Wilson Reviewed by: Josef 'Jeff' Sipek Reviewed by: Richard Elling Reviewed by: Saso Kiselkov Reviewed by: Brian Behlendorf Approved by: Dan McDonald Author: Matthew Ahrens illumos/illumos-gate@b515258426fed6c7311fd3f1dea697cfbd4085c6 --- cmd/zdb/zdb.c | 32 +++++++- cmd/zfs/zfs_main.c | 11 ++- cmd/zstreamdump/zstreamdump.c | 19 ++++- cmd/ztest/ztest.c | 14 +++- common/zfs/zfeature_common.c | 12 ++- common/zfs/zfeature_common.h | 1 + common/zfs/zfs_prop.c | 4 +- common/zfs/zpool_prop.c | 2 + lib/libzfs/common/libzfs.h | 3 + lib/libzfs/common/libzfs_dataset.c | 38 +++++++--- lib/libzfs/common/libzfs_sendrecv.c | 15 ++-- lib/libzfs_core/common/libzfs_core.c | 6 ++ lib/libzfs_core/common/libzfs_core.h | 3 +- man/man1m/zfs.1m | 42 +++++++++-- man/man5/zpool-features.5 | 28 +++++-- uts/common/fs/zfs/bpobj.c | 5 +- uts/common/fs/zfs/bptree.c | 2 +- uts/common/fs/zfs/dbuf.c | 6 +- uts/common/fs/zfs/dmu_objset.c | 16 ++++ uts/common/fs/zfs/dmu_send.c | 83 ++++++++++++++++----- uts/common/fs/zfs/dmu_tx.c | 24 ++++-- uts/common/fs/zfs/dnode.c | 10 +-- uts/common/fs/zfs/dsl_dataset.c | 105 ++++++++++++++++++++++++++- uts/common/fs/zfs/dsl_deadlist.c | 8 +- uts/common/fs/zfs/dsl_destroy.c | 7 ++ uts/common/fs/zfs/dsl_pool.c | 4 +- uts/common/fs/zfs/metaslab.c | 2 +- uts/common/fs/zfs/sa.c | 6 +- uts/common/fs/zfs/spa.c | 26 ++++++- uts/common/fs/zfs/spa_history.c | 2 +- uts/common/fs/zfs/spa_misc.c | 9 +++ uts/common/fs/zfs/sys/dmu.h | 3 +- uts/common/fs/zfs/sys/dmu_objset.h | 1 + uts/common/fs/zfs/sys/dmu_send.h | 6 +- uts/common/fs/zfs/sys/dsl_dataset.h | 11 +++ uts/common/fs/zfs/sys/spa.h | 22 ++++-- uts/common/fs/zfs/sys/zap_impl.h | 3 +- uts/common/fs/zfs/sys/zfs_ioctl.h | 5 +- uts/common/fs/zfs/sys/zfs_znode.h | 2 - uts/common/fs/zfs/sys/zil.h | 1 - uts/common/fs/zfs/sys/zil_impl.h | 2 +- uts/common/fs/zfs/vdev.c | 6 +- uts/common/fs/zfs/vdev_queue.c | 2 +- uts/common/fs/zfs/vdev_raidz.c | 6 +- uts/common/fs/zfs/zap_micro.c | 16 ++-- uts/common/fs/zfs/zfs_ioctl.c | 54 ++++++++++++-- uts/common/fs/zfs/zfs_log.c | 2 +- uts/common/fs/zfs/zfs_vfsops.c | 9 +-- uts/common/fs/zfs/zfs_vnops.c | 8 +- uts/common/fs/zfs/zfs_znode.c | 8 +- uts/common/fs/zfs/zil.c | 11 ++- uts/common/fs/zfs/zio.c | 9 +-- uts/common/fs/zfs/zvol.c | 11 +-- uts/common/sys/fs/zfs.h | 1 + 54 files changed, 579 insertions(+), 165 deletions(-) diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 3747e610679..36c4a8515b3 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -2120,6 +2120,8 @@ dump_label(const char *dev) (void) close(fd); } +static uint64_t num_large_blocks; + /*ARGSUSED*/ static int dump_one_dir(const char *dsname, void *arg) @@ -2132,6 +2134,8 @@ dump_one_dir(const char *dsname, void *arg) (void) printf("Could not open %s, error %d\n", dsname, error); return (0); } + if (dmu_objset_ds(os)->ds_large_blocks) + num_large_blocks++; dump_dir(os); dmu_objset_disown(os, FTAG); fuid_table_destroy(); @@ -2142,7 +2146,7 @@ dump_one_dir(const char *dsname, void *arg) /* * Block statistics. */ -#define PSIZE_HISTO_SIZE (SPA_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 1) +#define PSIZE_HISTO_SIZE (SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 2) typedef struct zdb_blkstats { uint64_t zb_asize; uint64_t zb_lsize; @@ -2207,7 +2211,15 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp, zb->zb_lsize += BP_GET_LSIZE(bp); zb->zb_psize += BP_GET_PSIZE(bp); zb->zb_count++; - zb->zb_psize_histogram[BP_GET_PSIZE(bp) >> SPA_MINBLOCKSHIFT]++; + + /* + * The histogram is only big enough to record blocks up to + * SPA_OLD_MAXBLOCKSIZE; larger blocks go into the last, + * "other", bucket. + */ + int idx = BP_GET_PSIZE(bp) >> SPA_MINBLOCKSHIFT; + idx = MIN(idx, SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 1); + zb->zb_psize_histogram[idx]++; zb->zb_gangs += BP_COUNT_GANG(bp); @@ -2917,6 +2929,7 @@ dump_zpool(spa_t *spa) dump_metaslab_groups(spa); if (dump_opt['d'] || dump_opt['i']) { + uint64_t refcount; dump_dir(dp->dp_meta_objset); if (dump_opt['d'] >= 3) { dump_bpobj(&spa->spa_deferred_bpobj, @@ -2936,8 +2949,21 @@ dump_zpool(spa_t *spa) } (void) dmu_objset_find(spa_name(spa), dump_one_dir, NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN); + + (void) feature_get_refcount(spa, + &spa_feature_table[SPA_FEATURE_LARGE_BLOCKS], &refcount); + if (num_large_blocks != refcount) { + (void) printf("large_blocks feature refcount mismatch: " + "expected %lld != actual %lld\n", + (longlong_t)num_large_blocks, + (longlong_t)refcount); + rc = 2; + } else { + (void) printf("Verified large_blocks feature refcount " + "is correct (%llu)\n", (longlong_t)refcount); + } } - if (dump_opt['b'] || dump_opt['c']) + if (rc == 0 && (dump_opt['b'] || dump_opt['c'])) rc = dump_block_stats(spa); if (rc == 0) diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 1c642b5e3da..34ddad2c38b 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -256,9 +256,9 @@ get_usage(zfs_help_t idx) case HELP_ROLLBACK: return (gettext("\trollback [-rRf] \n")); case HELP_SEND: - return (gettext("\tsend [-DnPpRve] [-[iI] snapshot] " + return (gettext("\tsend [-DnPpRvLe] [-[iI] snapshot] " "\n" - "\tsend [-e] [-i snapshot|bookmark] " + "\tsend [-Le] [-i snapshot|bookmark] " "\n")); case HELP_SET: return (gettext("\tset " @@ -3640,7 +3640,7 @@ zfs_do_send(int argc, char **argv) boolean_t extraverbose = B_FALSE; /* check options */ - while ((c = getopt(argc, argv, ":i:I:RDpvnPe")) != -1) { + while ((c = getopt(argc, argv, ":i:I:RDpvnPLe")) != -1) { switch (c) { case 'i': if (fromname) @@ -3675,6 +3675,9 @@ zfs_do_send(int argc, char **argv) case 'n': flags.dryrun = B_TRUE; break; + case 'L': + flags.largeblock = B_TRUE; + break; case 'e': flags.embed_data = B_TRUE; break; @@ -3731,6 +3734,8 @@ zfs_do_send(int argc, char **argv) if (zhp == NULL) return (1); + if (flags.largeblock) + lzc_flags |= LZC_SEND_FLAG_LARGE_BLOCK; if (flags.embed_data) lzc_flags |= LZC_SEND_FLAG_EMBED_DATA; diff --git a/cmd/zstreamdump/zstreamdump.c b/cmd/zstreamdump/zstreamdump.c index dce1cb3d765..d99d8014f04 100644 --- a/cmd/zstreamdump/zstreamdump.c +++ b/cmd/zstreamdump/zstreamdump.c @@ -54,7 +54,6 @@ uint64_t total_stream_len = 0; FILE *send_stream = 0; boolean_t do_byteswap = B_FALSE; boolean_t do_cksum = B_TRUE; -#define INITIAL_BUFLEN (1<<20) static void usage(void) @@ -67,6 +66,18 @@ usage(void) exit(1); } +static void * +safe_malloc(size_t size) +{ + void *rv = malloc(size); + if (rv == NULL) { + (void) fprintf(stderr, "ERROR; failed to allocate %zu bytes\n", + size); + abort(); + } + return (rv); +} + /* * ssread - send stream read. * @@ -158,7 +169,7 @@ print_block(char *buf, int length) int main(int argc, char *argv[]) { - char *buf = malloc(INITIAL_BUFLEN); + char *buf = safe_malloc(SPA_MAXBLOCKSIZE); uint64_t drr_record_count[DRR_NUMTYPES] = { 0 }; uint64_t total_records = 0; dmu_replay_record_t thedrr; @@ -307,9 +318,9 @@ main(int argc, char *argv[]) nvlist_t *nv; int sz = drr->drr_payloadlen; - if (sz > INITIAL_BUFLEN) { + if (sz > SPA_MAXBLOCKSIZE) { free(buf); - buf = malloc(sz); + buf = safe_malloc(sz); } (void) ssread(buf, sz, &zc); if (ferror(send_stream)) diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index 6a29b2f32ca..a5f96008046 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -985,9 +985,15 @@ ztest_spa_get_ashift() { static int ztest_random_blocksize(void) { - // Choose a block size >= the ashift. - uint64_t block_shift = - ztest_random(SPA_MAXBLOCKSHIFT - ztest_spa_get_ashift() + 1); + uint64_t block_shift; + /* + * Choose a block size >= the ashift. + * If the SPA supports new MAXBLOCKSIZE, test up to 1MB blocks. + */ + int maxbs = SPA_OLD_MAXBLOCKSHIFT; + if (spa_maxblocksize(ztest_spa) == SPA_MAXBLOCKSIZE) + maxbs = 20; + block_shift = ztest_random(maxbs - ztest_spa_get_ashift() + 1); return (1 << (SPA_MINBLOCKSHIFT + block_shift)); } @@ -4787,7 +4793,7 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) char path0[MAXPATHLEN]; char pathrand[MAXPATHLEN]; size_t fsize; - int bshift = SPA_MAXBLOCKSHIFT + 2; /* don't scrog all labels */ + int bshift = SPA_OLD_MAXBLOCKSHIFT + 2; /* don't scrog all labels */ int iters = 1000; int maxfaults; int mirror_save; diff --git a/common/zfs/zfeature_common.c b/common/zfs/zfeature_common.c index 9b046ab07db..3358e5eb87b 100644 --- a/common/zfs/zfeature_common.c +++ b/common/zfs/zfeature_common.c @@ -57,7 +57,8 @@ valid_char(char c, boolean_t after_colon) { return ((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || - c == (after_colon ? '_' : '.')); + (after_colon && c == '_') || + (!after_colon && (c == '.' || c == '-'))); } /* @@ -221,4 +222,13 @@ zpool_feature_init(void) "com.delphix:embedded_data", "embedded_data", "Blocks which compress very well use even less space.", B_FALSE, B_TRUE, B_TRUE, NULL); + + static const spa_feature_t large_blocks_deps[] = { + SPA_FEATURE_EXTENSIBLE_DATASET, + SPA_FEATURE_NONE + }; + zfeature_register(SPA_FEATURE_LARGE_BLOCKS, + "org.open-zfs:large_blocks", "large_blocks", + "Support for blocks larger than 128KB.", B_FALSE, B_FALSE, B_FALSE, + large_blocks_deps); } diff --git a/common/zfs/zfeature_common.h b/common/zfs/zfeature_common.h index be2111be918..3f54b392be4 100644 --- a/common/zfs/zfeature_common.h +++ b/common/zfs/zfeature_common.h @@ -51,6 +51,7 @@ typedef enum spa_feature { SPA_FEATURE_EMBEDDED_DATA, SPA_FEATURE_BOOKMARKS, SPA_FEATURE_FS_SS_LIMIT, + SPA_FEATURE_LARGE_BLOCKS, SPA_FEATURES } spa_feature_t; diff --git a/common/zfs/zfs_prop.c b/common/zfs/zfs_prop.c index 0b4927243fb..e145b1c866e 100644 --- a/common/zfs/zfs_prop.c +++ b/common/zfs/zfs_prop.c @@ -397,8 +397,8 @@ zfs_prop_init(void) /* inherit number properties */ zprop_register_number(ZFS_PROP_RECORDSIZE, "recordsize", - SPA_MAXBLOCKSIZE, PROP_INHERIT, - ZFS_TYPE_FILESYSTEM, "512 to 128k, power of 2", "RECSIZE"); + SPA_OLD_MAXBLOCKSIZE, PROP_INHERIT, + ZFS_TYPE_FILESYSTEM, "512 to 1M, power of 2", "RECSIZE"); /* hidden properties */ zprop_register_hidden(ZFS_PROP_CREATETXG, "createtxg", PROP_TYPE_NUMBER, diff --git a/common/zfs/zpool_prop.c b/common/zfs/zpool_prop.c index a400f821e2e..4d906b02bc0 100644 --- a/common/zfs/zpool_prop.c +++ b/common/zfs/zpool_prop.c @@ -127,6 +127,8 @@ zpool_prop_init(void) /* hidden properties */ zprop_register_hidden(ZPOOL_PROP_NAME, "name", PROP_TYPE_STRING, PROP_READONLY, ZFS_TYPE_POOL, "NAME"); + zprop_register_hidden(ZPOOL_PROP_MAXBLOCKSIZE, "maxblocksize", + PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_POOL, "MAXBLOCKSIZE"); } /* diff --git a/lib/libzfs/common/libzfs.h b/lib/libzfs/common/libzfs.h index 44b97062b75..9aa5ba59056 100644 --- a/lib/libzfs/common/libzfs.h +++ b/lib/libzfs/common/libzfs.h @@ -591,6 +591,9 @@ typedef struct sendflags { /* show progress (ie. -v) */ boolean_t progress; + /* large blocks (>128K) are permitted */ + boolean_t largeblock; + /* WRITE_EMBEDDED records of type DATA are permitted */ boolean_t embed_data; } sendflags_t; diff --git a/lib/libzfs/common/libzfs_dataset.c b/lib/libzfs/common/libzfs_dataset.c index 1a61d2694ab..27c3cca8e82 100644 --- a/lib/libzfs/common/libzfs_dataset.c +++ b/lib/libzfs/common/libzfs_dataset.c @@ -1048,21 +1048,36 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl, break; } - case ZFS_PROP_RECORDSIZE: case ZFS_PROP_VOLBLOCKSIZE: - /* must be power of two within SPA_{MIN,MAX}BLOCKSIZE */ + case ZFS_PROP_RECORDSIZE: + { + int maxbs = SPA_MAXBLOCKSIZE; + if (zhp != NULL) { + maxbs = zpool_get_prop_int(zhp->zpool_hdl, + ZPOOL_PROP_MAXBLOCKSIZE, NULL); + } + /* + * Volumes are limited to a volblocksize of 128KB, + * because they typically service workloads with + * small random writes, which incur a large performance + * penalty with large blocks. + */ + if (prop == ZFS_PROP_VOLBLOCKSIZE) + maxbs = SPA_OLD_MAXBLOCKSIZE; + /* + * The value must be a power of two between + * SPA_MINBLOCKSIZE and maxbs. + */ if (intval < SPA_MINBLOCKSIZE || - intval > SPA_MAXBLOCKSIZE || !ISP2(intval)) { + intval > maxbs || !ISP2(intval)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "'%s' must be power of 2 from %u " - "to %uk"), propname, - (uint_t)SPA_MINBLOCKSIZE, - (uint_t)SPA_MAXBLOCKSIZE >> 10); + "'%s' must be power of 2 from 512B " + "to %uKB"), propname, maxbs >> 10); (void) zfs_error(hdl, EZFS_BADPROP, errbuf); goto error; } break; - + } case ZFS_PROP_MLSLABEL: { /* @@ -1437,7 +1452,8 @@ zfs_setprop_error(libzfs_handle_t *hdl, zfs_prop_t prop, int err, break; case ERANGE: - if (prop == ZFS_PROP_COMPRESSION) { + if (prop == ZFS_PROP_COMPRESSION || + prop == ZFS_PROP_RECORDSIZE) { (void) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "property setting is not allowed on " "bootable datasets")); @@ -3136,9 +3152,7 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, case EDOM: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "volume block size must be power of 2 from " - "%u to %uk"), - (uint_t)SPA_MINBLOCKSIZE, - (uint_t)SPA_MAXBLOCKSIZE >> 10); + "512B to 128KB")); return (zfs_error(hdl, EZFS_BADPROP, errbuf)); diff --git a/lib/libzfs/common/libzfs_sendrecv.c b/lib/libzfs/common/libzfs_sendrecv.c index 6697b528315..c4944438aa2 100644 --- a/lib/libzfs/common/libzfs_sendrecv.c +++ b/lib/libzfs/common/libzfs_sendrecv.c @@ -206,7 +206,7 @@ static void * cksummer(void *arg) { dedup_arg_t *dda = arg; - char *buf = malloc(1<<20); + char *buf = zfs_alloc(dda->dedup_hdl, SPA_MAXBLOCKSIZE); dmu_replay_record_t thedrr; dmu_replay_record_t *drr = &thedrr; struct drr_begin *drrb = &thedrr.drr_u.drr_begin; @@ -271,9 +271,9 @@ cksummer(void *arg) DMU_COMPOUNDSTREAM && drr->drr_payloadlen != 0) { int sz = drr->drr_payloadlen; - if (sz > 1<<20) { - free(buf); - buf = malloc(sz); + if (sz > SPA_MAXBLOCKSIZE) { + buf = zfs_realloc(dda->dedup_hdl, buf, + SPA_MAXBLOCKSIZE, sz); } (void) ssread(buf, sz, ofp); if (ferror(stdin)) @@ -806,7 +806,7 @@ typedef struct send_dump_data { char prevsnap[ZFS_MAXNAMELEN]; uint64_t prevsnap_obj; boolean_t seenfrom, seento, replicate, doall, fromorigin; - boolean_t verbose, dryrun, parsable, progress, embed_data; + boolean_t verbose, dryrun, parsable, progress, embed_data, large_block; int outfd; boolean_t err; nvlist_t *fss; @@ -1153,6 +1153,8 @@ dump_snapshot(zfs_handle_t *zhp, void *arg) } enum lzc_send_flags flags = 0; + if (sdd->large_block) + flags |= LZC_SEND_FLAG_LARGE_BLOCK; if (sdd->embed_data) flags |= LZC_SEND_FLAG_EMBED_DATA; @@ -1501,6 +1503,7 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, sdd.parsable = flags->parsable; sdd.progress = flags->progress; sdd.dryrun = flags->dryrun; + sdd.large_block = flags->largeblock; sdd.embed_data = flags->embed_data; sdd.filter_cb = filter_func; sdd.filter_cb_arg = cb_arg; @@ -2506,7 +2509,7 @@ static int recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap) { dmu_replay_record_t *drr; - void *buf = malloc(1<<20); + void *buf = zfs_alloc(hdl, SPA_MAXBLOCKSIZE); char errbuf[1024]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, diff --git a/lib/libzfs_core/common/libzfs_core.c b/lib/libzfs_core/common/libzfs_core.c index 6f36568667b..06221fab4ae 100644 --- a/lib/libzfs_core/common/libzfs_core.c +++ b/lib/libzfs_core/common/libzfs_core.c @@ -455,6 +455,10 @@ lzc_get_holds(const char *snapname, nvlist_t **holdsp) * * "fd" is the file descriptor to write the send stream to. * + * If "flags" contains LZC_SEND_FLAG_LARGE_BLOCK, the stream is permitted + * to contain DRR_WRITE records with drr_length > 128K, and DRR_OBJECT + * records with drr_blksz > 128K. + * * If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted * to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA, * which the receiving system must support (as indicated by support @@ -471,6 +475,8 @@ lzc_send(const char *snapname, const char *from, int fd, fnvlist_add_int32(args, "fd", fd); if (from != NULL) fnvlist_add_string(args, "fromsnap", from); + if (flags & LZC_SEND_FLAG_LARGE_BLOCK) + fnvlist_add_boolean(args, "largeblockok"); if (flags & LZC_SEND_FLAG_EMBED_DATA) fnvlist_add_boolean(args, "embedok"); err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL); diff --git a/lib/libzfs_core/common/libzfs_core.h b/lib/libzfs_core/common/libzfs_core.h index d7d767055d3..bdd6c951ee4 100644 --- a/lib/libzfs_core/common/libzfs_core.h +++ b/lib/libzfs_core/common/libzfs_core.h @@ -53,7 +53,8 @@ int lzc_release(nvlist_t *, nvlist_t **); int lzc_get_holds(const char *, nvlist_t **); enum lzc_send_flags { - LZC_SEND_FLAG_EMBED_DATA = 1 << 0 + LZC_SEND_FLAG_EMBED_DATA = 1 << 0, + LZC_SEND_FLAG_LARGE_BLOCK = 1 << 1 }; int lzc_send(const char *, const char *, int, enum lzc_send_flags); diff --git a/man/man1m/zfs.1m b/man/man1m/zfs.1m index 60cef0ae950..4ab55f1a3cc 100644 --- a/man/man1m/zfs.1m +++ b/man/man1m/zfs.1m @@ -176,12 +176,12 @@ zfs \- configures ZFS file systems .LP .nf -\fBzfs\fR \fBsend\fR [\fB-DnPpRve\fR] [\fB-\fR[\fBiI\fR] \fIsnapshot\fR] \fIsnapshot\fR +\fBzfs\fR \fBsend\fR [\fB-DnPpRveL\fR] [\fB-\fR[\fBiI\fR] \fIsnapshot\fR] \fIsnapshot\fR .fi .LP .nf -\fBzfs\fR \fBsend\fR [\fB-e\fR] [\fB-i \fIsnapshot\fR|\fIbookmark\fR]\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR +\fBzfs\fR \fBsend\fR [\fB-eL\fR] [\fB-i \fIsnapshot\fR|\fIbookmark\fR]\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR .fi .LP @@ -1244,7 +1244,9 @@ significant performance gains. Use of this property for general purpose file systems is strongly discouraged, and may adversely affect performance. .sp The size specified must be a power of two greater than or equal to 512 and less -than or equal to 128 Kbytes. +than or equal to 128 Kbytes. If the \fBlarge_blocks\fR feature is enabled +on the pool, the size may be up to 1 Mbyte. See \fBzpool-features\fR(5) +for details on ZFS feature flags. .sp Changing the file system's \fBrecordsize\fR affects only files created afterward; existing files are unaffected. @@ -2923,7 +2925,7 @@ See \fBzpool-features\fR(5) for details on ZFS feature flags and the .sp .ne 2 .na -\fBzfs send\fR [\fB-DnPpRve\fR] [\fB-\fR[\fBiI\fR] \fIsnapshot\fR] \fIsnapshot\fR +\fBzfs send\fR [\fB-DnPpRveL\fR] [\fB-\fR[\fBiI\fR] \fIsnapshot\fR] \fIsnapshot\fR .ad .sp .6 .RS 4n @@ -2996,6 +2998,21 @@ will be much better if the filesystem uses a dedup-capable checksum (eg. \fBsha256\fR). .RE +.sp +.ne 2 +.na +\fB\fB-L\fR\fR +.ad +.sp .6 +.RS 4n +Generate a stream which may contain blocks larger than 128KB. This flag +has no effect if the \fBlarge_blocks\fR pool feature is disabled, or if +the \fRrecordsize\fR property of this filesystem has never been set above +128KB. The receiving system must have the \fBlarge_blocks\fR pool feature +enabled as well. See \fBzpool-features\fR(5) for details on ZFS feature +flags and the \fBlarge_blocks\fR feature. +.RE + .sp .ne 2 .na @@ -3064,7 +3081,7 @@ on future versions of \fBZFS\fR. .sp .ne 2 .na -\fBzfs send\fR [\fB-e\fR] [\fB-i\fR \fIsnapshot\fR|\fIbookmark\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR +\fBzfs send\fR [\fB-eL\fR] [\fB-i\fR \fIsnapshot\fR|\fIbookmark\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR .ad .sp .6 .RS 4n @@ -3092,6 +3109,21 @@ be the origin snapshot, or an earlier snapshot in the origin's filesystem, or the origin's origin, etc. .RE +.sp +.ne 2 +.na +\fB\fB-L\fR\fR +.ad +.sp .6 +.RS 4n +Generate a stream which may contain blocks larger than 128KB. This flag +has no effect if the \fBlarge_blocks\fR pool feature is disabled, or if +the \fRrecordsize\fR property of this filesystem has never been set above +128KB. The receiving system must have the \fBlarge_blocks\fR pool feature +enabled as well. See \fBzpool-features\fR(5) for details on ZFS feature +flags and the \fBlarge_blocks\fR feature. +.RE + .sp .ne 2 .na diff --git a/man/man5/zpool-features.5 b/man/man5/zpool-features.5 index 24398124f0f..fbefb37b809 100644 --- a/man/man5/zpool-features.5 +++ b/man/man5/zpool-features.5 @@ -18,7 +18,6 @@ .SH NAME zpool\-features \- ZFS pool feature descriptions .SH DESCRIPTION -.sp .LP ZFS pool on\-disk format versions are specified via "features" which replace the old on\-disk format numbers (the last supported on\-disk format number is @@ -36,7 +35,6 @@ format of the pool is specified by the set of all features marked as \fBactive\fR on the pool. If the pool was created by another software version this set may include unsupported features. .SS "Identifying features" -.sp .LP Every feature has a guid of the form \fIcom.example:feature_name\fR. The reverse DNS name ensures that the feature's guid is unique across all ZFS @@ -51,7 +49,6 @@ name is the portion of its guid which follows the ':' (e.g. however a feature's short name may differ across ZFS implementations if following the convention would result in name conflicts. .SS "Feature states" -.sp .LP Features can be in one of three states: .sp @@ -97,7 +94,6 @@ cannot be disabled once they have been enabled. The state of supported features is exposed through pool properties of the form \fIfeature@short_name\fR. .SS "Read\-only compatibility" -.sp .LP Some features may make on\-disk format changes that do not interfere with other software's ability to read from the pool. These features are referred to as @@ -106,7 +102,6 @@ compatible, the pool can be imported in read\-only mode by setting the \fBreadonly\fR property during import (see \fBzpool\fR(1M) for details on importing pools). .SS "Unsupported features" -.sp .LP For each unsupported feature enabled on an imported pool a pool property named \fIunsupported@feature_guid\fR will indicate why the import was allowed @@ -133,13 +128,11 @@ read\-only mode. .RE .SS "Feature dependencies" -.sp .LP Some features depend on other features being enabled in order to function properly. Enabling a feature will automatically enable any features it depends on. .SH FEATURES -.sp .LP The following features are supported on this system: .sp @@ -430,5 +423,26 @@ never return to being \fBenabled\fR. .RE +.sp +.ne 2 +.na +\fB\fBlarge_blocks\fR\fR +.ad +.RS 4n +.TS +l l . +GUID org.open-zfs:large_block +READ\-ONLY COMPATIBLE no +DEPENDENCIES extensible_dataset +.TE + +The \fBlarge_block\fR feature allows the record size on a dataset to be +set larger than 128KB. + +This feature becomes \fBactive\fR once a \fBrecordsize\fR property has been +set larger than 128KB, and will return to being \fBenabled\fR once all +filesystems that have ever had their recordsize larger than 128KB are destroyed. +.RE + .SH "SEE ALSO" \fBzpool\fR(1M) diff --git a/uts/common/fs/zfs/bpobj.c b/uts/common/fs/zfs/bpobj.c index e75ae72f9e9..da4d38a3a9c 100644 --- a/uts/common/fs/zfs/bpobj.c +++ b/uts/common/fs/zfs/bpobj.c @@ -43,7 +43,7 @@ bpobj_alloc_empty(objset_t *os, int blocksize, dmu_tx_t *tx) if (!spa_feature_is_active(spa, SPA_FEATURE_EMPTY_BPOBJ)) { ASSERT0(dp->dp_empty_bpobj); dp->dp_empty_bpobj = - bpobj_alloc(os, SPA_MAXBLOCKSIZE, tx); + bpobj_alloc(os, SPA_OLD_MAXBLOCKSIZE, tx); VERIFY(zap_add(os, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_EMPTY_BPOBJ, sizeof (uint64_t), 1, @@ -396,7 +396,8 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx) dmu_buf_will_dirty(bpo->bpo_dbuf, tx); if (bpo->bpo_phys->bpo_subobjs == 0) { bpo->bpo_phys->bpo_subobjs = dmu_object_alloc(bpo->bpo_os, - DMU_OT_BPOBJ_SUBOBJ, SPA_MAXBLOCKSIZE, DMU_OT_NONE, 0, tx); + DMU_OT_BPOBJ_SUBOBJ, SPA_OLD_MAXBLOCKSIZE, + DMU_OT_NONE, 0, tx); } dmu_object_info_t doi; diff --git a/uts/common/fs/zfs/bptree.c b/uts/common/fs/zfs/bptree.c index c724ed07410..5f7d76f0e2a 100644 --- a/uts/common/fs/zfs/bptree.c +++ b/uts/common/fs/zfs/bptree.c @@ -65,7 +65,7 @@ bptree_alloc(objset_t *os, dmu_tx_t *tx) bptree_phys_t *bt; obj = dmu_object_alloc(os, DMU_OTN_UINT64_METADATA, - SPA_MAXBLOCKSIZE, DMU_OTN_UINT64_METADATA, + SPA_OLD_MAXBLOCKSIZE, DMU_OTN_UINT64_METADATA, sizeof (bptree_phys_t), tx); /* diff --git a/uts/common/fs/zfs/dbuf.c b/uts/common/fs/zfs/dbuf.c index 8d064daf25c..a5816e12ccf 100644 --- a/uts/common/fs/zfs/dbuf.c +++ b/uts/common/fs/zfs/dbuf.c @@ -2022,10 +2022,8 @@ dbuf_spill_set_blksz(dmu_buf_t *db_fake, uint64_t blksz, dmu_tx_t *tx) return (SET_ERROR(ENOTSUP)); if (blksz == 0) blksz = SPA_MINBLOCKSIZE; - if (blksz > SPA_MAXBLOCKSIZE) - blksz = SPA_MAXBLOCKSIZE; - else - blksz = P2ROUNDUP(blksz, SPA_MINBLOCKSIZE); + ASSERT3U(blksz, <=, spa_maxblocksize(dmu_objset_spa(db->db_objset))); + blksz = P2ROUNDUP(blksz, SPA_MINBLOCKSIZE); DB_DNODE_ENTER(db); dn = DB_DNODE(db); diff --git a/uts/common/fs/zfs/dmu_objset.c b/uts/common/fs/zfs/dmu_objset.c index 73b8e056cc8..e7aeed17fb9 100644 --- a/uts/common/fs/zfs/dmu_objset.c +++ b/uts/common/fs/zfs/dmu_objset.c @@ -255,6 +255,14 @@ logbias_changed_cb(void *arg, uint64_t newval) zil_set_logbias(os->os_zil, newval); } +static void +recordsize_changed_cb(void *arg, uint64_t newval) +{ + objset_t *os = arg; + + os->os_recordsize = newval; +} + void dmu_objset_byteswap(void *buf, size_t size) { @@ -384,6 +392,11 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, ZFS_PROP_REDUNDANT_METADATA), redundant_metadata_changed_cb, os); } + if (err == 0) { + err = dsl_prop_register(ds, + zfs_prop_to_name(ZFS_PROP_RECORDSIZE), + recordsize_changed_cb, os); + } } if (err != 0) { VERIFY(arc_buf_remove_ref(os->os_phys_buf, @@ -642,6 +655,9 @@ dmu_objset_evict(objset_t *os) VERIFY0(dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_REDUNDANT_METADATA), redundant_metadata_changed_cb, os)); + VERIFY0(dsl_prop_unregister(ds, + zfs_prop_to_name(ZFS_PROP_RECORDSIZE), + recordsize_changed_cb, os)); } VERIFY0(dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE), diff --git a/uts/common/fs/zfs/dmu_send.c b/uts/common/fs/zfs/dmu_send.c index fed1b86a1b1..2c08e7075f6 100644 --- a/uts/common/fs/zfs/dmu_send.c +++ b/uts/common/fs/zfs/dmu_send.c @@ -206,11 +206,12 @@ dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type, drrw->drr_offset = offset; drrw->drr_length = blksz; drrw->drr_toguid = dsp->dsa_toguid; - if (BP_IS_EMBEDDED(bp)) { + if (bp == NULL || BP_IS_EMBEDDED(bp)) { /* - * There's no pre-computed checksum of embedded BP's, so - * (like fletcher4-checkummed blocks) userland will have - * to compute a dedup-capable checksum itself. + * There's no pre-computed checksum for partial-block + * writes or embedded BP's, so (like + * fletcher4-checkummed blocks) userland will have to + * compute a dedup-capable checksum itself. */ drrw->drr_checksumtype = ZIO_CHECKSUM_OFF; } else { @@ -372,6 +373,10 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp) drro->drr_compress = dnp->dn_compress; drro->drr_toguid = dsp->dsa_toguid; + if (!(dsp->dsa_featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS) && + drro->drr_blksz > SPA_OLD_MAXBLOCKSIZE) + drro->drr_blksz = SPA_OLD_MAXBLOCKSIZE; + if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) return (SET_ERROR(EINTR)); @@ -491,6 +496,7 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, uint32_t aflags = ARC_WAIT; arc_buf_t *abuf; int blksz = BP_GET_LSIZE(bp); + uint64_t offset; ASSERT3U(blksz, ==, dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT); ASSERT0(zb->zb_level); @@ -511,8 +517,24 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, } } - err = dump_write(dsp, type, zb->zb_object, zb->zb_blkid * blksz, - blksz, bp, abuf->b_data); + offset = zb->zb_blkid * blksz; + + if (!(dsp->dsa_featureflags & + DMU_BACKUP_FEATURE_LARGE_BLOCKS) && + blksz > SPA_OLD_MAXBLOCKSIZE) { + char *buf = abuf->b_data; + while (blksz > 0 && err == 0) { + int n = MIN(blksz, SPA_OLD_MAXBLOCKSIZE); + err = dump_write(dsp, type, zb->zb_object, + offset, n, NULL, buf); + offset += n; + buf += n; + blksz -= n; + } + } else { + err = dump_write(dsp, type, zb->zb_object, + offset, blksz, bp, abuf->b_data); + } (void) arc_buf_remove_ref(abuf, &abuf); } @@ -526,7 +548,7 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, static int dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds, zfs_bookmark_phys_t *fromzb, boolean_t is_clone, boolean_t embedok, - int outfd, vnode_t *vp, offset_t *off) + boolean_t large_block_ok, int outfd, vnode_t *vp, offset_t *off) { objset_t *os; dmu_replay_record_t *drr; @@ -561,6 +583,8 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds, } #endif + if (large_block_ok && ds->ds_large_blocks) + featureflags |= DMU_BACKUP_FEATURE_LARGE_BLOCKS; if (embedok && spa_feature_is_active(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA)) { featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA; @@ -656,7 +680,8 @@ out: int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, - boolean_t embedok, int outfd, vnode_t *vp, offset_t *off) + boolean_t embedok, boolean_t large_block_ok, + int outfd, vnode_t *vp, offset_t *off) { dsl_pool_t *dp; dsl_dataset_t *ds; @@ -690,18 +715,19 @@ dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, zb.zbm_guid = fromds->ds_phys->ds_guid; is_clone = (fromds->ds_dir != ds->ds_dir); dsl_dataset_rele(fromds, FTAG); - err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, embedok, - outfd, vp, off); + err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, + embedok, large_block_ok, outfd, vp, off); } else { - err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, embedok, - outfd, vp, off); + err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, + embedok, large_block_ok, outfd, vp, off); } dsl_dataset_rele(ds, FTAG); return (err); } int -dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok, +dmu_send(const char *tosnap, const char *fromsnap, + boolean_t embedok, boolean_t large_block_ok, int outfd, vnode_t *vp, offset_t *off) { dsl_pool_t *dp; @@ -768,11 +794,11 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok, dsl_pool_rele(dp, FTAG); return (err); } - err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, embedok, - outfd, vp, off); + err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, + embedok, large_block_ok, outfd, vp, off); } else { - err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, embedok, - outfd, vp, off); + err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, + embedok, large_block_ok, outfd, vp, off); } if (owned) dsl_dataset_disown(ds, FTAG); @@ -972,6 +998,15 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS)) return (SET_ERROR(ENOTSUP)); + /* + * The receiving code doesn't know how to translate large blocks + * to smaller ones, so the pool must have the LARGE_BLOCKS + * feature enabled if the stream has LARGE_BLOCKS. + */ + if ((featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS) && + !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LARGE_BLOCKS)) + return (SET_ERROR(ENOTSUP)); + error = dsl_dataset_hold(dp, tofs, FTAG, &ds); if (error == 0) { /* target fs already exists; recv into temp clone */ @@ -1097,6 +1132,13 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx) } VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &newds)); + if ((DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & + DMU_BACKUP_FEATURE_LARGE_BLOCKS) && + !newds->ds_large_blocks) { + dsl_dataset_activate_large_blocks_sync_impl(dsobj, tx); + newds->ds_large_blocks = B_TRUE; + } + dmu_buf_will_dirty(newds->ds_dbuf, tx); newds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; @@ -1222,6 +1264,7 @@ restore_read(struct restorearg *ra, int len, char *buf) /* some things will require 8-byte alignment, so everything must */ ASSERT0(len % 8); + ASSERT3U(len, <=, ra->bufsize); while (done < len) { ssize_t resid; @@ -1361,7 +1404,7 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS || P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) || drro->drr_blksz < SPA_MINBLOCKSIZE || - drro->drr_blksz > SPA_MAXBLOCKSIZE || + drro->drr_blksz > spa_maxblocksize(dmu_objset_spa(os)) || drro->drr_bonuslen > DN_MAX_BONUSLEN) { return (SET_ERROR(EINVAL)); } @@ -1634,7 +1677,7 @@ restore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs) int err; if (drrs->drr_length < SPA_MINBLOCKSIZE || - drrs->drr_length > SPA_MAXBLOCKSIZE) + drrs->drr_length > spa_maxblocksize(dmu_objset_spa(os))) return (SET_ERROR(EINVAL)); data = restore_read(ra, drrs->drr_length, NULL); @@ -1721,7 +1764,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, ra.cksum = drc->drc_cksum; ra.vp = vp; ra.voff = *voffp; - ra.bufsize = 1<<20; + ra.bufsize = SPA_MAXBLOCKSIZE; ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP); /* these were verified in dmu_recv_begin */ diff --git a/uts/common/fs/zfs/dmu_tx.c b/uts/common/fs/zfs/dmu_tx.c index 55ce31eda85..d249762e5a2 100644 --- a/uts/common/fs/zfs/dmu_tx.c +++ b/uts/common/fs/zfs/dmu_tx.c @@ -224,7 +224,7 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) return; min_bs = SPA_MINBLOCKSHIFT; - max_bs = SPA_MAXBLOCKSHIFT; + max_bs = highbit64(txh->txh_tx->tx_objset->os_recordsize) - 1; min_ibs = DN_MIN_INDBLKSHIFT; max_ibs = DN_MAX_INDBLKSHIFT; @@ -293,6 +293,14 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) */ ASSERT(dn->dn_datablkshift != 0); min_bs = max_bs = dn->dn_datablkshift; + } else { + /* + * The blocksize can increase up to the recordsize, + * or if it is already more than the recordsize, + * up to the next power of 2. + */ + min_bs = highbit64(dn->dn_datablksz - 1); + max_bs = MAX(max_bs, highbit64(dn->dn_datablksz - 1)); } /* @@ -750,11 +758,11 @@ dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name) bp = &dn->dn_phys->dn_blkptr[0]; if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset, bp, bp->blk_birth)) - txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE; + txh->txh_space_tooverwrite += MZAP_MAX_BLKSZ; else - txh->txh_space_towrite += SPA_MAXBLOCKSIZE; + txh->txh_space_towrite += MZAP_MAX_BLKSZ; if (!BP_IS_HOLE(bp)) - txh->txh_space_tounref += SPA_MAXBLOCKSIZE; + txh->txh_space_tounref += MZAP_MAX_BLKSZ; return; } @@ -1543,18 +1551,18 @@ dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object) /* If blkptr doesn't exist then add space to towrite */ if (!(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR)) { - txh->txh_space_towrite += SPA_MAXBLOCKSIZE; + txh->txh_space_towrite += SPA_OLD_MAXBLOCKSIZE; } else { blkptr_t *bp; bp = &dn->dn_phys->dn_spill; if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset, bp, bp->blk_birth)) - txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE; + txh->txh_space_tooverwrite += SPA_OLD_MAXBLOCKSIZE; else - txh->txh_space_towrite += SPA_MAXBLOCKSIZE; + txh->txh_space_towrite += SPA_OLD_MAXBLOCKSIZE; if (!BP_IS_HOLE(bp)) - txh->txh_space_tounref += SPA_MAXBLOCKSIZE; + txh->txh_space_tounref += SPA_OLD_MAXBLOCKSIZE; } } diff --git a/uts/common/fs/zfs/dnode.c b/uts/common/fs/zfs/dnode.c index 175157714d0..9c70c2bfb1b 100644 --- a/uts/common/fs/zfs/dnode.c +++ b/uts/common/fs/zfs/dnode.c @@ -510,10 +510,10 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, { int i; + ASSERT3U(blocksize, <=, + spa_maxblocksize(dmu_objset_spa(dn->dn_objset))); if (blocksize == 0) blocksize = 1 << zfs_default_bs; - else if (blocksize > SPA_MAXBLOCKSIZE) - blocksize = SPA_MAXBLOCKSIZE; else blocksize = P2ROUNDUP(blocksize, SPA_MINBLOCKSIZE); @@ -594,7 +594,8 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int nblkptr; ASSERT3U(blocksize, >=, SPA_MINBLOCKSIZE); - ASSERT3U(blocksize, <=, SPA_MAXBLOCKSIZE); + ASSERT3U(blocksize, <=, + spa_maxblocksize(dmu_objset_spa(dn->dn_objset))); ASSERT0(blocksize % SPA_MINBLOCKSIZE); ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT || dmu_tx_private_ok(tx)); ASSERT(tx->tx_txg != 0); @@ -1347,10 +1348,9 @@ dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx) dmu_buf_impl_t *db; int err; + ASSERT3U(size, <=, spa_maxblocksize(dmu_objset_spa(dn->dn_objset))); if (size == 0) size = SPA_MINBLOCKSIZE; - if (size > SPA_MAXBLOCKSIZE) - size = SPA_MAXBLOCKSIZE; else size = P2ROUNDUP(size, SPA_MINBLOCKSIZE); diff --git a/uts/common/fs/zfs/dsl_dataset.c b/uts/common/fs/zfs/dsl_dataset.c index f1b92f3eaa3..e7ed7509020 100644 --- a/uts/common/fs/zfs/dsl_dataset.c +++ b/uts/common/fs/zfs/dsl_dataset.c @@ -50,6 +50,17 @@ #include #include +/* + * The SPA supports block sizes up to 16MB. However, very large blocks + * can have an impact on i/o latency (e.g. tying up a spinning disk for + * ~300ms), and also potentially on the memory allocator. Therefore, + * we do not allow the recordsize to be set larger than zfs_max_recordsize + * (default 1MB). Larger blocks can be created by changing this tunable, + * and pools with larger blocks can always be imported and used, regardless + * of this setting. + */ +int zfs_max_recordsize = 1 * 1024 * 1024; + #define SWITCH64(x, y) \ { \ uint64_t __tmp = (x); \ @@ -59,8 +70,6 @@ #define DS_REF_MAX (1ULL << 62) -#define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE - /* * Figure out how much of this delta should be propogated to the dsl_dir * layer. If there's a refreservation, that space has already been @@ -110,6 +119,8 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx) ds->ds_phys->ds_compressed_bytes += compressed; ds->ds_phys->ds_uncompressed_bytes += uncompressed; ds->ds_phys->ds_unique_bytes += used; + if (BP_GET_LSIZE(bp) > SPA_OLD_MAXBLOCKSIZE) + ds->ds_need_large_blocks = B_TRUE; mutex_exit(&ds->ds_lock); dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta, compressed, uncompressed, tx); @@ -387,6 +398,14 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t), offsetof(dmu_sendarg_t, dsa_link)); + if (doi.doi_type == DMU_OTN_ZAP_METADATA) { + err = zap_contains(mos, dsobj, DS_FIELD_LARGE_BLOCKS); + if (err == 0) + ds->ds_large_blocks = B_TRUE; + else + ASSERT3U(err, ==, ENOENT); + } + if (err == 0) { err = dsl_dir_hold_obj(dp, ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); @@ -700,6 +719,9 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, dsphys->ds_flags |= origin->ds_phys->ds_flags & (DS_FLAG_INCONSISTENT | DS_FLAG_CI_DATASET); + if (origin->ds_large_blocks) + dsl_dataset_activate_large_blocks_sync_impl(dsobj, tx); + dmu_buf_will_dirty(origin->ds_dbuf, tx); origin->ds_phys->ds_num_children++; @@ -1213,6 +1235,9 @@ dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname, dsphys->ds_bp = ds->ds_phys->ds_bp; dmu_buf_rele(dbuf, FTAG); + if (ds->ds_large_blocks) + dsl_dataset_activate_large_blocks_sync_impl(dsobj, tx); + ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); if (ds->ds_prev) { uint64_t next_clones_obj = @@ -1486,6 +1511,11 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid; dmu_objset_sync(ds->ds_objset, zio, tx); + + if (ds->ds_need_large_blocks && !ds->ds_large_blocks) { + dsl_dataset_activate_large_blocks_sync_impl(ds->ds_object, tx); + ds->ds_large_blocks = B_TRUE; + } } static void @@ -3128,6 +3158,77 @@ dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, return (err); } +static int +dsl_dataset_activate_large_blocks_check(void *arg, dmu_tx_t *tx) +{ + const char *dsname = arg; + dsl_dataset_t *ds; + dsl_pool_t *dp = dmu_tx_pool(tx); + int error = 0; + + if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LARGE_BLOCKS)) + return (SET_ERROR(ENOTSUP)); + + ASSERT(spa_feature_is_enabled(dp->dp_spa, + SPA_FEATURE_EXTENSIBLE_DATASET)); + + error = dsl_dataset_hold(dp, dsname, FTAG, &ds); + if (error != 0) + return (error); + + if (ds->ds_large_blocks) + error = EALREADY; + dsl_dataset_rele(ds, FTAG); + + return (error); +} + +void +dsl_dataset_activate_large_blocks_sync_impl(uint64_t dsobj, dmu_tx_t *tx) +{ + spa_t *spa = dmu_tx_pool(tx)->dp_spa; + objset_t *mos = dmu_tx_pool(tx)->dp_meta_objset; + uint64_t zero = 0; + + spa_feature_incr(spa, SPA_FEATURE_LARGE_BLOCKS, tx); + dmu_object_zapify(mos, dsobj, DMU_OT_DSL_DATASET, tx); + + VERIFY0(zap_add(mos, dsobj, DS_FIELD_LARGE_BLOCKS, + sizeof (zero), 1, &zero, tx)); +} + +static void +dsl_dataset_activate_large_blocks_sync(void *arg, dmu_tx_t *tx) +{ + const char *dsname = arg; + dsl_dataset_t *ds; + + VERIFY0(dsl_dataset_hold(dmu_tx_pool(tx), dsname, FTAG, &ds)); + + dsl_dataset_activate_large_blocks_sync_impl(ds->ds_object, tx); + ASSERT(!ds->ds_large_blocks); + ds->ds_large_blocks = B_TRUE; + dsl_dataset_rele(ds, FTAG); +} + +int +dsl_dataset_activate_large_blocks(const char *dsname) +{ + int error; + + error = dsl_sync_task(dsname, + dsl_dataset_activate_large_blocks_check, + dsl_dataset_activate_large_blocks_sync, (void *)dsname, + 1, ZFS_SPACE_CHECK_RESERVED); + + /* + * EALREADY indicates that this dataset already supports large blocks. + */ + if (error == EALREADY) + error = 0; + return (error); +} + /* * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline. * For example, they could both be snapshots of the same filesystem, and diff --git a/uts/common/fs/zfs/dsl_deadlist.c b/uts/common/fs/zfs/dsl_deadlist.c index 4f39c397a06..8c8e3746eec 100644 --- a/uts/common/fs/zfs/dsl_deadlist.c +++ b/uts/common/fs/zfs/dsl_deadlist.c @@ -143,7 +143,7 @@ uint64_t dsl_deadlist_alloc(objset_t *os, dmu_tx_t *tx) { if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_DEADLISTS) - return (bpobj_alloc(os, SPA_MAXBLOCKSIZE, tx)); + return (bpobj_alloc(os, SPA_OLD_MAXBLOCKSIZE, tx)); return (zap_create(os, DMU_OT_DEADLIST, DMU_OT_DEADLIST_HDR, sizeof (dsl_deadlist_phys_t), tx)); } @@ -180,7 +180,7 @@ dle_enqueue(dsl_deadlist_t *dl, dsl_deadlist_entry_t *dle, { if (dle->dle_bpobj.bpo_object == dmu_objset_pool(dl->dl_os)->dp_empty_bpobj) { - uint64_t obj = bpobj_alloc(dl->dl_os, SPA_MAXBLOCKSIZE, tx); + uint64_t obj = bpobj_alloc(dl->dl_os, SPA_OLD_MAXBLOCKSIZE, tx); bpobj_close(&dle->dle_bpobj); bpobj_decr_empty(dl->dl_os, tx); VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os, obj)); @@ -254,7 +254,7 @@ dsl_deadlist_add_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx) dle = kmem_alloc(sizeof (*dle), KM_SLEEP); dle->dle_mintxg = mintxg; - obj = bpobj_alloc_empty(dl->dl_os, SPA_MAXBLOCKSIZE, tx); + obj = bpobj_alloc_empty(dl->dl_os, SPA_OLD_MAXBLOCKSIZE, tx); VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os, obj)); avl_add(&dl->dl_tree, dle); @@ -338,7 +338,7 @@ dsl_deadlist_clone(dsl_deadlist_t *dl, uint64_t maxtxg, if (dle->dle_mintxg >= maxtxg) break; - obj = bpobj_alloc_empty(dl->dl_os, SPA_MAXBLOCKSIZE, tx); + obj = bpobj_alloc_empty(dl->dl_os, SPA_OLD_MAXBLOCKSIZE, tx); VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, newobj, dle->dle_mintxg, obj, tx)); } diff --git a/uts/common/fs/zfs/dsl_destroy.c b/uts/common/fs/zfs/dsl_destroy.c index f8a4546535e..1237641583a 100644 --- a/uts/common/fs/zfs/dsl_destroy.c +++ b/uts/common/fs/zfs/dsl_destroy.c @@ -264,6 +264,10 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx) obj = ds->ds_object; + if (ds->ds_large_blocks) { + ASSERT0(zap_contains(mos, obj, DS_FIELD_LARGE_BLOCKS)); + spa_feature_decr(dp->dp_spa, SPA_FEATURE_LARGE_BLOCKS, tx); + } if (ds->ds_phys->ds_prev_snap_obj != 0) { ASSERT3P(ds->ds_prev, ==, NULL); VERIFY0(dsl_dataset_hold_obj(dp, @@ -720,6 +724,9 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx) ASSERT0(ds->ds_reserved); } + if (ds->ds_large_blocks) + spa_feature_decr(dp->dp_spa, SPA_FEATURE_LARGE_BLOCKS, tx); + dsl_scan_ds_destroyed(ds, tx); obj = ds->ds_object; diff --git a/uts/common/fs/zfs/dsl_pool.c b/uts/common/fs/zfs/dsl_pool.c index ad26a9070b6..1d246b8e688 100644 --- a/uts/common/fs/zfs/dsl_pool.c +++ b/uts/common/fs/zfs/dsl_pool.c @@ -367,7 +367,7 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg) FREE_DIR_NAME, &dp->dp_free_dir)); /* create and open the free_bplist */ - obj = bpobj_alloc(dp->dp_meta_objset, SPA_MAXBLOCKSIZE, tx); + obj = bpobj_alloc(dp->dp_meta_objset, SPA_OLD_MAXBLOCKSIZE, tx); VERIFY(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx) == 0); VERIFY0(bpobj_open(&dp->dp_free_bpobj, @@ -792,7 +792,7 @@ dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx) * subobj support. So call dmu_object_alloc() directly. */ obj = dmu_object_alloc(dp->dp_meta_objset, DMU_OT_BPOBJ, - SPA_MAXBLOCKSIZE, DMU_OT_BPOBJ_HDR, sizeof (bpobj_phys_t), tx); + SPA_OLD_MAXBLOCKSIZE, DMU_OT_BPOBJ_HDR, sizeof (bpobj_phys_t), tx); VERIFY0(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx)); VERIFY0(bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj)); diff --git a/uts/common/fs/zfs/metaslab.c b/uts/common/fs/zfs/metaslab.c index a33ec7f6281..0f05e8b00a8 100644 --- a/uts/common/fs/zfs/metaslab.c +++ b/uts/common/fs/zfs/metaslab.c @@ -125,7 +125,7 @@ int metaslab_debug_unload = 0; * an allocation of this size then it switches to using more * aggressive strategy (i.e search by size rather than offset). */ -uint64_t metaslab_df_alloc_threshold = SPA_MAXBLOCKSIZE; +uint64_t metaslab_df_alloc_threshold = SPA_OLD_MAXBLOCKSIZE; /* * The minimum free space, in percent, which must be available diff --git a/uts/common/fs/zfs/sa.c b/uts/common/fs/zfs/sa.c index bd2ebce5df3..8b3963aed9f 100644 --- a/uts/common/fs/zfs/sa.c +++ b/uts/common/fs/zfs/sa.c @@ -500,7 +500,7 @@ sa_resize_spill(sa_handle_t *hdl, uint32_t size, dmu_tx_t *tx) if (size == 0) { blocksize = SPA_MINBLOCKSIZE; - } else if (size > SPA_MAXBLOCKSIZE) { + } else if (size > SPA_OLD_MAXBLOCKSIZE) { ASSERT(0); return (SET_ERROR(EFBIG)); } else { @@ -675,7 +675,7 @@ sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count, hdrsize = sa_find_sizes(sa, attr_desc, attr_count, hdl->sa_bonus, SA_BONUS, &i, &used, &spilling); - if (used > SPA_MAXBLOCKSIZE) + if (used > SPA_OLD_MAXBLOCKSIZE) return (SET_ERROR(EFBIG)); VERIFY(0 == dmu_set_bonus(hdl->sa_bonus, spilling ? @@ -699,7 +699,7 @@ sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count, attr_count - i, hdl->sa_spill, SA_SPILL, &i, &spill_used, &dummy); - if (spill_used > SPA_MAXBLOCKSIZE) + if (spill_used > SPA_OLD_MAXBLOCKSIZE) return (SET_ERROR(EFBIG)); buf_space = hdl->sa_spill->db_size - spillhdrsize; diff --git a/uts/common/fs/zfs/spa.c b/uts/common/fs/zfs/spa.c index be308e2f873..634967c46f1 100644 --- a/uts/common/fs/zfs/spa.c +++ b/uts/common/fs/zfs/spa.c @@ -267,6 +267,14 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp) spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root, 0, ZPROP_SRC_LOCAL); + if (spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_BLOCKS)) { + spa_prop_add_list(*nvp, ZPOOL_PROP_MAXBLOCKSIZE, NULL, + MIN(zfs_max_recordsize, SPA_MAXBLOCKSIZE), ZPROP_SRC_NONE); + } else { + spa_prop_add_list(*nvp, ZPOOL_PROP_MAXBLOCKSIZE, NULL, + SPA_OLD_MAXBLOCKSIZE, ZPROP_SRC_NONE); + } + if ((dp = list_head(&spa->spa_config_list)) != NULL) { if (dp->scd_path == NULL) { spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, @@ -481,7 +489,7 @@ spa_prop_validate(spa_t *spa, nvlist_t *props) if (!error) { objset_t *os; - uint64_t compress; + uint64_t propval; if (strval == NULL || strval[0] == '\0') { objnum = zpool_prop_default_numeric( @@ -492,15 +500,25 @@ spa_prop_validate(spa_t *spa, nvlist_t *props) if (error = dmu_objset_hold(strval, FTAG, &os)) break; - /* Must be ZPL and not gzip compressed. */ + /* + * Must be ZPL, and its property settings + * must be supported by GRUB (compression + * is not gzip, and large blocks are not used). + */ if (dmu_objset_type(os) != DMU_OST_ZFS) { error = SET_ERROR(ENOTSUP); } else if ((error = dsl_prop_get_int_ds(dmu_objset_ds(os), zfs_prop_to_name(ZFS_PROP_COMPRESSION), - &compress)) == 0 && - !BOOTFS_COMPRESS_VALID(compress)) { + &propval)) == 0 && + !BOOTFS_COMPRESS_VALID(propval)) { + error = SET_ERROR(ENOTSUP); + } else if ((error = + dsl_prop_get_int_ds(dmu_objset_ds(os), + zfs_prop_to_name(ZFS_PROP_RECORDSIZE), + &propval)) == 0 && + propval > SPA_OLD_MAXBLOCKSIZE) { error = SET_ERROR(ENOTSUP); } else { objnum = dmu_objset_id(os); diff --git a/uts/common/fs/zfs/spa_history.c b/uts/common/fs/zfs/spa_history.c index cf72c8ad882..ce64f70b28c 100644 --- a/uts/common/fs/zfs/spa_history.c +++ b/uts/common/fs/zfs/spa_history.c @@ -90,7 +90,7 @@ spa_history_create_obj(spa_t *spa, dmu_tx_t *tx) ASSERT(spa->spa_history == 0); spa->spa_history = dmu_object_alloc(mos, DMU_OT_SPA_HISTORY, - SPA_MAXBLOCKSIZE, DMU_OT_SPA_HISTORY_OFFSETS, + SPA_OLD_MAXBLOCKSIZE, DMU_OT_SPA_HISTORY_OFFSETS, sizeof (spa_history_phys_t), tx); VERIFY(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, diff --git a/uts/common/fs/zfs/spa_misc.c b/uts/common/fs/zfs/spa_misc.c index 05043627269..1729ba0503b 100644 --- a/uts/common/fs/zfs/spa_misc.c +++ b/uts/common/fs/zfs/spa_misc.c @@ -1963,3 +1963,12 @@ spa_debug_enabled(spa_t *spa) { return (spa->spa_debug); } + +int +spa_maxblocksize(spa_t *spa) +{ + if (spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_BLOCKS)) + return (SPA_MAXBLOCKSIZE); + else + return (SPA_OLD_MAXBLOCKSIZE); +} diff --git a/uts/common/fs/zfs/sys/dmu.h b/uts/common/fs/zfs/sys/dmu.h index e9fa39d5d6f..93165a95951 100644 --- a/uts/common/fs/zfs/sys/dmu.h +++ b/uts/common/fs/zfs/sys/dmu.h @@ -249,7 +249,7 @@ void zfs_znode_byteswap(void *buf, size_t size); * The maximum number of bytes that can be accessed as part of one * operation, including metadata. */ -#define DMU_MAX_ACCESS (10<<20) /* 10MB */ +#define DMU_MAX_ACCESS (32 * 1024 * 1024) /* 32MB */ #define DMU_MAX_DELETEBLKCNT (20480) /* ~5MB of indirect blocks */ #define DMU_USERUSED_OBJECT (-1ULL) @@ -637,6 +637,7 @@ void xuio_stat_wbuf_copied(); void xuio_stat_wbuf_nocopy(); extern int zfs_prefetch_disable; +extern int zfs_max_recordsize; /* * Asynchronously try to read in the data. diff --git a/uts/common/fs/zfs/sys/dmu_objset.h b/uts/common/fs/zfs/sys/dmu_objset.h index 23d88fd048b..804f0c182b6 100644 --- a/uts/common/fs/zfs/sys/dmu_objset.h +++ b/uts/common/fs/zfs/sys/dmu_objset.h @@ -95,6 +95,7 @@ struct objset { zfs_cache_type_t os_secondary_cache; zfs_sync_type_t os_sync; zfs_redundant_metadata_type_t os_redundant_metadata; + int os_recordsize; /* no lock needed: */ struct dmu_tx *os_synctx; /* XXX sketchy */ diff --git a/uts/common/fs/zfs/sys/dmu_send.h b/uts/common/fs/zfs/sys/dmu_send.h index dc183c02c35..3a8dc89abd4 100644 --- a/uts/common/fs/zfs/sys/dmu_send.h +++ b/uts/common/fs/zfs/sys/dmu_send.h @@ -37,12 +37,14 @@ struct dsl_dataset; struct drr_begin; struct avl_tree; -int dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok, +int dmu_send(const char *tosnap, const char *fromsnap, + boolean_t embedok, boolean_t large_block_ok, int outfd, struct vnode *vp, offset_t *off); int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds, uint64_t *sizep); int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, - boolean_t embedok, int outfd, vnode_t *vp, offset_t *off); + boolean_t embedok, boolean_t large_block_ok, + int outfd, struct vnode *vp, offset_t *off); typedef struct dmu_recv_cookie { struct dsl_dataset *drc_ds; diff --git a/uts/common/fs/zfs/sys/dsl_dataset.h b/uts/common/fs/zfs/sys/dsl_dataset.h index d9552b2260a..ff90f8b439c 100644 --- a/uts/common/fs/zfs/sys/dsl_dataset.h +++ b/uts/common/fs/zfs/sys/dsl_dataset.h @@ -82,6 +82,13 @@ struct dsl_pool; */ #define DS_FIELD_BOOKMARK_NAMES "com.delphix:bookmarks" +/* + * This field is present (with value=0) if this dataset may contain large + * blocks (>128KB). If it is present, then this dataset + * is counted in the refcount of the SPA_FEATURE_LARGE_BLOCKS feature. + */ +#define DS_FIELD_LARGE_BLOCKS "org.open-zfs:large_blocks" + /* * DS_FLAG_CI_DATASET is set if the dataset contains a file system whose * name lookups should be performed case-insensitively. @@ -135,6 +142,8 @@ typedef struct dsl_dataset { /* only used in syncing context, only valid for non-snapshots: */ struct dsl_dataset *ds_prev; uint64_t ds_bookmarks; /* DMU_OTN_ZAP_METADATA */ + boolean_t ds_large_blocks; + boolean_t ds_need_large_blocks; /* has internal locking: */ dsl_deadlist_t ds_deadlist; @@ -244,6 +253,8 @@ int dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new, int dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, dsl_dataset_t *last, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp); boolean_t dsl_dataset_is_dirty(dsl_dataset_t *ds); +int dsl_dataset_activate_large_blocks(const char *dsname); +void dsl_dataset_activate_large_blocks_sync_impl(uint64_t dsobj, dmu_tx_t *tx); int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf); diff --git a/uts/common/fs/zfs/sys/spa.h b/uts/common/fs/zfs/sys/spa.h index 6c7d34cb823..e4731ae5a73 100644 --- a/uts/common/fs/zfs/sys/spa.h +++ b/uts/common/fs/zfs/sys/spa.h @@ -94,17 +94,26 @@ _NOTE(CONSTCOND) } while (0) _NOTE(CONSTCOND) } while (0) /* - * We currently support nine block sizes, from 512 bytes to 128K. - * We could go higher, but the benefits are near-zero and the cost - * of COWing a giant block to modify one byte would become excessive. + * We currently support block sizes from 512 bytes to 16MB. + * The benefits of larger blocks, and thus larger IO, need to be weighed + * against the cost of COWing a giant block to modify one byte, and the + * large latency of reading or writing a large block. + * + * Note that although blocks up to 16MB are supported, the recordsize + * property can not be set larger than zfs_max_recordsize (default 1MB). + * See the comment near zfs_max_recordsize in dsl_dataset.c for details. + * + * Note that although the LSIZE field of the blkptr_t can store sizes up + * to 32MB, the dnode's dn_datablkszsec can only store sizes up to + * 32MB - 512 bytes. Therefore, we limit SPA_MAXBLOCKSIZE to 16MB. */ #define SPA_MINBLOCKSHIFT 9 -#define SPA_MAXBLOCKSHIFT 17 +#define SPA_OLD_MAXBLOCKSHIFT 17 +#define SPA_MAXBLOCKSHIFT 24 #define SPA_MINBLOCKSIZE (1ULL << SPA_MINBLOCKSHIFT) +#define SPA_OLD_MAXBLOCKSIZE (1ULL << SPA_OLD_MAXBLOCKSHIFT) #define SPA_MAXBLOCKSIZE (1ULL << SPA_MAXBLOCKSHIFT) -#define SPA_BLOCKSIZES (SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1) - /* * Size of block to hold the configuration data (a packed nvlist) */ @@ -781,6 +790,7 @@ extern boolean_t spa_has_slogs(spa_t *spa); extern boolean_t spa_is_root(spa_t *spa); extern boolean_t spa_writeable(spa_t *spa); extern boolean_t spa_has_pending_synctask(spa_t *spa); +extern int spa_maxblocksize(spa_t *spa); extern int spa_mode(spa_t *spa); extern uint64_t strtonum(const char *str, char **nptr); diff --git a/uts/common/fs/zfs/sys/zap_impl.h b/uts/common/fs/zfs/sys/zap_impl.h index 466aab02bac..8b4a8b2b56e 100644 --- a/uts/common/fs/zfs/sys/zap_impl.h +++ b/uts/common/fs/zfs/sys/zap_impl.h @@ -42,8 +42,7 @@ extern int fzap_default_block_shift; #define MZAP_ENT_LEN 64 #define MZAP_NAME_LEN (MZAP_ENT_LEN - 8 - 4 - 2) -#define MZAP_MAX_BLKSHIFT SPA_MAXBLOCKSHIFT -#define MZAP_MAX_BLKSZ (1 << MZAP_MAX_BLKSHIFT) +#define MZAP_MAX_BLKSZ SPA_OLD_MAXBLOCKSIZE #define ZAP_NEED_CD (-1U) diff --git a/uts/common/fs/zfs/sys/zfs_ioctl.h b/uts/common/fs/zfs/sys/zfs_ioctl.h index bf9f83c3767..62f6ff997d1 100644 --- a/uts/common/fs/zfs/sys/zfs_ioctl.h +++ b/uts/common/fs/zfs/sys/zfs_ioctl.h @@ -85,13 +85,16 @@ typedef enum drr_headertype { /* flags #3 - #15 are reserved for incompatible closed-source implementations */ #define DMU_BACKUP_FEATURE_EMBED_DATA (1<<16) #define DMU_BACKUP_FEATURE_EMBED_DATA_LZ4 (1<<17) +/* flag #18 is reserved for a Delphix feature */ +#define DMU_BACKUP_FEATURE_LARGE_BLOCKS (1<<19) /* * Mask of all supported backup features */ #define DMU_BACKUP_FEATURE_MASK (DMU_BACKUP_FEATURE_DEDUP | \ DMU_BACKUP_FEATURE_DEDUPPROPS | DMU_BACKUP_FEATURE_SA_SPILL | \ - DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_EMBED_DATA_LZ4) + DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_EMBED_DATA_LZ4 | \ + DMU_BACKUP_FEATURE_LARGE_BLOCKS) /* Are all features in the given flag word currently supported? */ #define DMU_STREAM_SUPPORTED(x) (!((x) & ~DMU_BACKUP_FEATURE_MASK)) diff --git a/uts/common/fs/zfs/sys/zfs_znode.h b/uts/common/fs/zfs/sys/zfs_znode.h index bc389b4a436..df08aad7b41 100644 --- a/uts/common/fs/zfs/sys/zfs_znode.h +++ b/uts/common/fs/zfs/sys/zfs_znode.h @@ -136,8 +136,6 @@ extern "C" { #define ZFS_SHARES_DIR "SHARES" #define ZFS_SA_ATTRS "SA_ATTRS" -#define ZFS_MAX_BLOCKSIZE (SPA_MAXBLOCKSIZE) - /* * Path component length * diff --git a/uts/common/fs/zfs/sys/zil.h b/uts/common/fs/zfs/sys/zil.h index 15ef2aa8bf9..895d632a262 100644 --- a/uts/common/fs/zfs/sys/zil.h +++ b/uts/common/fs/zfs/sys/zil.h @@ -90,7 +90,6 @@ typedef struct zil_chain { } zil_chain_t; #define ZIL_MIN_BLKSZ 4096ULL -#define ZIL_MAX_BLKSZ SPA_MAXBLOCKSIZE /* * The words of a log block checksum. diff --git a/uts/common/fs/zfs/sys/zil_impl.h b/uts/common/fs/zfs/sys/zil_impl.h index 58566203b69..b5c666c02b7 100644 --- a/uts/common/fs/zfs/sys/zil_impl.h +++ b/uts/common/fs/zfs/sys/zil_impl.h @@ -139,7 +139,7 @@ typedef struct zil_bp_node { avl_node_t zn_node; } zil_bp_node_t; -#define ZIL_MAX_LOG_DATA (SPA_MAXBLOCKSIZE - sizeof (zil_chain_t) - \ +#define ZIL_MAX_LOG_DATA (SPA_OLD_MAXBLOCKSIZE - sizeof (zil_chain_t) - \ sizeof (lr_write_t)) #ifdef __cplusplus diff --git a/uts/common/fs/zfs/vdev.c b/uts/common/fs/zfs/vdev.c index 7571b21a5f8..67b58edeed0 100644 --- a/uts/common/fs/zfs/vdev.c +++ b/uts/common/fs/zfs/vdev.c @@ -828,9 +828,9 @@ vdev_metaslab_init(vdev_t *vd, uint64_t txg) /* * Compute the raidz-deflation ratio. Note, we hard-code - * in 128k (1 << 17) because it is the current "typical" blocksize. - * Even if SPA_MAXBLOCKSIZE changes, this algorithm must never change, - * or we will inconsistently account for existing bp's. + * in 128k (1 << 17) because it is the "typical" blocksize. + * Even though SPA_MAXBLOCKSIZE changed, this algorithm can not change, + * otherwise it would inconsistently account for existing bp's. */ vd->vdev_deflate_ratio = (1 << 17) / (vdev_psize_to_asize(vd, 1 << 17) >> SPA_MINBLOCKSHIFT); diff --git a/uts/common/fs/zfs/vdev_queue.c b/uts/common/fs/zfs/vdev_queue.c index 561e6a4482e..f47c4cd1e26 100644 --- a/uts/common/fs/zfs/vdev_queue.c +++ b/uts/common/fs/zfs/vdev_queue.c @@ -162,7 +162,7 @@ int zfs_vdev_async_write_active_max_dirty_percent = 60; * we include spans of optional I/Os to aid aggregation at the disk even when * they aren't able to help us aggregate at this level. */ -int zfs_vdev_aggregation_limit = SPA_MAXBLOCKSIZE; +int zfs_vdev_aggregation_limit = SPA_OLD_MAXBLOCKSIZE; int zfs_vdev_read_gap_limit = 32 << 10; int zfs_vdev_write_gap_limit = 4 << 10; diff --git a/uts/common/fs/zfs/vdev_raidz.c b/uts/common/fs/zfs/vdev_raidz.c index f686d56e9e5..085d1250a1a 100644 --- a/uts/common/fs/zfs/vdev_raidz.c +++ b/uts/common/fs/zfs/vdev_raidz.c @@ -1604,7 +1604,7 @@ vdev_raidz_physio(vdev_t *vd, caddr_t data, size_t size, /* * Don't write past the end of the block */ - VERIFY3U(offset + size, <=, origoffset + SPA_MAXBLOCKSIZE); + VERIFY3U(offset + size, <=, origoffset + SPA_OLD_MAXBLOCKSIZE); start = offset; end = start + size; @@ -1619,8 +1619,8 @@ vdev_raidz_physio(vdev_t *vd, caddr_t data, size_t size, * KB size. */ rm = vdev_raidz_map_alloc(data - (offset - origoffset), - SPA_MAXBLOCKSIZE, origoffset, tvd->vdev_ashift, vd->vdev_children, - vd->vdev_nparity); + SPA_OLD_MAXBLOCKSIZE, origoffset, tvd->vdev_ashift, + vd->vdev_children, vd->vdev_nparity); coloffset = origoffset; diff --git a/uts/common/fs/zfs/zap_micro.c b/uts/common/fs/zfs/zap_micro.c index 1152f9072f9..59a9f970448 100644 --- a/uts/common/fs/zfs/zap_micro.c +++ b/uts/common/fs/zfs/zap_micro.c @@ -33,6 +33,7 @@ #include #include #include +#include #ifdef _KERNEL #include @@ -653,9 +654,9 @@ zap_create_flags(objset_t *os, int normflags, zap_flags_t flags, uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx); ASSERT(leaf_blockshift >= SPA_MINBLOCKSHIFT && - leaf_blockshift <= SPA_MAXBLOCKSHIFT && + leaf_blockshift <= SPA_OLD_MAXBLOCKSHIFT && indirect_blockshift >= SPA_MINBLOCKSHIFT && - indirect_blockshift <= SPA_MAXBLOCKSHIFT); + indirect_blockshift <= SPA_OLD_MAXBLOCKSHIFT); VERIFY(dmu_object_set_blocksize(os, obj, 1ULL << leaf_blockshift, indirect_blockshift, tx) == 0); @@ -1345,7 +1346,6 @@ zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add, zap_t *zap; int err = 0; - /* * Since, we don't have a name, we cannot figure out which blocks will * be affected in this operation. So, account for the worst case : @@ -1358,7 +1358,7 @@ zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add, * large microzap results in a promotion to fatzap. */ if (name == NULL) { - *towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE; + *towrite += (3 + (add ? 4 : 0)) * SPA_OLD_MAXBLOCKSIZE; return (err); } @@ -1382,7 +1382,7 @@ zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add, /* * We treat this case as similar to (name == NULL) */ - *towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE; + *towrite += (3 + (add ? 4 : 0)) * SPA_OLD_MAXBLOCKSIZE; } } else { /* @@ -1401,12 +1401,12 @@ zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add, * ptrtbl blocks */ if (dmu_buf_freeable(zap->zap_dbuf)) - *tooverwrite += SPA_MAXBLOCKSIZE; + *tooverwrite += MZAP_MAX_BLKSZ; else - *towrite += SPA_MAXBLOCKSIZE; + *towrite += MZAP_MAX_BLKSZ; if (add) { - *towrite += 4 * SPA_MAXBLOCKSIZE; + *towrite += 4 * MZAP_MAX_BLKSZ; } } diff --git a/uts/common/fs/zfs/zfs_ioctl.c b/uts/common/fs/zfs/zfs_ioctl.c index 0cec1c2adad..a05e7b2a29b 100644 --- a/uts/common/fs/zfs/zfs_ioctl.c +++ b/uts/common/fs/zfs/zfs_ioctl.c @@ -2383,7 +2383,7 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source, const char *propname = nvpair_name(pair); zfs_prop_t prop = zfs_name_to_prop(propname); uint64_t intval; - int err; + int err = -1; if (prop == ZPROP_INVAL) { if (zfs_prop_userquota(propname)) @@ -3772,8 +3772,7 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr) * the SPA supports it. We ignore any errors here since * we'll catch them later. */ - if (nvpair_type(pair) == DATA_TYPE_UINT64 && - nvpair_value_uint64(pair, &intval) == 0) { + if (nvpair_value_uint64(pair, &intval) == 0) { if (intval >= ZIO_COMPRESS_GZIP_1 && intval <= ZIO_COMPRESS_GZIP_9 && zfs_earlier_version(dsname, @@ -3824,6 +3823,42 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr) return (SET_ERROR(ENOTSUP)); break; + case ZFS_PROP_RECORDSIZE: + /* Record sizes above 128k need the feature to be enabled */ + if (nvpair_value_uint64(pair, &intval) == 0 && + intval > SPA_OLD_MAXBLOCKSIZE) { + spa_t *spa; + + /* + * If this is a bootable dataset then + * the we don't allow large (>128K) blocks, + * because GRUB doesn't support them. + */ + if (zfs_is_bootfs(dsname) && + intval > SPA_OLD_MAXBLOCKSIZE) { + return (SET_ERROR(EDOM)); + } + + /* + * We don't allow setting the property above 1MB, + * unless the tunable has been changed. + */ + if (intval > zfs_max_recordsize || + intval > SPA_MAXBLOCKSIZE) + return (SET_ERROR(EDOM)); + + if ((err = spa_open(dsname, &spa, FTAG)) != 0) + return (err); + + if (!spa_feature_is_enabled(spa, + SPA_FEATURE_LARGE_BLOCKS)) { + spa_close(spa, FTAG); + return (SET_ERROR(ENOTSUP)); + } + spa_close(spa, FTAG); + } + break; + case ZFS_PROP_SHARESMB: if (zpl_earlier_version(dsname, ZPL_VERSION_FUID)) return (SET_ERROR(ENOTSUP)); @@ -4245,7 +4280,7 @@ out: * zc_fromobj objsetid of incremental fromsnap (may be zero) * zc_guid if set, estimate size of stream only. zc_cookie is ignored. * output size in zc_objset_type. - * zc_flags if =1, WRITE_EMBEDDED records are permitted + * zc_flags lzc_send_flags * * outputs: * zc_objset_type estimated size, if zc_guid is set @@ -4257,6 +4292,7 @@ zfs_ioc_send(zfs_cmd_t *zc) offset_t off; boolean_t estimate = (zc->zc_guid != 0); boolean_t embedok = (zc->zc_flags & 0x1); + boolean_t large_block_ok = (zc->zc_flags & 0x2); if (zc->zc_obj != 0) { dsl_pool_t *dp; @@ -4317,7 +4353,8 @@ zfs_ioc_send(zfs_cmd_t *zc) off = fp->f_offset; error = dmu_send_obj(zc->zc_name, zc->zc_sendobj, - zc->zc_fromobj, embedok, zc->zc_cookie, fp->f_vnode, &off); + zc->zc_fromobj, embedok, large_block_ok, + zc->zc_cookie, fp->f_vnode, &off); if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) fp->f_offset = off; @@ -5219,6 +5256,8 @@ zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl) * innvl: { * "fd" -> file descriptor to write stream to (int32) * (optional) "fromsnap" -> full snap name to send an incremental from + * (optional) "largeblockok" -> (value ignored) + * indicates that blocks > 128KB are permitted * (optional) "embedok" -> (value ignored) * presence indicates DRR_WRITE_EMBEDDED records are permitted * } @@ -5233,6 +5272,7 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) offset_t off; char *fromname = NULL; int fd; + boolean_t largeblockok; boolean_t embedok; error = nvlist_lookup_int32(innvl, "fd", &fd); @@ -5241,6 +5281,7 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) (void) nvlist_lookup_string(innvl, "fromsnap", &fromname); + largeblockok = nvlist_exists(innvl, "largeblockok"); embedok = nvlist_exists(innvl, "embedok"); file_t *fp = getf(fd); @@ -5248,7 +5289,8 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) return (SET_ERROR(EBADF)); off = fp->f_offset; - error = dmu_send(snapname, fromname, embedok, fd, fp->f_vnode, &off); + error = dmu_send(snapname, fromname, embedok, largeblockok, + fd, fp->f_vnode, &off); if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) fp->f_offset = off; diff --git a/uts/common/fs/zfs/zfs_log.c b/uts/common/fs/zfs/zfs_log.c index aeaba2233a1..47d32a45c39 100644 --- a/uts/common/fs/zfs/zfs_log.c +++ b/uts/common/fs/zfs/zfs_log.c @@ -485,7 +485,7 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, * If the write would overflow the largest block then split it. */ if (write_state != WR_INDIRECT && resid > ZIL_MAX_LOG_DATA) - len = SPA_MAXBLOCKSIZE >> 1; + len = SPA_OLD_MAXBLOCKSIZE >> 1; else len = resid; diff --git a/uts/common/fs/zfs/zfs_vfsops.c b/uts/common/fs/zfs/zfs_vfsops.c index 0b0b0a99cc6..8cf83b399a1 100644 --- a/uts/common/fs/zfs/zfs_vfsops.c +++ b/uts/common/fs/zfs/zfs_vfsops.c @@ -272,10 +272,9 @@ static void blksz_changed_cb(void *arg, uint64_t newval) { zfsvfs_t *zfsvfs = arg; - - if (newval < SPA_MINBLOCKSIZE || - newval > SPA_MAXBLOCKSIZE || !ISP2(newval)) - newval = SPA_MAXBLOCKSIZE; + ASSERT3U(newval, <=, spa_maxblocksize(dmu_objset_spa(zfsvfs->z_os))); + ASSERT3U(newval, >=, SPA_MINBLOCKSIZE); + ASSERT(ISP2(newval)); zfsvfs->z_max_blksz = newval; zfsvfs->z_vfs->vfs_bsize = newval; @@ -906,7 +905,7 @@ zfsvfs_create(const char *osname, zfsvfs_t **zfvp) */ zfsvfs->z_vfs = NULL; zfsvfs->z_parent = zfsvfs; - zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; + zfsvfs->z_max_blksz = SPA_OLD_MAXBLOCKSIZE; zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; zfsvfs->z_os = os; diff --git a/uts/common/fs/zfs/zfs_vnops.c b/uts/common/fs/zfs/zfs_vnops.c index c608a79fd2a..5060412afd9 100644 --- a/uts/common/fs/zfs/zfs_vnops.c +++ b/uts/common/fs/zfs/zfs_vnops.c @@ -817,8 +817,14 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) uint64_t new_blksz; if (zp->z_blksz > max_blksz) { + /* + * File's blocksize is already larger than the + * "recordsize" property. Only let it grow to + * the next power of 2. + */ ASSERT(!ISP2(zp->z_blksz)); - new_blksz = MIN(end_size, SPA_MAXBLOCKSIZE); + new_blksz = MIN(end_size, + 1 << highbit64(zp->z_blksz)); } else { new_blksz = MIN(end_size, max_blksz); } diff --git a/uts/common/fs/zfs/zfs_znode.c b/uts/common/fs/zfs/zfs_znode.c index 7577250408d..4664899d13f 100644 --- a/uts/common/fs/zfs/zfs_znode.c +++ b/uts/common/fs/zfs/zfs_znode.c @@ -58,6 +58,7 @@ #endif /* _KERNEL */ #include +#include #include #include #include @@ -1474,8 +1475,13 @@ zfs_extend(znode_t *zp, uint64_t end) * We are growing the file past the current block size. */ if (zp->z_blksz > zp->z_zfsvfs->z_max_blksz) { + /* + * File's blocksize is already larger than the + * "recordsize" property. Only let it grow to + * the next power of 2. + */ ASSERT(!ISP2(zp->z_blksz)); - newblksz = MIN(end, SPA_MAXBLOCKSIZE); + newblksz = MIN(end, 1 << highbit64(zp->z_blksz)); } else { newblksz = MIN(end, zp->z_zfsvfs->z_max_blksz); } diff --git a/uts/common/fs/zfs/zil.c b/uts/common/fs/zfs/zil.c index 6377285fe2a..6beacadf710 100644 --- a/uts/common/fs/zfs/zil.c +++ b/uts/common/fs/zfs/zil.c @@ -220,6 +220,7 @@ zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst, sizeof (cksum)) || BP_IS_HOLE(&zilc->zc_next_blk)) { error = SET_ERROR(ECKSUM); } else { + ASSERT3U(len, <=, SPA_OLD_MAXBLOCKSIZE); bcopy(lr, dst, len); *end = (char *)dst + len; *nbp = zilc->zc_next_blk; @@ -234,6 +235,8 @@ zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst, (zilc->zc_nused > (size - sizeof (*zilc)))) { error = SET_ERROR(ECKSUM); } else { + ASSERT3U(zilc->zc_nused, <=, + SPA_OLD_MAXBLOCKSIZE); bcopy(lr, dst, zilc->zc_nused); *end = (char *)dst + zilc->zc_nused; *nbp = zilc->zc_next_blk; @@ -317,7 +320,7 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, * If the log has been claimed, stop if we encounter a sequence * number greater than the highest claimed sequence number. */ - lrbuf = zio_buf_alloc(SPA_MAXBLOCKSIZE); + lrbuf = zio_buf_alloc(SPA_OLD_MAXBLOCKSIZE); zil_bp_tree_init(zilog); for (blk = zh->zh_log; !BP_IS_HOLE(&blk); blk = next_blk) { @@ -364,7 +367,7 @@ done: (max_blk_seq == claim_blk_seq && max_lr_seq == claim_lr_seq)); zil_bp_tree_fini(zilog); - zio_buf_free(lrbuf, SPA_MAXBLOCKSIZE); + zio_buf_free(lrbuf, SPA_OLD_MAXBLOCKSIZE); return (error); } @@ -896,7 +899,7 @@ zil_lwb_write_init(zilog_t *zilog, lwb_t *lwb) * * These must be a multiple of 4KB. Note only the amount used (again * aligned to 4KB) actually gets written. However, we can't always just - * allocate SPA_MAXBLOCKSIZE as the slog space could be exhausted. + * allocate SPA_OLD_MAXBLOCKSIZE as the slog space could be exhausted. */ uint64_t zil_block_buckets[] = { 4096, /* non TX_WRITE */ @@ -978,7 +981,7 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb) continue; zil_blksz = zil_block_buckets[i]; if (zil_blksz == UINT64_MAX) - zil_blksz = SPA_MAXBLOCKSIZE; + zil_blksz = SPA_OLD_MAXBLOCKSIZE; zilog->zl_prev_blks[zilog->zl_prev_rotor] = zil_blksz; for (i = 0; i < ZIL_PREV_BLKS; i++) zil_blksz = MAX(zil_blksz, zilog->zl_prev_blks[i]); diff --git a/uts/common/fs/zfs/zio.c b/uts/common/fs/zfs/zio.c index 302d692c64a..d67e1361b4f 100644 --- a/uts/common/fs/zfs/zio.c +++ b/uts/common/fs/zfs/zio.c @@ -112,9 +112,8 @@ zio_init(void) /* * For small buffers, we want a cache for each multiple of - * SPA_MINBLOCKSIZE. For medium-size buffers, we want a cache - * for each quarter-power of 2. For large buffers, we want - * a cache for each multiple of PAGESIZE. + * SPA_MINBLOCKSIZE. For larger buffers, we want a cache + * for each quarter-power of 2. */ for (c = 0; c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; c++) { size_t size = (c + 1) << SPA_MINBLOCKSHIFT; @@ -137,10 +136,8 @@ zio_init(void) #endif if (size <= 4 * SPA_MINBLOCKSIZE) { align = SPA_MINBLOCKSIZE; - } else if (IS_P2ALIGNED(size, PAGESIZE)) { - align = PAGESIZE; } else if (IS_P2ALIGNED(size, p2 >> 2)) { - align = p2 >> 2; + align = MIN(p2 >> 2, PAGESIZE); } if (align != 0) { diff --git a/uts/common/fs/zfs/zvol.c b/uts/common/fs/zfs/zvol.c index ca05d5cf214..80888103fe4 100644 --- a/uts/common/fs/zfs/zvol.c +++ b/uts/common/fs/zfs/zvol.c @@ -191,7 +191,7 @@ int zvol_check_volblocksize(uint64_t volblocksize) { if (volblocksize < SPA_MINBLOCKSIZE || - volblocksize > SPA_MAXBLOCKSIZE || + volblocksize > SPA_OLD_MAXBLOCKSIZE || !ISP2(volblocksize)) return (SET_ERROR(EDOM)); @@ -692,7 +692,7 @@ zvol_prealloc(zvol_state_t *zv) while (resid != 0) { int error; - uint64_t bytes = MIN(resid, SPA_MAXBLOCKSIZE); + uint64_t bytes = MIN(resid, SPA_OLD_MAXBLOCKSIZE); tx = dmu_tx_create(os); dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes); @@ -1661,7 +1661,8 @@ zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) (void) strcpy(dki.dki_dname, "zvol"); dki.dki_ctype = DKC_UNKNOWN; dki.dki_unit = getminor(dev); - dki.dki_maxtransfer = 1 << (SPA_MAXBLOCKSHIFT - zv->zv_min_bs); + dki.dki_maxtransfer = + 1 << (SPA_OLD_MAXBLOCKSHIFT - zv->zv_min_bs); mutex_exit(&zfsdev_state_lock); if (ddi_copyout(&dki, (void *)arg, sizeof (dki), flag)) error = SET_ERROR(EFAULT); @@ -1978,14 +1979,14 @@ zvol_dump_init(zvol_state_t *zv, boolean_t resize) zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), 8, 1, &vbs, tx); error = error ? error : dmu_object_set_blocksize( - os, ZVOL_OBJ, SPA_MAXBLOCKSIZE, 0, tx); + os, ZVOL_OBJ, SPA_OLD_MAXBLOCKSIZE, 0, tx); if (version >= SPA_VERSION_DEDUP) { error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, zfs_prop_to_name(ZFS_PROP_DEDUP), 8, 1, &dedup, tx); } if (error == 0) - zv->zv_volblocksize = SPA_MAXBLOCKSIZE; + zv->zv_volblocksize = SPA_OLD_MAXBLOCKSIZE; } dmu_tx_commit(tx); diff --git a/uts/common/sys/fs/zfs.h b/uts/common/sys/fs/zfs.h index 7155d9702bb..569fae20915 100644 --- a/uts/common/sys/fs/zfs.h +++ b/uts/common/sys/fs/zfs.h @@ -192,6 +192,7 @@ typedef enum { ZPOOL_PROP_FREEING, ZPOOL_PROP_FRAGMENTATION, ZPOOL_PROP_LEAKED, + ZPOOL_PROP_MAXBLOCKSIZE, ZPOOL_NUM_PROPS } zpool_prop_t; From e977d70f4573d2643415b32028fd791c8381600b Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Sat, 8 Nov 2014 18:16:39 +0000 Subject: [PATCH 003/280] Revert r274200. Implicitly setting DEBUG_FLAGS when WITH_CTF is true is not the right way to do this. Requested by: kan --- share/mk/bsd.lib.mk | 2 +- share/mk/bsd.own.mk | 1 - share/mk/bsd.prog.mk | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/share/mk/bsd.lib.mk b/share/mk/bsd.lib.mk index c6b689d84c6..f0acf16772d 100644 --- a/share/mk/bsd.lib.mk +++ b/share/mk/bsd.lib.mk @@ -36,7 +36,7 @@ NO_WERROR= .if defined(DEBUG_FLAGS) CFLAGS+= ${DEBUG_FLAGS} -.if ${MK_CTF} != "no" +.if ${MK_CTF} != "no" && ${DEBUG_FLAGS:M-g} != "" CTFFLAGS+= -g .endif .else diff --git a/share/mk/bsd.own.mk b/share/mk/bsd.own.mk index 1e2344f2dd2..486914bc19b 100644 --- a/share/mk/bsd.own.mk +++ b/share/mk/bsd.own.mk @@ -128,7 +128,6 @@ ____: .if ${MK_CTF} != "no" CTFCONVERT_CMD= ${CTFCONVERT} ${CTFFLAGS} ${.TARGET} -DEBUG_FLAGS+= -g .elif defined(.PARSEDIR) || (defined(MAKE_VERSION) && ${MAKE_VERSION} >= 5201111300) CTFCONVERT_CMD= .else diff --git a/share/mk/bsd.prog.mk b/share/mk/bsd.prog.mk index e4f71045ab2..340950a3cdd 100644 --- a/share/mk/bsd.prog.mk +++ b/share/mk/bsd.prog.mk @@ -20,7 +20,7 @@ NO_WERROR= CFLAGS+=${DEBUG_FLAGS} CXXFLAGS+=${DEBUG_FLAGS} -.if ${MK_CTF} != "no" +.if ${MK_CTF} != "no" && ${DEBUG_FLAGS:M-g} != "" CTFFLAGS+= -g .endif .endif From 3e88eb903befabe44a72c071cd792f22c5e587bd Mon Sep 17 00:00:00 2001 From: "Andrey V. Elsukov" Date: Sat, 8 Nov 2014 19:38:34 +0000 Subject: [PATCH 004/280] Remove ip6_getdstifaddr() and all functions to work with auxiliary data. It isn't safe to keep unreferenced ifaddrs. Use in6ifa_ifwithaddr() to determine ifaddr corresponding to destination address. Since currently we keep addresses with embedded scope zone, in6ifa_ifwithaddr is called with zero zoneid and marked with XXX. Also remove route and lle lookups from ip6_input. Use in6ifa_ifwithaddr() instead. Sponsored by: Yandex LLC --- sys/netinet/tcp_input.c | 6 +- sys/netinet6/frag6.c | 19 +-- sys/netinet6/icmp6.c | 48 ++---- sys/netinet6/ip6_input.c | 328 +++------------------------------------ sys/netinet6/ip6_var.h | 36 ----- 5 files changed, 44 insertions(+), 393 deletions(-) diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index f6183b9124e..625992afaef 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -513,6 +513,7 @@ tcp6_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m = *mp; struct in6_ifaddr *ia6; + struct ip6_hdr *ip6; IP6_EXTHDR_CHECK(m, *offp, sizeof(struct tcphdr), IPPROTO_DONE); @@ -520,7 +521,8 @@ tcp6_input(struct mbuf **mp, int *offp, int proto) * draft-itojun-ipv6-tcp-to-anycast * better place to put this in? */ - ia6 = ip6_getdstifaddr(m); + ip6 = mtod(m, struct ip6_hdr *); + ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */); if (ia6 && (ia6->ia6_flags & IN6_IFF_ANYCAST)) { struct ip6_hdr *ip6; @@ -1251,7 +1253,7 @@ relocked: if (isipv6 && !V_ip6_use_deprecated) { struct in6_ifaddr *ia6; - ia6 = ip6_getdstifaddr(m); + ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */); if (ia6 != NULL && (ia6->ia6_flags & IN6_IFF_DEPRECATED)) { ifa_free(&ia6->ia_ifa); diff --git a/sys/netinet6/frag6.c b/sys/netinet6/frag6.c index dd088961358..92d07155bcb 100644 --- a/sys/netinet6/frag6.c +++ b/sys/netinet6/frag6.c @@ -59,13 +59,6 @@ __FBSDID("$FreeBSD$"); #include -/* - * Define it to get a correct behavior on per-interface statistics. - * You will need to perform an extra routing table lookup, per fragment, - * to do it. This may, or may not be, a performance hit. - */ -#define IN6_IFSTAT_STRICT - static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *); static void frag6_deq(struct ip6asfrag *); static void frag6_insque(struct ip6q *, struct ip6q *); @@ -160,9 +153,7 @@ frag6_input(struct mbuf **mp, int *offp, int proto) struct ip6_frag *ip6f; struct ip6q *q6; struct ip6asfrag *af6, *ip6af, *af6dwn; -#ifdef IN6_IFSTAT_STRICT struct in6_ifaddr *ia; -#endif int offset = *offp, nxt, i, next; int first_frag = 0; int fragoff, frgpartlen; /* must be larger than u_int16_t */ @@ -183,18 +174,12 @@ frag6_input(struct mbuf **mp, int *offp, int proto) #endif dstifp = NULL; -#ifdef IN6_IFSTAT_STRICT /* find the destination interface of the packet. */ - if ((ia = ip6_getdstifaddr(m)) != NULL) { + ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */); + if (ia != NULL) { dstifp = ia->ia_ifp; ifa_free(&ia->ia_ifa); } -#else - /* we are violating the spec, this is not the destination interface */ - if ((m->m_flags & M_PKTHDR) != 0) - dstifp = m->m_pkthdr.rcvif; -#endif - /* jumbo payload can't contain a fragment header */ if (ip6->ip6_plen == 0) { icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset); diff --git a/sys/netinet6/icmp6.c b/sys/netinet6/icmp6.c index acebe162a25..8169e56bc38 100644 --- a/sys/netinet6/icmp6.c +++ b/sys/netinet6/icmp6.c @@ -1313,7 +1313,8 @@ ni6_input(struct mbuf *m, int off) goto bad; /* else it's a link-local multicast, fine */ } else { /* unicast or anycast */ - if ((ia6 = ip6_getdstifaddr(m)) == NULL) + ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */); + if (ia6 == NULL) goto bad; /* XXX impossible */ if ((ia6->ia6_flags & IN6_IFF_TEMPORARY) && @@ -2136,13 +2137,13 @@ icmp6_rip6_input(struct mbuf **mp, int off) void icmp6_reflect(struct mbuf *m, size_t off) { + struct in6_addr src, *srcp = NULL; struct ip6_hdr *ip6; struct icmp6_hdr *icmp6; struct in6_ifaddr *ia = NULL; + struct ifnet *outif = NULL; int plen; int type, code; - struct ifnet *outif = NULL; - struct in6_addr origdst, src, *srcp = NULL; /* too short to reflect */ if (off < sizeof(struct ip6_hdr)) { @@ -2189,43 +2190,18 @@ icmp6_reflect(struct mbuf *m, size_t off) type = icmp6->icmp6_type; /* keep type for statistics */ code = icmp6->icmp6_code; /* ditto. */ - origdst = ip6->ip6_dst; - /* - * ip6_input() drops a packet if its src is multicast. - * So, the src is never multicast. - */ - ip6->ip6_dst = ip6->ip6_src; - /* * If the incoming packet was addressed directly to us (i.e. unicast), * use dst as the src for the reply. * The IN6_IFF_NOTREADY case should be VERY rare, but is possible * (for example) when we encounter an error while forwarding procedure * destined to a duplicated address of ours. - * Note that ip6_getdstifaddr() may fail if we are in an error handling - * procedure of an outgoing packet of our own, in which case we need - * to search in the ifaddr list. */ - if (!IN6_IS_ADDR_MULTICAST(&origdst)) { - if ((ia = ip6_getdstifaddr(m))) { - if (!(ia->ia6_flags & - (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY))) - srcp = &ia->ia_addr.sin6_addr; - } else { - struct sockaddr_in6 d; - - bzero(&d, sizeof(d)); - d.sin6_family = AF_INET6; - d.sin6_len = sizeof(d); - d.sin6_addr = origdst; - ia = (struct in6_ifaddr *) - ifa_ifwithaddr((struct sockaddr *)&d); - if (ia && - !(ia->ia6_flags & - (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY))) { - srcp = &ia->ia_addr.sin6_addr; - } - } + if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { + ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */); + if (ia != NULL && !(ia->ia6_flags & + (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY))) + srcp = &ia->ia_addr.sin6_addr; } if (srcp == NULL) { @@ -2257,7 +2233,11 @@ icmp6_reflect(struct mbuf *m, size_t off) } srcp = &src; } - + /* + * ip6_input() drops a packet if its src is multicast. + * So, the src is never multicast. + */ + ip6->ip6_dst = ip6->ip6_src; ip6->ip6_src = *srcp; ip6->ip6_flow = 0; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c index 1f89bd05ffa..64d6cdbedd7 100644 --- a/sys/netinet6/ip6_input.c +++ b/sys/netinet6/ip6_input.c @@ -148,10 +148,6 @@ struct rwlock in6_ifaddr_lock; RW_SYSINIT(in6_ifaddr_lock, &in6_ifaddr_lock, "in6_ifaddr_lock"); static void ip6_init2(void *); -static struct ip6aux *ip6_setdstifaddr(struct mbuf *, struct in6_ifaddr *); -static struct ip6aux *ip6_addaux(struct mbuf *); -static struct ip6aux *ip6_findaux(struct mbuf *m); -static void ip6_delaux (struct mbuf *); static int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *); #ifdef PULLDOWN_TEST static struct mbuf *ip6_pullexthdr(struct mbuf *, size_t, int); @@ -399,19 +395,15 @@ out: void ip6_input(struct mbuf *m) { + struct in6_addr odst; struct ip6_hdr *ip6; - int off = sizeof(struct ip6_hdr), nest; + struct in6_ifaddr *ia; u_int32_t plen; u_int32_t rtalert = ~0; + int off = sizeof(struct ip6_hdr), nest; int nxt, ours = 0; - struct ifnet *deliverifp = NULL, *ifp = NULL; - struct in6_addr odst; - struct route_in6 rin6; int srcrt = 0; - struct llentry *lle = NULL; - struct sockaddr_in6 dst6, *dst; - bzero(&rin6, sizeof(struct route_in6)); #ifdef IPSEC /* * should the inner packet be considered authentic? @@ -424,18 +416,12 @@ ip6_input(struct mbuf *m) #endif /* IPSEC */ - /* - * make sure we don't have onion peering information into m_tag. - */ - ip6_delaux(m); - if (m->m_flags & M_FASTFWD_OURS) { /* * Firewall changed destination to local. */ m->m_flags &= ~M_FASTFWD_OURS; ours = 1; - deliverifp = m->m_pkthdr.rcvif; ip6 = mtod(m, struct ip6_hdr *); goto hbhcheck; } @@ -462,10 +448,8 @@ ip6_input(struct mbuf *m) } /* drop the packet if IPv6 operation is disabled on the IF */ - if ((ND_IFINFO(m->m_pkthdr.rcvif)->flags & ND6_IFF_IFDISABLED)) { - m_freem(m); - return; - } + if ((ND_IFINFO(m->m_pkthdr.rcvif)->flags & ND6_IFF_IFDISABLED)) + goto bad; in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_receive); IP6STAT_INC(ip6s_total); @@ -626,7 +610,6 @@ ip6_input(struct mbuf *m) if (m->m_flags & M_FASTFWD_OURS) { m->m_flags &= ~M_FASTFWD_OURS; ours = 1; - deliverifp = m->m_pkthdr.rcvif; goto hbhcheck; } if ((m->m_flags & M_IP6_NEXTHOP) && @@ -637,7 +620,7 @@ ip6_input(struct mbuf *m) * connected host. */ ip6_forward(m, 1); - goto out; + return; } passin: @@ -660,7 +643,6 @@ passin: IP6STAT_INC(ip6s_badscope); goto bad; } - /* * Multicast check. Assume packet is for us to avoid * prematurely taking locks. @@ -668,53 +650,16 @@ passin: if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { ours = 1; in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mcast); - deliverifp = m->m_pkthdr.rcvif; goto hbhcheck; } - /* - * Unicast check + * Unicast check + * XXX: For now we keep link-local IPv6 addresses with embedded + * scope zone id, therefore we use zero zoneid here. */ - - bzero(&dst6, sizeof(dst6)); - dst6.sin6_family = AF_INET6; - dst6.sin6_len = sizeof(struct sockaddr_in6); - dst6.sin6_addr = ip6->ip6_dst; - ifp = m->m_pkthdr.rcvif; - IF_AFDATA_RLOCK(ifp); - lle = lla_lookup(LLTABLE6(ifp), 0, - (struct sockaddr *)&dst6); - IF_AFDATA_RUNLOCK(ifp); - if ((lle != NULL) && (lle->la_flags & LLE_IFADDR)) { - struct ifaddr *ifa; - struct in6_ifaddr *ia6; - int bad; - - bad = 1; - IF_ADDR_RLOCK(ifp); - TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { - if (ifa->ifa_addr->sa_family != dst6.sin6_family) - continue; - if (sa_equal(&dst6, ifa->ifa_addr)) - break; - } - KASSERT(ifa != NULL, ("%s: ifa not found for lle %p", - __func__, lle)); - - ia6 = (struct in6_ifaddr *)ifa; - if (!(ia6->ia6_flags & IN6_IFF_NOTREADY)) { - /* Count the packet in the ip address stats */ - counter_u64_add(ia6->ia_ifa.ifa_ipackets, 1); - counter_u64_add(ia6->ia_ifa.ifa_ibytes, - m->m_pkthdr.len); - - /* - * record address information into m_tag. - */ - (void)ip6_setdstifaddr(m, ia6); - - bad = 0; - } else { + ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */); + if (ia != NULL) { + if (ia->ia6_flags & IN6_IFF_NOTREADY) { char ip6bufs[INET6_ADDRSTRLEN]; char ip6bufd[INET6_ADDRSTRLEN]; /* address is not ready, so discard the packet. */ @@ -722,137 +667,15 @@ passin: "ip6_input: packet to an unready address %s->%s\n", ip6_sprintf(ip6bufs, &ip6->ip6_src), ip6_sprintf(ip6bufd, &ip6->ip6_dst))); - } - IF_ADDR_RUNLOCK(ifp); - LLE_RUNLOCK(lle); - if (bad) - goto bad; - else { - ours = 1; - deliverifp = ifp; - goto hbhcheck; - } - } - if (lle != NULL) - LLE_RUNLOCK(lle); - - dst = &rin6.ro_dst; - dst->sin6_len = sizeof(struct sockaddr_in6); - dst->sin6_family = AF_INET6; - dst->sin6_addr = ip6->ip6_dst; - rin6.ro_rt = in6_rtalloc1((struct sockaddr *)dst, 0, 0, M_GETFIB(m)); - if (rin6.ro_rt) - RT_UNLOCK(rin6.ro_rt); - -#define rt6_key(r) ((struct sockaddr_in6 *)((r)->rt_nodes->rn_key)) - - /* - * Accept the packet if the forwarding interface to the destination - * according to the routing table is the loopback interface, - * unless the associated route has a gateway. - * Note that this approach causes to accept a packet if there is a - * route to the loopback interface for the destination of the packet. - * But we think it's even useful in some situations, e.g. when using - * a special daemon which wants to intercept the packet. - * - * XXX: some OSes automatically make a cloned route for the destination - * of an outgoing packet. If the outgoing interface of the packet - * is a loopback one, the kernel would consider the packet to be - * accepted, even if we have no such address assinged on the interface. - * We check the cloned flag of the route entry to reject such cases, - * assuming that route entries for our own addresses are not made by - * cloning (it should be true because in6_addloop explicitly installs - * the host route). However, we might have to do an explicit check - * while it would be less efficient. Or, should we rather install a - * reject route for such a case? - */ - if (rin6.ro_rt && - (rin6.ro_rt->rt_flags & - (RTF_HOST|RTF_GATEWAY)) == RTF_HOST && -#ifdef RTF_WASCLONED - !(rin6.ro_rt->rt_flags & RTF_WASCLONED) && -#endif -#ifdef RTF_CLONED - !(rin6.ro_rt->rt_flags & RTF_CLONED) && -#endif -#if 0 - /* - * The check below is redundant since the comparison of - * the destination and the key of the rtentry has - * already done through looking up the routing table. - */ - IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, - &rt6_key(rin6.ro_rt)->sin6_addr) -#endif - rin6.ro_rt->rt_ifp->if_type == IFT_LOOP) { - int free_ia6 = 0; - struct in6_ifaddr *ia6; - - /* - * found the loopback route to the interface address - */ - if (rin6.ro_rt->rt_gateway->sa_family == AF_LINK) { - struct sockaddr_in6 dest6; - - bzero(&dest6, sizeof(dest6)); - dest6.sin6_family = AF_INET6; - dest6.sin6_len = sizeof(dest6); - dest6.sin6_addr = ip6->ip6_dst; - ia6 = (struct in6_ifaddr *) - ifa_ifwithaddr((struct sockaddr *)&dest6); - if (ia6 == NULL) - goto bad; - free_ia6 = 1; - } - else - ia6 = (struct in6_ifaddr *)rin6.ro_rt->rt_ifa; - - /* - * record address information into m_tag. - */ - (void)ip6_setdstifaddr(m, ia6); - - /* - * packets to a tentative, duplicated, or somehow invalid - * address must not be accepted. - */ - if (!(ia6->ia6_flags & IN6_IFF_NOTREADY)) { - /* this address is ready */ - ours = 1; - deliverifp = ia6->ia_ifp; /* correct? */ - /* Count the packet in the ip address stats */ - counter_u64_add(ia6->ia_ifa.ifa_ipackets, 1); - counter_u64_add(ia6->ia_ifa.ifa_ibytes, - m->m_pkthdr.len); - if (free_ia6) - ifa_free(&ia6->ia_ifa); - goto hbhcheck; - } else { - char ip6bufs[INET6_ADDRSTRLEN]; - char ip6bufd[INET6_ADDRSTRLEN]; - /* address is not ready, so discard the packet. */ - nd6log((LOG_INFO, - "ip6_input: packet to an unready address %s->%s\n", - ip6_sprintf(ip6bufs, &ip6->ip6_src), - ip6_sprintf(ip6bufd, &ip6->ip6_dst))); - - if (free_ia6) - ifa_free(&ia6->ia_ifa); + ifa_free(&ia->ia_ifa); goto bad; } - } - - /* - * FAITH (Firewall Aided Internet Translator) - */ - if (V_ip6_keepfaith) { - if (rin6.ro_rt && rin6.ro_rt->rt_ifp && - rin6.ro_rt->rt_ifp->if_type == IFT_FAITH) { - /* XXX do we need more sanity checks? */ - ours = 1; - deliverifp = rin6.ro_rt->rt_ifp; /* faith */ - goto hbhcheck; - } + /* Count the packet in the ip address stats */ + counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); + counter_u64_add(ia->ia_ifa.ifa_ibytes, m->m_pkthdr.len); + ifa_free(&ia->ia_ifa); + ours = 1; + goto hbhcheck; } /* @@ -866,32 +689,6 @@ passin: } hbhcheck: - /* - * record address information into m_tag, if we don't have one yet. - * note that we are unable to record it, if the address is not listed - * as our interface address (e.g. multicast addresses, addresses - * within FAITH prefixes and such). - */ - if (deliverifp) { - struct in6_ifaddr *ia6; - - if ((ia6 = ip6_getdstifaddr(m)) != NULL) { - ifa_free(&ia6->ia_ifa); - } else { - ia6 = in6_ifawithifp(deliverifp, &ip6->ip6_dst); - if (ia6) { - if (!ip6_setdstifaddr(m, ia6)) { - /* - * XXX maybe we should drop the packet here, - * as we could not provide enough information - * to the upper layers. - */ - } - ifa_free(&ia6->ia_ifa); - } - } - } - /* * Process Hop-by-Hop options header if it's contained. * m may be modified in ip6_hopopts_input(). @@ -899,11 +696,8 @@ passin: */ plen = (u_int32_t)ntohs(ip6->ip6_plen); if (ip6->ip6_nxt == IPPROTO_HOPOPTS) { - int error; - - error = ip6_input_hbh(m, &plen, &rtalert, &off, &nxt, &ours); - if (error != 0) - goto out; + if (ip6_input_hbh(m, &plen, &rtalert, &off, &nxt, &ours) != 0) + return; } else nxt = ip6->ip6_nxt; @@ -950,7 +744,7 @@ passin: } } else if (!ours) { ip6_forward(m, srcrt); - goto out; + return; } ip6 = mtod(m, struct ip6_hdr *); @@ -975,7 +769,7 @@ passin: * Tell launch routine the next header */ IP6STAT_INC(ip6s_delivered); - in6_ifstat_inc(deliverifp, ifs6_in_deliver); + in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_deliver); nest = 0; while (nxt != IPPROTO_DONE) { @@ -1013,47 +807,9 @@ passin: nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt); } - goto out; + return; bad: m_freem(m); -out: - if (rin6.ro_rt) - RTFREE(rin6.ro_rt); -} - -/* - * set/grab in6_ifaddr correspond to IPv6 destination address. - * XXX backward compatibility wrapper - * - * XXXRW: We should bump the refcount on ia6 before sticking it in the m_tag, - * and then bump it when the tag is copied, and release it when the tag is - * freed. Unfortunately, m_tags don't support deep copies (yet), so instead - * we just bump the ia refcount when we receive it. This should be fixed. - */ -static struct ip6aux * -ip6_setdstifaddr(struct mbuf *m, struct in6_ifaddr *ia6) -{ - struct ip6aux *ip6a; - - ip6a = ip6_addaux(m); - if (ip6a) - ip6a->ip6a_dstia6 = ia6; - return ip6a; /* NULL if failed to set */ -} - -struct in6_ifaddr * -ip6_getdstifaddr(struct mbuf *m) -{ - struct ip6aux *ip6a; - struct in6_ifaddr *ia; - - ip6a = ip6_findaux(m); - if (ip6a) { - ia = ip6a->ip6a_dstia6; - ifa_ref(&ia->ia_ifa); - return ia; - } else - return NULL; } /* @@ -1816,42 +1572,6 @@ ip6_lasthdr(struct mbuf *m, int off, int proto, int *nxtp) } } -static struct ip6aux * -ip6_addaux(struct mbuf *m) -{ - struct m_tag *mtag; - - mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL); - if (!mtag) { - mtag = m_tag_get(PACKET_TAG_IPV6_INPUT, sizeof(struct ip6aux), - M_NOWAIT); - if (mtag) { - m_tag_prepend(m, mtag); - bzero(mtag + 1, sizeof(struct ip6aux)); - } - } - return mtag ? (struct ip6aux *)(mtag + 1) : NULL; -} - -static struct ip6aux * -ip6_findaux(struct mbuf *m) -{ - struct m_tag *mtag; - - mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL); - return mtag ? (struct ip6aux *)(mtag + 1) : NULL; -} - -static void -ip6_delaux(struct mbuf *m) -{ - struct m_tag *mtag; - - mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL); - if (mtag) - m_tag_delete(m, mtag); -} - /* * System control for IP6 */ diff --git a/sys/netinet6/ip6_var.h b/sys/netinet6/ip6_var.h index b13d7ea07d5..0889d987c6e 100644 --- a/sys/netinet6/ip6_var.h +++ b/sys/netinet6/ip6_var.h @@ -256,37 +256,6 @@ VNET_PCPUSTAT_DECLARE(struct ip6stat, ip6stat); #define IP6STAT_DEC(name) IP6STAT_SUB(name, 1) #endif -#ifdef _KERNEL -/* - * IPv6 onion peeling state. - * it will be initialized when we come into ip6_input(). - * XXX do not make it a kitchen sink! - */ -struct ip6aux { - u_int32_t ip6a_flags; -#define IP6A_SWAP 0x01 /* swapped home/care-of on packet */ -#define IP6A_HASEEN 0x02 /* HA was present */ -#define IP6A_BRUID 0x04 /* BR Unique Identifier was present */ -#define IP6A_RTALERTSEEN 0x08 /* rtalert present */ - - /* ip6.ip6_src */ - struct in6_addr ip6a_careof; /* care-of address of the peer */ - struct in6_addr ip6a_home; /* home address of the peer */ - u_int16_t ip6a_bruid; /* BR unique identifier */ - - /* ip6.ip6_dst */ - struct in6_ifaddr *ip6a_dstia6; /* my ifaddr that matches ip6_dst */ - - /* rtalert */ - u_int16_t ip6a_rtalert; /* rtalert option value */ - - /* - * decapsulation history will be here. - * with IPsec it may not be accurate. - */ -}; -#endif - #ifdef _KERNEL /* flags passed to ip6_output as last parameter */ #define IPV6_UNSPECSRC 0x01 /* allow :: as the source address */ @@ -386,7 +355,6 @@ int ip6proto_register(short); int ip6proto_unregister(short); void ip6_input(struct mbuf *); -struct in6_ifaddr *ip6_getdstifaddr(struct mbuf *); void ip6_freepcbopts(struct ip6_pktopts *); int ip6_unknown_opt(u_int8_t *, struct mbuf *, int); @@ -394,10 +362,6 @@ char * ip6_get_prevhdr(struct mbuf *, int); int ip6_nexthdr(struct mbuf *, int, int, int *); int ip6_lasthdr(struct mbuf *, int, int, int *); -#ifdef __notyet__ -struct ip6aux *ip6_findaux(struct mbuf *); -#endif - extern int (*ip6_mforward)(struct ip6_hdr *, struct ifnet *, struct mbuf *); From 68ce9bc32d9d354d235be081d4a9ee3055d6f554 Mon Sep 17 00:00:00 2001 From: Xin LI Date: Sun, 9 Nov 2014 01:42:28 +0000 Subject: [PATCH 005/280] Apply upstream 13597:3eac1e8e0f4c (git: illumos-gate@aa846ad9): Initialize tqent_flags in the userland taskq implementation. Without this the assertion of tq->tq_freelist != NULL may fail in taskq_destroy. The problem is that tqent_flags is never initialized in the userland implementation while the kernel one does initialize it. Without proper initialization, the flag may have its lowest bit set, making it treated as TQENT_FLAG_PREALLOC and never removing taskq_ent_t from tq_freelist. MFC after: 2 weeks --- cddl/contrib/opensolaris/lib/libzpool/common/taskq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cddl/contrib/opensolaris/lib/libzpool/common/taskq.c b/cddl/contrib/opensolaris/lib/libzpool/common/taskq.c index d4036d03b0c..785f10678a9 100644 --- a/cddl/contrib/opensolaris/lib/libzpool/common/taskq.c +++ b/cddl/contrib/opensolaris/lib/libzpool/common/taskq.c @@ -24,6 +24,7 @@ */ /* * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright 2012 Garrett D'Amore . All rights reserved. */ #include @@ -136,6 +137,7 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t tqflags) t->tqent_prev->tqent_next = t; t->tqent_func = func; t->tqent_arg = arg; + t->tqent_flags = 0; cv_signal(&tq->tq_dispatch_cv); mutex_exit(&tq->tq_lock); return (1); From 4ea05db88ec5cba1b380a2d5875be034f04fb69b Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Sun, 9 Nov 2014 11:11:08 +0000 Subject: [PATCH 006/280] Use standard mtx(9), rwlock(9), sx(9) system initialization macros instead of doing initialization manually. Sponsored by: Nginx, Inc. Sponsored by: Netflix --- sys/net/if.c | 14 ++------------ sys/net/if_clone.c | 12 ++---------- sys/net/if_clone.h | 1 - sys/net/if_var.h | 5 ----- 4 files changed, 4 insertions(+), 28 deletions(-) diff --git a/sys/net/if.c b/sys/net/if.c index 1d3e3233e88..0103c3fdd5c 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -159,7 +159,6 @@ static void if_attachdomain(void *); static void if_attachdomain1(struct ifnet *); static int ifconf(u_long, caddr_t); static void if_freemulti(struct ifmultiaddr *); -static void if_init(void *); static void if_grow(void); static void if_route(struct ifnet *, int flag, int fam); static int if_setflag(struct ifnet *, int, int, int *, int); @@ -207,7 +206,9 @@ VNET_DEFINE(struct ifnet **, ifindex_table); * inversions and deadlocks. */ struct rwlock ifnet_rwlock; +RW_SYSINIT_FLAGS(ifnet_rw, &ifnet_rwlock, "ifnet_rw", RW_RECURSE); struct sx ifnet_sxlock; +SX_SYSINIT_FLAGS(ifnet_sx, &ifnet_sxlock, "ifnet_sx", SX_RECURSE); /* * The allocation of network interfaces is a rather non-atomic affair; we @@ -364,17 +365,6 @@ vnet_if_init(const void *unused __unused) VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_if_init, NULL); -/* ARGSUSED*/ -static void -if_init(void *dummy __unused) -{ - - IFNET_LOCK_INIT(); - if_clone_init(); -} -SYSINIT(interfaces, SI_SUB_INIT_IF, SI_ORDER_FIRST, if_init, NULL); - - #ifdef VIMAGE static void vnet_if_uninit(const void *unused __unused) diff --git a/sys/net/if_clone.c b/sys/net/if_clone.c index abbda41199d..09f8d2a613b 100644 --- a/sys/net/if_clone.c +++ b/sys/net/if_clone.c @@ -103,15 +103,14 @@ static int ifc_simple_match(struct if_clone *, const char *); static int ifc_simple_create(struct if_clone *, char *, size_t, caddr_t); static int ifc_simple_destroy(struct if_clone *, struct ifnet *); -static struct mtx if_cloners_mtx; +static struct mtx if_cloners_mtx; +MTX_SYSINIT(if_cloners_lock, &if_cloners_mtx, "if_cloners lock", MTX_DEF); static VNET_DEFINE(int, if_cloners_count); VNET_DEFINE(LIST_HEAD(, if_clone), if_cloners); #define V_if_cloners_count VNET(if_cloners_count) #define V_if_cloners VNET(if_cloners) -#define IF_CLONERS_LOCK_INIT() \ - mtx_init(&if_cloners_mtx, "if_cloners lock", NULL, MTX_DEF) #define IF_CLONERS_LOCK_ASSERT() mtx_assert(&if_cloners_mtx, MA_OWNED) #define IF_CLONERS_LOCK() mtx_lock(&if_cloners_mtx) #define IF_CLONERS_UNLOCK() mtx_unlock(&if_cloners_mtx) @@ -169,13 +168,6 @@ vnet_if_clone_init(void) LIST_INIT(&V_if_cloners); } -void -if_clone_init(void) -{ - - IF_CLONERS_LOCK_INIT(); -} - /* * Lookup and create a clone network interface. */ diff --git a/sys/net/if_clone.h b/sys/net/if_clone.h index 90d9b7b18a1..67ec8046dff 100644 --- a/sys/net/if_clone.h +++ b/sys/net/if_clone.h @@ -65,7 +65,6 @@ EVENTHANDLER_DECLARE(if_clone_event, if_clone_event_handler_t); #endif /* The below interfaces used only by net/if.c. */ -void if_clone_init(void); void vnet_if_clone_init(void); int if_clone_create(char *, size_t, caddr_t); int if_clone_destroy(const char *); diff --git a/sys/net/if_var.h b/sys/net/if_var.h index 049d3b043e0..643a1a489a5 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -421,11 +421,6 @@ struct ifmultiaddr { extern struct rwlock ifnet_rwlock; extern struct sx ifnet_sxlock; -#define IFNET_LOCK_INIT() do { \ - rw_init_flags(&ifnet_rwlock, "ifnet_rw", RW_RECURSE); \ - sx_init_flags(&ifnet_sxlock, "ifnet_sx", SX_RECURSE); \ -} while(0) - #define IFNET_WLOCK() do { \ sx_xlock(&ifnet_sxlock); \ rw_wlock(&ifnet_rwlock); \ From 1241937290a12fbeb2b9e64b9ea102c42a437810 Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Sun, 9 Nov 2014 11:13:15 +0000 Subject: [PATCH 007/280] Remove remnants of if_ef(4). --- sys/net/if_ethersubr.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index 36f95290746..b96f43854fa 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -78,11 +78,6 @@ #ifdef INET6 #include #endif - -int (*ef_inputp)(struct ifnet*, struct ether_header *eh, struct mbuf *m); -int (*ef_outputp)(struct ifnet *ifp, struct mbuf **mp, - const struct sockaddr *dst, short *tp, int *hlen); - #include #ifdef CTASSERT From 6dbdbf825d3cd71e475bdfc7ff91b8ca166a97de Mon Sep 17 00:00:00 2001 From: Edward Tomasz Napierala Date: Sun, 9 Nov 2014 13:01:09 +0000 Subject: [PATCH 008/280] Add support for sending redirections to iSCSI target. MFC after: 1 month Sponsored by: The FreeBSD Foundation --- usr.sbin/ctld/ctl.conf.5 | 18 +++++++- usr.sbin/ctld/ctld.c | 56 +++++++++++++++++++++++-- usr.sbin/ctld/ctld.h | 6 +++ usr.sbin/ctld/login.c | 91 ++++++++++++++++++++++++++++++++++++++++ usr.sbin/ctld/parse.y | 29 ++++++++++++- usr.sbin/ctld/token.l | 1 + 6 files changed, 195 insertions(+), 6 deletions(-) diff --git a/usr.sbin/ctld/ctl.conf.5 b/usr.sbin/ctld/ctl.conf.5 index 6a6184bcc41..8e427b1c98e 100644 --- a/usr.sbin/ctld/ctl.conf.5 +++ b/usr.sbin/ctld/ctl.conf.5 @@ -27,7 +27,7 @@ .\" .\" $FreeBSD$ .\" -.Dd November 8, 2014 +.Dd November 9, 2014 .Dt CTL.CONF 5 .Os .Sh NAME @@ -218,6 +218,17 @@ An IPv4 or IPv6 address and port to listen on for incoming connections. .\".It Ic listen-iser Ar address .\"An IPv4 or IPv6 address and port to listen on for incoming connections .\"using iSER (iSCSI over RDMA) protocol. +.It Ic redirect Aq Ar address +IPv4 or IPv6 address to redirect initiators to. +When configured, all initiators attempting to connect to portal +belonging to this +.Sy portal-group +will get redirected using "Target moved temporarily" login response. +Redirection happens before authentication and any +.Sy initiator-name +or +.Sy initiator-portal +checks are skipped. .El .Ss target Context .Bl -tag -width indent @@ -296,6 +307,11 @@ The default portal group is .Qq Ar default , which makes the target available on TCP port 3260 on all configured IPv4 and IPv6 addresses. +.It Ic redirect Aq Ar address +IPv4 or IPv6 address to redirect initiators to. +When configured, all initiators attempting to connect to this target +will get redirected using "Target moved temporarily" login response. +Redirection happens after successful authentication. .It Ic lun Ar number Create a .Sy lun diff --git a/usr.sbin/ctld/ctld.c b/usr.sbin/ctld/ctld.c index a480b2ec043..bbf8e7d5064 100644 --- a/usr.sbin/ctld/ctld.c +++ b/usr.sbin/ctld/ctld.c @@ -622,6 +622,7 @@ portal_group_delete(struct portal_group *pg) TAILQ_FOREACH_SAFE(portal, &pg->pg_portals, p_next, tmp) portal_delete(portal); free(pg->pg_name); + free(pg->pg_redirection); free(pg); } @@ -1000,6 +1001,22 @@ portal_group_set_filter(struct portal_group *pg, const char *str) return (0); } +int +portal_group_set_redirection(struct portal_group *pg, const char *addr) +{ + + if (pg->pg_redirection != NULL) { + log_warnx("cannot set redirection to \"%s\" for " + "portal-group \"%s\"; already defined", + addr, pg->pg_name); + return (1); + } + + pg->pg_redirection = checked_strdup(addr); + + return (0); +} + static bool valid_hex(const char ch) { @@ -1144,6 +1161,7 @@ target_delete(struct target *targ) TAILQ_FOREACH_SAFE(lun, &targ->t_luns, l_next, tmp) lun_delete(lun); free(targ->t_name); + free(targ->t_redirection); free(targ); } @@ -1160,6 +1178,22 @@ target_find(struct conf *conf, const char *name) return (NULL); } +int +target_set_redirection(struct target *target, const char *addr) +{ + + if (target->t_redirection != NULL) { + log_warnx("cannot set redirection to \"%s\" for " + "target \"%s\"; already defined", + addr, target->t_name); + return (1); + } + + target->t_redirection = checked_strdup(addr); + + return (0); +} + struct lun * lun_new(struct target *targ, int lun_id) { @@ -1486,10 +1520,15 @@ conf_verify(struct conf *conf) return (error); found = true; } - if (!found) { + if (!found && targ->t_redirection == NULL) { log_warnx("no LUNs defined for target \"%s\"", targ->t_name); } + if (found && targ->t_redirection != NULL) { + log_debugx("target \"%s\" contains luns, " + " but configured for redirection", + targ->t_name); + } } TAILQ_FOREACH(pg, &conf->conf_portal_groups, pg_next) { assert(pg->pg_name != NULL); @@ -1506,13 +1545,22 @@ conf_verify(struct conf *conf) if (targ->t_portal_group == pg) break; } - if (targ == NULL) { + if (pg->pg_redirection != NULL) { + if (targ != NULL) { + log_debugx("portal-group \"%s\" assigned " + "to target \"%s\", but configured " + "for redirection", + pg->pg_name, targ->t_name); + } + pg->pg_unassigned = false; + } else if (targ != NULL) { + pg->pg_unassigned = false; + } else { if (strcmp(pg->pg_name, "default") != 0) log_warnx("portal-group \"%s\" not assigned " "to any target", pg->pg_name); pg->pg_unassigned = true; - } else - pg->pg_unassigned = false; + } } TAILQ_FOREACH(ag, &conf->conf_auth_groups, ag_next) { if (ag->ag_name == NULL) diff --git a/usr.sbin/ctld/ctld.h b/usr.sbin/ctld/ctld.h index e7b364240d1..600bd30c4f2 100644 --- a/usr.sbin/ctld/ctld.h +++ b/usr.sbin/ctld/ctld.h @@ -117,6 +117,7 @@ struct portal_group { int pg_discovery_filter; bool pg_unassigned; TAILQ_HEAD(, portal) pg_portals; + char *pg_redirection; uint16_t pg_tag; }; @@ -151,6 +152,7 @@ struct target { struct portal_group *t_portal_group; char *t_name; char *t_alias; + char *t_redirection; }; struct isns { @@ -301,6 +303,8 @@ int portal_group_add_listen(struct portal_group *pg, const char *listen, bool iser); int portal_group_set_filter(struct portal_group *pg, const char *filter); +int portal_group_set_redirection(struct portal_group *pg, + const char *addr); int isns_new(struct conf *conf, const char *addr); void isns_delete(struct isns *is); @@ -312,6 +316,8 @@ struct target *target_new(struct conf *conf, const char *name); void target_delete(struct target *target); struct target *target_find(struct conf *conf, const char *name); +int target_set_redirection(struct target *target, + const char *addr); struct lun *lun_new(struct target *target, int lun_id); void lun_delete(struct lun *lun); diff --git a/usr.sbin/ctld/login.c b/usr.sbin/ctld/login.c index c59cea01b5f..edba16bdf48 100644 --- a/usr.sbin/ctld/login.c +++ b/usr.sbin/ctld/login.c @@ -612,6 +612,66 @@ login_negotiate_key(struct pdu *request, const char *name, } } +static void +login_redirect(struct pdu *request, const char *target_address) +{ + struct pdu *response; + struct iscsi_bhs_login_response *bhslr2; + struct keys *response_keys; + + response = login_new_response(request); + bhslr2 = (struct iscsi_bhs_login_response *)response->pdu_bhs; + bhslr2->bhslr_status_class = 0x01; + bhslr2->bhslr_status_detail = 0x01; + login_set_csg(response, BHSLR_STAGE_OPERATIONAL_NEGOTIATION); + login_set_nsg(response, BHSLR_STAGE_OPERATIONAL_NEGOTIATION); + + response_keys = keys_new(); + keys_add(response_keys, "TargetAddress", target_address); + + keys_save(response_keys, response); + pdu_send(response); + pdu_delete(response); + keys_delete(response_keys); +} + +static bool +login_portal_redirect(struct connection *conn, struct pdu *request) +{ + const struct portal_group *pg; + + pg = conn->conn_portal->p_portal_group; + if (pg->pg_redirection == NULL) + return (false); + + log_debugx("portal-group \"%s\" configured to redirect to %s", + pg->pg_name, pg->pg_redirection); + login_redirect(request, pg->pg_redirection); + + return (true); +} + +static bool +login_target_redirect(struct connection *conn, struct pdu *request) +{ + const char *target_address; + + assert(conn->conn_portal->p_portal_group->pg_redirection == NULL); + + if (conn->conn_target == NULL) + return (false); + + target_address = conn->conn_target->t_redirection; + if (target_address == NULL) + return (false); + + log_debugx("target \"%s\" configured to redirect to %s", + conn->conn_target->t_name, target_address); + login_redirect(request, target_address); + + return (true); +} + static void login_negotiate(struct connection *conn, struct pdu *request) { @@ -680,6 +740,7 @@ login(struct connection *conn) struct portal_group *pg; const char *initiator_name, *initiator_alias, *session_type, *target_name, *auth_method; + bool redirected; /* * Handle the initial Login Request - figure out required authentication @@ -722,6 +783,12 @@ login(struct connection *conn) */ setproctitle("%s (%s)", conn->conn_initiator_addr, conn->conn_initiator_name); + redirected = login_portal_redirect(conn, request); + if (redirected) { + log_debugx("initiator redirected; exiting"); + exit(0); + } + initiator_alias = keys_find(request_keys, "InitiatorAlias"); if (initiator_alias != NULL) conn->conn_initiator_alias = checked_strdup(initiator_alias); @@ -809,6 +876,12 @@ login(struct connection *conn) keys_delete(request_keys); + redirected = login_target_redirect(conn, request); + if (redirected) { + log_debugx("initiator redirected; exiting"); + exit(0); + } + log_debugx("initiator skipped the authentication, " "and we don't need it; proceeding with negotiation"); login_negotiate(conn, request); @@ -820,6 +893,12 @@ login(struct connection *conn) * Initiator might want to to authenticate, * but we don't need it. */ + redirected = login_target_redirect(conn, request); + if (redirected) { + log_debugx("initiator redirected; exiting"); + exit(0); + } + log_debugx("authentication not required; " "transitioning to operational parameter negotiation"); @@ -908,5 +987,17 @@ login(struct connection *conn) login_chap(conn, ag); + /* + * RFC 3720, 10.13.5. Status-Class and Status-Detail, says + * the redirection SHOULD be accepted by the initiator before + * authentication, but MUST be be accepted afterwards; that's + * why we're doing it here and not earlier. + */ + redirected = login_target_redirect(conn, request); + if (redirected) { + log_debugx("initiator redirected; exiting"); + exit(0); + } + login_negotiate(conn, NULL); } diff --git a/usr.sbin/ctld/parse.y b/usr.sbin/ctld/parse.y index c801ce69076..a6519dd7033 100644 --- a/usr.sbin/ctld/parse.y +++ b/usr.sbin/ctld/parse.y @@ -61,7 +61,8 @@ extern void yyrestart(FILE *); %token CLOSING_BRACKET DEBUG DEVICE_ID DISCOVERY_AUTH_GROUP DISCOVERY_FILTER %token INITIATOR_NAME INITIATOR_PORTAL ISNS_SERVER ISNS_PERIOD ISNS_TIMEOUT %token LISTEN LISTEN_ISER LUN MAXPROC OPENING_BRACKET OPTION -%token PATH PIDFILE PORTAL_GROUP SEMICOLON SERIAL SIZE STR TARGET TIMEOUT +%token PATH PIDFILE PORTAL_GROUP REDIRECT SEMICOLON SERIAL SIZE STR +%token TARGET TIMEOUT %union { @@ -338,6 +339,8 @@ portal_group_entry: portal_group_listen | portal_group_listen_iser + | + portal_group_redirect ; portal_group_discovery_auth_group: DISCOVERY_AUTH_GROUP STR @@ -393,6 +396,17 @@ portal_group_listen_iser: LISTEN_ISER STR } ; +portal_group_redirect: REDIRECT STR + { + int error; + + error = portal_group_set_redirection(portal_group, $2); + free($2); + if (error != 0) + return (1); + } + ; + target: TARGET target_name OPENING_BRACKET target_entries CLOSING_BRACKET { @@ -433,6 +447,8 @@ target_entry: | target_portal_group | + target_redirect + | target_lun ; @@ -635,6 +651,17 @@ target_portal_group: PORTAL_GROUP STR } ; +target_redirect: REDIRECT STR + { + int error; + + error = target_set_redirection(target, $2); + free($2); + if (error != 0) + return (1); + } + ; + target_lun: LUN lun_number OPENING_BRACKET lun_entries CLOSING_BRACKET { diff --git a/usr.sbin/ctld/token.l b/usr.sbin/ctld/token.l index 822d1ac7e33..d4bf823b8b0 100644 --- a/usr.sbin/ctld/token.l +++ b/usr.sbin/ctld/token.l @@ -72,6 +72,7 @@ isns-server { return ISNS_SERVER; } isns-period { return ISNS_PERIOD; } isns-timeout { return ISNS_TIMEOUT; } portal-group { return PORTAL_GROUP; } +redirect { return REDIRECT; } serial { return SERIAL; } size { return SIZE; } target { return TARGET; } From 7889a0832fe15102a4194479a3aed1a73d75b63b Mon Sep 17 00:00:00 2001 From: Edward Tomasz Napierala Date: Sun, 9 Nov 2014 13:30:02 +0000 Subject: [PATCH 009/280] Fix several nits in redirection handling - don't use wrong CSG, and avoid use-after-free. MFC after: 1 month Sponsored by: The FreeBSD Foundation --- usr.sbin/ctld/login.c | 41 ++++++++++++++--------------------------- 1 file changed, 14 insertions(+), 27 deletions(-) diff --git a/usr.sbin/ctld/login.c b/usr.sbin/ctld/login.c index edba16bdf48..fc41f5178d3 100644 --- a/usr.sbin/ctld/login.c +++ b/usr.sbin/ctld/login.c @@ -620,11 +620,10 @@ login_redirect(struct pdu *request, const char *target_address) struct keys *response_keys; response = login_new_response(request); + login_set_csg(response, login_csg(request)); bhslr2 = (struct iscsi_bhs_login_response *)response->pdu_bhs; bhslr2->bhslr_status_class = 0x01; bhslr2->bhslr_status_detail = 0x01; - login_set_csg(response, BHSLR_STAGE_OPERATIONAL_NEGOTIATION); - login_set_nsg(response, BHSLR_STAGE_OPERATIONAL_NEGOTIATION); response_keys = keys_new(); keys_add(response_keys, "TargetAddress", target_address); @@ -679,7 +678,7 @@ login_negotiate(struct connection *conn, struct pdu *request) struct iscsi_bhs_login_response *bhslr2; struct keys *request_keys, *response_keys; int i; - bool skipped_security; + bool redirected, skipped_security; if (request == NULL) { log_debugx("beginning operational parameter negotiation; " @@ -689,6 +688,18 @@ login_negotiate(struct connection *conn, struct pdu *request) } else skipped_security = true; + /* + * RFC 3720, 10.13.5. Status-Class and Status-Detail, says + * the redirection SHOULD be accepted by the initiator before + * authentication, but MUST be be accepted afterwards; that's + * why we're doing it here and not earlier. + */ + redirected = login_target_redirect(conn, request); + if (redirected) { + log_debugx("initiator redirected; exiting"); + exit(0); + } + request_keys = keys_new(); keys_load(request_keys, request); @@ -876,12 +887,6 @@ login(struct connection *conn) keys_delete(request_keys); - redirected = login_target_redirect(conn, request); - if (redirected) { - log_debugx("initiator redirected; exiting"); - exit(0); - } - log_debugx("initiator skipped the authentication, " "and we don't need it; proceeding with negotiation"); login_negotiate(conn, request); @@ -893,12 +898,6 @@ login(struct connection *conn) * Initiator might want to to authenticate, * but we don't need it. */ - redirected = login_target_redirect(conn, request); - if (redirected) { - log_debugx("initiator redirected; exiting"); - exit(0); - } - log_debugx("authentication not required; " "transitioning to operational parameter negotiation"); @@ -987,17 +986,5 @@ login(struct connection *conn) login_chap(conn, ag); - /* - * RFC 3720, 10.13.5. Status-Class and Status-Detail, says - * the redirection SHOULD be accepted by the initiator before - * authentication, but MUST be be accepted afterwards; that's - * why we're doing it here and not earlier. - */ - redirected = login_target_redirect(conn, request); - if (redirected) { - log_debugx("initiator redirected; exiting"); - exit(0); - } - login_negotiate(conn, NULL); } From d02d7de76c442846cc420489f8c3e614aeb7b2b6 Mon Sep 17 00:00:00 2001 From: Edward Tomasz Napierala Date: Sun, 9 Nov 2014 13:45:35 +0000 Subject: [PATCH 010/280] Add HISTORY section to ctld(8). MFC after: 1 month Sponsored by: The FreeBSD Foundation --- usr.sbin/ctld/ctld.8 | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/usr.sbin/ctld/ctld.8 b/usr.sbin/ctld/ctld.8 index 9cff3a82b42..aa9a414e138 100644 --- a/usr.sbin/ctld/ctld.8 +++ b/usr.sbin/ctld/ctld.8 @@ -27,7 +27,7 @@ .\" .\" $FreeBSD$ .\" -.Dd July 20, 2014 +.Dd November 9, 2014 .Dt CTLD 8 .Os .Sh NAME @@ -105,6 +105,11 @@ utility exits 0 on success, and >0 if an error occurs. .Xr ctl 4 , .Xr ctl.conf 5 , .Xr ctladm 8 +.Sh HISTORY +The +.Nm +command appeared in +.Fx 10.0 . .Sh AUTHORS The .Nm From a52ce850e9cc816537f25462a27ed75eb983460c Mon Sep 17 00:00:00 2001 From: Poul-Henning Kamp Date: Sun, 9 Nov 2014 15:33:31 +0000 Subject: [PATCH 011/280] Handle full-path-resolutions to detect the magic-ness of the pkg port. --- tools/tools/sysbuild/sysbuild.sh | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tools/tools/sysbuild/sysbuild.sh b/tools/tools/sysbuild/sysbuild.sh index ebe2c67552c..1a32c60e027 100644 --- a/tools/tools/sysbuild/sysbuild.sh +++ b/tools/tools/sysbuild/sysbuild.sh @@ -160,7 +160,6 @@ fi set -e log_it() ( - set +x a="$*" set `cat /tmp/_sb_log` TX=`date +%s` @@ -175,7 +174,6 @@ log_it() ( ports_recurse() ( - set +x t=$1 shift if [ "x$t" = "x." ] ; then @@ -218,7 +216,6 @@ ports_recurse() ( ) ports_build() ( - set +x ports_recurse . $PORTS_WE_WANT @@ -229,17 +226,21 @@ ports_build() ( t=`echo $p | sed 's,/usr/ports/,,'` pn=`cd $p && make package-name` - if pkg info $pn > /dev/null 2>&1 ; then - log_it "Already installed: $t ($pn)" + if [ "x$p" == "x/usr/ports/ports-mgmt/pkg" -o \ + "x$p" == "x/freebsd/ports/ports-mgmt/pkg" ] ; then + log_it "Very Special: $t ($pn)" + + ( + cd $p + make clean ${PORTS_OPTS} + make all ${PORTS_OPTS} + make install ${PORTS_OPTS} + ) > _.$b 2>&1 < /dev/null continue fi - if [ "x$p" == "x/usr/ports/ports-mgmt/pkg" ] ; then - log_it "Very Special: $t ($pn)" - ( - cd $p - make clean all install ${PORTS_OPTS} - ) > _.$b 2>&1 < /dev/null + if pkg info $pn > /dev/null 2>&1 ; then + log_it "Already installed: $t ($pn)" continue fi @@ -380,7 +381,6 @@ done ####################################################################### if [ "x$1" = "xchroot_script" ] ; then - set +x set -e shift From cd15a010918a4003cf31bc06741901deb7dca302 Mon Sep 17 00:00:00 2001 From: Poul-Henning Kamp Date: Sun, 9 Nov 2014 15:52:11 +0000 Subject: [PATCH 012/280] Translate the errno to gctl_error() texts. Spotted by: mwlucas --- sys/geom/bde/g_bde.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/sys/geom/bde/g_bde.c b/sys/geom/bde/g_bde.c index e882bb83c6e..93d77336ed7 100644 --- a/sys/geom/bde/g_bde.c +++ b/sys/geom/bde/g_bde.c @@ -204,6 +204,23 @@ g_bde_create_geom(struct gctl_req *req, struct g_class *mp, struct g_provider *p if (gp->softc != NULL) g_free(gp->softc); g_destroy_geom(gp); + switch (error) { + case ENOENT: + gctl_error(req, "Lock was destroyed"); + break; + case ESRCH: + gctl_error(req, "Lock was nuked"); + break; + case EINVAL: + gctl_error(req, "Could not open lock"); + break; + case ENOTDIR: + gctl_error(req, "Lock not found"); + break; + default: + gctl_error(req, "Could not open lock (%d)", error); + break; + } return; } From 89cbe4e288af0b4155df15934f15edcb0232a5ee Mon Sep 17 00:00:00 2001 From: Poul-Henning Kamp Date: Sun, 9 Nov 2014 15:53:29 +0000 Subject: [PATCH 013/280] Report the 1-based key numbers rather than the 0-based ones to be consistent. Fix documentation for destroy command. Not sure how the wrong explanation happened. Spotted by: mwlucas --- sbin/gbde/gbde.8 | 16 +++++++++++++++- sbin/gbde/gbde.c | 7 +++---- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/sbin/gbde/gbde.8 b/sbin/gbde/gbde.8 index 71937679edf..0578287f025 100644 --- a/sbin/gbde/gbde.8 +++ b/sbin/gbde/gbde.8 @@ -233,9 +233,23 @@ pass-phrase: .Pp .Dl "gbde setkey ada0s1f -n 2 -P foo -L key2.lockfile" .Pp -To destroy all copies of the masterkey: +To invalidate your own masterkey: +.Pp +.Dl "gbde nuke ada0s1f" +.Pp +This will overwrite your masterkey sector with zeros, and results in +a diagnostic if you try to use the key again. +You can also destroy the other three copies of the masterkey with the +-n argument. +.Pp +You can also invalidate your masterkey without leaving a tell-tale sector +full of zeros: .Pp .Dl "gbde destroy ada0s1f" +.Pp +This will overwrite the information fields in your masterkey sector, +encrypt it and write it back. +You get a (different) diagnostic if you try to use it. .Sh SEE ALSO .Xr gbde 4 , .Xr geom 4 diff --git a/sbin/gbde/gbde.c b/sbin/gbde/gbde.c index b6baa95ab8c..3dca2126a7a 100644 --- a/sbin/gbde/gbde.c +++ b/sbin/gbde/gbde.c @@ -300,7 +300,6 @@ cmd_attach(const struct g_bde_softc *sc, const char *dest, const char *lfile) gctl_ro_param(r, "key", 16, buf); close(ffd); } - /* gctl_dump(r, stdout); */ errstr = gctl_issue(r); if (errstr != NULL) errx(1, "Attach to %s failed: %s", dest, errstr); @@ -371,7 +370,7 @@ cmd_open(struct g_bde_softc *sc, int dfd , const char *l_opt, u_int *nkey) if (error != 0) errx(1, "Error %d decrypting lock", error); if (nkey) - printf("Opened with key %u\n", *nkey); + printf("Opened with key %u\n", 1 + *nkey); return; } @@ -392,7 +391,7 @@ cmd_nuke(struct g_bde_key *gl, int dfd , int key) free(sbuf); if (i != (int)gl->sectorsize) err(1, "write"); - printf("Nuked key %d\n", key); + printf("Nuked key %d\n", 1 + key); } static void @@ -493,7 +492,7 @@ cmd_destroy(struct g_bde_key *gl, int nkey) bzero(&gl->sector0, sizeof gl->sector0); bzero(&gl->sectorN, sizeof gl->sectorN); bzero(&gl->keyoffset, sizeof gl->keyoffset); - bzero(&gl->flags, sizeof gl->flags); + gl->flags &= GBDE_F_SECT0; bzero(gl->mkey, sizeof gl->mkey); for (i = 0; i < G_BDE_MAXKEYS; i++) if (i != nkey) From a458ad86ee9d509ea0b14dd1e7a24496156b973b Mon Sep 17 00:00:00 2001 From: "Alexander V. Chernikov" Date: Sun, 9 Nov 2014 16:15:28 +0000 Subject: [PATCH 014/280] Remove unused 'struct route' fields. --- sys/netpfil/ipfw/ip_fw_private.h | 2 -- sys/netpfil/pf/pf.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/sys/netpfil/ipfw/ip_fw_private.h b/sys/netpfil/ipfw/ip_fw_private.h index b88c5dbfd00..ddb73e7f155 100644 --- a/sys/netpfil/ipfw/ip_fw_private.h +++ b/sys/netpfil/ipfw/ip_fw_private.h @@ -66,14 +66,12 @@ enum { */ struct _ip6dn_args { struct ip6_pktopts *opt_or; - struct route_in6 ro_or; int flags_or; struct ip6_moptions *im6o_or; struct ifnet *origifp_or; struct ifnet *ifp_or; struct sockaddr_in6 dst_or; u_long mtu_or; - struct route_in6 ro_pmtu_or; }; diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c index 329879151cc..8528af9b7a2 100644 --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -140,14 +140,12 @@ struct pf_send_entry { PFSE_ICMP6, } pfse_type; union { - struct route ro; struct { int type; int code; int mtu; } icmpopts; } u; -#define pfse_ro u.ro #define pfse_icmp_type u.icmpopts.type #define pfse_icmp_code u.icmpopts.code #define pfse_icmp_mtu u.icmpopts.mtu From 9c9bde01d1097e9b427fe5ca09df3d5dec336833 Mon Sep 17 00:00:00 2001 From: "Alexander V. Chernikov" Date: Sun, 9 Nov 2014 16:20:27 +0000 Subject: [PATCH 015/280] Remove unused 'struct route *' argument from nd6_output_flush(). --- sys/netinet6/nd6.c | 6 +++--- sys/netinet6/nd6.h | 2 +- sys/netinet6/nd6_nbr.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c index e82cc937381..059c84336d2 100644 --- a/sys/netinet6/nd6.c +++ b/sys/netinet6/nd6.c @@ -1763,7 +1763,7 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr, ln = NULL; } if (chain) - nd6_output_flush(ifp, ifp, chain, &sin6, NULL); + nd6_output_flush(ifp, ifp, chain, &sin6); /* * When the link-layer address of a router changes, select the @@ -2156,7 +2156,7 @@ nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m, int nd6_output_flush(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain, - struct sockaddr_in6 *dst, struct route *ro) + struct sockaddr_in6 *dst) { struct mbuf *m, *m_head; struct ifnet *outifp; @@ -2171,7 +2171,7 @@ nd6_output_flush(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain, while (m_head) { m = m_head; m_head = m_head->m_nextpkt; - error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, ro); + error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, NULL); } /* diff --git a/sys/netinet6/nd6.h b/sys/netinet6/nd6.h index a23a4f608c4..ff475c82874 100644 --- a/sys/netinet6/nd6.h +++ b/sys/netinet6/nd6.h @@ -413,7 +413,7 @@ int nd6_output_lle(struct ifnet *, struct ifnet *, struct mbuf *, struct sockaddr_in6 *, struct rtentry *, struct llentry *, struct mbuf **); int nd6_output_flush(struct ifnet *, struct ifnet *, struct mbuf *, - struct sockaddr_in6 *, struct route *); + struct sockaddr_in6 *); int nd6_need_cache(struct ifnet *); int nd6_add_ifa_lle(struct in6_ifaddr *); void nd6_rem_ifa_lle(struct in6_ifaddr *); diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c index 682e7156d95..16406a25d36 100644 --- a/sys/netinet6/nd6_nbr.c +++ b/sys/netinet6/nd6_nbr.c @@ -921,7 +921,7 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) LLE_WUNLOCK(ln); if (chain) - nd6_output_flush(ifp, ifp, chain, &sin6, NULL); + nd6_output_flush(ifp, ifp, chain, &sin6); } if (checklink) pfxlist_onlink_check(); From 5b07fc31cc496671dfb960b03aad9f8ee1510519 Mon Sep 17 00:00:00 2001 From: "Alexander V. Chernikov" Date: Sun, 9 Nov 2014 17:01:54 +0000 Subject: [PATCH 016/280] Finish r274315: remove union 'u' from struct pf_send_entry. Suggested by: kib --- sys/netpfil/pf/pf.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c index 8528af9b7a2..15667a6031e 100644 --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -139,16 +139,11 @@ struct pf_send_entry { PFSE_ICMP, PFSE_ICMP6, } pfse_type; - union { - struct { - int type; - int code; - int mtu; - } icmpopts; - } u; -#define pfse_icmp_type u.icmpopts.type -#define pfse_icmp_code u.icmpopts.code -#define pfse_icmp_mtu u.icmpopts.mtu + struct { + int type; + int code; + int mtu; + } icmpopts; }; STAILQ_HEAD(pf_send_head, pf_send_entry); @@ -1368,8 +1363,8 @@ pf_intr(void *v) ip_output(pfse->pfse_m, NULL, NULL, 0, NULL, NULL); break; case PFSE_ICMP: - icmp_error(pfse->pfse_m, pfse->pfse_icmp_type, - pfse->pfse_icmp_code, 0, pfse->pfse_icmp_mtu); + icmp_error(pfse->pfse_m, pfse->icmpopts.type, + pfse->icmpopts.code, 0, pfse->icmpopts.mtu); break; #endif /* INET */ #ifdef INET6 @@ -1378,8 +1373,8 @@ pf_intr(void *v) NULL); break; case PFSE_ICMP6: - icmp6_error(pfse->pfse_m, pfse->pfse_icmp_type, - pfse->pfse_icmp_code, pfse->pfse_icmp_mtu); + icmp6_error(pfse->pfse_m, pfse->icmpopts.type, + pfse->icmpopts.code, pfse->icmpopts.mtu); break; #endif /* INET6 */ default: @@ -2411,8 +2406,8 @@ pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af, #endif /* INET6 */ } pfse->pfse_m = m0; - pfse->pfse_icmp_type = type; - pfse->pfse_icmp_code = code; + pfse->icmpopts.type = type; + pfse->icmpopts.code = code; pf_send(pfse); } From eccc99cb3e49c927e248748583cd63383abc3764 Mon Sep 17 00:00:00 2001 From: Bryan Venteicher Date: Sun, 9 Nov 2014 18:13:08 +0000 Subject: [PATCH 017/280] Attempt to report a better error if sanitize is not supported MFC after: 1 month --- sbin/camcontrol/camcontrol.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/sbin/camcontrol/camcontrol.c b/sbin/camcontrol/camcontrol.c index 4129cf4c272..cdb379dc6d2 100644 --- a/sbin/camcontrol/camcontrol.c +++ b/sbin/camcontrol/camcontrol.c @@ -5827,15 +5827,31 @@ scsisanitize(struct cam_device *device, int argc, char **argv, if (arglist & CAM_ARG_ERR_RECOVER) ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER; - if (((retval = cam_send_ccb(device, ccb)) < 0) - || ((immediate == 0) - && ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP))) { - const char errstr[] = "error sending sanitize command"; + if (cam_send_ccb(device, ccb) < 0) { + warn("error sending sanitize command"); + error = 1; + goto scsisanitize_bailout; + } - if (retval < 0) - warn(errstr); - else - warnx(errstr); + if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) { + struct scsi_sense_data *sense; + int error_code, sense_key, asc, ascq; + + if ((ccb->ccb_h.status & CAM_STATUS_MASK) == + CAM_SCSI_STATUS_ERROR) { + sense = &ccb->csio.sense_data; + scsi_extract_sense_len(sense, ccb->csio.sense_len - + ccb->csio.sense_resid, &error_code, &sense_key, + &asc, &ascq, /*show_errors*/ 1); + + if (sense_key == SSD_KEY_ILLEGAL_REQUEST && + asc == 0x20 && ascq == 0x00) + warnx("sanitize is not supported by " + "this device"); + else + warnx("error sanitizing this device"); + } else + warnx("error sanitizing this device"); if (arglist & CAM_ARG_VERBOSE) { cam_error_print(device, ccb, CAM_ESF_ALL, From 2c59cd89c8fe3c2d885cc21c7fc8c041dbcf6493 Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Sun, 9 Nov 2014 19:58:30 +0000 Subject: [PATCH 018/280] Remove unused includes. Reviewed by: kib --- sys/amd64/amd64/genassym.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c index fceccd21863..aff685b2797 100644 --- a/sys/amd64/amd64/genassym.c +++ b/sys/amd64/amd64/genassym.c @@ -61,11 +61,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include -#include -#include -#include -#include #include #include #include From 9a4dabdc5a960b7473308d4780fb080ea81fc800 Mon Sep 17 00:00:00 2001 From: Bryan Venteicher Date: Sun, 9 Nov 2014 20:04:12 +0000 Subject: [PATCH 019/280] Enable LRO by default when available on vtnet interfaces The prior change to not enable LRO by default has confused several people. The configurations where LRO is problematic is not the typical use case for VirtIO, and due to other issues, this often requires checksum offloading to be disabled anyways. PR: 185864 MFC after: 2 weeks --- sys/dev/virtio/network/if_vtnet.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/sys/dev/virtio/network/if_vtnet.c b/sys/dev/virtio/network/if_vtnet.c index 1310f30bc23..83cb2d79fe2 100644 --- a/sys/dev/virtio/network/if_vtnet.c +++ b/sys/dev/virtio/network/if_vtnet.c @@ -967,9 +967,14 @@ vtnet_setup_interface(struct vtnet_softc *sc) ifp->if_capabilities |= IFCAP_VLAN_HWTSO; } - if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) + if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) { ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6; + if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) || + virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6)) + ifp->if_capabilities |= IFCAP_LRO; + } + if (ifp->if_capabilities & IFCAP_HWCSUM) { /* * VirtIO does not support VLAN tagging, but we can fake @@ -987,12 +992,6 @@ vtnet_setup_interface(struct vtnet_softc *sc) * Capabilities after here are not enabled by default. */ - if (ifp->if_capabilities & IFCAP_RXCSUM) { - if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) || - virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6)) - ifp->if_capabilities |= IFCAP_LRO; - } - if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) { ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; From 40259c79cd80e135fbb75f048927343e0a14f2eb Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Sun, 9 Nov 2014 20:39:08 +0000 Subject: [PATCH 020/280] Make both iSCSI initiator and target support base64 encoded CHAP data. While all tested initiators and targets use hex-encoded CHAP data, RFC also allows base64 encoding there, and Microsoft certificaition tool uses it. Reviewed by: trasz (earlier version) MFC after: 2 weeks Sponsored by: iXsystems, Inc. --- usr.sbin/ctld/chap.c | 51 +++++++++++++++++++++++++++++++++++++++++- usr.sbin/iscsid/chap.c | 51 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 100 insertions(+), 2 deletions(-) diff --git a/usr.sbin/ctld/chap.c b/usr.sbin/ctld/chap.c index 635ab8cb024..0678a7770bc 100644 --- a/usr.sbin/ctld/chap.c +++ b/usr.sbin/ctld/chap.c @@ -33,6 +33,8 @@ __FBSDID("$FreeBSD$"); #include #include +#include +#include #include #include #include @@ -105,6 +107,29 @@ chap_hex2int(const char hex) } } +static int +chap_b642bin(const char *b64, void **binp, size_t *bin_lenp) +{ + char *bin; + int b64_len, bin_len; + + b64_len = strlen(b64); + bin_len = (b64_len + 3) / 4 * 3; + bin = calloc(bin_len, 1); + if (bin == NULL) + log_err(1, "calloc"); + + bin_len = b64_pton(b64, bin, bin_len); + if (bin_len < 0) { + log_warnx("malformed base64 variable"); + free(bin); + return (-1); + } + *binp = bin; + *bin_lenp = bin_len; + return (0); +} + /* * XXX: Review this _carefully_. */ @@ -116,8 +141,12 @@ chap_hex2bin(const char *hex, void **binp, size_t *bin_lenp) char *bin; size_t bin_off, bin_len; + if (strncasecmp(hex, "0b", strlen("0b")) == 0) + return (chap_b642bin(hex + 2, binp, bin_lenp)); + if (strncasecmp(hex, "0x", strlen("0x")) != 0) { - log_warnx("malformed variable, should start with \"0x\""); + log_warnx("malformed variable, should start with \"0x\"" + " or \"0b\""); return (-1); } @@ -160,6 +189,25 @@ chap_hex2bin(const char *hex, void **binp, size_t *bin_lenp) return (0); } +#ifdef USE_BASE64 +static char * +chap_bin2hex(const char *bin, size_t bin_len) +{ + unsigned char *b64, *tmp; + size_t b64_len; + + b64_len = (bin_len + 2) / 3 * 4 + 3; /* +2 for "0b", +1 for '\0'. */ + b64 = malloc(b64_len); + if (b64 == NULL) + log_err(1, "malloc"); + + tmp = b64; + tmp += sprintf(tmp, "0b"); + b64_ntop(bin, bin_len, tmp, b64_len - 2); + + return (b64); +} +#else static char * chap_bin2hex(const char *bin, size_t bin_len) { @@ -181,6 +229,7 @@ chap_bin2hex(const char *bin, size_t bin_len) return (hex); } +#endif /* !USE_BASE64 */ struct chap * chap_new(void) diff --git a/usr.sbin/iscsid/chap.c b/usr.sbin/iscsid/chap.c index abc9a18d7e1..62e39f5a636 100644 --- a/usr.sbin/iscsid/chap.c +++ b/usr.sbin/iscsid/chap.c @@ -33,6 +33,8 @@ __FBSDID("$FreeBSD$"); #include #include +#include +#include #include #include #include @@ -105,6 +107,29 @@ chap_hex2int(const char hex) } } +static int +chap_b642bin(const char *b64, void **binp, size_t *bin_lenp) +{ + char *bin; + int b64_len, bin_len; + + b64_len = strlen(b64); + bin_len = (b64_len + 3) / 4 * 3; + bin = calloc(bin_len, 1); + if (bin == NULL) + log_err(1, "calloc"); + + bin_len = b64_pton(b64, bin, bin_len); + if (bin_len < 0) { + log_warnx("malformed base64 variable"); + free(bin); + return (-1); + } + *binp = bin; + *bin_lenp = bin_len; + return (0); +} + /* * XXX: Review this _carefully_. */ @@ -116,8 +141,12 @@ chap_hex2bin(const char *hex, void **binp, size_t *bin_lenp) char *bin; size_t bin_off, bin_len; + if (strncasecmp(hex, "0b", strlen("0b")) == 0) + return (chap_b642bin(hex + 2, binp, bin_lenp)); + if (strncasecmp(hex, "0x", strlen("0x")) != 0) { - log_warnx("malformed variable, should start with \"0x\""); + log_warnx("malformed variable, should start with \"0x\"" + " or \"0b\""); return (-1); } @@ -160,6 +189,25 @@ chap_hex2bin(const char *hex, void **binp, size_t *bin_lenp) return (0); } +#ifdef USE_BASE64 +static char * +chap_bin2hex(const char *bin, size_t bin_len) +{ + unsigned char *b64, *tmp; + size_t b64_len; + + b64_len = (bin_len + 2) / 3 * 4 + 3; /* +2 for "0b", +1 for '\0'. */ + b64 = malloc(b64_len); + if (b64 == NULL) + log_err(1, "malloc"); + + tmp = b64; + tmp += sprintf(tmp, "0b"); + b64_ntop(bin, bin_len, tmp, b64_len - 2); + + return (b64); +} +#else static char * chap_bin2hex(const char *bin, size_t bin_len) { @@ -181,6 +229,7 @@ chap_bin2hex(const char *bin, size_t bin_len) return (hex); } +#endif /* !USE_BASE64 */ struct chap * chap_new(void) From 72ce7f29de9b369243f1112c57fc7a20c9dc9aea Mon Sep 17 00:00:00 2001 From: Alexander Leidinger Date: Sun, 9 Nov 2014 20:43:50 +0000 Subject: [PATCH 021/280] We moved to ada a while ago, reflect that in the example. --- UPDATING | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/UPDATING b/UPDATING index 41e036fa8e8..507055d6931 100644 --- a/UPDATING +++ b/UPDATING @@ -833,8 +833,8 @@ COMMON ITEMS: 2.) update the ZFS boot block on your boot drive The following example updates the ZFS boot block on the first - partition (freebsd-boot) of a GPT partitioned drive ad0: - "gpart bootcode -p /boot/gptzfsboot -i 1 ad0" + partition (freebsd-boot) of a GPT partitioned drive ada0: + "gpart bootcode -p /boot/gptzfsboot -i 1 ada0" Non-boot pools do not need these updates. From 48a9d8f214b6a6ab1d2f92c20be44e9e396ef6a5 Mon Sep 17 00:00:00 2001 From: Tycho Nightingale Date: Sun, 9 Nov 2014 21:08:52 +0000 Subject: [PATCH 022/280] To allow a request to be submitted from within the callback routine of a completing one increase the total by 1 but don't advertise it. Reviewed by: grehan --- usr.sbin/bhyve/block_if.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/usr.sbin/bhyve/block_if.c b/usr.sbin/bhyve/block_if.c index 4986386de0b..8687e9a39d3 100644 --- a/usr.sbin/bhyve/block_if.c +++ b/usr.sbin/bhyve/block_if.c @@ -54,7 +54,7 @@ __FBSDID("$FreeBSD$"); #define BLOCKIF_SIG 0xb109b109 -#define BLOCKIF_MAXREQ 32 +#define BLOCKIF_MAXREQ 33 enum blockop { BOP_READ, @@ -600,7 +600,7 @@ blockif_queuesz(struct blockif_ctxt *bc) { assert(bc->bc_magic == BLOCKIF_SIG); - return (BLOCKIF_MAXREQ); + return (BLOCKIF_MAXREQ - 1); } int From 603eaf792b659f91d7d1a065d82503966d1386fc Mon Sep 17 00:00:00 2001 From: "Alexander V. Chernikov" Date: Sun, 9 Nov 2014 21:33:01 +0000 Subject: [PATCH 023/280] Renove faith(4) and faithd(8) from base. It looks like industry have chosen different (and more traditional) stateless/statuful NAT64 as translation mechanism. Last non-trivial commits to both faith(4) and faithd(8) happened more than 12 years ago, so I assume it is time to drop RFC3142 in FreeBSD. No objections from: net@ --- ObsoleteFiles.inc | 6 + UPDATING | 4 + etc/defaults/rc.conf | 3 - etc/network.subr | 2 - etc/rc.d/Makefile | 1 - etc/rc.d/NETWORKING | 2 +- etc/rc.d/bridge | 2 +- etc/rc.d/defaultroute | 2 +- etc/rc.d/faith | 75 -- etc/rc.d/routing | 4 +- lib/libc/net/getaddrinfo.c | 52 - lib/libc/net/getnameinfo.c | 1 - release/picobsd/bridge/PICOBSD | 1 - release/picobsd/qemu/PICOBSD | 1 - share/man/man4/Makefile | 2 - share/man/man4/faith.4 | 133 -- share/man/man4/inet6.4 | 11 - share/man/man4/ip6.4 | 4 - share/man/man5/rc.conf.5 | 9 - sys/amd64/conf/GENERIC | 1 - sys/arm/conf/ATMEL | 1 - sys/arm/conf/DOCKSTAR | 1 - sys/arm/conf/DREAMPLUG-1001 | 1 - sys/arm/conf/EFIKA_MX | 1 - sys/arm/conf/ETHERNUT5 | 1 - sys/arm/conf/IMX53 | 1 - sys/arm/conf/IMX6 | 1 - sys/arm/conf/SAM9260EK | 1 - sys/boot/forth/loader.conf | 1 - sys/conf/NOTES | 3 - sys/conf/files | 1 - sys/i386/conf/GENERIC | 1 - sys/i386/conf/XBOX | 1 - sys/i386/conf/XEN | 1 - sys/mips/conf/GXEMUL | 1 - sys/mips/conf/GXEMUL32 | 1 - sys/mips/conf/OCTEON1 | 1 - sys/mips/rmi/rootfs_list.txt | 1 - sys/modules/Makefile | 1 - sys/modules/if_faith/Makefile | 8 - sys/net/if_faith.c | 331 ----- sys/net/if_types.h | 1 - sys/netinet/in.h | 5 +- sys/netinet/in_pcb.c | 19 - sys/netinet/in_pcb.h | 2 +- sys/netinet/ip_icmp.c | 13 - sys/netinet/ip_input.c | 18 - sys/netinet/ip_output.c | 10 - sys/netinet6/icmp6.c | 16 - sys/netinet6/in6.c | 40 +- sys/netinet6/in6.h | 6 +- sys/netinet6/in6_pcb.c | 24 - sys/netinet6/in6_proto.c | 3 - sys/netinet6/ip6_output.c | 10 - sys/netinet6/ip6_var.h | 2 - sys/netinet6/raw_ip6.c | 6 - sys/netinet6/sctp6_usrreq.c | 4 - sys/netinet6/udp6_usrreq.c | 6 - sys/pc98/conf/GENERIC | 1 - sys/powerpc/conf/GENERIC | 1 - sys/powerpc/conf/GENERIC64 | 1 - sys/powerpc/conf/WII | 1 - sys/sparc64/conf/GENERIC | 1 - tools/build/mk/OptionalObsoleteFiles.inc | 1 - tools/tools/nanobsd/pcengines/ALIX_DSK | 1 - tools/tools/nanobsd/pcengines/ALIX_NFS | 1 - tools/tools/sysdoc/tunables.mdoc | 8 - tools/tools/tinybsd/conf/default/TINYBSD | 1 - tools/tools/tinybsd/conf/vpn/TINYBSD | 1 - tools/tools/tinybsd/conf/wrap/TINYBSD | 1 - usr.sbin/Makefile | 1 - .../bsdconfig/networking/share/device.subr | 2 +- usr.sbin/faithd/Makefile | 25 - usr.sbin/faithd/README | 148 --- usr.sbin/faithd/faithd.8 | 404 ------ usr.sbin/faithd/faithd.c | 908 -------------- usr.sbin/faithd/faithd.h | 70 -- usr.sbin/faithd/ftp.c | 1085 ----------------- usr.sbin/faithd/prefix.c | 345 ------ usr.sbin/faithd/prefix.h | 52 - usr.sbin/faithd/tcp.c | 324 ----- usr.sbin/faithd/test/faithd.rb | 312 ----- usr.sbin/inetd/inetd.c | 20 +- 83 files changed, 39 insertions(+), 4537 deletions(-) delete mode 100755 etc/rc.d/faith delete mode 100644 share/man/man4/faith.4 delete mode 100644 sys/modules/if_faith/Makefile delete mode 100644 sys/net/if_faith.c delete mode 100644 usr.sbin/faithd/Makefile delete mode 100644 usr.sbin/faithd/README delete mode 100644 usr.sbin/faithd/faithd.8 delete mode 100644 usr.sbin/faithd/faithd.c delete mode 100644 usr.sbin/faithd/faithd.h delete mode 100644 usr.sbin/faithd/ftp.c delete mode 100644 usr.sbin/faithd/prefix.c delete mode 100644 usr.sbin/faithd/prefix.h delete mode 100644 usr.sbin/faithd/tcp.c delete mode 100644 usr.sbin/faithd/test/faithd.rb diff --git a/ObsoleteFiles.inc b/ObsoleteFiles.inc index c8072d44605..be0b7c90924 100644 --- a/ObsoleteFiles.inc +++ b/ObsoleteFiles.inc @@ -38,6 +38,12 @@ # xargs -n1 | sort | uniq -d; # done +# 20141109: faith/faithd removal +OLD_FILES+=etc/rc.d/faith +OLD_FILES+=usr/share/man/man4/faith.4.gz +OLD_FILES+=usr/share/man/man4/if_faith.4.gz +OLD_FILES+=usr/sbin/faithd +OLD_FILES+=usr/share/man/man8/faithd.8.gz # 20141102: postrandom obsoleted by new /dev/random code OLD_FILES+=etc/rc.d/postrandom # 20141031: initrandom obsoleted by new /dev/random code diff --git a/UPDATING b/UPDATING index 507055d6931..b9a7b3552fb 100644 --- a/UPDATING +++ b/UPDATING @@ -31,6 +31,10 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 11.x IS SLOW: disable the most expensive debugging functionality run "ln -s 'abort:false,junk:false' /etc/malloc.conf".) +20141109: + faith(4) and faithd(8) has been removed from base system. It + has been obsolete for a very long time. + 20141104: vt(4), the new console driver, is enabled by default. It brings support for Unicode and double-width characters, as well as diff --git a/etc/defaults/rc.conf b/etc/defaults/rc.conf index 79799bf9165..1422390e978 100644 --- a/etc/defaults/rc.conf +++ b/etc/defaults/rc.conf @@ -514,9 +514,6 @@ stf_interface_ipv4plen="0" # Prefix length for 6to4 IPv4 addr, stf_interface_ipv6_ifid="0:0:0:1" # IPv6 interface id for stf0. # If you like, you can set "AUTO" for this. stf_interface_ipv6_slaid="0000" # IPv6 Site Level Aggregator for stf0 -ipv6_faith_prefix="NO" # Set faith prefix to enable a FAITH - # IPv6-to-IPv4 TCP translator. You also need - # faithd(8) setup. ipv6_ipv4mapping="NO" # Set to "YES" to enable IPv4 mapped IPv6 addr # communication. (like ::ffff:a.b.c.d) ipv6_ipfilter_rules="/etc/ipf6.rules" # rules definition file for ipfilter, diff --git a/etc/network.subr b/etc/network.subr index 520c9e86a7e..b8e06544ff9 100644 --- a/etc/network.subr +++ b/etc/network.subr @@ -372,7 +372,6 @@ dhcpif() case $1 in lo[0-9]*|\ stf[0-9]*|\ - faith[0-9]*|\ lp[0-9]*|\ sl[0-9]*) return 1 @@ -591,7 +590,6 @@ ipv6_autoconfif() case $_if in lo[0-9]*|\ stf[0-9]*|\ - faith[0-9]*|\ lp[0-9]*|\ sl[0-9]*) return 1 diff --git a/etc/rc.d/Makefile b/etc/rc.d/Makefile index bfa22bfcfd0..f26c190fe9d 100644 --- a/etc/rc.d/Makefile +++ b/etc/rc.d/Makefile @@ -42,7 +42,6 @@ FILES= DAEMON \ dhclient \ dmesg \ dumpon \ - faith \ fsck \ ftpd \ gbde \ diff --git a/etc/rc.d/NETWORKING b/etc/rc.d/NETWORKING index c86150f85ff..12dd3b0d71b 100755 --- a/etc/rc.d/NETWORKING +++ b/etc/rc.d/NETWORKING @@ -4,7 +4,7 @@ # # PROVIDE: NETWORKING NETWORK -# REQUIRE: netif netoptions routing ppp ipfw stf faith +# REQUIRE: netif netoptions routing ppp ipfw stf # REQUIRE: defaultroute routed mrouted route6d mroute6d resolv bridge # REQUIRE: static_arp static_ndp local_unbound diff --git a/etc/rc.d/bridge b/etc/rc.d/bridge index 4c3b34021d8..93e68a20ab0 100755 --- a/etc/rc.d/bridge +++ b/etc/rc.d/bridge @@ -26,7 +26,7 @@ # # PROVIDE: bridge -# REQUIRE: netif faith ppp stf +# REQUIRE: netif ppp stf # KEYWORD: nojail . /etc/rc.subr diff --git a/etc/rc.d/defaultroute b/etc/rc.d/defaultroute index ea54c83ac0c..8e87775ffb9 100755 --- a/etc/rc.d/defaultroute +++ b/etc/rc.d/defaultroute @@ -6,7 +6,7 @@ # # PROVIDE: defaultroute -# REQUIRE: devd faith netif stf +# REQUIRE: devd netif stf # KEYWORD: nojail . /etc/rc.subr diff --git a/etc/rc.d/faith b/etc/rc.d/faith deleted file mode 100755 index 4790ebd0a57..00000000000 --- a/etc/rc.d/faith +++ /dev/null @@ -1,75 +0,0 @@ -#!/bin/sh -# $FreeBSD$ -# - -# PROVIDE: faith -# REQUIRE: netif -# KEYWORD: nojail - -. /etc/rc.subr -. /etc/network.subr - -name="faith" -start_cmd="faith_up" -stop_cmd="faith_down" - -faith_up() -{ - case ${ipv6_faith_prefix} in - [Nn][Oo] | '') - ;; - *) - echo "Configuring IPv6-to-IPv4 TCP relay capturing interface:" \ - " faith0." - ${SYSCTL} net.inet6.ip6.keepfaith=1 - ifconfig faith0 create >/dev/null 2>&1 - ifconfig faith0 up - for prefix in ${ipv6_faith_prefix}; do - prefixlen=`expr "${prefix}" : ".*/\(.*\)"` - case ${prefixlen} in - '') - prefixlen=96 - ;; - *) - prefix=`expr "${prefix}" : \ - "\(.*\)/${prefixlen}"` - ;; - esac - route add -inet6 ${prefix} -prefixlen ${prefixlen} ::1 - route change -inet6 ${prefix} -prefixlen ${prefixlen} \ - -ifp faith0 - done - check_startmsgs && ifconfig faith0 - ;; - esac -} - -faith_down() -{ - echo "Removing IPv6-to-IPv4 TCP relay capturing interface: faith0." - ifconfig faith0 destroy - ${SYSCTL} net.inet6.ip6.keepfaith=0 - - case ${ipv6_faith_prefix} in - [Nn][Oo] | '') - ;; - *) - for prefix in ${ipv6_faith_prefix}; do - prefixlen=`expr "${prefix}" : ".*/\(.*\)"` - case ${prefixlen} in - '') - prefixlen=96 - ;; - *) - prefix=`expr "${prefix}" : \ - "\(.*\)/${prefixlen}"` - ;; - esac - route delete -inet6 ${prefix} -prefixlen ${prefixlen} - done - ;; - esac -} - -load_rc_config $name -run_rc_command "$1" diff --git a/etc/rc.d/routing b/etc/rc.d/routing index 9cb07e57698..b38147153b0 100755 --- a/etc/rc.d/routing +++ b/etc/rc.d/routing @@ -6,7 +6,7 @@ # # PROVIDE: routing -# REQUIRE: faith netif ppp stf +# REQUIRE: netif ppp stf # KEYWORD: nojailvnet . /etc/rc.subr @@ -245,7 +245,7 @@ static_inet6() [Nn][Oo][Nn][Ee]) return ;; - lo0|faith[0-9]*) + lo0) continue ;; esac diff --git a/lib/libc/net/getaddrinfo.c b/lib/libc/net/getaddrinfo.c index b4c1a33ddff..c50374e7287 100644 --- a/lib/libc/net/getaddrinfo.c +++ b/lib/libc/net/getaddrinfo.c @@ -30,8 +30,6 @@ */ /* - * "#ifdef FAITH" part is local hack for supporting IPv4-v6 translator. - * * Issues to be discussed: * - Return values. There are nonstandard return values defined and used * in the source code. This is because RFC2553 is silent about which error @@ -101,10 +99,6 @@ __FBSDID("$FreeBSD$"); #include "nscache.h" #endif -#if defined(__KAME__) && defined(INET6) -# define FAITH -#endif - #define ANY 0 #define YES 1 #define NO 0 @@ -1316,47 +1310,6 @@ get_ai(const struct addrinfo *pai, const struct afd *afd, const char *addr) { char *p; struct addrinfo *ai; -#ifdef FAITH - struct in6_addr faith_prefix; - char *fp_str; - int translate = 0; -#endif - -#ifdef FAITH - /* - * Transfrom an IPv4 addr into a special IPv6 addr format for - * IPv6->IPv4 translation gateway. (only TCP is supported now) - * - * +-----------------------------------+------------+ - * | faith prefix part (12 bytes) | embedded | - * | | IPv4 addr part (4 bytes) - * +-----------------------------------+------------+ - * - * faith prefix part is specified as ascii IPv6 addr format - * in environmental variable GAI. - * For FAITH to work correctly, routing to faith prefix must be - * setup toward a machine where a FAITH daemon operates. - * Also, the machine must enable some mechanizm - * (e.g. faith interface hack) to divert those packet with - * faith prefixed destination addr to user-land FAITH daemon. - */ - fp_str = getenv("GAI"); - if (fp_str && inet_pton(AF_INET6, fp_str, &faith_prefix) == 1 && - afd->a_af == AF_INET && pai->ai_socktype == SOCK_STREAM) { - u_int32_t v4a; - u_int8_t v4a_top; - - memcpy(&v4a, addr, sizeof v4a); - v4a_top = v4a >> IN_CLASSA_NSHIFT; - if (!IN_MULTICAST(v4a) && !IN_EXPERIMENTAL(v4a) && - v4a_top != 0 && v4a != IN_LOOPBACKNET) { - afd = &afdl[N_INET6]; - memcpy(&faith_prefix.s6_addr[12], addr, - sizeof(struct in_addr)); - translate = 1; - } - } -#endif ai = (struct addrinfo *)malloc(sizeof(struct addrinfo) + (afd->a_socklen)); @@ -1370,11 +1323,6 @@ get_ai(const struct addrinfo *pai, const struct afd *afd, const char *addr) ai->ai_addrlen = afd->a_socklen; ai->ai_addr->sa_family = ai->ai_family = afd->a_af; p = (char *)(void *)(ai->ai_addr); -#ifdef FAITH - if (translate == 1) - memcpy(p + afd->a_off, &faith_prefix, (size_t)afd->a_addrlen); - else -#endif memcpy(p + afd->a_off, addr, (size_t)afd->a_addrlen); return ai; } diff --git a/lib/libc/net/getnameinfo.c b/lib/libc/net/getnameinfo.c index ffd34a137ec..005b8773bfc 100644 --- a/lib/libc/net/getnameinfo.c +++ b/lib/libc/net/getnameinfo.c @@ -414,7 +414,6 @@ getnameinfo_link(const struct sockaddr *sa, socklen_t salen, /* * The following have zero-length addresses. * IFT_ATM (net/if_atmsubr.c) - * IFT_FAITH (net/if_faith.c) * IFT_GIF (net/if_gif.c) * IFT_LOOP (net/if_loop.c) * IFT_PPP (net/if_ppp.c, net/if_spppsubr.c) diff --git a/release/picobsd/bridge/PICOBSD b/release/picobsd/bridge/PICOBSD index 47f88963265..bc1185d61c6 100644 --- a/release/picobsd/bridge/PICOBSD +++ b/release/picobsd/bridge/PICOBSD @@ -109,7 +109,6 @@ device tun # Packet tunnel. device pty # Pseudo-ttys (telnet etc) device md # Memory "disks" #device gif 4 # IPv6 and IPv4 tunneling -#device faith 1 # IPv6-to-IPv4 relaying (translation) device tap #options DEVICE_POLLING diff --git a/release/picobsd/qemu/PICOBSD b/release/picobsd/qemu/PICOBSD index 2b4cdabec3c..16b175385b0 100644 --- a/release/picobsd/qemu/PICOBSD +++ b/release/picobsd/qemu/PICOBSD @@ -114,7 +114,6 @@ device tun # Packet tunnel. device pty # Pseudo-ttys (telnet etc) device md # Memory "disks" #device gif 4 # IPv6 and IPv4 tunneling -#device faith 1 # IPv6-to-IPv4 relaying (translation) device tap #options VIMAGE # soner or later we may want to test this diff --git a/share/man/man4/Makefile b/share/man/man4/Makefile index 346f2972e73..76b3d55c2c6 100644 --- a/share/man/man4/Makefile +++ b/share/man/man4/Makefile @@ -132,7 +132,6 @@ MAN= aac.4 \ et.4 \ eventtimers.4 \ exca.4 \ - faith.4 \ fatm.4 \ fd.4 \ fdc.4 \ @@ -637,7 +636,6 @@ MLINKS+=en.4 if_en.4 MLINKS+=enc.4 if_enc.4 MLINKS+=epair.4 if_epair.4 MLINKS+=et.4 if_et.4 -MLINKS+=faith.4 if_faith.4 MLINKS+=fatm.4 if_fatm.4 MLINKS+=fd.4 stderr.4 \ fd.4 stdin.4 \ diff --git a/share/man/man4/faith.4 b/share/man/man4/faith.4 deleted file mode 100644 index f0a2df6f6c3..00000000000 --- a/share/man/man4/faith.4 +++ /dev/null @@ -1,133 +0,0 @@ -.\" $KAME: faith.4,v 1.9 2001/04/27 17:26:35 itojun Exp $ -.\" -.\" Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. -.\" All rights reserved. -.\" -.\" Redistribution and use in source and binary forms, with or without -.\" modification, are permitted provided that the following conditions -.\" are met: -.\" 1. Redistributions of source code must retain the above copyright -.\" notice, this list of conditions and the following disclaimer. -.\" 2. Redistributions in binary form must reproduce the above copyright -.\" notice, this list of conditions and the following disclaimer in the -.\" documentation and/or other materials provided with the distribution. -.\" 3. Neither the name of the project nor the names of its contributors -.\" may be used to endorse or promote products derived from this software -.\" without specific prior written permission. -.\" -.\" THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND -.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -.\" ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE -.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -.\" SUCH DAMAGE. -.\" -.\" $FreeBSD$ -.\" -.Dd January 23, 2012 -.Dt FAITH 4 -.Os -.Sh NAME -.Nm faith -.Nd IPv6-to-IPv4 TCP relay capturing interface -.Sh SYNOPSIS -.Cd "device faith" -.Sh DESCRIPTION -The -.Nm -interface captures IPv6 TCP traffic, -for implementing userland IPv6-to-IPv4 TCP relay -like -.Xr faithd 8 . -.Pp -Each -.Nm -interface is created at runtime using interface cloning. -This is -most easily done with the -.Xr ifconfig 8 -.Cm create -command or using the -.Va cloned_interfaces -variable in -.Xr rc.conf 5 . -.Pp -Special action will be taken when IPv6 TCP traffic is seen on a router, -and the default routing table suggests to route it to the -.Nm -interface. -In this case, the packet will be accepted by the router, -regardless of the list of IPv6 interface addresses assigned to the router. -The packet will be captured by an IPv6 TCP socket, if it has the -.Dv IN6P_FAITH -flag turned on and matching address/port pairs. -As a result, -.Nm -will let you capture IPv6 TCP traffic to some specific destination addresses. -Userland programs, such as -.Xr faithd 8 -can use this behavior to relay IPv6 TCP traffic to IPv4 TCP traffic. -The program can accept some specific IPv6 TCP traffic, perform -.Xr getsockname 2 -to get the IPv6 destination address specified by the client, -and perform application-specific address mapping to relay IPv6 TCP to IPv4 TCP. -.Pp -The -.Dv IN6P_FAITH -flag on a IPv6 TCP socket can be set by using -.Xr setsockopt 2 , -with level -.Dv IPPROTO_IPV6 -and optname -.Dv IPv6_FAITH . -.Pp -To handle error reports by ICMPv6, some ICMPv6 packets routed to an -.Nm -interface will be delivered to IPv6 TCP, as well. -.Pp -To understand how -.Nm -can be used, take a look at the source code of -.Xr faithd 8 . -.Pp -As the -.Nm -interface implements potentially dangerous operations, -great care must be taken when configuring it. -To avoid possible misuse, the -.Xr sysctl 8 -variable -.Li net.inet6.ip6.keepfaith -must be set to -.Li 1 -prior to using the interface. -When -.Li net.inet6.ip6.keepfaith -is -.Li 0 , -no packets will be captured by the -.Nm -interface. -.Pp -The -.Nm -interface is intended to be used on routers, not on hosts. -.\" -.Sh SEE ALSO -.Xr inet 4 , -.Xr inet6 4 , -.Xr faithd 8 -.Rs -.%A Jun-ichiro itojun Hagino -.%A Kazu Yamamoto -.%T "An IPv6-to-IPv4 transport relay translator" -.%O RFC3142 -.Re -.Sh HISTORY -The FAITH IPv6-to-IPv4 TCP relay translator first appeared in the -WIDE hydrangea IPv6 stack. diff --git a/share/man/man4/inet6.4 b/share/man/man4/inet6.4 index 93015e0551b..0e505db691c 100644 --- a/share/man/man4/inet6.4 +++ b/share/man/man4/inet6.4 @@ -241,17 +241,6 @@ Defaults to off. Boolean: the default value of a per-interface flag to enable/disable performing automatic link-local address configuration. Defaults to on. -.It Dv IPV6CTL_KEEPFAITH -.Pq ip6.keepfaith -Boolean: enable/disable -.Dq FAITH -TCP relay IPv6-to-IPv4 translator code in the kernel. -Refer -.Xr faith 4 -and -.Xr faithd 8 -for detail. -Defaults to off. .It Dv IPV6CTL_LOG_INTERVAL .Pq ip6.log_interval Integer: default interval between diff --git a/share/man/man4/ip6.4 b/share/man/man4/ip6.4 index fcd396c1e7d..dba5e8de539 100644 --- a/share/man/man4/ip6.4 +++ b/share/man/man4/ip6.4 @@ -393,10 +393,6 @@ For wildcard sockets, this can restrict connections to IPv6 only. .\".Ox .\"IPv6 sockets are always IPv6-only, so the socket option is read-only .\"(not modifiable). -.It Dv IPV6_FAITH Fa "int *" -Get or set the status of whether -.Xr faith 4 -connections can be made to this socket. .It Dv IPV6_USE_MIN_MTU Fa "int *" Get or set whether the minimal IPv6 maximum transmission unit (MTU) size will be used to avoid fragmentation from occurring for subsequent diff --git a/share/man/man5/rc.conf.5 b/share/man/man5/rc.conf.5 index 721fef01c1c..09583ec1042 100644 --- a/share/man/man5/rc.conf.5 +++ b/share/man/man5/rc.conf.5 @@ -2948,15 +2948,6 @@ This can be set to .Pq Vt str IPv6 Site Level Aggregator for .Xr stf 4 . -.It Va ipv6_faith_prefix -.Pq Vt str -If not set to -.Dq Li NO , -this is the faith prefix to enable a FAITH IPv6-to-IPv4 TCP -translator. -You also need -.Xr faithd 8 -setup. .It Va ipv6_ipv4mapping .Pq Vt bool If set to diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC index f8ccd4cba5b..c8d509849b5 100644 --- a/sys/amd64/conf/GENERIC +++ b/sys/amd64/conf/GENERIC @@ -310,7 +310,6 @@ device vlan # 802.1Q VLAN support device tun # Packet tunnel. device md # Memory "disks" device gif # IPv6 and IPv4 tunneling -device faith # IPv6-to-IPv4 relaying (translation) device firmware # firmware assist module # The `bpf' device enables the Berkeley Packet Filter. diff --git a/sys/arm/conf/ATMEL b/sys/arm/conf/ATMEL index 6cc6fb4c8ef..615e06e4af5 100644 --- a/sys/arm/conf/ATMEL +++ b/sys/arm/conf/ATMEL @@ -143,7 +143,6 @@ device vlan # 802.1Q VLAN support device tun # Packet tunnel. device md # Memory "disks" device gif # IPv6 and IPv4 tunneling -device faith # IPv6-to-IPv4 relaying (translation) #device firmware # firmware assist module # SCSI peripherals diff --git a/sys/arm/conf/DOCKSTAR b/sys/arm/conf/DOCKSTAR index c95a5974961..a3c57a12177 100644 --- a/sys/arm/conf/DOCKSTAR +++ b/sys/arm/conf/DOCKSTAR @@ -54,7 +54,6 @@ options FDT_DTB_STATIC # Misc pseudo devices device bpf # Required for DHCP -device faith # IPv6-to-IPv4 relaying (translation) device firmware # firmware(9) required for USB wlan device gif # IPv6 and IPv4 tunneling device loop # Network loopback diff --git a/sys/arm/conf/DREAMPLUG-1001 b/sys/arm/conf/DREAMPLUG-1001 index 3bfdd913ef6..73695929947 100644 --- a/sys/arm/conf/DREAMPLUG-1001 +++ b/sys/arm/conf/DREAMPLUG-1001 @@ -57,7 +57,6 @@ options FDT_DTB_STATIC # Misc pseudo devices device bpf # Required for DHCP -device faith # IPv6-to-IPv4 relaying (translation) device firmware # firmware(9) required for USB wlan device gif # IPv6 and IPv4 tunneling device loop # Network loopback diff --git a/sys/arm/conf/EFIKA_MX b/sys/arm/conf/EFIKA_MX index df7a9efc7ba..677d73d9edc 100644 --- a/sys/arm/conf/EFIKA_MX +++ b/sys/arm/conf/EFIKA_MX @@ -104,7 +104,6 @@ device ether # Ethernet support #device tun # Packet tunnel. #device md # Memory "disks" #device gif # IPv6 and IPv4 tunneling -#device faith # IPv6-to-IPv4 relaying (translation) #device firmware # firmware assist module # Serial (COM) ports diff --git a/sys/arm/conf/ETHERNUT5 b/sys/arm/conf/ETHERNUT5 index 9df7f9c55aa..88f9949059e 100644 --- a/sys/arm/conf/ETHERNUT5 +++ b/sys/arm/conf/ETHERNUT5 @@ -136,7 +136,6 @@ device ether # Ethernet support #device tun # Packet tunnel. #device md # Memory "disks" #device gif # IPv6 and IPv4 tunneling -#device faith # IPv6-to-IPv4 relaying (translation) #device firmware # firmware assist module # SCSI peripherals diff --git a/sys/arm/conf/IMX53 b/sys/arm/conf/IMX53 index 1396578e3e5..5fd56979fbf 100644 --- a/sys/arm/conf/IMX53 +++ b/sys/arm/conf/IMX53 @@ -92,7 +92,6 @@ device ether # Ethernet support #device tun # Packet tunnel. device md # Memory "disks" #device gif # IPv6 and IPv4 tunneling -#device faith # IPv6-to-IPv4 relaying (translation) #device firmware # firmware assist module # Ethernet diff --git a/sys/arm/conf/IMX6 b/sys/arm/conf/IMX6 index 88a6462b617..007862bc6d1 100644 --- a/sys/arm/conf/IMX6 +++ b/sys/arm/conf/IMX6 @@ -75,7 +75,6 @@ device vlan # 802.1Q VLAN support device tun # Packet tunnel. device md # Memory "disks" #device gif # IPv6 and IPv4 tunneling -#device faith # IPv6-to-IPv4 relaying (translation) #device firmware # firmware assist module device ether # Ethernet support device miibus # Required for ethernet diff --git a/sys/arm/conf/SAM9260EK b/sys/arm/conf/SAM9260EK index 34b64afd234..c3cba33f8b7 100644 --- a/sys/arm/conf/SAM9260EK +++ b/sys/arm/conf/SAM9260EK @@ -146,7 +146,6 @@ device ether # Ethernet support #device tun # Packet tunnel. #device md # Memory "disks" #device gif # IPv6 and IPv4 tunneling -#device faith # IPv6-to-IPv4 relaying (translation) #device firmware # firmware assist module # SCSI peripherals diff --git a/sys/boot/forth/loader.conf b/sys/boot/forth/loader.conf index 241b14aaf97..bd7d296e984 100644 --- a/sys/boot/forth/loader.conf +++ b/sys/boot/forth/loader.conf @@ -254,7 +254,6 @@ if_disc_load="NO" # Discard device if_ef_load="NO" # pseudo-device providing support for multiple # ethernet frame types if_epair_load="NO" # Virtual b-t-b Ethernet-like interface pair -if_faith_load="NO" # IPv6-to-IPv4 TCP relay capturing interface if_gif_load="NO" # generic tunnel interface if_gre_load="NO" # encapsulating network device if_stf_load="NO" # 6to4 tunnel interface diff --git a/sys/conf/NOTES b/sys/conf/NOTES index 45b38d9a69f..1436a3bc166 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -890,10 +890,7 @@ device gre device me options XBONEHACK -# The `faith' device captures packets sent to it and diverts them -# to the IPv4/IPv6 translation daemon. # The `stf' device implements 6to4 encapsulation. -device faith device stf # The pf packet filter consists of three devices: diff --git a/sys/conf/files b/sys/conf/files index 351155d59b2..f7a4310b868 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -3229,7 +3229,6 @@ net/if_edsc.c optional edsc net/if_enc.c optional enc ipsec inet | enc ipsec inet6 net/if_epair.c optional epair net/if_ethersubr.c optional ether -net/if_faith.c optional faith net/if_fddisubr.c optional fddi net/if_fwsubr.c optional fwip net/if_gif.c optional gif inet | gif inet6 | \ diff --git a/sys/i386/conf/GENERIC b/sys/i386/conf/GENERIC index b01af819398..889a20a7fe3 100644 --- a/sys/i386/conf/GENERIC +++ b/sys/i386/conf/GENERIC @@ -318,7 +318,6 @@ device vlan # 802.1Q VLAN support device tun # Packet tunnel. device md # Memory "disks" device gif # IPv6 and IPv4 tunneling -device faith # IPv6-to-IPv4 relaying (translation) device firmware # firmware assist module # The `bpf' device enables the Berkeley Packet Filter. diff --git a/sys/i386/conf/XBOX b/sys/i386/conf/XBOX index fb6a5c1570f..73f81e44cc6 100644 --- a/sys/i386/conf/XBOX +++ b/sys/i386/conf/XBOX @@ -66,7 +66,6 @@ device ether # Ethernet support #device tun # Packet tunnel. #device md # Memory "disks" #device gif # IPv6 and IPv4 tunneling -#device faith # IPv6-to-IPv4 relaying (translation) # The `bpf' device enables the Berkeley Packet Filter. # Be aware of the administrative consequences of enabling this! diff --git a/sys/i386/conf/XEN b/sys/i386/conf/XEN index 62bbbb6ba3f..108224cd557 100644 --- a/sys/i386/conf/XEN +++ b/sys/i386/conf/XEN @@ -82,7 +82,6 @@ device ether # Ethernet support device tun # Packet tunnel. device md # Memory "disks" device gif # IPv6 and IPv4 tunneling -device faith # IPv6-to-IPv4 relaying (translation) # Wireless cards options IEEE80211_SUPPORT_MESH diff --git a/sys/mips/conf/GXEMUL b/sys/mips/conf/GXEMUL index ea5862287cc..fa0b9b51899 100644 --- a/sys/mips/conf/GXEMUL +++ b/sys/mips/conf/GXEMUL @@ -55,7 +55,6 @@ device ether # Ethernet support device tun # Packet tunnel. device md # Memory "disks" device gif # IPv6 and IPv4 tunneling -device faith # IPv6-to-IPv4 relaying (translation) # The `bpf' device enables the Berkeley Packet Filter. # Be aware of the administrative consequences of enabling this! diff --git a/sys/mips/conf/GXEMUL32 b/sys/mips/conf/GXEMUL32 index 6bd756f20f7..27854c5fa2a 100644 --- a/sys/mips/conf/GXEMUL32 +++ b/sys/mips/conf/GXEMUL32 @@ -53,7 +53,6 @@ device ether # Ethernet support device tun # Packet tunnel. device md # Memory "disks" device gif # IPv6 and IPv4 tunneling -device faith # IPv6-to-IPv4 relaying (translation) # The `bpf' device enables the Berkeley Packet Filter. # Be aware of the administrative consequences of enabling this! diff --git a/sys/mips/conf/OCTEON1 b/sys/mips/conf/OCTEON1 index fb417cfccc0..9f22030c170 100644 --- a/sys/mips/conf/OCTEON1 +++ b/sys/mips/conf/OCTEON1 @@ -261,7 +261,6 @@ device vlan # 802.1Q VLAN support device tun # Packet tunnel. device md # Memory "disks" device gif # IPv6 and IPv4 tunneling -device faith # IPv6-to-IPv4 relaying (translation) device firmware # firmware assist module # The `bpf' device enables the Berkeley Packet Filter. diff --git a/sys/mips/rmi/rootfs_list.txt b/sys/mips/rmi/rootfs_list.txt index 048321a30ad..86895b6a2f5 100644 --- a/sys/mips/rmi/rootfs_list.txt +++ b/sys/mips/rmi/rootfs_list.txt @@ -197,7 +197,6 @@ ./etc/rc.d/dmesg ./etc/rc.d/dumpon ./etc/rc.d/encswap -./etc/rc.d/faith ./etc/rc.d/fsck ./etc/rc.d/ftp-proxy ./etc/rc.d/ftpd diff --git a/sys/modules/Makefile b/sys/modules/Makefile index 0ca6f63df72..fedc92e362c 100644 --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -145,7 +145,6 @@ SUBDIR= \ if_disc \ if_edsc \ if_epair \ - if_faith \ ${_if_gif} \ ${_if_gre} \ ${_if_me} \ diff --git a/sys/modules/if_faith/Makefile b/sys/modules/if_faith/Makefile deleted file mode 100644 index fe78ec9be48..00000000000 --- a/sys/modules/if_faith/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -# $FreeBSD$ - -.PATH: ${.CURDIR}/../../net - -KMOD= if_faith -SRCS= if_faith.c opt_inet.h opt_inet6.h - -.include diff --git a/sys/net/if_faith.c b/sys/net/if_faith.c deleted file mode 100644 index a8a103de489..00000000000 --- a/sys/net/if_faith.c +++ /dev/null @@ -1,331 +0,0 @@ -/* $KAME: if_faith.c,v 1.23 2001/12/17 13:55:29 sumikawa Exp $ */ - -/*- - * Copyright (c) 1982, 1986, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ -/* - * derived from - * @(#)if_loop.c 8.1 (Berkeley) 6/10/93 - * Id: if_loop.c,v 1.22 1996/06/19 16:24:10 wollman Exp - */ - -/* - * Loopback interface driver for protocol testing and timing. - */ -#include "opt_inet.h" -#include "opt_inet6.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef INET -#include -#include -#include -#include -#endif - -#ifdef INET6 -#ifndef INET -#include -#endif -#include -#include -#include -#endif - -struct faith_softc { - struct ifnet *sc_ifp; -}; - -static int faithioctl(struct ifnet *, u_long, caddr_t); -static int faithoutput(struct ifnet *, struct mbuf *, const struct sockaddr *, - struct route *); -#ifdef INET6 -static int faithprefix(struct in6_addr *); -#endif - -static int faithmodevent(module_t, int, void *); - -static const char faithname[] = "faith"; -static MALLOC_DEFINE(M_FAITH, faithname, "Firewall Assisted Tunnel Interface"); - -static int faith_clone_create(struct if_clone *, int, caddr_t); -static void faith_clone_destroy(struct ifnet *); -static struct if_clone *faith_cloner; - -#define FAITHMTU 1500 - -static int -faithmodevent(mod, type, data) - module_t mod; - int type; - void *data; -{ - - switch (type) { - case MOD_LOAD: - faith_cloner = if_clone_simple(faithname, faith_clone_create, - faith_clone_destroy, 0); -#ifdef INET6 - faithprefix_p = faithprefix; -#endif - - break; - case MOD_UNLOAD: -#ifdef INET6 - faithprefix_p = NULL; -#endif - - if_clone_detach(faith_cloner); - break; - default: - return EOPNOTSUPP; - } - return 0; -} - -static moduledata_t faith_mod = { - "if_faith", - faithmodevent, - 0 -}; - -DECLARE_MODULE(if_faith, faith_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); -MODULE_VERSION(if_faith, 1); - -static int -faith_clone_create(ifc, unit, params) - struct if_clone *ifc; - int unit; - caddr_t params; -{ - struct ifnet *ifp; - struct faith_softc *sc; - - sc = malloc(sizeof(struct faith_softc), M_FAITH, M_WAITOK | M_ZERO); - ifp = sc->sc_ifp = if_alloc(IFT_FAITH); - if (ifp == NULL) { - free(sc, M_FAITH); - return (ENOSPC); - } - - ifp->if_softc = sc; - if_initname(sc->sc_ifp, faithname, unit); - - ifp->if_mtu = FAITHMTU; - /* Change to BROADCAST experimentaly to announce its prefix. */ - ifp->if_flags = /* IFF_LOOPBACK */ IFF_BROADCAST | IFF_MULTICAST; - ifp->if_ioctl = faithioctl; - ifp->if_output = faithoutput; - ifp->if_hdrlen = 0; - ifp->if_addrlen = 0; - ifp->if_snd.ifq_maxlen = ifqmaxlen; - if_attach(ifp); - bpfattach(ifp, DLT_NULL, sizeof(u_int32_t)); - return (0); -} - -static void -faith_clone_destroy(ifp) - struct ifnet *ifp; -{ - struct faith_softc *sc = ifp->if_softc; - - bpfdetach(ifp); - if_detach(ifp); - if_free(ifp); - free(sc, M_FAITH); -} - -static int -faithoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, - struct route *ro) -{ - int isr; - u_int32_t af; - struct rtentry *rt = NULL; - - M_ASSERTPKTHDR(m); - - if (ro != NULL) - rt = ro->ro_rt; - /* BPF writes need to be handled specially. */ - if (dst->sa_family == AF_UNSPEC) - bcopy(dst->sa_data, &af, sizeof(af)); - else - af = dst->sa_family; - - if (bpf_peers_present(ifp->if_bpf)) - bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m); - - if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { - m_freem(m); - return (rt->rt_flags & RTF_BLACKHOLE ? 0 : - rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); - } - if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); - if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); - switch (af) { -#ifdef INET - case AF_INET: - isr = NETISR_IP; - break; -#endif -#ifdef INET6 - case AF_INET6: - isr = NETISR_IPV6; - break; -#endif - default: - m_freem(m); - return EAFNOSUPPORT; - } - - /* XXX do we need more sanity checks? */ - - m->m_pkthdr.rcvif = ifp; - if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); - if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); - netisr_dispatch(isr, m); - return (0); -} - -/* - * Process an ioctl request. - */ -/* ARGSUSED */ -static int -faithioctl(ifp, cmd, data) - struct ifnet *ifp; - u_long cmd; - caddr_t data; -{ - struct ifreq *ifr = (struct ifreq *)data; - int error = 0; - - switch (cmd) { - - case SIOCSIFADDR: - ifp->if_flags |= IFF_UP; - ifp->if_drv_flags |= IFF_DRV_RUNNING; - - /* - * Everything else is done at a higher level. - */ - break; - - case SIOCADDMULTI: - case SIOCDELMULTI: - if (ifr == 0) { - error = EAFNOSUPPORT; /* XXX */ - break; - } - switch (ifr->ifr_addr.sa_family) { -#ifdef INET - case AF_INET: - break; -#endif -#ifdef INET6 - case AF_INET6: - break; -#endif - - default: - error = EAFNOSUPPORT; - break; - } - break; - -#ifdef SIOCSIFMTU - case SIOCSIFMTU: - ifp->if_mtu = ifr->ifr_mtu; - break; -#endif - - case SIOCSIFFLAGS: - break; - - default: - error = EINVAL; - } - return (error); -} - -#ifdef INET6 -/* - * XXX could be slow - * XXX could be layer violation to call sys/net from sys/netinet6 - */ -static int -faithprefix(in6) - struct in6_addr *in6; -{ - struct rtentry *rt; - struct sockaddr_in6 sin6; - int ret; - - if (V_ip6_keepfaith == 0) - return 0; - - bzero(&sin6, sizeof(sin6)); - sin6.sin6_family = AF_INET6; - sin6.sin6_len = sizeof(struct sockaddr_in6); - sin6.sin6_addr = *in6; - rt = in6_rtalloc1((struct sockaddr *)&sin6, 0, 0UL, RT_DEFAULT_FIB); - if (rt && rt->rt_ifp && rt->rt_ifp->if_type == IFT_FAITH && - (rt->rt_ifp->if_flags & IFF_UP) != 0) - ret = 1; - else - ret = 0; - if (rt) - RTFREE_LOCKED(rt); - return ret; -} -#endif diff --git a/sys/net/if_types.h b/sys/net/if_types.h index 80a5606ba8a..c9b20db719e 100644 --- a/sys/net/if_types.h +++ b/sys/net/if_types.h @@ -246,7 +246,6 @@ /* not based on IANA assignments */ #define IFT_GIF 0xf0 #define IFT_PVC 0xf1 -#define IFT_FAITH 0xf2 #define IFT_ENC 0xf4 #define IFT_PFLOG 0xf6 #define IFT_PFSYNC 0xf7 diff --git a/sys/netinet/in.h b/sys/netinet/in.h index 4776278e1b1..f2dbce29c69 100644 --- a/sys/netinet/in.h +++ b/sys/netinet/in.h @@ -428,8 +428,7 @@ __END_DECLS #define IP_RECVIF 20 /* bool; receive reception if w/dgram */ /* for IPSEC */ #define IP_IPSEC_POLICY 21 /* int; set/get security policy */ -#define IP_FAITH 22 /* bool; accept FAITH'ed connections */ - + /* unused; was IP_FAITH */ #define IP_ONESBCAST 23 /* bool: send all-ones broadcast */ #define IP_BINDANY 24 /* bool: allow bind to any address */ #define IP_BINDMULTI 25 /* bool: allow multiple listeners on a tuple */ @@ -630,7 +629,7 @@ int getsourcefilter(int, uint32_t, struct sockaddr *, socklen_t, #define IPCTL_STATS 12 /* ipstat structure */ #define IPCTL_ACCEPTSOURCEROUTE 13 /* may accept source routed packets */ #define IPCTL_FASTFORWARDING 14 /* use fast IP forwarding code */ -#define IPCTL_KEEPFAITH 15 /* FAITH IPv4->IPv6 translater ctl */ + /* 15, unused, was: IPCTL_KEEPFAITH */ #define IPCTL_GIF_TTL 16 /* default TTL for gif encap packet */ #endif /* __BSD_VISIBLE */ diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index c7f0511133d..6bec7efc256 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -1645,11 +1645,6 @@ in_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup, inp->inp_lport != lport) continue; - /* XXX inp locking */ - if (ifp && ifp->if_type == IFT_FAITH && - (inp->inp_flags & INP_FAITH) == 0) - continue; - injail = prison_flag(inp->inp_cred, PR_IP4); if (injail) { if (prison_check_ip4(inp->inp_cred, @@ -1724,11 +1719,6 @@ in_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup, inp->inp_lport != lport) continue; - /* XXX inp locking */ - if (ifp && ifp->if_type == IFT_FAITH && - (inp->inp_flags & INP_FAITH) == 0) - continue; - injail = prison_flag(inp->inp_cred, PR_IP4); if (injail) { if (prison_check_ip4(inp->inp_cred, @@ -1869,11 +1859,6 @@ in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr, inp->inp_lport != lport) continue; - /* XXX inp locking */ - if (ifp && ifp->if_type == IFT_FAITH && - (inp->inp_flags & INP_FAITH) == 0) - continue; - injail = prison_flag(inp->inp_cred, PR_IP4); if (injail) { if (prison_check_ip4(inp->inp_cred, @@ -2468,10 +2453,6 @@ db_print_inpflags(int inp_flags) db_printf("%sINP_MTUDISC", comma ? ", " : ""); comma = 1; } - if (inp_flags & INP_FAITH) { - db_printf("%sINP_FAITH", comma ? ", " : ""); - comma = 1; - } if (inp_flags & INP_RECVTTL) { db_printf("%sINP_RECVTTL", comma ? ", " : ""); comma = 1; diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h index 185bcfb2d06..04ed0b0bdf8 100644 --- a/sys/netinet/in_pcb.h +++ b/sys/netinet/in_pcb.h @@ -511,7 +511,7 @@ short inp_so_options(const struct inpcb *inp); #define INP_ANONPORT 0x00000040 /* port chosen for user */ #define INP_RECVIF 0x00000080 /* receive incoming interface */ #define INP_MTUDISC 0x00000100 /* user can do MTU discovery */ -#define INP_FAITH 0x00000200 /* accept FAITH'ed connections */ + /* 0x000200 unused: was INP_FAITH */ #define INP_RECVTTL 0x00000400 /* receive incoming IP TTL */ #define INP_DONTFRAG 0x00000800 /* don't fragment packet */ #define INP_BINDANY 0x00001000 /* allow bind to any address */ diff --git a/sys/netinet/ip_icmp.c b/sys/netinet/ip_icmp.c index 916124a7efb..23f7a567c74 100644 --- a/sys/netinet/ip_icmp.c +++ b/sys/netinet/ip_icmp.c @@ -410,19 +410,6 @@ icmp_input(struct mbuf **mp, int *offp, int proto) m->m_len += hlen; m->m_data -= hlen; - if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) { - /* - * Deliver very specific ICMP type only. - */ - switch (icp->icmp_type) { - case ICMP_UNREACH: - case ICMP_TIMXCEED: - break; - default: - goto freeit; - } - } - #ifdef ICMPPRINTFS if (icmpprintfs) printf("icmp_input, type %d code %d\n", icp->icmp_type, diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index ac7568248f9..1ad020a2114 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -104,12 +104,6 @@ SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_R &VNET_NAME(ipsendredirects), 0, "Enable sending IP redirects"); -static VNET_DEFINE(int, ip_keepfaith); -#define V_ip_keepfaith VNET(ip_keepfaith) -SYSCTL_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_VNET | CTLFLAG_RW, - &VNET_NAME(ip_keepfaith), 0, - "Enable packet capture for FAITH IPv4->IPv6 translater daemon"); - static VNET_DEFINE(int, ip_sendsourcequench); #define V_ip_sendsourcequench VNET(ip_sendsourcequench) SYSCTL_INT(_net_inet_ip, OID_AUTO, sendsourcequench, CTLFLAG_VNET | CTLFLAG_RW, @@ -752,18 +746,6 @@ passin: if (ip->ip_dst.s_addr == INADDR_ANY) goto ours; - /* - * FAITH(Firewall Aided Internet Translator) - */ - if (ifp && ifp->if_type == IFT_FAITH) { - if (V_ip_keepfaith) { - if (ip->ip_p == IPPROTO_TCP || ip->ip_p == IPPROTO_ICMP) - goto ours; - } - m_freem(m); - return; - } - /* * Not for us; forward if possible and desirable. */ diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index 25e7cceaf98..5e4a5074e3f 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -991,7 +991,6 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) case IP_RECVDSTADDR: case IP_RECVTTL: case IP_RECVIF: - case IP_FAITH: case IP_ONESBCAST: case IP_DONTFRAG: case IP_RECVTOS: @@ -1058,10 +1057,6 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) OPTSET(INP_RECVIF); break; - case IP_FAITH: - OPTSET(INP_FAITH); - break; - case IP_ONESBCAST: OPTSET(INP_ONESBCAST); break; @@ -1200,7 +1195,6 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) case IP_RECVTTL: case IP_RECVIF: case IP_PORTRANGE: - case IP_FAITH: case IP_ONESBCAST: case IP_DONTFRAG: case IP_BINDANY: @@ -1259,10 +1253,6 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) optval = 0; break; - case IP_FAITH: - optval = OPTBIT(INP_FAITH); - break; - case IP_ONESBCAST: optval = OPTBIT(INP_ONESBCAST); break; diff --git a/sys/netinet6/icmp6.c b/sys/netinet6/icmp6.c index 8169e56bc38..891e8eb97ad 100644 --- a/sys/netinet6/icmp6.c +++ b/sys/netinet6/icmp6.c @@ -482,22 +482,6 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) goto freeit; } - if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) { - /* - * Deliver very specific ICMP6 type only. - * This is important to deliver TOOBIG. Otherwise PMTUD - * will not work. - */ - switch (icmp6->icmp6_type) { - case ICMP6_DST_UNREACH: - case ICMP6_PACKET_TOO_BIG: - case ICMP6_TIME_EXCEEDED: - break; - default: - goto freeit; - } - } - ICMP6STAT_INC(icp6s_inhist[icmp6->icmp6_type]); icmp6_ifstat_inc(ifp, ifs6_in_msg); if (icmp6->icmp6_type < ICMP6_INFOMSG_MASK) diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c index 89ec855243a..4ea398a8866 100644 --- a/sys/netinet6/in6.c +++ b/sys/netinet6/in6.c @@ -137,8 +137,6 @@ static int in6_notify_ifa(struct ifnet *, struct in6_ifaddr *, struct in6_aliasreq *, int); static void in6_unlink_ifa(struct in6_ifaddr *, struct ifnet *); -int (*faithprefix_p)(struct in6_addr *); - static int in6_validate_ifra(struct ifnet *, struct in6_aliasreq *, struct in6_ifaddr *, int); static struct in6_ifaddr *in6_alloc_ifa(struct ifnet *, @@ -1948,34 +1946,20 @@ in6if_do_dad(struct ifnet *ifp) if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) return (0); - switch (ifp->if_type) { -#ifdef IFT_DUMMY - case IFT_DUMMY: -#endif - case IFT_FAITH: - /* - * These interfaces do not have the IFF_LOOPBACK flag, - * but loop packets back. We do not have to do DAD on such - * interfaces. We should even omit it, because loop-backed - * NS would confuse the DAD procedure. - */ + /* + * Our DAD routine requires the interface up and running. + * However, some interfaces can be up before the RUNNING + * status. Additionaly, users may try to assign addresses + * before the interface becomes up (or running). + * We simply skip DAD in such a case as a work around. + * XXX: we should rather mark "tentative" on such addresses, + * and do DAD after the interface becomes ready. + */ + if (!((ifp->if_flags & IFF_UP) && + (ifp->if_drv_flags & IFF_DRV_RUNNING))) return (0); - default: - /* - * Our DAD routine requires the interface up and running. - * However, some interfaces can be up before the RUNNING - * status. Additionaly, users may try to assign addresses - * before the interface becomes up (or running). - * We simply skip DAD in such a case as a work around. - * XXX: we should rather mark "tentative" on such addresses, - * and do DAD after the interface becomes ready. - */ - if (!((ifp->if_flags & IFF_UP) && - (ifp->if_drv_flags & IFF_DRV_RUNNING))) - return (0); - return (1); - } + return (1); } /* diff --git a/sys/netinet6/in6.h b/sys/netinet6/in6.h index 5fbcf5f23d9..7187aec7b7d 100644 --- a/sys/netinet6/in6.h +++ b/sys/netinet6/in6.h @@ -424,8 +424,7 @@ struct route_in6 { #define IPV6_IPSEC_POLICY 28 /* struct; get/set security policy */ #endif /* IPSEC */ -#define IPV6_FAITH 29 /* bool; accept FAITH'ed connections */ - + /* 29; unused; was IPV6_FAITH */ #if 1 /* IPV6FIREWALL */ #define IPV6_FW_ADD 30 /* add a firewall rule to chain */ #define IPV6_FW_DEL 31 /* delete a firewall rule from chain */ @@ -580,7 +579,7 @@ struct ip6_mtuinfo { #define IPV6CTL_SOURCECHECK 10 /* verify source route and intf */ #define IPV6CTL_SOURCECHECK_LOGINT 11 /* minimume logging interval */ #define IPV6CTL_ACCEPT_RTADV 12 -#define IPV6CTL_KEEPFAITH 13 + /* 13; unused; was: IPV6CTL_KEEPFAITH */ #define IPV6CTL_LOG_INTERVAL 14 #define IPV6CTL_HDRNESTLIMIT 15 #define IPV6CTL_DAD_COUNT 16 @@ -669,7 +668,6 @@ extern void addrsel_policy_init(void); #define sin6tosa(sin6) ((struct sockaddr *)(sin6)) #define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa)) -extern int (*faithprefix_p)(struct in6_addr *); #endif /* _KERNEL */ #ifndef _SIZE_T_DECLARED diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c index 2be2e8366af..2a7b9dcf40f 100644 --- a/sys/netinet6/in6_pcb.c +++ b/sys/netinet6/in6_pcb.c @@ -870,12 +870,6 @@ in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup, struct inpcbhead *head; struct inpcb *inp, *tmpinp; u_short fport = fport_arg, lport = lport_arg; - int faith; - - if (faithprefix_p != NULL) - faith = (*faithprefix_p)(laddr); - else - faith = 0; /* * First look for an exact match. @@ -935,10 +929,6 @@ in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup, continue; } - /* XXX inp locking */ - if (faith && (inp->inp_flags & INP_FAITH) == 0) - continue; - injail = prison_flag(inp->inp_cred, PR_IP6); if (injail) { if (prison_check_ip6(inp->inp_cred, @@ -1001,10 +991,6 @@ in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup, continue; } - /* XXX inp locking */ - if (faith && (inp->inp_flags & INP_FAITH) == 0) - continue; - injail = prison_flag(inp->inp_cred, PR_IP6); if (injail) { if (prison_check_ip6(inp->inp_cred, @@ -1069,18 +1055,12 @@ in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, struct inpcbhead *head; struct inpcb *inp, *tmpinp; u_short fport = fport_arg, lport = lport_arg; - int faith; KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); INP_HASH_LOCK_ASSERT(pcbinfo); - if (faithprefix_p != NULL) - faith = (*faithprefix_p)(laddr); - else - faith = 0; - /* * First look for an exact match. */ @@ -1137,10 +1117,6 @@ in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, continue; } - /* XXX inp locking */ - if (faith && (inp->inp_flags & INP_FAITH) == 0) - continue; - injail = prison_flag(inp->inp_cred, PR_IP6); if (injail) { if (prison_check_ip6(inp->inp_cred, diff --git a/sys/netinet6/in6_proto.c b/sys/netinet6/in6_proto.c index 3798636696e..4d328d282ec 100644 --- a/sys/netinet6/in6_proto.c +++ b/sys/netinet6/in6_proto.c @@ -434,7 +434,6 @@ VNET_DEFINE(int, ip6_rr_prune) = 5; /* router renumbering prefix VNET_DEFINE(int, ip6_mcast_pmtu) = 0; /* enable pMTU discovery for multicast? */ VNET_DEFINE(int, ip6_v6only) = 1; -VNET_DEFINE(int, ip6_keepfaith) = 0; VNET_DEFINE(time_t, ip6_log_time) = (time_t)0L; #ifdef IPSTEALTH VNET_DEFINE(int, ip6stealth) = 0; @@ -543,8 +542,6 @@ SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RFC6204W3, rfc6204w3, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_rfc6204w3), 0, "Accept the default router list from ICMPv6 RA messages even " "when packet forwarding enabled."); -SYSCTL_INT(_net_inet6_ip6, IPV6CTL_KEEPFAITH, keepfaith, - CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_keepfaith), 0, ""); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_LOG_INTERVAL, log_interval, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_log_interval), 0, ""); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_HDRNESTLIMIT, hdrnestlimit, diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c index f574fb22c86..1763041595f 100644 --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -1408,7 +1408,6 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt) /* FALLTHROUGH */ case IPV6_UNICAST_HOPS: case IPV6_HOPLIMIT: - case IPV6_FAITH: case IPV6_RECVPKTINFO: case IPV6_RECVHOPLIMIT: @@ -1552,10 +1551,6 @@ do { \ OPTSET(IN6P_RTHDR); break; - case IPV6_FAITH: - OPTSET(INP_FAITH); - break; - case IPV6_RECVPATHMTU: /* * We ignore this option for TCP @@ -1823,7 +1818,6 @@ do { \ case IPV6_RECVRTHDR: case IPV6_RECVPATHMTU: - case IPV6_FAITH: case IPV6_V6ONLY: case IPV6_PORTRANGE: case IPV6_RECVTCLASS: @@ -1868,10 +1862,6 @@ do { \ optval = OPTBIT(IN6P_MTU); break; - case IPV6_FAITH: - optval = OPTBIT(INP_FAITH); - break; - case IPV6_V6ONLY: optval = OPTBIT(IN6P_IPV6_V6ONLY); break; diff --git a/sys/netinet6/ip6_var.h b/sys/netinet6/ip6_var.h index 0889d987c6e..155a607a740 100644 --- a/sys/netinet6/ip6_var.h +++ b/sys/netinet6/ip6_var.h @@ -296,7 +296,6 @@ VNET_DECLARE(int, ip6_norbit_raif); /* Disable R-bit in NA on RA * receiving IF. */ VNET_DECLARE(int, ip6_rfc6204w3); /* Accept defroute from RA even when forwarding enabled */ -VNET_DECLARE(int, ip6_keepfaith); /* Firewall Aided Internet Translator */ VNET_DECLARE(int, ip6_log_interval); VNET_DECLARE(time_t, ip6_log_time); VNET_DECLARE(int, ip6_hdrnestlimit); /* upper limit of # of extension @@ -310,7 +309,6 @@ VNET_DECLARE(int, ip6_dad_count); /* DupAddrDetectionTransmits */ #define V_ip6_no_radr VNET(ip6_no_radr) #define V_ip6_norbit_raif VNET(ip6_norbit_raif) #define V_ip6_rfc6204w3 VNET(ip6_rfc6204w3) -#define V_ip6_keepfaith VNET(ip6_keepfaith) #define V_ip6_log_interval VNET(ip6_log_interval) #define V_ip6_log_time VNET(ip6_log_time) #define V_ip6_hdrnestlimit VNET(ip6_hdrnestlimit) diff --git a/sys/netinet6/raw_ip6.c b/sys/netinet6/raw_ip6.c index 34e38e496b7..685d7f527ce 100644 --- a/sys/netinet6/raw_ip6.c +++ b/sys/netinet6/raw_ip6.c @@ -169,12 +169,6 @@ rip6_input(struct mbuf **mp, int *offp, int proto) RIP6STAT_INC(rip6s_ipackets); - if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) { - /* XXX Send icmp6 host/port unreach? */ - m_freem(m); - return (IPPROTO_DONE); - } - init_sin6(&fromsa, m); /* general init */ ifp = m->m_pkthdr.rcvif; diff --git a/sys/netinet6/sctp6_usrreq.c b/sys/netinet6/sctp6_usrreq.c index a2393ec5120..037127eef68 100644 --- a/sys/netinet6/sctp6_usrreq.c +++ b/sys/netinet6/sctp6_usrreq.c @@ -149,10 +149,6 @@ sctp6_input_with_port(struct mbuf **i_pak, int *offp, uint16_t port) if (in6_setscope(&dst.sin6_addr, m->m_pkthdr.rcvif, NULL) != 0) { goto out; } - if (faithprefix_p != NULL && (*faithprefix_p) (&dst.sin6_addr)) { - /* XXX send icmp6 host/port unreach? */ - goto out; - } length = ntohs(ip6->ip6_plen) + iphlen; /* Validate mbuf chain length with IP payload length. */ if (SCTP_HEADER_LEN(m) != length) { diff --git a/sys/netinet6/udp6_usrreq.c b/sys/netinet6/udp6_usrreq.c index de79816f0eb..6c6f27a6e5d 100644 --- a/sys/netinet6/udp6_usrreq.c +++ b/sys/netinet6/udp6_usrreq.c @@ -208,12 +208,6 @@ udp6_input(struct mbuf **mp, int *offp, int proto) ifp = m->m_pkthdr.rcvif; ip6 = mtod(m, struct ip6_hdr *); - if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) { - /* XXX send icmp6 host/port unreach? */ - m_freem(m); - return (IPPROTO_DONE); - } - #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct udphdr), IPPROTO_DONE); ip6 = mtod(m, struct ip6_hdr *); diff --git a/sys/pc98/conf/GENERIC b/sys/pc98/conf/GENERIC index ea89d325874..299606aa827 100644 --- a/sys/pc98/conf/GENERIC +++ b/sys/pc98/conf/GENERIC @@ -231,7 +231,6 @@ device vlan # 802.1Q VLAN support device tun # Packet tunnel. device md # Memory "disks" device gif # IPv6 and IPv4 tunneling -device faith # IPv6-to-IPv4 relaying (translation) device firmware # firmware assist module # The `bpf' device enables the Berkeley Packet Filter. diff --git a/sys/powerpc/conf/GENERIC b/sys/powerpc/conf/GENERIC index dfd5a61945d..d1ee43c9cf7 100644 --- a/sys/powerpc/conf/GENERIC +++ b/sys/powerpc/conf/GENERIC @@ -154,7 +154,6 @@ device tun # Packet tunnel. device md # Memory "disks" device ofwd # Open Firmware disks device gif # IPv6 and IPv4 tunneling -device faith # IPv6-to-IPv4 relaying/(translation) device firmware # firmware assist module # The `bpf' device enables the Berkeley Packet Filter. diff --git a/sys/powerpc/conf/GENERIC64 b/sys/powerpc/conf/GENERIC64 index 3ce6e3cdbd5..3e4d72f6581 100644 --- a/sys/powerpc/conf/GENERIC64 +++ b/sys/powerpc/conf/GENERIC64 @@ -157,7 +157,6 @@ device tun # Packet tunnel. device md # Memory "disks" device ofwd # Open Firmware disks device gif # IPv6 and IPv4 tunneling -device faith # IPv6-to-IPv4 relaying/(translation) device firmware # firmware assist module # The `bpf' device enables the Berkeley Packet Filter. diff --git a/sys/powerpc/conf/WII b/sys/powerpc/conf/WII index e4d61b25352..8e21f2a0d50 100644 --- a/sys/powerpc/conf/WII +++ b/sys/powerpc/conf/WII @@ -77,7 +77,6 @@ device vlan # 802.1Q VLAN support device tun # Packet tunnel. device md # Memory "disks" device gif # IPv6 and IPv4 tunneling -device faith # IPv6-to-IPv4 relaying/(translation) device firmware # firmware assist module diff --git a/sys/sparc64/conf/GENERIC b/sys/sparc64/conf/GENERIC index 306f1dad24b..0f9aa1bc1ee 100644 --- a/sys/sparc64/conf/GENERIC +++ b/sys/sparc64/conf/GENERIC @@ -225,7 +225,6 @@ device vlan # 802.1Q VLAN support device tun # Packet tunnel. device md # Memory "disks" device gif # IPv6 and IPv4 tunneling -device faith # IPv6-to-IPv4 relaying (translation) device firmware # firmware assist module # The `bpf' device enables the Berkeley Packet Filter. diff --git a/tools/build/mk/OptionalObsoleteFiles.inc b/tools/build/mk/OptionalObsoleteFiles.inc index d0b76ad77cb..d77cef26790 100644 --- a/tools/build/mk/OptionalObsoleteFiles.inc +++ b/tools/build/mk/OptionalObsoleteFiles.inc @@ -1954,7 +1954,6 @@ OLD_FILES+=usr/share/man/man3/iconvlist.3.gz .if ${MK_INET6} == no OLD_FILES+=sbin/ping6 OLD_FILES+=sbin/rtsol -OLD_FILES+=usr/sbin/faithd OLD_FILES+=usr/sbin/ip6addrctl OLD_FILES+=usr/sbin/mld6query OLD_FILES+=usr/sbin/ndp diff --git a/tools/tools/nanobsd/pcengines/ALIX_DSK b/tools/tools/nanobsd/pcengines/ALIX_DSK index 32b0b8ca136..1d750e5e2bd 100644 --- a/tools/tools/nanobsd/pcengines/ALIX_DSK +++ b/tools/tools/nanobsd/pcengines/ALIX_DSK @@ -71,7 +71,6 @@ device tun device pty device md device gif -device faith device firmware device bpf device uhci diff --git a/tools/tools/nanobsd/pcengines/ALIX_NFS b/tools/tools/nanobsd/pcengines/ALIX_NFS index fe911da613f..3c0f0da6237 100644 --- a/tools/tools/nanobsd/pcengines/ALIX_NFS +++ b/tools/tools/nanobsd/pcengines/ALIX_NFS @@ -69,7 +69,6 @@ device tun device pty device md device gif -device faith device firmware device bpf device uhci diff --git a/tools/tools/sysdoc/tunables.mdoc b/tools/tools/sysdoc/tunables.mdoc index 1c5569bee9a..6d88856cfd0 100644 --- a/tools/tools/sysdoc/tunables.mdoc +++ b/tools/tools/sysdoc/tunables.mdoc @@ -1311,14 +1311,6 @@ net.inet.ip.intr_queue_drops --- net.inet.ip.intr_queue_maxlen ---- -net.inet.ip.keepfaith -bool - -This is used in conjunction with -.Xr faithd 8 -to control the FAITH IPv6/v4 translator daemon. - --- net.inet.ip.maxfragpackets diff --git a/tools/tools/tinybsd/conf/default/TINYBSD b/tools/tools/tinybsd/conf/default/TINYBSD index ae7c9f6390b..aba05c5bcfc 100644 --- a/tools/tools/tinybsd/conf/default/TINYBSD +++ b/tools/tools/tinybsd/conf/default/TINYBSD @@ -148,7 +148,6 @@ device tun # Packet tunnel. device pty # Pseudo-ttys (telnet etc) device md # Memory "disks" device gif # IPv6 and IPv4 tunneling -device faith # IPv6-to-IPv4 relaying (translation) # The `bpf' device enables the Berkeley Packet Filter. # Be aware of the administrative consequences of enabling this! diff --git a/tools/tools/tinybsd/conf/vpn/TINYBSD b/tools/tools/tinybsd/conf/vpn/TINYBSD index f7159ad8a9b..7e6da0edf1e 100644 --- a/tools/tools/tinybsd/conf/vpn/TINYBSD +++ b/tools/tools/tinybsd/conf/vpn/TINYBSD @@ -138,7 +138,6 @@ device tun # Packet tunnel. device pty # Pseudo-ttys (telnet etc) device md # Memory "disks" device gif # IPv6 and IPv4 tunneling -device faith # IPv6-to-IPv4 relaying (translation) # The `bpf' device enables the Berkeley Packet Filter. # Be aware of the administrative consequences of enabling this! diff --git a/tools/tools/tinybsd/conf/wrap/TINYBSD b/tools/tools/tinybsd/conf/wrap/TINYBSD index aff67ea95ef..d04b0659c4e 100644 --- a/tools/tools/tinybsd/conf/wrap/TINYBSD +++ b/tools/tools/tinybsd/conf/wrap/TINYBSD @@ -110,7 +110,6 @@ device tun # Packet tunnel. device pty # Pseudo-ttys (telnet etc) device md # Memory "disks" device gif # IPv6 and IPv4 tunneling -device faith # IPv6-to-IPv4 relaying (translation) # The `bpf' device enables the Berkeley Packet Filter. # Be aware of the administrative consequences of enabling this! diff --git a/usr.sbin/Makefile b/usr.sbin/Makefile index b0cff950bf2..df9f8674793 100644 --- a/usr.sbin/Makefile +++ b/usr.sbin/Makefile @@ -160,7 +160,6 @@ SUBDIR+= gpioctl .endif .if ${MK_INET6} != "no" -SUBDIR+= faithd SUBDIR+= ip6addrctl SUBDIR+= mld6query SUBDIR+= ndp diff --git a/usr.sbin/bsdconfig/networking/share/device.subr b/usr.sbin/bsdconfig/networking/share/device.subr index 42010ab10bf..bb41be300a0 100644 --- a/usr.sbin/bsdconfig/networking/share/device.subr +++ b/usr.sbin/bsdconfig/networking/share/device.subr @@ -82,7 +82,7 @@ f_dialog_menu_netdev() f_struct "$dev" get name if || continue # Skip unsavory interfaces case "$if" in - lo[0-9]*|ppp[0-9]*|sl[0-9]*|faith[0-9]*) continue ;; + lo[0-9]*|ppp[0-9]*|sl[0-9]*) continue ;; esac iflist="$iflist $if" done diff --git a/usr.sbin/faithd/Makefile b/usr.sbin/faithd/Makefile deleted file mode 100644 index dec45b9645b..00000000000 --- a/usr.sbin/faithd/Makefile +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (c) 1996 WIDE Project. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modifications, are permitted provided that the above copyright notice -# and this paragraph are duplicated in all such forms and that any -# documentation, advertising materials, and other materials related to -# such distribution and use acknowledge that the software was developed -# by the WIDE Project, Japan. The name of the Project may not be used to -# endorse or promote products derived from this software without -# specific prior written permission. THIS SOFTWARE IS PROVIDED ``AS IS'' -# AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT -# LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE. -# -# $FreeBSD$ - -PROG= faithd -MAN= faithd.8 -SRCS= faithd.c tcp.c ftp.c prefix.c - -CFLAGS+= -DHAVE_POLL_H - -WARNS?= 2 - -.include diff --git a/usr.sbin/faithd/README b/usr.sbin/faithd/README deleted file mode 100644 index 6628bf63dcb..00000000000 --- a/usr.sbin/faithd/README +++ /dev/null @@ -1,148 +0,0 @@ -Configuring FAITH IPv6-to-IPv4 TCP relay - -Kazu Yamamoto and Jun-ichiro itojun Hagino -$KAME: README,v 1.10 2003/01/06 21:40:33 sumikawa Exp $ -$FreeBSD$ - - -Introduction -============ - -FAITH is an IPv6-to-IPv4 TCP relay. It performs tcp relay just as some of -firewall-oriented gateway does, but between IPv6 and IPv4 with address -translation. -TCP connections has to be made from IPv6 node to IPv4 node. FAITH will -not relay connections for the opposite direction. -To perform relays, FAITH daemon needs to be executed on a router between -your local IPv6 site and outside IPv4 network. The daemon needs to be -invoked per each TCP services (TCP port number). - - IPv4 node "dest" = 123.4.5.6 - | - [[[[ outside IPv4 ocean ]]]] - | - node that runs FAITH-daemon (usually a router) - | - ==+=====+===+==== IPv6, or IPv4/v6 network in your site ^ - | | | connection - clients IPv6 node "src" | - -You will have to allocate an IPv6 address prefix to map IPv4 addresses into. -The following description uses 3ffe:0501:ffff:0000:: as example. -Please use a prefix which belongs to your site. -FAITH will make it possible to make an IPv6 TCP connection From IPv6 node -"src", toward IPv4 node "dest", by specifying FAITH-mapped address -3ffe:0501:ffff:0000::123.4.5.6 -(which is, 3ffe:0501:ffff:0000:0000:0000:7b04:0506). -The address mapping can be performed by hand:-), by special nameserver on -the network, or by special resolver on the source node. - - -Setup -===== - -The following example assumes: -- You have assigned 3ffe:0501:ffff:0000:: as FAITH adderss prefix. -- You are willing to provide IPv6-to IPv4 TCP relay for telnet. - -<> - -(1) If you have IPv6 TCP server for the "telnet" service, i.e. telnetd via - inet6d, disable that daemon. Comment out the line from "inet6d.conf" - and send the HUP signal to "inet6d". - -(2) Execute sysctl as root to enable FAITH support in the kernel. - - # sysctl net.inet6.ip6.keepfaith=1 - -(3) Route packets toward FAITH prefix into "faith0" interface. - - # ifconfig faith0 up - # route add -inet6 3ffe:0501:ffff:0000:: -prefixlen 64 ::1 - # route change -inet6 3ffe:0501:ffff:0000:: -prefixlen 64 -ifp faith0 - -(4) Execute "faithd" by root as follows: - - # faithd telnet /usr/libexec/telnetd telnetd - - 1st argument is a service name you are willing to provide TCP relay. - (it can be specified either by number "23" or by string "telnet") - 2nd argument is a path name for local IPv6 TCP server. If there is a - connection toward the router itself, this program will be invoked. - 3rd and the following arguments are arguments for the local IPv6 TCP - server. (3rd argument is typically the program name without its path.) - - More examples: - - # faithd ftpd /usr/libexec/ftpd ftpd -l - # faithd sshd - -If inetd(8) on your platform have special support for faithd, it is possible -to setup faithd services via inetd(8). Consult manpage for details. - - -<> - -(4) Make sure that packets whose destinations match the prefix can -reach from the IPv6 host to the translating router. - -<> - -There are two ways to translate IPv4 address to IPv6 address: - (a) Faked by DNS - (b) Faked by /etc/hosts. - -(5.a) Install "newbie" and set up FAITH mode. See kit/ports/newbie. - -(5.b) Add an entry into /etc/hosts so that you can resolve hostname into -faked IPv6 addrss. For example, add the following line for www.netbsd.org: - - 3ffe:0501:ffff:0000::140.160.140.252 www.netbsd.org - -<> - -(6) To see if "faithd" works, watch "/var/log/daemon". Note: please -setup "/etc/syslog.conf" so that LOG_DAEMON messages are to be stored -in "/var/log/daemon". - - - daemon.* /var/log/daemon - - -Access control -============== - -Since faithd implements TCP relaying service, it is critical to implement -proper access control to cope with malicious use. Bad guy may try to -use your relay router to circumvent access controls, or may try to -abuse your network (like sending SPAMs from IPv4 address that belong to you). -Install IPv6 packet filter directives that would reject traffic from -unwanted source. If you are using inetd-based setup, you may be able to -use access control mechanisms in inetd. - - -Advanced configuration -====================== - -If you would like to restrict IPv4 destination for translation, you may -want to do the following: - - # route add -inet6 3ffe:0501:ffff:0000::123.0.0.0 -prefixlen 104 ::1 - # route change -inet6 3ffe:0501:ffff:0000::123.0.0.0 -prefixlen 104 \ - -ifp faith0 - -By this way, you can restrict IPv4 destination to 123.0.0.0/8. -You may also want to reject packets toward 3ffe:0501:ffff:0000::/64 which -is not in 3ffe:0501:ffff:0000::123.0.0.0/104. This will be left as excerside -for the reader. - -By doing this, you will be able to provide your IPv4 web server to outside -IPv6 customers, without risks of unwanted open relays. - - [[[[ IPv6 network outside ]]]] | - | | connection - node that runs FAITH-daemon (usually a router) v - | - ========+======== IPv4/v6 network in your site - | (123.0.0.0/8) - IPv4 web server diff --git a/usr.sbin/faithd/faithd.8 b/usr.sbin/faithd/faithd.8 deleted file mode 100644 index 93a835b9a97..00000000000 --- a/usr.sbin/faithd/faithd.8 +++ /dev/null @@ -1,404 +0,0 @@ -.\" $KAME: faithd.8,v 1.37 2002/05/09 14:21:23 itojun Exp $ -.\" -.\" Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. -.\" All rights reserved. -.\" -.\" Redistribution and use in source and binary forms, with or without -.\" modification, are permitted provided that the following conditions -.\" are met: -.\" 1. Redistributions of source code must retain the above copyright -.\" notice, this list of conditions and the following disclaimer. -.\" 2. Redistributions in binary form must reproduce the above copyright -.\" notice, this list of conditions and the following disclaimer in the -.\" documentation and/or other materials provided with the distribution. -.\" 3. Neither the name of the project nor the names of its contributors -.\" may be used to endorse or promote products derived from this software -.\" without specific prior written permission. -.\" -.\" THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND -.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -.\" ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE -.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -.\" SUCH DAMAGE. -.\" -.\" $FreeBSD$ -.\" -.Dd August 2, 2011 -.Dt FAITHD 8 -.Os -.Sh NAME -.Nm faithd -.Nd FAITH IPv6/v4 translator daemon -.Sh SYNOPSIS -.Nm -.Op Fl dp -.Op Fl f Ar configfile -.Ar service -.Op Ar serverpath Op Ar serverargs -.Sh DESCRIPTION -The -.Nm -utility provides IPv6-to-IPv4 TCP relaying. -It can only be used on an IPv4/v6 dual stack router. -.Pp -When -.Nm -receives -.Tn TCPv6 -traffic, it will relay the -.Tn TCPv6 -traffic to -.Tn TCPv4 . -The destination for the relayed -.Tn TCPv4 -connection will be determined by the last 4 octets of the original -.Tn IPv6 -destination. -For example, if -.Li 3ffe:0501:4819:ffff:: -is reserved for -.Nm , -and the -.Tn TCPv6 -destination address is -.Li 3ffe:0501:4819:ffff::0a01:0101 , -the traffic will be relayed to IPv4 destination -.Li 10.1.1.1 . -.Pp -To use the -.Nm -translation service, -an IPv6 address prefix must be reserved for mapping IPv4 addresses into. -The kernel must be properly configured to route all the TCP connections -toward the reserved IPv6 address prefix into the -.Xr faith 4 -pseudo interface, using the -.Xr route 8 -command. -Also, -.Xr sysctl 8 -should be used to configure -.Dv net.inet6.ip6.keepfaith -to -.Dv 1 . -.Pp -The router must be configured to capture all the TCP traffic -for the reserved -.Tn IPv6 -address prefix, by using -.Xr route 8 -and -.Xr sysctl 8 -commands. -.Pp -The -.Nm -utility needs special name-to-address translation logic, so that -hostnames get resolved into the special -.Tn IPv6 -address prefix. -For small-scale installations, use -.Xr hosts 5 ; -For large-scale installations, it is useful to have -a DNS server with special address translation support. -An implementation called -.Nm totd -is available at -.Pa http://www.vermicelli.pasta.cs.uit.no/software/totd.html . -Make sure you do not propagate translated DNS records over to normal -DNS, as it can cause severe problems. -.Ss Daemon mode -When -.Nm -is invoked as a standalone program, -.Nm -will daemonize itself. -The -.Nm -utility will listen to -.Tn TCPv6 -port -.Ar service . -If -.Tn TCPv6 -traffic to port -.Ar service -is found, it relays the connection. -.Pp -Since -.Nm -listens to TCP port -.Ar service , -it is not possible to run local TCP daemons for port -.Ar service -on the router, using -.Xr inetd 8 -or other standard mechanisms. -By specifying -.Ar serverpath -to -.Nm , -you can run local daemons on the router. -The -.Nm -utility will invoke a local daemon at -.Ar serverpath -if the destination address is a local interface address, -and will perform translation to IPv4 TCP in other cases. -You can also specify -.Ar serverargs -for the arguments for the local daemon. -.Pp -The following options are available: -.Bl -tag -width indent -.It Fl d -Debugging information will be generated using -.Xr syslog 3 . -.It Fl f Ar configfile -Specify a configuration file for access control. -See below. -.It Fl p -Use privileged TCP port number as source port, -for IPv4 TCP connection toward final destination. -For relaying -.Xr ftp 1 , -this flag is not necessary as special program code is supplied. -.El -.Pp -The -.Nm -utility will relay both normal and out-of-band TCP data. -It is capable of emulating TCP half close as well. -The -.Nm -utility includes special support for protocols used by -.Xr ftp 1 . -When translating the FTP protocol, -.Nm -translates network level addresses in -.Li PORT/LPRT/EPRT -and -.Li PASV/LPSV/EPSV -commands. -.Pp -Inactive sessions will be disconnected in 30 minutes, -to prevent stale sessions from chewing up resources. -This may be inappropriate for some services -(should this be configurable?). -.Ss inetd mode -When -.Nm -is invoked via -.Xr inetd 8 , -.Nm -will handle connections passed from standard input. -If the connection endpoint is in the reserved IPv6 address prefix, -.Nm -will relay the connection. -Otherwise, -.Nm -will invoke a service-specific daemon like -.Xr telnetd 8 , -by using the command argument passed from -.Xr inetd 8 . -.Pp -The -.Nm -utility determines operation mode by the local TCP port number, -and enables special protocol handling whenever necessary/possible. -For example, if -.Nm -is invoked via -.Xr inetd 8 -on the FTP port, it will operate as an FTP relay. -.Pp -The operation mode requires special support for -.Nm -in -.Xr inetd 8 . -.Ss Access control -To prevent malicious access, -.Nm -implements simple address-based access control. -With -.Pa /etc/faithd.conf -(or -.Ar configfile -specified by -.Fl f ) , -.Nm -will avoid relaying unwanted traffic. -The -.Pa faithd.conf -configuration file contains directives of the following format: -.Bl -bullet -.It -.Ar src Ns / Ns Ar slen Cm deny Ar dst Ns / Ns Ar dlen -.Pp -If the source address of a query matches -.Ar src Ns / Ns Ar slen , -and the translated destination address matches -.Ar dst Ns / Ns Ar dlen , -deny the connection. -.It -.Ar src Ns / Ns Ar slen Cm permit Ar dst Ns / Ns Ar dlen -.Pp -If the source address of a query matches -.Ar src Ns / Ns Ar slen , -and the translated destination address matches -.Ar dst Ns / Ns Ar dlen , -permit the connection. -.El -.Pp -The directives are evaluated in sequence, -and the first matching entry will be effective. -If there is no match -(if we reach the end of the ruleset) -the traffic will be denied. -.Pp -With inetd mode, -traffic may be filtered by using access control functionality in -.Xr inetd 8 . -.Sh EXIT STATUS -The -.Nm -utility exits with -.Dv EXIT_SUCCESS -.Pq 0 -on success, and -.Dv EXIT_FAILURE -.Pq 1 -on error. -.Sh EXAMPLES -Before invoking -.Nm , -the -.Xr faith 4 -interface has to be configured properly. -.Bd -literal -offset indent -# sysctl net.inet6.ip6.accept_rtadv=0 -# sysctl net.inet6.ip6.forwarding=1 -# sysctl net.inet6.ip6.keepfaith=1 -# ifconfig faith0 up -# route add -inet6 3ffe:501:4819:ffff:: -prefixlen 96 ::1 -# route change -inet6 3ffe:501:4819:ffff:: -prefixlen 96 -ifp faith0 -.Ed -.Ss Daemon mode samples -To translate -.Li telnet -service, and provide no local telnet service, invoke -.Nm -as follows: -.Bd -literal -offset indent -# faithd telnet -.Ed -.Pp -If you would like to provide local telnet service via -.Xr telnetd 8 -on -.Pa /usr/libexec/telnetd , -use the following command line: -.Bd -literal -offset indent -# faithd telnet /usr/libexec/telnetd telnetd -.Ed -.Pp -If you would like to pass extra arguments to the local daemon: -.Bd -literal -offset indent -# faithd ftp /usr/libexec/ftpd ftpd -l -.Ed -.Pp -Here are some other examples. -You may need -.Fl p -if the service checks the source port range. -.Bd -literal -offset indent -# faithd ssh -# faithd telnet /usr/libexec/telnetd telnetd -.Ed -.Ss inetd mode samples -Add the following lines into -.Xr inetd.conf 5 . -Syntax may vary depending upon your operating system. -.Bd -literal -offset indent -telnet stream tcp6/faith nowait root faithd telnetd -ftp stream tcp6/faith nowait root faithd ftpd -l -ssh stream tcp6/faith nowait root faithd /usr/sbin/sshd -i -.Ed -.Pp -.Xr inetd 8 -will open listening sockets with kernel TCP relay support enabled. -Whenever a connection comes in, -.Nm -will be invoked by -.Xr inetd 8 . -If the connection endpoint is in the reserved IPv6 address prefix. -The -.Nm -utility will relay the connection. -Otherwise, -.Nm -will invoke service-specific daemon like -.Xr telnetd 8 . -.Ss Access control samples -The following illustrates a simple -.Pa faithd.conf -setting. -.Bd -literal -offset indent -# permit anyone from 3ffe:501:ffff::/48 to use the translator, -# to connect to the following IPv4 destinations: -# - any location except 10.0.0.0/8 and 127.0.0.0/8. -# Permit no other connections. -# -3ffe:501:ffff::/48 deny 10.0.0.0/8 -3ffe:501:ffff::/48 deny 127.0.0.0/8 -3ffe:501:ffff::/48 permit 0.0.0.0/0 -.Ed -.Sh SEE ALSO -.Xr faith 4 , -.Xr route 8 , -.Xr sysctl 8 -.Rs -.%A Jun-ichiro itojun Hagino -.%A Kazu Yamamoto -.%T "An IPv6-to-IPv4 transport relay translator" -.%B RFC3142 -.%U http://tools.ietf.org/html/rfc3142 -.%D June 2001 -.Re -.\" -.Sh HISTORY -The -.Nm -utility first appeared in the WIDE Hydrangea IPv6 protocol stack kit. -.\" -.Pp -IPv6 and IPsec support based on the KAME Project (http://www.kame.net/) stack -was initially integrated into -.Fx 4.0 . -.Sh SECURITY CONSIDERATIONS -It is very insecure to use IP-address based authentication, for connections relayed by -.Nm , -and any other TCP relaying services. -.Pp -Administrators are advised to limit accesses to -.Nm -using -.Pa faithd.conf , -or by using IPv6 packet filters, to protect the -.Nm -service from malicious parties, and to avoid theft of service/bandwidth. -IPv6 destination addresses can be limited by -carefully configuring routing entries that point to -.Xr faith 4 , -using -.Xr route 8 . -The IPv6 source address needs to be filtered using packet filters. -The documents listed in -.Sx SEE ALSO -have more information on this topic. diff --git a/usr.sbin/faithd/faithd.c b/usr.sbin/faithd/faithd.c deleted file mode 100644 index 1745de1f6b1..00000000000 --- a/usr.sbin/faithd/faithd.c +++ /dev/null @@ -1,908 +0,0 @@ -/* $KAME: faithd.c,v 1.67 2003/10/16 05:26:21 itojun Exp $ */ - -/* - * Copyright (C) 1997 and 1998 WIDE Project. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the project nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * User level translator from IPv6 to IPv4. - * - * Usage: faithd [ ...] - * e.g. faithd telnet /usr/libexec/telnetd telnetd - */ - -#include -__FBSDID("$FreeBSD$"); - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef HAVE_POLL_H -#include -#endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#ifdef IFT_FAITH -# define USE_ROUTE -# include -# include -# include -#endif - -#include -#include -#include -#include - -#include "faithd.h" -#include "prefix.h" - -char *serverpath = NULL; -char *serverarg[MAXARGV + 1]; -static char *faithdname = NULL; -char logname[BUFSIZ]; -char procname[BUFSIZ]; - -struct myaddrs { - struct myaddrs *next; - struct sockaddr *addr; -}; -struct myaddrs *myaddrs = NULL; - -static const char *service; -#ifdef USE_ROUTE -static int sockfd = 0; -#endif -int dflag = 0; -static int pflag = 0; -static int inetd = 0; -static char *configfile = NULL; - -int main(int, char **); -static int inetd_main(int, char **); -static int daemon_main(int, char **); -static void play_service(int); -static void play_child(int, struct sockaddr *); -static int faith_prefix(struct sockaddr *); -static int map6to4(struct sockaddr_in6 *, struct sockaddr_in *); -static void sig_child(int); -static void sig_terminate(int); -static void start_daemon(void); -static void exit_stderr(const char *, ...) - __attribute__((__format__(__printf__, 1, 2))); -static void grab_myaddrs(void); -static void free_myaddrs(void); -static void update_myaddrs(void); -static void usage(void); - -int -main(int argc, char **argv) -{ - - /* - * Initializing stuff - */ - - faithdname = strrchr(argv[0], '/'); - if (faithdname) - faithdname++; - else - faithdname = argv[0]; - - if (strcmp(faithdname, "faithd") != 0) { - inetd = 1; - return inetd_main(argc, argv); - } else - return daemon_main(argc, argv); -} - -static int -inetd_main(int argc, char **argv) -{ - char path[MAXPATHLEN]; - struct sockaddr_storage me; - struct sockaddr_storage from; - socklen_t melen, fromlen; - int i; - int error; - const int on = 1; - char sbuf[NI_MAXSERV], snum[NI_MAXSERV]; - - if (config_load(configfile) < 0 && configfile) { - exit_failure("could not load config file"); - /*NOTREACHED*/ - } - - if (strrchr(argv[0], '/') == NULL) - snprintf(path, sizeof(path), "%s/%s", DEFAULT_DIR, argv[0]); - else - snprintf(path, sizeof(path), "%s", argv[0]); - -#ifdef USE_ROUTE - grab_myaddrs(); - - sockfd = socket(PF_ROUTE, SOCK_RAW, PF_UNSPEC); - if (sockfd < 0) { - exit_failure("socket(PF_ROUTE): %s", strerror(errno)); - /*NOTREACHED*/ - } -#endif - - melen = sizeof(me); - if (getsockname(STDIN_FILENO, (struct sockaddr *)&me, &melen) < 0) { - exit_failure("getsockname: %s", strerror(errno)); - /*NOTREACHED*/ - } - fromlen = sizeof(from); - if (getpeername(STDIN_FILENO, (struct sockaddr *)&from, &fromlen) < 0) { - exit_failure("getpeername: %s", strerror(errno)); - /*NOTREACHED*/ - } - if (getnameinfo((struct sockaddr *)&me, melen, NULL, 0, - sbuf, sizeof(sbuf), NI_NUMERICHOST) == 0) - service = sbuf; - else - service = DEFAULT_PORT_NAME; - if (getnameinfo((struct sockaddr *)&me, melen, NULL, 0, - snum, sizeof(snum), NI_NUMERICHOST) != 0) - snprintf(snum, sizeof(snum), "?"); - - snprintf(logname, sizeof(logname), "faithd %s", snum); - snprintf(procname, sizeof(procname), "accepting port %s", snum); - openlog(logname, LOG_PID | LOG_NOWAIT, LOG_DAEMON); - - if (argc >= MAXARGV) { - exit_failure("too many arguments"); - /*NOTREACHED*/ - } - serverarg[0] = serverpath = path; - for (i = 1; i < argc; i++) - serverarg[i] = argv[i]; - serverarg[i] = NULL; - - error = setsockopt(STDIN_FILENO, SOL_SOCKET, SO_OOBINLINE, &on, - sizeof(on)); - if (error < 0) { - exit_failure("setsockopt(SO_OOBINLINE): %s", strerror(errno)); - /*NOTREACHED*/ - } - - play_child(STDIN_FILENO, (struct sockaddr *)&from); - exit_failure("should not reach here"); - return 0; /*dummy!*/ -} - -static int -daemon_main(int argc, char **argv) -{ - struct addrinfo hints, *res; - int s_wld, error, i, serverargc, on = 1; - int family = AF_INET6; - int c; - - while ((c = getopt(argc, argv, "df:p")) != -1) { - switch (c) { - case 'd': - dflag++; - break; - case 'f': - configfile = optarg; - break; - case 'p': - pflag++; - break; - default: - usage(); - /*NOTREACHED*/ - } - } - argc -= optind; - argv += optind; - - if (config_load(configfile) < 0 && configfile) { - exit_failure("could not load config file"); - /*NOTREACHED*/ - } - - -#ifdef USE_ROUTE - grab_myaddrs(); -#endif - - switch (argc) { - case 0: - usage(); - /*NOTREACHED*/ - default: - serverargc = argc - NUMARG; - if (serverargc >= MAXARGV) - exit_stderr("too many arguments"); - - serverpath = strdup(argv[NUMPRG]); - if (!serverpath) - exit_stderr("not enough core"); - for (i = 0; i < serverargc; i++) { - serverarg[i] = strdup(argv[i + NUMARG]); - if (!serverarg[i]) - exit_stderr("not enough core"); - } - serverarg[i] = NULL; - /* fall throuth */ - case 1: /* no local service */ - service = argv[NUMPRT]; - break; - } - - start_daemon(); - - /* - * Opening wild card socket for this service. - */ - - memset(&hints, 0, sizeof(hints)); - hints.ai_flags = AI_PASSIVE; - hints.ai_family = family; - hints.ai_socktype = SOCK_STREAM; - hints.ai_protocol = IPPROTO_TCP; /* SCTP? */ - error = getaddrinfo(NULL, service, &hints, &res); - if (error) - exit_failure("getaddrinfo: %s", gai_strerror(error)); - - s_wld = socket(res->ai_family, res->ai_socktype, res->ai_protocol); - if (s_wld == -1) - exit_failure("socket: %s", strerror(errno)); - -#ifdef IPV6_FAITH - if (res->ai_family == AF_INET6) { - error = setsockopt(s_wld, IPPROTO_IPV6, IPV6_FAITH, &on, sizeof(on)); - if (error == -1) - exit_failure("setsockopt(IPV6_FAITH): %s", - strerror(errno)); - } -#endif - - error = setsockopt(s_wld, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); - if (error == -1) - exit_failure("setsockopt(SO_REUSEADDR): %s", strerror(errno)); - - error = setsockopt(s_wld, SOL_SOCKET, SO_OOBINLINE, &on, sizeof(on)); - if (error == -1) - exit_failure("setsockopt(SO_OOBINLINE): %s", strerror(errno)); - -#ifdef IPV6_V6ONLY - error = setsockopt(s_wld, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof(on)); - if (error == -1) - exit_failure("setsockopt(IPV6_V6ONLY): %s", strerror(errno)); -#endif - - error = bind(s_wld, (struct sockaddr *)res->ai_addr, res->ai_addrlen); - if (error == -1) - exit_failure("bind: %s", strerror(errno)); - - error = listen(s_wld, 5); - if (error == -1) - exit_failure("listen: %s", strerror(errno)); - -#ifdef USE_ROUTE - sockfd = socket(PF_ROUTE, SOCK_RAW, PF_UNSPEC); - if (sockfd < 0) { - exit_failure("socket(PF_ROUTE): %s", strerror(errno)); - /*NOTREACHED*/ - } -#endif - - /* - * Everything is OK. - */ - - snprintf(logname, sizeof(logname), "faithd %s", service); - snprintf(procname, sizeof(procname), "accepting port %s", service); - openlog(logname, LOG_PID | LOG_NOWAIT, LOG_DAEMON); - syslog(LOG_INFO, "Starting faith daemon for %s port", service); - - play_service(s_wld); - /* NOTREACHED */ - exit(1); /*pacify gcc*/ -} - -static void -play_service(int s_wld) -{ - struct sockaddr_storage srcaddr; - socklen_t len; - int s_src; - pid_t child_pid; -#ifdef HAVE_POLL_H - struct pollfd pfd[2]; -#else - fd_set rfds; - int maxfd; -#endif - int error; - - /* - * Wait, accept, fork, faith.... - */ -again: - setproctitle("%s", procname); - -#ifdef HAVE_POLL_H - pfd[0].fd = s_wld; - pfd[0].events = POLLIN; - pfd[1].fd = -1; - pfd[1].revents = 0; -#else - FD_ZERO(&rfds); - if (s_wld >= FD_SETSIZE) - exit_failure("descriptor too big"); - FD_SET(s_wld, &rfds); - maxfd = s_wld; -#endif -#ifdef USE_ROUTE - if (sockfd) { -#ifdef HAVE_POLL_H - pfd[1].fd = sockfd; - pfd[1].events = POLLIN; -#else - if (sockfd >= FD_SETSIZE) - exit_failure("descriptor too big"); - FD_SET(sockfd, &rfds); - maxfd = (maxfd < sockfd) ? sockfd : maxfd; -#endif - } -#endif - -#ifdef HAVE_POLL_H - error = poll(pfd, sizeof(pfd)/sizeof(pfd[0]), INFTIM); -#else - error = select(maxfd + 1, &rfds, NULL, NULL, NULL); -#endif - if (error < 0) { - if (errno == EINTR) - goto again; - exit_failure("select: %s", strerror(errno)); - /*NOTREACHED*/ - } - -#ifdef USE_ROUTE -#ifdef HAVE_POLL_H - if (pfd[1].revents & POLLIN) -#else - if (FD_ISSET(sockfd, &rfds)) -#endif - { - update_myaddrs(); - } -#endif -#ifdef HAVE_POLL_H - if (pfd[0].revents & POLLIN) -#else - if (FD_ISSET(s_wld, &rfds)) -#endif - { - len = sizeof(srcaddr); - s_src = accept(s_wld, (struct sockaddr *)&srcaddr, &len); - if (s_src < 0) { - if (errno == ECONNABORTED) - goto again; - exit_failure("socket: %s", strerror(errno)); - /*NOTREACHED*/ - } - if (srcaddr.ss_family == AF_INET6 && - IN6_IS_ADDR_V4MAPPED(&((struct sockaddr_in6 *)&srcaddr)->sin6_addr)) { - close(s_src); - syslog(LOG_ERR, "connection from IPv4 mapped address?"); - goto again; - } - - child_pid = fork(); - - if (child_pid == 0) { - /* child process */ - close(s_wld); - closelog(); - openlog(logname, LOG_PID | LOG_NOWAIT, LOG_DAEMON); - play_child(s_src, (struct sockaddr *)&srcaddr); - exit_failure("should never reach here"); - /*NOTREACHED*/ - } else { - /* parent process */ - close(s_src); - if (child_pid == -1) - syslog(LOG_ERR, "can't fork"); - } - } - goto again; -} - -static void -play_child(int s_src, struct sockaddr *srcaddr) -{ - struct sockaddr_storage dstaddr6; - struct sockaddr_storage dstaddr4; - char src[NI_MAXHOST]; - char dst6[NI_MAXHOST]; - char dst4[NI_MAXHOST]; - socklen_t len = sizeof(dstaddr6); - int s_dst, error, hport, nresvport, on = 1; - struct timeval tv; - struct sockaddr *sa4; - const struct config *conf; - - tv.tv_sec = 1; - tv.tv_usec = 0; - - getnameinfo(srcaddr, srcaddr->sa_len, - src, sizeof(src), NULL, 0, NI_NUMERICHOST); - syslog(LOG_INFO, "accepted a client from %s", src); - - error = getsockname(s_src, (struct sockaddr *)&dstaddr6, &len); - if (error == -1) { - exit_failure("getsockname: %s", strerror(errno)); - /*NOTREACHED*/ - } - - getnameinfo((struct sockaddr *)&dstaddr6, len, - dst6, sizeof(dst6), NULL, 0, NI_NUMERICHOST); - syslog(LOG_INFO, "the client is connecting to %s", dst6); - - if (!faith_prefix((struct sockaddr *)&dstaddr6)) { - if (serverpath) { - /* - * Local service - */ - syslog(LOG_INFO, "executing local %s", serverpath); - if (!inetd) { - dup2(s_src, 0); - close(s_src); - dup2(0, 1); - dup2(0, 2); - } - execv(serverpath, serverarg); - syslog(LOG_ERR, "execv %s: %s", serverpath, - strerror(errno)); - _exit(EXIT_FAILURE); - } else { - close(s_src); - exit_success("no local service for %s", service); - } - } - - /* - * Act as a translator - */ - - switch (((struct sockaddr *)&dstaddr6)->sa_family) { - case AF_INET6: - if (!map6to4((struct sockaddr_in6 *)&dstaddr6, - (struct sockaddr_in *)&dstaddr4)) { - close(s_src); - exit_failure("map6to4 failed"); - /*NOTREACHED*/ - } - syslog(LOG_INFO, "translating from v6 to v4"); - break; - default: - close(s_src); - exit_failure("family not supported"); - /*NOTREACHED*/ - } - - sa4 = (struct sockaddr *)&dstaddr4; - getnameinfo(sa4, sa4->sa_len, - dst4, sizeof(dst4), NULL, 0, NI_NUMERICHOST); - - conf = config_match(srcaddr, sa4); - if (!conf || !conf->permit) { - close(s_src); - if (conf) { - exit_failure("translation to %s not permitted for %s", - dst4, prefix_string(&conf->match)); - /*NOTREACHED*/ - } else { - exit_failure("translation to %s not permitted", dst4); - /*NOTREACHED*/ - } - } - - syslog(LOG_INFO, "the translator is connecting to %s", dst4); - - setproctitle("port %s, %s -> %s", service, src, dst4); - - if (sa4->sa_family == AF_INET6) - hport = ntohs(((struct sockaddr_in6 *)&dstaddr4)->sin6_port); - else /* AF_INET */ - hport = ntohs(((struct sockaddr_in *)&dstaddr4)->sin_port); - - if (pflag) - s_dst = rresvport_af(&nresvport, sa4->sa_family); - else - s_dst = socket(sa4->sa_family, SOCK_STREAM, 0); - if (s_dst < 0) { - exit_failure("socket: %s", strerror(errno)); - /*NOTREACHED*/ - } - - if (conf->src.a.ss_family) { - if (bind(s_dst, (const struct sockaddr *)&conf->src.a, - conf->src.a.ss_len) < 0) { - exit_failure("bind: %s", strerror(errno)); - /*NOTREACHED*/ - } - } - - error = setsockopt(s_dst, SOL_SOCKET, SO_OOBINLINE, &on, sizeof(on)); - if (error < 0) { - exit_failure("setsockopt(SO_OOBINLINE): %s", strerror(errno)); - /*NOTREACHED*/ - } - - error = setsockopt(s_src, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)); - if (error < 0) { - exit_failure("setsockopt(SO_SNDTIMEO): %s", strerror(errno)); - /*NOTREACHED*/ - } - error = setsockopt(s_dst, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)); - if (error < 0) { - exit_failure("setsockopt(SO_SNDTIMEO): %s", strerror(errno)); - /*NOTREACHED*/ - } - - error = connect(s_dst, sa4, sa4->sa_len); - if (error < 0) { - exit_failure("connect: %s", strerror(errno)); - /*NOTREACHED*/ - } - - switch (hport) { - case FTP_PORT: - ftp_relay(s_src, s_dst); - break; - default: - tcp_relay(s_src, s_dst, service); - break; - } - - /* NOTREACHED */ -} - -/* 0: non faith, 1: faith */ -static int -faith_prefix(struct sockaddr *dst) -{ -#ifndef USE_ROUTE - int mib[4], size; - struct in6_addr faith_prefix; - struct sockaddr_in6 *dst6 = (struct sockaddr_in *)dst; - - if (dst->sa_family != AF_INET6) - return 0; - - mib[0] = CTL_NET; - mib[1] = PF_INET6; - mib[2] = IPPROTO_IPV6; - mib[3] = IPV6CTL_FAITH_PREFIX; - size = sizeof(struct in6_addr); - if (sysctl(mib, 4, &faith_prefix, &size, NULL, 0) < 0) { - exit_failure("sysctl: %s", strerror(errno)); - /*NOTREACHED*/ - } - - if (memcmp(dst, &faith_prefix, - sizeof(struct in6_addr) - sizeof(struct in_addr) == 0) { - return 1; - } - return 0; -#else - struct myaddrs *p; - struct sockaddr_in6 *sin6; - struct sockaddr_in *sin4; - struct sockaddr_in6 *dst6; - struct sockaddr_in *dst4; - struct sockaddr_in dstmap; - - dst6 = (struct sockaddr_in6 *)dst; - if (dst->sa_family == AF_INET6 - && IN6_IS_ADDR_V4MAPPED(&dst6->sin6_addr)) { - /* ugly... */ - memset(&dstmap, 0, sizeof(dstmap)); - dstmap.sin_family = AF_INET; - dstmap.sin_len = sizeof(dstmap); - memcpy(&dstmap.sin_addr, &dst6->sin6_addr.s6_addr[12], - sizeof(dstmap.sin_addr)); - dst = (struct sockaddr *)&dstmap; - } - - dst6 = (struct sockaddr_in6 *)dst; - dst4 = (struct sockaddr_in *)dst; - - for (p = myaddrs; p; p = p->next) { - sin6 = (struct sockaddr_in6 *)p->addr; - sin4 = (struct sockaddr_in *)p->addr; - - if (p->addr->sa_len != dst->sa_len - || p->addr->sa_family != dst->sa_family) - continue; - - switch (dst->sa_family) { - case AF_INET6: - if (sin6->sin6_scope_id == dst6->sin6_scope_id - && IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, &dst6->sin6_addr)) - return 0; - break; - case AF_INET: - if (sin4->sin_addr.s_addr == dst4->sin_addr.s_addr) - return 0; - break; - } - } - return 1; -#endif -} - -/* 0: non faith, 1: faith */ -static int -map6to4(struct sockaddr_in6 *dst6, struct sockaddr_in *dst4) -{ - memset(dst4, 0, sizeof(*dst4)); - dst4->sin_len = sizeof(*dst4); - dst4->sin_family = AF_INET; - dst4->sin_port = dst6->sin6_port; - memcpy(&dst4->sin_addr, &dst6->sin6_addr.s6_addr[12], - sizeof(dst4->sin_addr)); - - if (dst4->sin_addr.s_addr == INADDR_ANY - || dst4->sin_addr.s_addr == INADDR_BROADCAST - || IN_MULTICAST(ntohl(dst4->sin_addr.s_addr))) - return 0; - - return 1; -} - - -static void -sig_child(int sig __unused) -{ - int status; - pid_t pid; - - while ((pid = wait3(&status, WNOHANG, (struct rusage *)0)) > 0) - if (WEXITSTATUS(status)) - syslog(LOG_WARNING, "child %ld exit status 0x%x", - (long)pid, status); -} - -void -sig_terminate(int sig __unused) -{ - syslog(LOG_INFO, "Terminating faith daemon"); - exit(EXIT_SUCCESS); -} - -static void -start_daemon(void) -{ -#ifdef SA_NOCLDWAIT - struct sigaction sa; -#endif - - if (daemon(0, 0) == -1) - exit_stderr("daemon: %s", strerror(errno)); - -#ifdef SA_NOCLDWAIT - memset(&sa, 0, sizeof(sa)); - sa.sa_handler = sig_child; - sa.sa_flags = SA_NOCLDWAIT; - sigemptyset(&sa.sa_mask); - sigaction(SIGCHLD, &sa, (struct sigaction *)0); -#else - if (signal(SIGCHLD, sig_child) == SIG_ERR) { - exit_failure("signal CHLD: %s", strerror(errno)); - /*NOTREACHED*/ - } -#endif - - if (signal(SIGTERM, sig_terminate) == SIG_ERR) { - exit_failure("signal TERM: %s", strerror(errno)); - /*NOTREACHED*/ - } -} - -static void -exit_stderr(const char *fmt, ...) -{ - va_list ap; - char buf[BUFSIZ]; - - va_start(ap, fmt); - vsnprintf(buf, sizeof(buf), fmt, ap); - va_end(ap); - fprintf(stderr, "%s\n", buf); - exit(EXIT_FAILURE); -} - -void -exit_failure(const char *fmt, ...) -{ - va_list ap; - char buf[BUFSIZ]; - - va_start(ap, fmt); - vsnprintf(buf, sizeof(buf), fmt, ap); - va_end(ap); - syslog(LOG_ERR, "%s", buf); - exit(EXIT_FAILURE); -} - -void -exit_success(const char *fmt, ...) -{ - va_list ap; - char buf[BUFSIZ]; - - va_start(ap, fmt); - vsnprintf(buf, sizeof(buf), fmt, ap); - va_end(ap); - syslog(LOG_INFO, "%s", buf); - exit(EXIT_SUCCESS); -} - -#ifdef USE_ROUTE -static void -grab_myaddrs(void) -{ - struct ifaddrs *ifap, *ifa; - struct myaddrs *p; - struct sockaddr_in6 *sin6; - - if (getifaddrs(&ifap) != 0) { - exit_failure("getifaddrs"); - /*NOTREACHED*/ - } - - for (ifa = ifap; ifa; ifa = ifa->ifa_next) { - switch (ifa->ifa_addr->sa_family) { - case AF_INET: - case AF_INET6: - break; - default: - continue; - } - - p = (struct myaddrs *)malloc(sizeof(struct myaddrs) + - ifa->ifa_addr->sa_len); - if (!p) { - exit_failure("not enough core"); - /*NOTREACHED*/ - } - memcpy(p + 1, ifa->ifa_addr, ifa->ifa_addr->sa_len); - p->next = myaddrs; - p->addr = (struct sockaddr *)(p + 1); -#ifdef __KAME__ - if (ifa->ifa_addr->sa_family == AF_INET6) { - sin6 = (struct sockaddr_in6 *)p->addr; - if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) - || IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)) { - sin6->sin6_scope_id = - ntohs(*(u_int16_t *)&sin6->sin6_addr.s6_addr[2]); - sin6->sin6_addr.s6_addr[2] = 0; - sin6->sin6_addr.s6_addr[3] = 0; - } - } -#endif - myaddrs = p; - if (dflag) { - char hbuf[NI_MAXHOST]; - getnameinfo(p->addr, p->addr->sa_len, - hbuf, sizeof(hbuf), NULL, 0, - NI_NUMERICHOST); - syslog(LOG_INFO, "my interface: %s %s", hbuf, - ifa->ifa_name); - } - } - - freeifaddrs(ifap); -} - -static void -free_myaddrs(void) -{ - struct myaddrs *p, *q; - - p = myaddrs; - while (p) { - q = p->next; - free(p); - p = q; - } - myaddrs = NULL; -} - -static void -update_myaddrs(void) -{ - char msg[BUFSIZ]; - int len; - struct rt_msghdr *rtm; - - len = read(sockfd, msg, sizeof(msg)); - if (len < 0) { - syslog(LOG_ERR, "read(PF_ROUTE) failed"); - return; - } - rtm = (struct rt_msghdr *)msg; - if (len < 4 || len < rtm->rtm_msglen) { - syslog(LOG_ERR, "read(PF_ROUTE) short read"); - return; - } - if (rtm->rtm_version != RTM_VERSION) { - syslog(LOG_ERR, "routing socket version mismatch"); - close(sockfd); - sockfd = 0; - return; - } - switch (rtm->rtm_type) { - case RTM_NEWADDR: - case RTM_DELADDR: - case RTM_IFINFO: - break; - default: - return; - } - /* XXX more filters here? */ - - syslog(LOG_INFO, "update interface address list"); - free_myaddrs(); - grab_myaddrs(); -} -#endif /*USE_ROUTE*/ - -static void -usage(void) -{ - fprintf(stderr, "usage: %s [-dp] [-f conf] service [serverpath [serverargs]]\n", - faithdname); - exit(0); -} diff --git a/usr.sbin/faithd/faithd.h b/usr.sbin/faithd/faithd.h deleted file mode 100644 index c578d46c4d2..00000000000 --- a/usr.sbin/faithd/faithd.h +++ /dev/null @@ -1,70 +0,0 @@ -/* $KAME: faithd.h,v 1.9 2002/05/09 09:41:24 itojun Exp $ */ - -/* - * Copyright (C) 1997 and 1998 WIDE Project. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the project nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -extern char logname[]; -extern int dflag; - -extern void tcp_relay(int, int, const char *); -extern void ftp_relay(int, int); -extern int ftp_active(int, int, int *, int *); -extern int ftp_passive(int, int, int *, int *); -extern void exit_success(const char *, ...) - __attribute__((__format__(__printf__, 1, 2))); -extern void exit_failure(const char *, ...) - __attribute__((__format__(__printf__, 1, 2))); - -#define DEFAULT_PORT_NAME "telnet" -#define DEFAULT_DIR "/usr/libexec" -#define DEFAULT_NAME "telnetd" -#define DEFAULT_PATH (DEFAULT_DIR "/" DEFAULT_NAME) - -#define FTP_PORT 21 -#define RLOGIN_PORT 513 -#define RSH_PORT 514 - -#define RETURN_SUCCESS 0 -#define RETURN_FAILURE 1 - -#define YES 1 -#define NO 0 - -#define MSS 2048 -#define MAXARGV 20 - -#define NUMPRT 0 -#define NUMPRG 1 -#define NUMARG 2 - -#define UC(b) (((int)b)&0xff) - -#define FAITH_TIMEOUT (30 * 60) /*second*/ diff --git a/usr.sbin/faithd/ftp.c b/usr.sbin/faithd/ftp.c deleted file mode 100644 index c54371a2c20..00000000000 --- a/usr.sbin/faithd/ftp.c +++ /dev/null @@ -1,1085 +0,0 @@ -/* $KAME: ftp.c,v 1.24 2005/03/16 05:05:48 itojun Exp $ */ - -/* - * Copyright (C) 1997 and 1998 WIDE Project. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the project nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#ifdef HAVE_POLL_H -#include -#endif -#include -#include - -#include -#include -#include - -#include "faithd.h" - -static char rbuf[MSS]; -static char sbuf[MSS]; -static int passivemode = 0; -static int wport4 = -1; /* listen() to active */ -static int wport6 = -1; /* listen() to passive */ -static int port4 = -1; /* active: inbound passive: outbound */ -static int port6 = -1; /* active: outbound passive: inbound */ -static struct sockaddr_storage data4; /* server data address */ -static struct sockaddr_storage data6; /* client data address */ -static int epsvall = 0; - -enum state { NONE, LPRT, EPRT, LPSV, EPSV }; - -static int ftp_activeconn(void); -static int ftp_passiveconn(void); -static int ftp_copy(int, int); -static int ftp_copyresult(int, int, enum state); -static int ftp_copycommand(int, int, enum state *); - -void -ftp_relay(int ctl6, int ctl4) -{ -#ifdef HAVE_POLL_H - struct pollfd pfd[6]; -#else - fd_set readfds; -#endif - int error; - enum state state = NONE; - struct timeval tv; - - syslog(LOG_INFO, "starting ftp control connection"); - - for (;;) { -#ifdef HAVE_POLL_H - pfd[0].fd = ctl4; - pfd[0].events = POLLIN; - pfd[1].fd = ctl6; - pfd[1].events = POLLIN; - if (0 <= port4) { - pfd[2].fd = port4; - pfd[2].events = POLLIN; - } else - pfd[2].fd = -1; - if (0 <= port6) { - pfd[3].fd = port6; - pfd[3].events = POLLIN; - } else - pfd[3].fd = -1; -#if 0 - if (0 <= wport4) { - pfd[4].fd = wport4; - pfd[4].events = POLLIN; - } else - pfd[4].fd = -1; - if (0 <= wport6) { - pfd[5].fd = wport4; - pfd[5].events = POLLIN; - } else - pfd[5].fd = -1; -#else - pfd[4].fd = pfd[5].fd = -1; - pfd[4].events = pfd[5].events = 0; -#endif -#else - int maxfd = 0; - - FD_ZERO(&readfds); - if (ctl4 >= FD_SETSIZE) - exit_failure("descriptor too big"); - FD_SET(ctl4, &readfds); - maxfd = ctl4; - if (ctl6 >= FD_SETSIZE) - exit_failure("descriptor too big"); - FD_SET(ctl6, &readfds); - maxfd = (ctl6 > maxfd) ? ctl6 : maxfd; - if (0 <= port4) { - if (port4 >= FD_SETSIZE) - exit_failure("descriptor too big"); - FD_SET(port4, &readfds); - maxfd = (port4 > maxfd) ? port4 : maxfd; - } - if (0 <= port6) { - if (port6 >= FD_SETSIZE) - exit_failure("descriptor too big"); - FD_SET(port6, &readfds); - maxfd = (port6 > maxfd) ? port6 : maxfd; - } -#if 0 - if (0 <= wport4) { - if (wport4 >= FD_SETSIZE) - exit_failure("descriptor too big"); - FD_SET(wport4, &readfds); - maxfd = (wport4 > maxfd) ? wport4 : maxfd; - } - if (0 <= wport6) { - if (wport6 >= FD_SETSIZE) - exit_failure("descriptor too big"); - FD_SET(wport6, &readfds); - maxfd = (wport6 > maxfd) ? wport6 : maxfd; - } -#endif -#endif - tv.tv_sec = FAITH_TIMEOUT; - tv.tv_usec = 0; - -#ifdef HAVE_POLL_H - error = poll(pfd, sizeof(pfd)/sizeof(pfd[0]), tv.tv_sec * 1000); -#else - error = select(maxfd + 1, &readfds, NULL, NULL, &tv); -#endif - if (error == -1) { -#ifdef HAVE_POLL_H - exit_failure("poll: %s", strerror(errno)); -#else - exit_failure("select: %s", strerror(errno)); -#endif - } - else if (error == 0) - exit_failure("connection timeout"); - - /* - * The order of the following checks does (slightly) matter. - * It is important to visit all checks (do not use "continue"), - * otherwise some of the pipe may become full and we cannot - * relay correctly. - */ -#ifdef HAVE_POLL_H - if (pfd[1].revents & POLLIN) -#else - if (FD_ISSET(ctl6, &readfds)) -#endif - { - /* - * copy control connection from the client. - * command translation is necessary. - */ - error = ftp_copycommand(ctl6, ctl4, &state); - - if (error < 0) - goto bad; - else if (error == 0) { - close(ctl4); - close(ctl6); - exit_success("terminating ftp control connection"); - /*NOTREACHED*/ - } - } -#ifdef HAVE_POLL_H - if (pfd[0].revents & POLLIN) -#else - if (FD_ISSET(ctl4, &readfds)) -#endif - { - /* - * copy control connection from the server - * translation of result code is necessary. - */ - error = ftp_copyresult(ctl4, ctl6, state); - - if (error < 0) - goto bad; - else if (error == 0) { - close(ctl4); - close(ctl6); - exit_success("terminating ftp control connection"); - /*NOTREACHED*/ - } - } -#ifdef HAVE_POLL_H - if (0 <= port4 && 0 <= port6 && (pfd[2].revents & POLLIN)) -#else - if (0 <= port4 && 0 <= port6 && FD_ISSET(port4, &readfds)) -#endif - { - /* - * copy data connection. - * no special treatment necessary. - */ -#ifdef HAVE_POLL_H - if (pfd[2].revents & POLLIN) -#else - if (FD_ISSET(port4, &readfds)) -#endif - error = ftp_copy(port4, port6); - switch (error) { - case -1: - goto bad; - case 0: - close(port4); - close(port6); - port4 = port6 = -1; - syslog(LOG_INFO, "terminating data connection"); - break; - default: - break; - } - } -#ifdef HAVE_POLL_H - if (0 <= port4 && 0 <= port6 && (pfd[3].revents & POLLIN)) -#else - if (0 <= port4 && 0 <= port6 && FD_ISSET(port6, &readfds)) -#endif - { - /* - * copy data connection. - * no special treatment necessary. - */ -#ifdef HAVE_POLL_H - if (pfd[3].revents & POLLIN) -#else - if (FD_ISSET(port6, &readfds)) -#endif - error = ftp_copy(port6, port4); - switch (error) { - case -1: - goto bad; - case 0: - close(port4); - close(port6); - port4 = port6 = -1; - syslog(LOG_INFO, "terminating data connection"); - break; - default: - break; - } - } -#if 0 -#ifdef HAVE_POLL_H - if (wport4 && (pfd[4].revents & POLLIN)) -#else - if (wport4 && FD_ISSET(wport4, &readfds)) -#endif - { - /* - * establish active data connection from the server. - */ - ftp_activeconn(); - } -#ifdef HAVE_POLL_H - if (wport4 && (pfd[5].revents & POLLIN)) -#else - if (wport6 && FD_ISSET(wport6, &readfds)) -#endif - { - /* - * establish passive data connection from the client. - */ - ftp_passiveconn(); - } -#endif - } - - bad: - exit_failure("%s", strerror(errno)); -} - -static int -ftp_activeconn() -{ - socklen_t n; - int error; -#ifdef HAVE_POLL_H - struct pollfd pfd[1]; -#else - fd_set set; -#endif - struct timeval timeout; - struct sockaddr *sa; - - /* get active connection from server */ -#ifdef HAVE_POLL_H - pfd[0].fd = wport4; - pfd[0].events = POLLIN; -#else - FD_ZERO(&set); - if (wport4 >= FD_SETSIZE) - exit_failure("descriptor too big"); - FD_SET(wport4, &set); -#endif - timeout.tv_sec = 120; - timeout.tv_usec = 0; - n = sizeof(data4); -#ifdef HAVE_POLL_H - if (poll(pfd, sizeof(pfd)/sizeof(pfd[0]), timeout.tv_sec * 1000) == 0 || - (port4 = accept(wport4, (struct sockaddr *)&data4, &n)) < 0) -#else - if (select(wport4 + 1, &set, NULL, NULL, &timeout) == 0 || - (port4 = accept(wport4, (struct sockaddr *)&data4, &n)) < 0) -#endif - { - close(wport4); - wport4 = -1; - syslog(LOG_INFO, "active mode data connection failed"); - return -1; - } - - /* ask active connection to client */ - sa = (struct sockaddr *)&data6; - port6 = socket(sa->sa_family, SOCK_STREAM, 0); - if (port6 == -1) { - close(port4); - close(wport4); - port4 = wport4 = -1; - syslog(LOG_INFO, "active mode data connection failed"); - return -1; - } - error = connect(port6, sa, sa->sa_len); - if (error < 0) { - close(port6); - close(port4); - close(wport4); - port6 = port4 = wport4 = -1; - syslog(LOG_INFO, "active mode data connection failed"); - return -1; - } - - syslog(LOG_INFO, "active mode data connection established"); - return 0; -} - -static int -ftp_passiveconn() -{ - socklen_t len; - int error; -#ifdef HAVE_POLL_H - struct pollfd pfd[1]; -#else - fd_set set; -#endif - struct timeval timeout; - struct sockaddr *sa; - - /* get passive connection from client */ -#ifdef HAVE_POLL_H - pfd[0].fd = wport6; - pfd[0].events = POLLIN; -#else - FD_ZERO(&set); - if (wport6 >= FD_SETSIZE) - exit_failure("descriptor too big"); - FD_SET(wport6, &set); -#endif - timeout.tv_sec = 120; - timeout.tv_usec = 0; - len = sizeof(data6); -#ifdef HAVE_POLL_H - if (poll(pfd, sizeof(pfd)/sizeof(pfd[0]), timeout.tv_sec * 1000) == 0 || - (port6 = accept(wport6, (struct sockaddr *)&data6, &len)) < 0) -#else - if (select(wport6 + 1, &set, NULL, NULL, &timeout) == 0 || - (port6 = accept(wport6, (struct sockaddr *)&data6, &len)) < 0) -#endif - { - close(wport6); - wport6 = -1; - syslog(LOG_INFO, "passive mode data connection failed"); - return -1; - } - - /* ask passive connection to server */ - sa = (struct sockaddr *)&data4; - port4 = socket(sa->sa_family, SOCK_STREAM, 0); - if (port4 == -1) { - close(wport6); - close(port6); - wport6 = port6 = -1; - syslog(LOG_INFO, "passive mode data connection failed"); - return -1; - } - error = connect(port4, sa, sa->sa_len); - if (error < 0) { - close(wport6); - close(port4); - close(port6); - wport6 = port4 = port6 = -1; - syslog(LOG_INFO, "passive mode data connection failed"); - return -1; - } - - syslog(LOG_INFO, "passive mode data connection established"); - return 0; -} - -static int -ftp_copy(int src, int dst) -{ - int error, atmark, n; - - /* OOB data handling */ - error = ioctl(src, SIOCATMARK, &atmark); - if (error != -1 && atmark == 1) { - n = read(src, rbuf, 1); - if (n == -1) - goto bad; - send(dst, rbuf, n, MSG_OOB); -#if 0 - n = read(src, rbuf, sizeof(rbuf)); - if (n == -1) - goto bad; - write(dst, rbuf, n); - return n; -#endif - } - - n = read(src, rbuf, sizeof(rbuf)); - switch (n) { - case -1: - case 0: - return n; - default: - write(dst, rbuf, n); - return n; - } - - bad: - exit_failure("%s", strerror(errno)); - /*NOTREACHED*/ - return 0; /* to make gcc happy */ -} - -static int -ftp_copyresult(int src, int dst, enum state state) -{ - int error, atmark, n; - socklen_t len; - char *param; - int code; - char *a, *p; - int i; - - /* OOB data handling */ - error = ioctl(src, SIOCATMARK, &atmark); - if (error != -1 && atmark == 1) { - n = read(src, rbuf, 1); - if (n == -1) - goto bad; - send(dst, rbuf, n, MSG_OOB); -#if 0 - n = read(src, rbuf, sizeof(rbuf)); - if (n == -1) - goto bad; - write(dst, rbuf, n); - return n; -#endif - } - - n = read(src, rbuf, sizeof(rbuf)); - if (n <= 0) - return n; - rbuf[n] = '\0'; - - /* - * parse argument - */ - p = rbuf; - for (i = 0; i < 3; i++) { - if (!isdigit(*p)) { - /* invalid reply */ - write(dst, rbuf, n); - return n; - } - p++; - } - if (!isspace(*p)) { - /* invalid reply */ - write(dst, rbuf, n); - return n; - } - code = atoi(rbuf); - param = p; - /* param points to first non-command token, if any */ - while (*param && isspace(*param)) - param++; - if (!*param) - param = NULL; - - switch (state) { - case NONE: - if (!passivemode && rbuf[0] == '1') { - if (ftp_activeconn() < 0) { - n = snprintf(rbuf, sizeof(rbuf), - "425 Cannot open data connetion\r\n"); - if (n < 0 || n >= sizeof(rbuf)) - n = 0; - } - } - if (n) - write(dst, rbuf, n); - return n; - case LPRT: - case EPRT: - /* expecting "200 PORT command successful." */ - if (code == 200) { - p = strstr(rbuf, "PORT"); - if (p) { - p[0] = (state == LPRT) ? 'L' : 'E'; - p[1] = 'P'; - } - } else { - close(wport4); - wport4 = -1; - } - write(dst, rbuf, n); - return n; - case LPSV: - case EPSV: - /* - * expecting "227 Entering Passive Mode (x,x,x,x,x,x,x)" - * (in some cases result comes without paren) - */ - if (code != 227) { -passivefail0: - close(wport6); - wport6 = -1; - write(dst, rbuf, n); - return n; - } - - { - unsigned int ho[4], po[2]; - struct sockaddr_in *sin; - struct sockaddr_in6 *sin6; - u_short port; - - /* - * PASV result -> LPSV/EPSV result - */ - p = param; - while (*p && *p != '(' && !isdigit(*p)) /*)*/ - p++; - if (!*p) - goto passivefail0; /*XXX*/ - if (*p == '(') /*)*/ - p++; - n = sscanf(p, "%u,%u,%u,%u,%u,%u", - &ho[0], &ho[1], &ho[2], &ho[3], &po[0], &po[1]); - if (n != 6) - goto passivefail0; /*XXX*/ - - /* keep PORT parameter */ - memset(&data4, 0, sizeof(data4)); - sin = (struct sockaddr_in *)&data4; - sin->sin_len = sizeof(*sin); - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = 0; - for (n = 0; n < 4; n++) { - sin->sin_addr.s_addr |= - htonl((ho[n] & 0xff) << ((3 - n) * 8)); - } - sin->sin_port = htons(((po[0] & 0xff) << 8) | (po[1] & 0xff)); - - /* get ready for passive data connection */ - memset(&data6, 0, sizeof(data6)); - sin6 = (struct sockaddr_in6 *)&data6; - sin6->sin6_len = sizeof(*sin6); - sin6->sin6_family = AF_INET6; - wport6 = socket(sin6->sin6_family, SOCK_STREAM, 0); - if (wport6 == -1) { -passivefail: - n = snprintf(sbuf, sizeof(sbuf), - "500 could not translate from PASV\r\n"); - if (n < 0 || n >= sizeof(sbuf)) - n = 0; - if (n) - write(src, sbuf, n); - return n; - } -#ifdef IPV6_FAITH - { - int on = 1; - error = setsockopt(wport6, IPPROTO_IPV6, IPV6_FAITH, - &on, sizeof(on)); - if (error == -1) - exit_failure("setsockopt(IPV6_FAITH): %s", strerror(errno)); - } -#endif - error = bind(wport6, (struct sockaddr *)sin6, sin6->sin6_len); - if (error == -1) { - close(wport6); - wport6 = -1; - goto passivefail; - } - error = listen(wport6, 1); - if (error == -1) { - close(wport6); - wport6 = -1; - goto passivefail; - } - - /* transmit LPSV or EPSV */ - /* - * addr from dst, port from wport6 - */ - len = sizeof(data6); - error = getsockname(wport6, (struct sockaddr *)&data6, &len); - if (error == -1) { - close(wport6); - wport6 = -1; - goto passivefail; - } - sin6 = (struct sockaddr_in6 *)&data6; - port = sin6->sin6_port; - - len = sizeof(data6); - error = getsockname(dst, (struct sockaddr *)&data6, &len); - if (error == -1) { - close(wport6); - wport6 = -1; - goto passivefail; - } - sin6 = (struct sockaddr_in6 *)&data6; - sin6->sin6_port = port; - - if (state == LPSV) { - a = (char *)&sin6->sin6_addr; - p = (char *)&sin6->sin6_port; - n = snprintf(sbuf, sizeof(sbuf), -"228 Entering Long Passive Mode (%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d)\r\n", - 6, 16, UC(a[0]), UC(a[1]), UC(a[2]), UC(a[3]), - UC(a[4]), UC(a[5]), UC(a[6]), UC(a[7]), - UC(a[8]), UC(a[9]), UC(a[10]), UC(a[11]), - UC(a[12]), UC(a[13]), UC(a[14]), UC(a[15]), - 2, UC(p[0]), UC(p[1])); - if (n < 0 || n >= sizeof(sbuf)) - n = 0; - if (n) - write(dst, sbuf, n); - passivemode = 1; - return n; - } else { - n = snprintf(sbuf, sizeof(sbuf), -"229 Entering Extended Passive Mode (|||%d|)\r\n", - ntohs(sin6->sin6_port)); - if (n < 0 || n >= sizeof(sbuf)) - n = 0; - if (n) - write(dst, sbuf, n); - passivemode = 1; - return n; - } - } - } - - bad: - exit_failure("%s", strerror(errno)); - /*NOTREACHED*/ - return 0; /* to make gcc happy */ -} - -static int -ftp_copycommand(int src, int dst, enum state *state) -{ - int error, atmark, n; - socklen_t len; - unsigned int af, hal, ho[16], pal, po[2]; - char *a, *p, *q; - char cmd[5], *param; - struct sockaddr_in *sin; - struct sockaddr_in6 *sin6; - enum state nstate; - char ch; - int i; - - /* OOB data handling */ - error = ioctl(src, SIOCATMARK, &atmark); - if (error != -1 && atmark == 1) { - n = read(src, rbuf, 1); - if (n == -1) - goto bad; - send(dst, rbuf, n, MSG_OOB); -#if 0 - n = read(src, rbuf, sizeof(rbuf)); - if (n == -1) - goto bad; - write(dst, rbuf, n); - return n; -#endif - } - - n = read(src, rbuf, sizeof(rbuf)); - if (n <= 0) - return n; - rbuf[n] = '\0'; - - if (n < 4) { - write(dst, rbuf, n); - return n; - } - - /* - * parse argument - */ - p = rbuf; - q = cmd; - for (i = 0; i < 4; i++) { - if (!isalpha(*p)) { - /* invalid command */ - write(dst, rbuf, n); - return n; - } - *q++ = islower(*p) ? toupper(*p) : *p; - p++; - } - if (!isspace(*p)) { - /* invalid command */ - write(dst, rbuf, n); - return n; - } - *q = '\0'; - param = p; - /* param points to first non-command token, if any */ - while (*param && isspace(*param)) - param++; - if (!*param) - param = NULL; - - *state = NONE; - - if (strcmp(cmd, "LPRT") == 0 && param) { - /* - * LPRT -> PORT - */ - nstate = LPRT; - - close(wport4); - close(wport6); - close(port4); - close(port6); - wport4 = wport6 = port4 = port6 = -1; - - if (epsvall) { - n = snprintf(sbuf, sizeof(sbuf), "501 %s disallowed in EPSV ALL\r\n", - cmd); - if (n < 0 || n >= sizeof(sbuf)) - n = 0; - if (n) - write(src, sbuf, n); - return n; - } - - n = sscanf(param, -"%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u", - &af, &hal, &ho[0], &ho[1], &ho[2], &ho[3], - &ho[4], &ho[5], &ho[6], &ho[7], - &ho[8], &ho[9], &ho[10], &ho[11], - &ho[12], &ho[13], &ho[14], &ho[15], - &pal, &po[0], &po[1]); - if (n != 21 || af != 6 || hal != 16|| pal != 2) { - n = snprintf(sbuf, sizeof(sbuf), - "501 illegal parameter to LPRT\r\n"); - if (n < 0 || n >= sizeof(sbuf)) - n = 0; - if (n) - write(src, sbuf, n); - return n; - } - - /* keep LPRT parameter */ - memset(&data6, 0, sizeof(data6)); - sin6 = (struct sockaddr_in6 *)&data6; - sin6->sin6_len = sizeof(*sin6); - sin6->sin6_family = AF_INET6; - for (n = 0; n < 16; n++) - sin6->sin6_addr.s6_addr[n] = ho[n]; - sin6->sin6_port = htons(((po[0] & 0xff) << 8) | (po[1] & 0xff)); - -sendport: - /* get ready for active data connection */ - len = sizeof(data4); - error = getsockname(dst, (struct sockaddr *)&data4, &len); - if (error == -1) { -lprtfail: - n = snprintf(sbuf, sizeof(sbuf), - "500 could not translate to PORT\r\n"); - if (n < 0 || n >= sizeof(sbuf)) - n = 0; - if (n) - write(src, sbuf, n); - return n; - } - if (((struct sockaddr *)&data4)->sa_family != AF_INET) - goto lprtfail; - sin = (struct sockaddr_in *)&data4; - sin->sin_port = 0; - wport4 = socket(sin->sin_family, SOCK_STREAM, 0); - if (wport4 == -1) - goto lprtfail; - error = bind(wport4, (struct sockaddr *)sin, sin->sin_len); - if (error == -1) { - close(wport4); - wport4 = -1; - goto lprtfail; - } - error = listen(wport4, 1); - if (error == -1) { - close(wport4); - wport4 = -1; - goto lprtfail; - } - - /* transmit PORT */ - len = sizeof(data4); - error = getsockname(wport4, (struct sockaddr *)&data4, &len); - if (error == -1) { - close(wport4); - wport4 = -1; - goto lprtfail; - } - if (((struct sockaddr *)&data4)->sa_family != AF_INET) { - close(wport4); - wport4 = -1; - goto lprtfail; - } - sin = (struct sockaddr_in *)&data4; - a = (char *)&sin->sin_addr; - p = (char *)&sin->sin_port; - n = snprintf(sbuf, sizeof(sbuf), "PORT %d,%d,%d,%d,%d,%d\r\n", - UC(a[0]), UC(a[1]), UC(a[2]), UC(a[3]), - UC(p[0]), UC(p[1])); - if (n < 0 || n >= sizeof(sbuf)) - n = 0; - if (n) - write(dst, sbuf, n); - *state = nstate; - passivemode = 0; - return n; - } else if (strcmp(cmd, "EPRT") == 0 && param) { - /* - * EPRT -> PORT - */ - char *afp, *hostp, *portp; - struct addrinfo hints, *res; - - nstate = EPRT; - - close(wport4); - close(wport6); - close(port4); - close(port6); - wport4 = wport6 = port4 = port6 = -1; - - if (epsvall) { - n = snprintf(sbuf, sizeof(sbuf), "501 %s disallowed in EPSV ALL\r\n", - cmd); - if (n < 0 || n >= sizeof(sbuf)) - n = 0; - if (n) - write(src, sbuf, n); - return n; - } - - p = param; - ch = *p++; /* boundary character */ - afp = p; - while (*p && *p != ch) - p++; - if (!*p) { -eprtparamfail: - n = snprintf(sbuf, sizeof(sbuf), - "501 illegal parameter to EPRT\r\n"); - if (n < 0 || n >= sizeof(sbuf)) - n = 0; - if (n) - write(src, sbuf, n); - return n; - } - *p++ = '\0'; - hostp = p; - while (*p && *p != ch) - p++; - if (!*p) - goto eprtparamfail; - *p++ = '\0'; - portp = p; - while (*p && *p != ch) - p++; - if (!*p) - goto eprtparamfail; - *p++ = '\0'; - - n = sscanf(afp, "%d", &af); - if (n != 1 || af != 2) { - n = snprintf(sbuf, sizeof(sbuf), - "501 unsupported address family to EPRT\r\n"); - if (n < 0 || n >= sizeof(sbuf)) - n = 0; - if (n) - write(src, sbuf, n); - return n; - } - memset(&hints, 0, sizeof(hints)); - hints.ai_family = AF_UNSPEC; - hints.ai_socktype = SOCK_STREAM; - hints.ai_protocol = IPPROTO_TCP; - error = getaddrinfo(hostp, portp, &hints, &res); - if (error) { - n = snprintf(sbuf, sizeof(sbuf), - "501 EPRT: %s\r\n", gai_strerror(error)); - if (n < 0 || n >= sizeof(sbuf)) - n = 0; - if (n) - write(src, sbuf, n); - return n; - } - if (res->ai_next) { - n = snprintf(sbuf, sizeof(sbuf), - "501 EPRT: %s resolved to multiple addresses\r\n", hostp); - if (n < 0 || n >= sizeof(sbuf)) - n = 0; - if (n) - write(src, sbuf, n); - freeaddrinfo(res); - return n; - } - - memcpy(&data6, res->ai_addr, res->ai_addrlen); - - freeaddrinfo(res); - goto sendport; - } else if (strcmp(cmd, "LPSV") == 0 && !param) { - /* - * LPSV -> PASV - */ - nstate = LPSV; - - close(wport4); - close(wport6); - close(port4); - close(port6); - wport4 = wport6 = port4 = port6 = -1; - - if (epsvall) { - n = snprintf(sbuf, sizeof(sbuf), "501 %s disallowed in EPSV ALL\r\n", - cmd); - if (n < 0 || n >= sizeof(sbuf)) - n = 0; - if (n) - write(src, sbuf, n); - return n; - } - - /* transmit PASV */ - n = snprintf(sbuf, sizeof(sbuf), "PASV\r\n"); - if (n < 0 || n >= sizeof(sbuf)) - n = 0; - if (n) - write(dst, sbuf, n); - *state = LPSV; - passivemode = 0; /* to be set to 1 later */ - return n; - } else if (strcmp(cmd, "EPSV") == 0 && !param) { - /* - * EPSV -> PASV - */ - close(wport4); - close(wport6); - close(port4); - close(port6); - wport4 = wport6 = port4 = port6 = -1; - - n = snprintf(sbuf, sizeof(sbuf), "PASV\r\n"); - if (n < 0 || n >= sizeof(sbuf)) - n = 0; - if (n) - write(dst, sbuf, n); - *state = EPSV; - passivemode = 0; /* to be set to 1 later */ - return n; - } else if (strcmp(cmd, "EPSV") == 0 && param - && strncasecmp(param, "ALL", 3) == 0 && isspace(param[3])) { - /* - * EPSV ALL - */ - epsvall = 1; - n = snprintf(sbuf, sizeof(sbuf), "200 EPSV ALL command successful.\r\n"); - if (n < 0 || n >= sizeof(sbuf)) - n = 0; - if (n) - write(src, sbuf, n); - return n; - } else if (strcmp(cmd, "PORT") == 0 || strcmp(cmd, "PASV") == 0) { - /* - * reject PORT/PASV - */ - n = snprintf(sbuf, sizeof(sbuf), "502 %s not implemented.\r\n", cmd); - if (n < 0 || n >= sizeof(sbuf)) - n = 0; - if (n) - write(src, sbuf, n); - return n; - } else if (passivemode - && (strcmp(cmd, "STOR") == 0 - || strcmp(cmd, "STOU") == 0 - || strcmp(cmd, "RETR") == 0 - || strcmp(cmd, "LIST") == 0 - || strcmp(cmd, "NLST") == 0 - || strcmp(cmd, "APPE") == 0)) { - /* - * commands with data transfer. need to care about passive - * mode data connection. - */ - - if (ftp_passiveconn() < 0) { - n = snprintf(sbuf, sizeof(sbuf), "425 Cannot open data connetion\r\n"); - if (n < 0 || n >= sizeof(sbuf)) - n = 0; - if (n) - write(src, sbuf, n); - } else { - /* simply relay the command */ - write(dst, rbuf, n); - } - - *state = NONE; - return n; - } else { - /* simply relay it */ - *state = NONE; - write(dst, rbuf, n); - return n; - } - - bad: - exit_failure("%s", strerror(errno)); - /*NOTREACHED*/ - return 0; /* to make gcc happy */ -} diff --git a/usr.sbin/faithd/prefix.c b/usr.sbin/faithd/prefix.c deleted file mode 100644 index bdb763ad6f5..00000000000 --- a/usr.sbin/faithd/prefix.c +++ /dev/null @@ -1,345 +0,0 @@ -/* $KAME: prefix.c,v 1.13 2003/09/02 22:50:17 itojun Exp $ */ -/* $FreeBSD$ */ - -/* - * Copyright (C) 2000 WIDE Project. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the project nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifndef offsetof -#define offsetof(type, member) ((size_t)(u_long)(&((type *)0)->member)) -#endif - -#include "faithd.h" -#include "prefix.h" - -static int prefix_set(const char *, struct prefix *, int); -static struct config *config_load1(const char *); -#if 0 -static void config_show1(const struct config *); -static void config_show(void); -#endif - -struct config *config_list = NULL; -const int niflags = NI_NUMERICHOST; - -static int -prefix_set(const char *s, struct prefix *prefix, int slash) -{ - char *p = NULL, *q, *r; - struct addrinfo hints, *res = NULL; - int max; - - p = strdup(s); - if (!p) - goto fail; - q = strchr(p, '/'); - if (q) { - if (!slash) - goto fail; - *q++ = '\0'; - } - - memset(&hints, 0, sizeof(hints)); - hints.ai_family = PF_UNSPEC; - hints.ai_socktype = SOCK_DGRAM; /*dummy*/ - hints.ai_flags = AI_NUMERICHOST; - if (getaddrinfo(p, "0", &hints, &res)) - goto fail; - if (res->ai_next || res->ai_addrlen > sizeof(prefix->a)) - goto fail; - memcpy(&prefix->a, res->ai_addr, res->ai_addrlen); - - switch (prefix->a.ss_family) { - case AF_INET: - max = 32; - break; - case AF_INET6: - max = 128; - break; - default: - max = -1; - break; - } - - if (q) { - r = NULL; - prefix->l = (int)strtoul(q, &r, 10); - if (!*q || *r) - goto fail; - if (prefix->l < 0 || prefix->l > max) - goto fail; - } else - prefix->l = max; - - if (p) - free(p); - if (res) - freeaddrinfo(res); - return 0; - -fail: - if (p) - free(p); - if (res) - freeaddrinfo(res); - return -1; -} - -const char * -prefix_string(const struct prefix *prefix) -{ - static char buf[NI_MAXHOST + 20]; - char hbuf[NI_MAXHOST]; - - if (getnameinfo((const struct sockaddr *)&prefix->a, prefix->a.ss_len, - hbuf, sizeof(hbuf), NULL, 0, niflags)) - return NULL; - snprintf(buf, sizeof(buf), "%s/%d", hbuf, prefix->l); - return buf; -} - -int -prefix_match(const struct prefix *prefix, const struct sockaddr *sa) -{ - struct sockaddr_storage a, b; - char *pa, *pb; - int off, l; - - if (prefix->a.ss_family != sa->sa_family || - prefix->a.ss_len != sa->sa_len) - return 0; - - if (prefix->a.ss_len > sizeof(a) || sa->sa_len > sizeof(b)) - return 0; - - switch (prefix->a.ss_family) { - case AF_INET: - off = offsetof(struct sockaddr_in, sin_addr); - break; - case AF_INET6: - off = offsetof(struct sockaddr_in6, sin6_addr); - break; - default: - if (memcmp(&prefix->a, sa, prefix->a.ss_len) != 0) - return 0; - else - return 1; - } - - memcpy(&a, &prefix->a, prefix->a.ss_len); - memcpy(&b, sa, sa->sa_len); - l = prefix->l / 8 + (prefix->l % 8 ? 1 : 0); - - /* overrun check */ - if (off + l > a.ss_len) - return 0; - - pa = ((char *)&a) + off; - pb = ((char *)&b) + off; - if (prefix->l % 8) { - pa[prefix->l / 8] &= 0xff00 >> (prefix->l % 8); - pb[prefix->l / 8] &= 0xff00 >> (prefix->l % 8); - } - if (memcmp(pa, pb, l) != 0) - return 0; - else - return 1; -} - -/* - * prefix/prefixlen permit/deny prefix/prefixlen [srcaddr] - * 3ffe::/16 permit 10.0.0.0/8 10.1.1.1 - */ -static struct config * -config_load1(const char *line) -{ - struct config *conf; - char buf[BUFSIZ]; - char *p; - char *token[4]; - int i; - - if (strlen(line) + 1 > sizeof(buf)) - return NULL; - strlcpy(buf, line, sizeof(buf)); - - p = strchr(buf, '\n'); - if (!p) - return NULL; - *p = '\0'; - p = strchr(buf, '#'); - if (p) - *p = '\0'; - if (strlen(buf) == 0) - return NULL; - - p = buf; - memset(token, 0, sizeof(token)); - for (i = 0; i < sizeof(token) / sizeof(token[0]); i++) { - token[i] = strtok(p, "\t "); - p = NULL; - if (token[i] == NULL) - break; - } - /* extra tokens? */ - if (strtok(p, "\t ") != NULL) - return NULL; - /* insufficient tokens */ - switch (i) { - case 3: - case 4: - break; - default: - return NULL; - } - - conf = (struct config *)malloc(sizeof(*conf)); - if (conf == NULL) - return NULL; - memset(conf, 0, sizeof(*conf)); - - if (strcasecmp(token[1], "permit") == 0) - conf->permit = 1; - else if (strcasecmp(token[1], "deny") == 0) - conf->permit = 0; - else { - /* invalid keyword is considered as "deny" */ - conf->permit = 0; - } - - if (prefix_set(token[0], &conf->match, 1) < 0) - goto fail; - if (prefix_set(token[2], &conf->dest, 1) < 0) - goto fail; - if (token[3]) { - if (prefix_set(token[3], &conf->src, 0) < 0) - goto fail; - } - - return conf; - -fail: - free(conf); - return NULL; -} - -int -config_load(const char *configfile) -{ - FILE *fp; - char buf[BUFSIZ]; - struct config *conf, *p; - struct config sentinel; - - config_list = NULL; - - if (!configfile) - configfile = _PATH_PREFIX_CONF; - fp = fopen(configfile, "r"); - if (fp == NULL) - return -1; - - p = &sentinel; - sentinel.next = NULL; - while (fgets(buf, sizeof(buf), fp) != NULL) { - conf = config_load1(buf); - if (conf) { - p->next = conf; - p = p->next; - } - } - config_list = sentinel.next; - - fclose(fp); - return 0; -} - -#if 0 -static void -config_show1(const struct config *conf) -{ - const char *p; - - p = prefix_string(&conf->match); - printf("%s", p ? p : "?"); - - if (conf->permit) - printf(" permit"); - else - printf(" deny"); - - p = prefix_string(&conf->dest); - printf(" %s", p ? p : "?"); - - printf("\n"); -} - -static void -config_show() -{ - struct config *conf; - - for (conf = config_list; conf; conf = conf->next) - config_show1(conf); -} -#endif - -const struct config * -config_match(struct sockaddr *sa1, struct sockaddr *sa2) -{ - static struct config conf; - const struct config *p; - - if (sa1->sa_len > sizeof(conf.match.a) || - sa2->sa_len > sizeof(conf.dest.a)) - return NULL; - - memset(&conf, 0, sizeof(conf)); - if (!config_list) { - conf.permit = 1; - memcpy(&conf.match.a, sa1, sa1->sa_len); - memcpy(&conf.dest.a, sa2, sa2->sa_len); - return &conf; - } - - for (p = config_list; p; p = p->next) - if (prefix_match(&p->match, sa1) && prefix_match(&p->dest, sa2)) - return p; - - return NULL; -} diff --git a/usr.sbin/faithd/prefix.h b/usr.sbin/faithd/prefix.h deleted file mode 100644 index 4d6b3d59c46..00000000000 --- a/usr.sbin/faithd/prefix.h +++ /dev/null @@ -1,52 +0,0 @@ -/* $KAME: prefix.h,v 1.4 2001/09/05 03:04:21 itojun Exp $ */ -/* $FreeBSD$ */ - -/* - * Copyright (C) 2000 WIDE Project. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the project nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -struct prefix { - struct sockaddr_storage a; - int l; -}; - -struct config { - struct config *next; - - int permit; - struct prefix match; - struct prefix dest; - struct prefix src; /* src to use for outgoing connection */ -}; - -#define _PATH_PREFIX_CONF "/etc/faithd.conf" - -extern const char *prefix_string(const struct prefix *); -extern int prefix_match(const struct prefix *, const struct sockaddr *); -extern int config_load(const char *); -extern const struct config *config_match(struct sockaddr *, struct sockaddr *); diff --git a/usr.sbin/faithd/tcp.c b/usr.sbin/faithd/tcp.c deleted file mode 100644 index 21976944147..00000000000 --- a/usr.sbin/faithd/tcp.c +++ /dev/null @@ -1,324 +0,0 @@ -/* $KAME: tcp.c,v 1.13 2003/09/02 22:49:21 itojun Exp $ */ - -/* - * Copyright (C) 1997 and 1998 WIDE Project. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the project nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -__FBSDID("$FreeBSD$"); - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "faithd.h" - -static char tcpbuf[16*1024]; - /* bigger than MSS and may be lesser than window size */ -static int tblen, tboff, oob_exists; -static fd_set readfds, writefds, exceptfds; -static char atmark_buf[2]; -static pid_t cpid = (pid_t)0; -static pid_t ppid = (pid_t)0; -volatile time_t child_lastactive = (time_t)0; -static time_t parent_lastactive = (time_t)0; - -static void sig_ctimeout(int); -static void sig_child(int); -static void notify_inactive(void); -static void notify_active(void); -static void send_data(int, int, const char *, int); -static void relay(int, int, const char *, int); - -/* - * Inactivity timer: - * - child side (ppid != 0) will send SIGUSR1 to parent every (FAITH_TIMEOUT/4) - * second if traffic is active. if traffic is inactive, don't send SIGUSR1. - * - parent side (ppid == 0) will check the last SIGUSR1 it have seen. - */ -static void -sig_ctimeout(int sig __unused) -{ - /* parent side: record notification from the child */ - if (dflag) - syslog(LOG_DEBUG, "activity timer from child"); - child_lastactive = time(NULL); -} - -/* parent will terminate if child dies. */ -static void -sig_child(int sig __unused) -{ - int status; - pid_t pid; - - pid = wait3(&status, WNOHANG, (struct rusage *)0); - if (pid > 0 && WEXITSTATUS(status)) - syslog(LOG_WARNING, "child %ld exit status 0x%x", - (long)pid, status); - exit_success("terminate connection due to child termination"); -} - -static void -notify_inactive() -{ - time_t t; - - /* only on parent side... */ - if (ppid) - return; - - /* parent side should check for timeout. */ - t = time(NULL); - if (dflag) { - syslog(LOG_DEBUG, "parent side %sactive, child side %sactive", - (FAITH_TIMEOUT < t - parent_lastactive) ? "in" : "", - (FAITH_TIMEOUT < t - child_lastactive) ? "in" : ""); - } - - if (FAITH_TIMEOUT < t - child_lastactive - && FAITH_TIMEOUT < t - parent_lastactive) { - /* both side timeouted */ - signal(SIGCHLD, SIG_DFL); - kill(cpid, SIGTERM); - wait(NULL); - exit_failure("connection timeout"); - /* NOTREACHED */ - } -} - -static void -notify_active() -{ - if (ppid) { - /* child side: notify parent of active traffic */ - time_t t; - t = time(NULL); - if (FAITH_TIMEOUT / 4 < t - child_lastactive) { - if (kill(ppid, SIGUSR1) < 0) { - exit_failure("terminate connection due to parent termination"); - /* NOTREACHED */ - } - child_lastactive = t; - } - } else { - /* parent side */ - parent_lastactive = time(NULL); - } -} - -static void -send_data(int s_rcv, int s_snd, const char *service __unused, int direction) -{ - int cc; - - if (oob_exists) { - cc = send(s_snd, atmark_buf, 1, MSG_OOB); - if (cc == -1) - goto retry_or_err; - oob_exists = 0; - if (s_rcv >= FD_SETSIZE) - exit_failure("descriptor too big"); - FD_SET(s_rcv, &exceptfds); - } - - for (; tboff < tblen; tboff += cc) { - cc = write(s_snd, tcpbuf + tboff, tblen - tboff); - if (cc < 0) - goto retry_or_err; - } -#ifdef DEBUG - if (tblen) { - if (tblen >= sizeof(tcpbuf)) - tblen = sizeof(tcpbuf) - 1; - tcpbuf[tblen] = '\0'; - syslog(LOG_DEBUG, "from %s (%dbytes): %s", - direction == 1 ? "client" : "server", tblen, tcpbuf); - } -#endif /* DEBUG */ - tblen = 0; tboff = 0; - if (s_snd >= FD_SETSIZE) - exit_failure("descriptor too big"); - FD_CLR(s_snd, &writefds); - if (s_rcv >= FD_SETSIZE) - exit_failure("descriptor too big"); - FD_SET(s_rcv, &readfds); - return; - retry_or_err: - if (errno != EAGAIN) - exit_failure("writing relay data failed: %s", strerror(errno)); - if (s_snd >= FD_SETSIZE) - exit_failure("descriptor too big"); - FD_SET(s_snd, &writefds); -} - -static void -relay(int s_rcv, int s_snd, const char *service, int direction) -{ - int atmark, error, maxfd; - struct timeval tv; - fd_set oreadfds, owritefds, oexceptfds; - - FD_ZERO(&readfds); - FD_ZERO(&writefds); - FD_ZERO(&exceptfds); - fcntl(s_snd, F_SETFD, O_NONBLOCK); - oreadfds = readfds; owritefds = writefds; oexceptfds = exceptfds; - if (s_rcv >= FD_SETSIZE) - exit_failure("descriptor too big"); - FD_SET(s_rcv, &readfds); - FD_SET(s_rcv, &exceptfds); - oob_exists = 0; - maxfd = (s_rcv > s_snd) ? s_rcv : s_snd; - - for (;;) { - tv.tv_sec = FAITH_TIMEOUT / 4; - tv.tv_usec = 0; - oreadfds = readfds; - owritefds = writefds; - oexceptfds = exceptfds; - error = select(maxfd + 1, &readfds, &writefds, &exceptfds, &tv); - if (error == -1) { - if (errno == EINTR) - continue; - exit_failure("select: %s", strerror(errno)); - } else if (error == 0) { - readfds = oreadfds; - writefds = owritefds; - exceptfds = oexceptfds; - notify_inactive(); - continue; - } - - /* activity notification */ - notify_active(); - - if (FD_ISSET(s_rcv, &exceptfds)) { - error = ioctl(s_rcv, SIOCATMARK, &atmark); - if (error != -1 && atmark == 1) { - int cc; - oob_read_retry: - cc = read(s_rcv, atmark_buf, 1); - if (cc == 1) { - if (s_rcv >= FD_SETSIZE) - exit_failure("descriptor too big"); - FD_CLR(s_rcv, &exceptfds); - if (s_snd >= FD_SETSIZE) - exit_failure("descriptor too big"); - FD_SET(s_snd, &writefds); - oob_exists = 1; - } else if (cc == -1) { - if (errno == EINTR) - goto oob_read_retry; - exit_failure("reading oob data failed" - ": %s", - strerror(errno)); - } - } - } - if (FD_ISSET(s_rcv, &readfds)) { - relaydata_read_retry: - tblen = read(s_rcv, tcpbuf, sizeof(tcpbuf)); - tboff = 0; - - switch (tblen) { - case -1: - if (errno == EINTR) - goto relaydata_read_retry; - exit_failure("reading relay data failed: %s", - strerror(errno)); - /* NOTREACHED */ - case 0: - /* to close opposite-direction relay process */ - shutdown(s_snd, 0); - - close(s_rcv); - close(s_snd); - exit_success("terminating %s relay", service); - /* NOTREACHED */ - default: - if (s_rcv >= FD_SETSIZE) - exit_failure("descriptor too big"); - FD_CLR(s_rcv, &readfds); - if (s_snd >= FD_SETSIZE) - exit_failure("descriptor too big"); - FD_SET(s_snd, &writefds); - break; - } - } - if (FD_ISSET(s_snd, &writefds)) - send_data(s_rcv, s_snd, service, direction); - } -} - -void -tcp_relay(int s_src, int s_dst, const char *service) -{ - syslog(LOG_INFO, "starting %s relay", service); - - child_lastactive = parent_lastactive = time(NULL); - - cpid = fork(); - switch (cpid) { - case -1: - exit_failure("tcp_relay: can't fork grand child: %s", - strerror(errno)); - /* NOTREACHED */ - case 0: - /* child process: relay going traffic */ - ppid = getppid(); - /* this is child so reopen log */ - closelog(); - openlog(logname, LOG_PID | LOG_NOWAIT, LOG_DAEMON); - relay(s_src, s_dst, service, 1); - /* NOTREACHED */ - default: - /* parent process: relay coming traffic */ - ppid = (pid_t)0; - signal(SIGUSR1, sig_ctimeout); - signal(SIGCHLD, sig_child); - relay(s_dst, s_src, service, 0); - /* NOTREACHED */ - } -} diff --git a/usr.sbin/faithd/test/faithd.rb b/usr.sbin/faithd/test/faithd.rb deleted file mode 100644 index 682f540db98..00000000000 --- a/usr.sbin/faithd/test/faithd.rb +++ /dev/null @@ -1,312 +0,0 @@ -# faithd, ruby version. requires v6-enabled ruby. -# -# highly experimental (not working right at all) and very limited -# functionality. -# -# $Id: faithd.rb,v 1.1.2.4 1999/05/10 17:06:30 itojun Exp $ -# $FreeBSD$ - -require "socket" -require "thread" - -# XXX should be derived from system headers -IPPROTO_IPV6 = 41 -IPV6_FAITH = 29 -DEBUG = true -DEBUG_LOOPBACK = true - -# TODO: OOB data handling -def tcpcopy(s1, s2, m) - STDERR.print "tcpcopy #{s1} #{s2}\n" if DEBUG - buf = "" - while TRUE - begin - buf = s1.sysread(100) - s2.syswrite(buf) - rescue EOFError - break - rescue IOError - break - end - end - STDERR.print "tcpcopy #{s1} #{s2} finished\n" if DEBUG - s1.shutdown(0) - s2.shutdown(1) -end - -def relay_ftp_passiveconn(s6, s4, dport6, dport4) - Thread.start do - d6 = TCPserver.open("::", dport6).accept - d4 = TCPsocket.open(s4.getpeer[3], dport4) - t = [] - t[0] = Thread.start do - tcpcopy(d6, d4) - end - t[1] = Thread.start do - tcpcopy(d4, d6) - end - for i in t - i.join - end - d4.close - d6.close - end -end - -def ftp_parse_2428(line) - if (line[0] != line[line.length - 1]) - return nil - end - t = line.split(line[0 .. 0]) # as string - if (t.size != 4 || t[1] !~ /^[12]$/ || t[3] !~ /^\d+$/) - return nil - end - return t[1 .. 3] -end - -def relay_ftp_command(s6, s4, state) - STDERR.print "relay_ftp_command start\n" if DEBUG - while TRUE - begin - STDERR.print "s6.gets\n" if DEBUG - line = s6.gets - STDERR.print "line is #{line}\n" if DEBUG - if line == nil - return nil - end - - # translate then copy - STDERR.print "line is #{line}\n" if DEBUG - if (line =~ /^EPSV\r\n/i) - STDERR.print "EPSV -> PASV\n" if DEBUG - line = "PASV\n" - state = "EPSV" - elsif (line =~ /^EPRT\s+(.+)\r\n/i) - t = ftp_parse_2428($1) - if t == nil - s6.puts "501 illegal parameter to EPRT\r\n" - next - end - - # some tricks should be here - s6.puts "501 illegal parameter to EPRT\r\n" - next - end - STDERR.print "fail: send #{line} as is\n" if DEBUG - s4.puts(line) - break - rescue EOFError - return nil - rescue IOError - return nil - end - end - STDERR.print "relay_ftp_command finish\n" if DEBUG - return state -end - -def relay_ftp_status(s4, s6, state) - STDERR.print "relay_ftp_status start\n" if DEBUG - while TRUE - begin - line = s4.gets - if line == nil - return nil - end - - # translate then copy - s6.puts(line) - - next if line =~ /^\d\d\d-/ - next if line !~ /^\d/ - - # special post-processing - case line - when /^221 / # result to QUIT - s4.shutdown(0) - s6.shutdown(1) - end - - break if (line =~ /^\d\d\d /) - rescue EOFError - return nil - rescue IOError - return nil - end - end - STDERR.print "relay_ftp_status finish\n" if DEBUG - return state -end - -def relay_ftp(sock, name) - STDERR.print "relay_ftp(#{sock}, #{name})\n" if DEBUG - while TRUE - STDERR.print "relay_ftp(#{sock}, #{name}) accepting\n" if DEBUG - s = sock.accept - STDERR.print "relay_ftp(#{sock}, #{name}) accepted #{s}\n" if DEBUG - Thread.start do - threads = [] - STDERR.print "accepted #{s} -> #{Thread.current}\n" if DEBUG - s6 = s - dest6 = s.addr[3] - if !DEBUG_LOOPBACK - t = s.getsockname.unpack("x8 x12 C4") - dest4 = "#{t[0]}.#{t[1]}.#{t[2]}.#{t[3]}" - port4 = s.addr[1] - else - dest4 = "127.0.0.1" - port4 = "ftp" - end - if DEBUG - STDERR.print "IPv6 dest: #{dest6} IPv4 dest: #{dest4}\n" if DEBUG - end - STDERR.print "connect to #{dest4} #{port4}\n" if DEBUG - s4 = TCPsocket.open(dest4, port4) - STDERR.print "connected to #{dest4} #{port4}, #{s4.addr[1]}\n" if DEBUG - state = 0 - while TRUE - # translate status line - state = relay_ftp_status(s4, s6, state) - break if state == nil - # translate command line - state = relay_ftp_command(s6, s4, state) - break if state == nil - end - STDERR.print "relay_ftp(#{sock}, #{name}) closing s4\n" if DEBUG - s4.close - STDERR.print "relay_ftp(#{sock}, #{name}) closing s6\n" if DEBUG - s6.close - STDERR.print "relay_ftp(#{sock}, #{name}) done\n" if DEBUG - end - end - STDERR.print "relay_ftp(#{sock}, #{name}) finished\n" if DEBUG -end - -def relay_tcp(sock, name) - STDERR.print "relay_tcp(#{sock}, #{name})\n" if DEBUG - while TRUE - STDERR.print "relay_tcp(#{sock}, #{name}) accepting\n" if DEBUG - s = sock.accept - STDERR.print "relay_tcp(#{sock}, #{name}) accepted #{s}\n" if DEBUG - Thread.start do - threads = [] - STDERR.print "accepted #{s} -> #{Thread.current}\n" if DEBUG - s6 = s - dest6 = s.addr[3] - if !DEBUG_LOOPBACK - t = s.getsockname.unpack("x8 x12 C4") - dest4 = "#{t[0]}.#{t[1]}.#{t[2]}.#{t[3]}" - port4 = s.addr[1] - else - dest4 = "127.0.0.1" - port4 = "telnet" - end - if DEBUG - STDERR.print "IPv6 dest: #{dest6} IPv4 dest: #{dest4}\n" if DEBUG - end - STDERR.print "connect to #{dest4} #{port4}\n" if DEBUG - s4 = TCPsocket.open(dest4, port4) - STDERR.print "connected to #{dest4} #{port4}, #{s4.addr[1]}\n" if DEBUG - [0, 1].each do |i| - threads[i] = Thread.start do - if (i == 0) - tcpcopy(s6, s4) - else - tcpcopy(s4, s6) - end - end - end - STDERR.print "relay_tcp(#{sock}, #{name}) wait\n" if DEBUG - for i in threads - STDERR.print "relay_tcp(#{sock}, #{name}) wait #{i}\n" if DEBUG - i.join - STDERR.print "relay_tcp(#{sock}, #{name}) wait #{i} done\n" if DEBUG - end - STDERR.print "relay_tcp(#{sock}, #{name}) closing s4\n" if DEBUG - s4.close - STDERR.print "relay_tcp(#{sock}, #{name}) closing s6\n" if DEBUG - s6.close - STDERR.print "relay_tcp(#{sock}, #{name}) done\n" if DEBUG - end - end - STDERR.print "relay_tcp(#{sock}, #{name}) finished\n" if DEBUG -end - -def usage() - STDERR.print "usage: #{$0} [-f] port...\n" -end - -#------------------------------------------------------------ - -$mode = "tcp" - -while ARGV[0] =~ /^-/ do - case ARGV[0] - when /^-f/ - $mode = "ftp" - else - usage() - exit 0 - end - ARGV.shift -end - -if ARGV.length == 0 - usage() - exit 1 -end - -ftpport = Socket.getservbyname("ftp") - -res = [] -for port in ARGV - t = Socket.getaddrinfo(nil, port, Socket::PF_INET6, Socket::SOCK_STREAM, - nil, Socket::AI_PASSIVE) - if (t.size <= 0) - STDERR.print "FATAL: getaddrinfo failed (port=#{port})\n" - exit 1 - end - res += t -end - -sockpool = [] -names = [] -listenthreads = [] - -res.each do |i| - s = TCPserver.new(i[3], i[1]) - n = Socket.getnameinfo(s.getsockname, Socket::NI_NUMERICHOST|Socket::NI_NUMERICSERV).join(" port ") - if i[6] == IPPROTO_IPV6 - s.setsockopt(i[6], IPV6_FAITH, 1) - end - s.setsockopt(Socket::SOL_SOCKET, Socket::SO_REUSEADDR, 1) - sockpool.push s - names.push n -end - -if DEBUG - (0 .. sockpool.size - 1).each do |i| - STDERR.print "listen[#{i}]: #{sockpool[i]} #{names[i]}\n" if DEBUG - end -end - -(0 .. sockpool.size - 1).each do |i| - listenthreads[i] = Thread.start do - if DEBUG - STDERR.print "listen[#{i}]: thread #{Thread.current}\n" if DEBUG - end - STDERR.print "listen[#{i}]: thread #{Thread.current}\n" if DEBUG - case $mode - when "tcp" - relay_tcp(sockpool[i], names[i]) - when "ftp" - relay_ftp(sockpool[i], names[i]) - end - end -end - -for i in listenthreads - i.join -end - -exit 0 diff --git a/usr.sbin/inetd/inetd.c b/usr.sbin/inetd/inetd.c index eebcfea6f60..c48f33ca93e 100644 --- a/usr.sbin/inetd/inetd.c +++ b/usr.sbin/inetd/inetd.c @@ -69,7 +69,7 @@ __FBSDID("$FreeBSD$"); * or name a tcpmux service * or specify a unix domain socket * socket type stream/dgram/raw/rdm/seqpacket - * protocol tcp[4][6][/faith], udp[4][6], unix + * protocol tcp[4][6], udp[4][6], unix * wait/nowait single-threaded/multi-threaded * user[:group][/login-class] user/group/login-class to run daemon as * server program full path name @@ -1305,14 +1305,6 @@ setsockopt(fd, SOL_SOCKET, opt, (char *)&on, sizeof (on)) syslog(LOG_ERR, "setsockopt (IPV6_V6ONLY): %m"); } #undef turnon -#ifdef IPV6_FAITH - if (sep->se_type == FAITH_TYPE) { - if (setsockopt(sep->se_fd, IPPROTO_IPV6, IPV6_FAITH, &on, - sizeof(on)) < 0) { - syslog(LOG_ERR, "setsockopt (IPV6_FAITH): %m"); - } - } -#endif #ifdef IPSEC ipsecsetup(sep); #endif @@ -1744,15 +1736,15 @@ more: arg = sskip(&cp); if (strncmp(arg, "tcp", 3) == 0) { sep->se_proto = newstr(strsep(&arg, "/")); - if (arg != NULL) { - if (strcmp(arg, "faith") == 0) - sep->se_type = FAITH_TYPE; + if (arg != NULL && (strcmp(arg, "faith") == 0)) { + syslog(LOG_ERR, "faith has been deprecated"); + goto more; } } else { if (sep->se_type == NORM_TYPE && strncmp(arg, "faith/", 6) == 0) { - arg += 6; - sep->se_type = FAITH_TYPE; + syslog(LOG_ERR, "faith has been deprecated"); + goto more; } sep->se_proto = newstr(arg); } From fb5780def820246dc5588406bd35643df88a6394 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Sun, 9 Nov 2014 22:43:29 +0000 Subject: [PATCH 024/280] Handle PREEMPT AND ABORT service action equal to PREEMPT. With command serialization used in CTL, there are no other commands to abort when PREEMPT AND ABORT gets to run, so it is practically equal to PREEMPT. MFC after: 1 week --- sys/cam/ctl/ctl.c | 3 ++- sys/cam/ctl/ctl_cmd_table.c | 11 ++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/sys/cam/ctl/ctl.c b/sys/cam/ctl/ctl.c index b50caad470c..cd0111416b9 100644 --- a/sys/cam/ctl/ctl.c +++ b/sys/cam/ctl/ctl.c @@ -8914,7 +8914,8 @@ ctl_persistent_reserve_out(struct ctl_scsiio *ctsio) } break; - case SPRO_PREEMPT: { + case SPRO_PREEMPT: + case SPRO_PRE_ABO: { int nretval; nretval = ctl_pro_preempt(softc, lun, res_key, sa_res_key, type, diff --git a/sys/cam/ctl/ctl_cmd_table.c b/sys/cam/ctl/ctl_cmd_table.c index 0180cecff26..fedc11001e9 100644 --- a/sys/cam/ctl/ctl_cmd_table.c +++ b/sys/cam/ctl/ctl_cmd_table.c @@ -180,7 +180,16 @@ const struct ctl_cmd_entry ctl_cmd_table_5f[32] = 10, { 0x04, 0xff, 0, 0, 0xff, 0xff, 0xff, 0xff, 0x07}}, /* 05 PREEMPT AND ABORT */ -{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE}, +{ctl_persistent_reserve_out, CTL_SERIDX_RES, CTL_CMD_FLAG_ALLOW_ON_RESV | + CTL_CMD_FLAG_OK_ON_BOTH | + CTL_CMD_FLAG_OK_ON_STOPPED | + CTL_CMD_FLAG_OK_ON_INOPERABLE | + CTL_CMD_FLAG_OK_ON_OFFLINE | + CTL_CMD_FLAG_OK_ON_SECONDARY | + CTL_FLAG_DATA_OUT | + CTL_CMD_FLAG_ALLOW_ON_PR_RESV, + CTL_LUN_PAT_NONE, + 10, { 0x05, 0xff, 0, 0, 0xff, 0xff, 0xff, 0xff, 0x07}}, /* 06 REGISTER AND IGNORE EXISTING KEY */ {ctl_persistent_reserve_out, CTL_SERIDX_RES, CTL_CMD_FLAG_ALLOW_ON_RESV | From ea455de91dd46492cce8a23e7a3b4c804312afe6 Mon Sep 17 00:00:00 2001 From: "Andrey V. Elsukov" Date: Sun, 9 Nov 2014 22:54:40 +0000 Subject: [PATCH 025/280] Use embedded scope zone id to determine outgoing interface for link-local and node-local addresses. --- sys/netinet6/in6_src.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/sys/netinet6/in6_src.c b/sys/netinet6/in6_src.c index 9e163da8a41..d721fb9cdd9 100644 --- a/sys/netinet6/in6_src.c +++ b/sys/netinet6/in6_src.c @@ -555,6 +555,7 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, struct sockaddr_in6 *sin6_next; struct in6_pktinfo *pi = NULL; struct in6_addr *dst = &dstsock->sin6_addr; + uint32_t zoneid; #if 0 char ip6buf[INET6_ADDRSTRLEN]; @@ -585,7 +586,18 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, } else goto getroute; } - + /* + * If destination address is LLA or link- or node-local multicast, + * use it's embedded scope zone id to determine outgoing interface. + */ + if (IN6_IS_SCOPE_LINKLOCAL(dst) || + IN6_IS_ADDR_MC_NODELOCAL(dst)) { + zoneid = ntohs(in6_getscope(dst)); + if (zoneid > 0) { + ifp = in6_getlinkifnet(zoneid); + goto done; + } + } /* * If the destination address is a multicast address and the outgoing * interface for the address is specified by the caller, use it. From 0506889c15d39b8edb35bf871ef1f35112a13e1e Mon Sep 17 00:00:00 2001 From: Luigi Rizzo Date: Mon, 10 Nov 2014 08:31:56 +0000 Subject: [PATCH 026/280] return kernel-supplied error if available. Also fix field names in a comment. --- sys/net/netmap_user.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sys/net/netmap_user.h b/sys/net/netmap_user.h index 5faf671b897..aab6c358de7 100644 --- a/sys/net/netmap_user.h +++ b/sys/net/netmap_user.h @@ -40,7 +40,7 @@ * From there: * struct netmap_ring *NETMAP_TXRING(nifp, index) * struct netmap_ring *NETMAP_RXRING(nifp, index) - * we can access ring->nr_cur, ring->nr_avail, ring->nr_flags + * we can access ring->cur, ring->head, ring->tail, etc. * * ring->slot[i] gives us the i-th slot (we can access * directly len, flags, buf_idx) @@ -543,7 +543,8 @@ fail: nm_close(d); if (errmsg) D("%s %s", errmsg, ifname); - errno = EINVAL; + if (errno == 0) + errno = EINVAL; return NULL; } From 133cdd9e13bf502041339cf2561c224d3e9c609a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dag-Erling=20Sm=C3=B8rgrav?= Date: Mon, 10 Nov 2014 09:44:38 +0000 Subject: [PATCH 027/280] Constify the AES code and propagate to consumers. This allows us to update the Fortuna code to use SHAd-256 as defined in FS&K. Approved by: so (self) --- sys/crypto/rijndael/rijndael-api-fst.c | 11 ++++++----- sys/crypto/rijndael/rijndael-api-fst.h | 18 +++++++++--------- sys/dev/random/fortuna.c | 19 +++++++------------ sys/dev/random/hash.c | 6 +++--- sys/dev/random/hash.h | 6 +++--- sys/geom/bde/g_bde.h | 6 +++--- 6 files changed, 31 insertions(+), 35 deletions(-) diff --git a/sys/crypto/rijndael/rijndael-api-fst.c b/sys/crypto/rijndael/rijndael-api-fst.c index 187177b39ff..24e5646694e 100644 --- a/sys/crypto/rijndael/rijndael-api-fst.c +++ b/sys/crypto/rijndael/rijndael-api-fst.c @@ -34,7 +34,8 @@ __FBSDID("$FreeBSD$"); typedef u_int8_t BYTE; -int rijndael_makeKey(keyInstance *key, BYTE direction, int keyLen, char *keyMaterial) { +int rijndael_makeKey(keyInstance *key, BYTE direction, int keyLen, + const char *keyMaterial) { u_int8_t cipherKey[RIJNDAEL_MAXKB]; if (key == NULL) { @@ -83,7 +84,7 @@ int rijndael_cipherInit(cipherInstance *cipher, BYTE mode, char *IV) { } int rijndael_blockEncrypt(cipherInstance *cipher, keyInstance *key, - BYTE *input, int inputLen, BYTE *outBuffer) { + const BYTE *input, int inputLen, BYTE *outBuffer) { int i, k, numBlocks; u_int8_t block[16], iv[4][4]; @@ -198,7 +199,7 @@ int rijndael_blockEncrypt(cipherInstance *cipher, keyInstance *key, * @return length in octets (not bits) of the encrypted output buffer. */ int rijndael_padEncrypt(cipherInstance *cipher, keyInstance *key, - BYTE *input, int inputOctets, BYTE *outBuffer) { + const BYTE *input, int inputOctets, BYTE *outBuffer) { int i, numBlocks, padLen; u_int8_t block[16], *iv, *cp; @@ -261,7 +262,7 @@ int rijndael_padEncrypt(cipherInstance *cipher, keyInstance *key, } int rijndael_blockDecrypt(cipherInstance *cipher, keyInstance *key, - BYTE *input, int inputLen, BYTE *outBuffer) { + const BYTE *input, int inputLen, BYTE *outBuffer) { int i, k, numBlocks; u_int8_t block[16], iv[4][4]; @@ -360,7 +361,7 @@ int rijndael_blockDecrypt(cipherInstance *cipher, keyInstance *key, } int rijndael_padDecrypt(cipherInstance *cipher, keyInstance *key, - BYTE *input, int inputOctets, BYTE *outBuffer) { + const BYTE *input, int inputOctets, BYTE *outBuffer) { int i, numBlocks, padLen; u_int8_t block[16]; u_int32_t iv[4]; diff --git a/sys/crypto/rijndael/rijndael-api-fst.h b/sys/crypto/rijndael/rijndael-api-fst.h index 122bf52d6ce..e5f596ac75f 100644 --- a/sys/crypto/rijndael/rijndael-api-fst.h +++ b/sys/crypto/rijndael/rijndael-api-fst.h @@ -56,18 +56,18 @@ typedef struct { /* changed order of the components */ /* Function prototypes */ -int rijndael_makeKey(keyInstance *, u_int8_t, int, char *); +int rijndael_makeKey(keyInstance *, u_int8_t, int, const char *); int rijndael_cipherInit(cipherInstance *, u_int8_t, char *); -int rijndael_blockEncrypt(cipherInstance *, keyInstance *, u_int8_t *, int, - u_int8_t *); -int rijndael_padEncrypt(cipherInstance *, keyInstance *, u_int8_t *, int, - u_int8_t *); +int rijndael_blockEncrypt(cipherInstance *, keyInstance *, const u_int8_t *, + int, u_int8_t *); +int rijndael_padEncrypt(cipherInstance *, keyInstance *, const u_int8_t *, + int, u_int8_t *); -int rijndael_blockDecrypt(cipherInstance *, keyInstance *, u_int8_t *, int, - u_int8_t *); -int rijndael_padDecrypt(cipherInstance *, keyInstance *, u_int8_t *, int, - u_int8_t *); +int rijndael_blockDecrypt(cipherInstance *, keyInstance *, const u_int8_t *, + int, u_int8_t *); +int rijndael_padDecrypt(cipherInstance *, keyInstance *, const u_int8_t *, + int, u_int8_t *); #endif /* __RIJNDAEL_API_FST_H */ diff --git a/sys/dev/random/fortuna.c b/sys/dev/random/fortuna.c index f8b3e0c32aa..6f6febbe2b2 100644 --- a/sys/dev/random/fortuna.c +++ b/sys/dev/random/fortuna.c @@ -27,13 +27,11 @@ /* This implementation of Fortuna is based on the descriptions found in * ISBN 0-471-22357-3 "Practical Cryptography" by Ferguson and Schneier - * ("K&S"). + * ("F&S"). * - * The above book is superceded by ISBN 978-0-470-47424-2 "Cryptography - * Engineering" by Ferguson, Schneier and Kohno ("FS&K"). - * - * This code has not yet caught up with FS&K, but differences are not - * expected to be complex. + * The above book is superseded by ISBN 978-0-470-47424-2 "Cryptography + * Engineering" by Ferguson, Schneier and Kohno ("FS&K"). The code has + * not yet fully caught up with FS&K. */ #include @@ -252,12 +250,9 @@ reseed(uint8_t *junk, u_int length) mtx_assert(&random_reseed_mtx, MA_OWNED); #endif - /* F&S - K = Hd(K|s) where Hd(m) is H(H(m)) */ + /* FS&K - K = Hd(K|s) where Hd(m) is H(H(0^512|m)) */ randomdev_hash_init(&context); -#if 0 - /* FS&K defines Hd(m) as H(H(0^512|m)) */ - randomdev_hash_iterate(&context, zero_region, KEYSIZE); -#endif + randomdev_hash_iterate(&context, zero_region, 512/8); randomdev_hash_iterate(&context, &fortuna_state.key, sizeof(fortuna_state.key)); randomdev_hash_iterate(&context, junk, length); randomdev_hash_finish(&context, hash); @@ -270,7 +265,7 @@ reseed(uint8_t *junk, u_int length) /* Unblock the device if it was blocked due to being unseeded */ if (uint128_is_zero(fortuna_state.counter.whole)) random_adaptor_unblock(); - /* F&S - C = C + 1 */ + /* FS&K - C = C + 1 */ uint128_increment(&fortuna_state.counter.whole); } diff --git a/sys/dev/random/hash.c b/sys/dev/random/hash.c index 7deee878191..844e423422a 100644 --- a/sys/dev/random/hash.c +++ b/sys/dev/random/hash.c @@ -60,7 +60,7 @@ randomdev_hash_init(struct randomdev_hash *context) /* Iterate the hash */ void -randomdev_hash_iterate(struct randomdev_hash *context, void *data, size_t size) +randomdev_hash_iterate(struct randomdev_hash *context, const void *data, size_t size) { SHA256_Update(&context->sha, data, size); @@ -81,7 +81,7 @@ randomdev_hash_finish(struct randomdev_hash *context, void *buf) * data. Use CBC mode for better avalanche. */ void -randomdev_encrypt_init(struct randomdev_key *context, void *data) +randomdev_encrypt_init(struct randomdev_key *context, const void *data) { rijndael_cipherInit(&context->cipher, MODE_CBC, NULL); @@ -93,7 +93,7 @@ randomdev_encrypt_init(struct randomdev_key *context, void *data) * a multiple of BLOCKSIZE. */ void -randomdev_encrypt(struct randomdev_key *context, void *d_in, void *d_out, u_int length) +randomdev_encrypt(struct randomdev_key *context, const void *d_in, void *d_out, u_int length) { rijndael_blockEncrypt(&context->cipher, &context->key, d_in, length*8, d_out); diff --git a/sys/dev/random/hash.h b/sys/dev/random/hash.h index 57c0c6dfd81..d49de3aff01 100644 --- a/sys/dev/random/hash.h +++ b/sys/dev/random/hash.h @@ -42,9 +42,9 @@ struct randomdev_key { /* Big! Make static! */ }; void randomdev_hash_init(struct randomdev_hash *); -void randomdev_hash_iterate(struct randomdev_hash *, void *, size_t); +void randomdev_hash_iterate(struct randomdev_hash *, const void *, size_t); void randomdev_hash_finish(struct randomdev_hash *, void *); -void randomdev_encrypt_init(struct randomdev_key *, void *); -void randomdev_encrypt(struct randomdev_key *context, void *, void *, u_int); +void randomdev_encrypt_init(struct randomdev_key *, const void *); +void randomdev_encrypt(struct randomdev_key *context, const void *, void *, u_int); #endif diff --git a/sys/geom/bde/g_bde.h b/sys/geom/bde/g_bde.h index 9332c6b2706..2f29fe32c87 100644 --- a/sys/geom/bde/g_bde.h +++ b/sys/geom/bde/g_bde.h @@ -182,7 +182,7 @@ AES_init(cipherInstance *ci) } static __inline void -AES_makekey(keyInstance *ki, int dir, u_int len, void *key) +AES_makekey(keyInstance *ki, int dir, u_int len, const void *key) { int error; @@ -191,7 +191,7 @@ AES_makekey(keyInstance *ki, int dir, u_int len, void *key) } static __inline void -AES_encrypt(cipherInstance *ci, keyInstance *ki, void *in, void *out, u_int len) +AES_encrypt(cipherInstance *ci, keyInstance *ki, const void *in, void *out, u_int len) { int error; @@ -200,7 +200,7 @@ AES_encrypt(cipherInstance *ci, keyInstance *ki, void *in, void *out, u_int len) } static __inline void -AES_decrypt(cipherInstance *ci, keyInstance *ki, void *in, void *out, u_int len) +AES_decrypt(cipherInstance *ci, keyInstance *ki, const void *in, void *out, u_int len) { int error; From 45d1880a368b7481232c007349c4e6c159ecccab Mon Sep 17 00:00:00 2001 From: "Andrey V. Elsukov" Date: Mon, 10 Nov 2014 10:59:08 +0000 Subject: [PATCH 029/280] For now handle only multicast addresses, we still use routes to LLA unicasts yet. Sponsored by: Yandex LLC --- sys/netinet6/in6_src.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/sys/netinet6/in6_src.c b/sys/netinet6/in6_src.c index d721fb9cdd9..227f234206a 100644 --- a/sys/netinet6/in6_src.c +++ b/sys/netinet6/in6_src.c @@ -586,18 +586,6 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, } else goto getroute; } - /* - * If destination address is LLA or link- or node-local multicast, - * use it's embedded scope zone id to determine outgoing interface. - */ - if (IN6_IS_SCOPE_LINKLOCAL(dst) || - IN6_IS_ADDR_MC_NODELOCAL(dst)) { - zoneid = ntohs(in6_getscope(dst)); - if (zoneid > 0) { - ifp = in6_getlinkifnet(zoneid); - goto done; - } - } /* * If the destination address is a multicast address and the outgoing * interface for the address is specified by the caller, use it. @@ -606,6 +594,18 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, mopts != NULL && (ifp = mopts->im6o_multicast_ifp) != NULL) { goto done; /* we do not need a route for multicast. */ } + /* + * If destination address is LLA or link- or node-local multicast, + * use it's embedded scope zone id to determine outgoing interface. + */ + if (IN6_IS_ADDR_MC_LINKLOCAL(dst) || + IN6_IS_ADDR_MC_NODELOCAL(dst)) { + zoneid = ntohs(in6_getscope(dst)); + if (zoneid > 0) { + ifp = in6_getlinkifnet(zoneid); + goto done; + } + } getroute: /* From 0436fcb8096c8f8a2d32c8ad621710cd4efe719d Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Mon, 10 Nov 2014 14:11:17 +0000 Subject: [PATCH 030/280] When sleeping waiting for the profiling stop, always set P_STOPPROF before dropping process lock. Clear P_STOPPROF when doing wakeup. Both issues caused thread to hang in stopprofclock() "stopprof" sleep. Reported and tested by: pho Sponsored by: The FreeBSD Foundation MFC after: 1 week --- sys/kern/kern_clock.c | 6 +++--- sys/kern/subr_prof.c | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c index 3d232cf744c..79a294d37e3 100644 --- a/sys/kern/kern_clock.c +++ b/sys/kern/kern_clock.c @@ -668,11 +668,11 @@ stopprofclock(p) PROC_LOCK_ASSERT(p, MA_OWNED); if (p->p_flag & P_PROFIL) { if (p->p_profthreads != 0) { - p->p_flag |= P_STOPPROF; - while (p->p_profthreads != 0) + while (p->p_profthreads != 0) { + p->p_flag |= P_STOPPROF; msleep(&p->p_profthreads, &p->p_mtx, PPAUSE, "stopprof", 0); - p->p_flag &= ~P_STOPPROF; + } } if ((p->p_flag & P_PROFIL) == 0) return; diff --git a/sys/kern/subr_prof.c b/sys/kern/subr_prof.c index efd66b274ea..cedfc1b3d1d 100644 --- a/sys/kern/subr_prof.c +++ b/sys/kern/subr_prof.c @@ -533,6 +533,7 @@ out: if (--p->p_profthreads == 0) { if (p->p_flag & P_STOPPROF) { wakeup(&p->p_profthreads); + p->p_flag &= ~P_STOPPROF; stop = 0; } } From e6abaf91f4f5b1f57291ae81d59ab73d55481609 Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Mon, 10 Nov 2014 15:56:30 +0000 Subject: [PATCH 031/280] Consistently use if_link. Reviewed by: ae, melifaro --- sys/netinet6/icmp6.c | 4 ++-- sys/netinet6/in6.c | 2 +- sys/netinet6/in6_ifattach.c | 4 ++-- sys/netinet6/nd6.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sys/netinet6/icmp6.c b/sys/netinet6/icmp6.c index 891e8eb97ad..84a1bc039e2 100644 --- a/sys/netinet6/icmp6.c +++ b/sys/netinet6/icmp6.c @@ -1748,7 +1748,7 @@ ni6_addrs(struct icmp6_nodeinfo *ni6, struct mbuf *m, struct ifnet **ifpp, } IFNET_RLOCK_NOSLEEP(); - TAILQ_FOREACH(ifp, &V_ifnet, if_list) { + TAILQ_FOREACH(ifp, &V_ifnet, if_link) { addrsofif = 0; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { @@ -1835,7 +1835,7 @@ ni6_store_addrs(struct icmp6_nodeinfo *ni6, struct icmp6_nodeinfo *nni6, ifp = ifp0 ? ifp0 : TAILQ_FIRST(&V_ifnet); again: - for (; ifp; ifp = TAILQ_NEXT(ifp, if_list)) { + for (; ifp; ifp = TAILQ_NEXT(ifp, if_link)) { IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c index 4ea398a8866..d64de425acc 100644 --- a/sys/netinet6/in6.c +++ b/sys/netinet6/in6.c @@ -1973,7 +1973,7 @@ in6_setmaxmtu(void) struct ifnet *ifp; IFNET_RLOCK_NOSLEEP(); - TAILQ_FOREACH(ifp, &V_ifnet, if_list) { + TAILQ_FOREACH(ifp, &V_ifnet, if_link) { /* this function can be called during ifnet initialization */ if (!ifp->if_afdata[AF_INET6]) continue; diff --git a/sys/netinet6/in6_ifattach.c b/sys/netinet6/in6_ifattach.c index e0ddf22337e..06d931b297c 100644 --- a/sys/netinet6/in6_ifattach.c +++ b/sys/netinet6/in6_ifattach.c @@ -407,7 +407,7 @@ get_ifid(struct ifnet *ifp0, struct ifnet *altifp, /* next, try to get it from some other hardware interface */ IFNET_RLOCK_NOSLEEP(); - TAILQ_FOREACH(ifp, &V_ifnet, if_list) { + TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (ifp == ifp0) continue; if (in6_get_hw_ifid(ifp, in6) != 0) @@ -847,7 +847,7 @@ in6_tmpaddrtimer(void *arg) V_ip6_temp_regen_advance) * hz, in6_tmpaddrtimer, curvnet); bzero(nullbuf, sizeof(nullbuf)); - TAILQ_FOREACH(ifp, &V_ifnet, if_list) { + TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (ifp->if_afdata[AF_INET6] == NULL) continue; ndi = ND_IFINFO(ifp); diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c index 059c84336d2..cedf6e2b873 100644 --- a/sys/netinet6/nd6.c +++ b/sys/netinet6/nd6.c @@ -1811,7 +1811,7 @@ nd6_slowtimo(void *arg) callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz, nd6_slowtimo, curvnet); IFNET_RLOCK_NOSLEEP(); - TAILQ_FOREACH(ifp, &V_ifnet, if_list) { + TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (ifp->if_afdata[AF_INET6] == NULL) continue; nd6if = ND_IFINFO(ifp); From 06fec207911726e78f0af9f43f1567414e072e59 Mon Sep 17 00:00:00 2001 From: "Andrey V. Elsukov" Date: Mon, 10 Nov 2014 16:01:31 +0000 Subject: [PATCH 032/280] Remove link-local multicast routes remnants from in6_purgeaddr. Also merge in6_purgeaddr_mc with in6_purgeaddr. Sponsored by: Yandex LLC --- sys/netinet6/in6.c | 44 +++++--------------------------------------- 1 file changed, 5 insertions(+), 39 deletions(-) diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c index d64de425acc..e22c0bd69ad 100644 --- a/sys/netinet6/in6.c +++ b/sys/netinet6/in6.c @@ -1283,50 +1283,17 @@ in6_broadcast_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, return (error); } -/* - * Leave from multicast groups we have joined for the interface. - */ -static int -in6_purgeaddr_mc(struct ifnet *ifp, struct in6_ifaddr *ia, struct ifaddr *ifa0) -{ - struct in6_multi_mship *imm; - - while ((imm = LIST_FIRST(&ia->ia6_memberships)) != NULL) { - LIST_REMOVE(imm, i6mm_chain); - in6_leavegroup(imm); - } - return (0); -} - void in6_purgeaddr(struct ifaddr *ifa) { struct ifnet *ifp = ifa->ifa_ifp; struct in6_ifaddr *ia = (struct in6_ifaddr *) ifa; + struct in6_multi_mship *imm; int plen, error; - struct ifaddr *ifa0; if (ifa->ifa_carp) (*carp_detach_p)(ifa); - /* - * find another IPv6 address as the gateway for the - * link-local and node-local all-nodes multicast - * address routes - */ - IF_ADDR_RLOCK(ifp); - TAILQ_FOREACH(ifa0, &ifp->if_addrhead, ifa_link) { - if ((ifa0->ifa_addr->sa_family != AF_INET6) || - memcmp(&satosin6(ifa0->ifa_addr)->sin6_addr, - &ia->ia_addr.sin6_addr, sizeof(struct in6_addr)) == 0) - continue; - else - break; - } - if (ifa0 != NULL) - ifa_ref(ifa0); - IF_ADDR_RUNLOCK(ifp); - /* * Remove the loopback route to the interface address. * The check for the current setting of "nd6_useloopback" @@ -1346,11 +1313,10 @@ in6_purgeaddr(struct ifaddr *ifa) nd6_rem_ifa_lle(ia); /* Leave multicast groups. */ - error = in6_purgeaddr_mc(ifp, ia, ifa0); - - if (ifa0 != NULL) - ifa_free(ifa0); - + while ((imm = LIST_FIRST(&ia->ia6_memberships)) != NULL) { + LIST_REMOVE(imm, i6mm_chain); + in6_leavegroup(imm); + } plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */ if ((ia->ia_flags & IFA_ROUTE) && plen == 128) { error = rtinit(&(ia->ia_ifa), RTM_DELETE, ia->ia_flags | From e0c0711e011d738a332e1c167b7f443183541bff Mon Sep 17 00:00:00 2001 From: "Alexander V. Chernikov" Date: Mon, 10 Nov 2014 16:01:39 +0000 Subject: [PATCH 033/280] * Make nd6_dad_duplicated() constant. * Simplify refcounting by using nd6_dad_add() / nd6_dad_del(). Reviewed by: ae MFC after: 2 weeks Sponsored by: Yandex LLC --- sys/netinet6/nd6.h | 1 - sys/netinet6/nd6_nbr.c | 66 ++++++++++++++++++++---------------------- 2 files changed, 32 insertions(+), 35 deletions(-) diff --git a/sys/netinet6/nd6.h b/sys/netinet6/nd6.h index ff475c82874..358f4ecb167 100644 --- a/sys/netinet6/nd6.h +++ b/sys/netinet6/nd6.h @@ -430,7 +430,6 @@ void nd6_ns_output(struct ifnet *, const struct in6_addr *, caddr_t nd6_ifptomac(struct ifnet *); void nd6_dad_start(struct ifaddr *, int); void nd6_dad_stop(struct ifaddr *); -void nd6_dad_duplicated(struct ifaddr *); /* nd6_rtr.c */ void nd6_rs_input(struct mbuf *, int, int); diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c index 16406a25d36..a917b76ce36 100644 --- a/sys/netinet6/nd6_nbr.c +++ b/sys/netinet6/nd6_nbr.c @@ -79,9 +79,12 @@ __FBSDID("$FreeBSD$"); struct dadq; static struct dadq *nd6_dad_find(struct ifaddr *); +static void nd6_dad_add(struct dadq *dp); +static void nd6_dad_del(struct dadq *dp); static void nd6_dad_starttimer(struct dadq *, int); static void nd6_dad_stoptimer(struct dadq *); static void nd6_dad_timer(struct dadq *); +static void nd6_dad_duplicated(struct ifaddr *, struct dadq *); static void nd6_dad_ns_output(struct dadq *, struct ifaddr *); static void nd6_dad_ns_input(struct ifaddr *); static void nd6_dad_na_input(struct ifaddr *); @@ -1179,6 +1182,26 @@ static VNET_DEFINE(struct rwlock, dad_rwlock); #define DADQ_WLOCK() rw_wlock(&V_dad_rwlock) #define DADQ_WUNLOCK() rw_wunlock(&V_dad_rwlock) +static void +nd6_dad_add(struct dadq *dp) +{ + + ifa_ref(dp->dad_ifa); /* just for safety */ + DADQ_WLOCK(); + TAILQ_INSERT_TAIL(&V_dadq, (struct dadq *)dp, dad_list); + DADQ_WUNLOCK(); +} + +static void +nd6_dad_del(struct dadq *dp) +{ + + ifa_free(dp->dad_ifa); + DADQ_WLOCK(); + TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list); + DADQ_WUNLOCK(); +} + static struct dadq * nd6_dad_find(struct ifaddr *ifa) { @@ -1270,10 +1293,6 @@ nd6_dad_start(struct ifaddr *ifa, int delay) #ifdef VIMAGE dp->dad_vnet = curvnet; #endif - DADQ_WLOCK(); - TAILQ_INSERT_TAIL(&V_dadq, (struct dadq *)dp, dad_list); - DADQ_WUNLOCK(); - nd6log((LOG_DEBUG, "%s: starting DAD for %s\n", if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr))); @@ -1284,10 +1303,10 @@ nd6_dad_start(struct ifaddr *ifa, int delay) * (re)initialization. */ dp->dad_ifa = ifa; - ifa_ref(ifa); /* just for safety */ dp->dad_count = V_ip6_dad_count; dp->dad_ns_icount = dp->dad_na_icount = 0; dp->dad_ns_ocount = dp->dad_ns_tcount = 0; + nd6_dad_add(dp); if (delay == 0) { nd6_dad_ns_output(dp, ifa); nd6_dad_starttimer(dp, @@ -1315,12 +1334,8 @@ nd6_dad_stop(struct ifaddr *ifa) nd6_dad_stoptimer(dp); - DADQ_WLOCK(); - TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list); - DADQ_WUNLOCK(); + nd6_dad_del(dp); free(dp, M_IP6NDP); - dp = NULL; - ifa_free(ifa); } static void @@ -1367,12 +1382,9 @@ nd6_dad_timer(struct dadq *dp) nd6log((LOG_INFO, "%s: could not run DAD, driver problem?\n", if_name(ifa->ifa_ifp))); - DADQ_WLOCK(); - TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list); - DADQ_WUNLOCK(); + nd6_dad_del(dp); free(dp, M_IP6NDP); dp = NULL; - ifa_free(ifa); goto done; } @@ -1408,8 +1420,8 @@ nd6_dad_timer(struct dadq *dp) if (duplicate) { /* (*dp) will be freed in nd6_dad_duplicated() */ + nd6_dad_duplicated(ifa, dp); dp = NULL; - nd6_dad_duplicated(ifa); } else { /* * We are done with DAD. No NA came, no NS came. @@ -1425,12 +1437,9 @@ nd6_dad_timer(struct dadq *dp) if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr))); - DADQ_WLOCK(); - TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list); - DADQ_WUNLOCK(); + nd6_dad_del(dp); free(dp, M_IP6NDP); dp = NULL; - ifa_free(ifa); } } @@ -1439,19 +1448,12 @@ done: } void -nd6_dad_duplicated(struct ifaddr *ifa) +nd6_dad_duplicated(struct ifaddr *ifa, struct dadq *dp) { struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; struct ifnet *ifp; - struct dadq *dp; char ip6buf[INET6_ADDRSTRLEN]; - dp = nd6_dad_find(ifa); - if (dp == NULL) { - log(LOG_ERR, "nd6_dad_duplicated: DAD structure not found\n"); - return; - } - log(LOG_ERR, "%s: DAD detected duplicate IPv6 address %s: " "NS in/out=%d/%d, NA in=%d\n", if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr), @@ -1504,12 +1506,8 @@ nd6_dad_duplicated(struct ifaddr *ifa) } } - DADQ_WLOCK(); - TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list); - DADQ_WUNLOCK(); + nd6_dad_del(dp); free(dp, M_IP6NDP); - dp = NULL; - ifa_free(ifa); } static void @@ -1568,8 +1566,8 @@ nd6_dad_ns_input(struct ifaddr *ifa) /* XXX more checks for loopback situation - see nd6_dad_timer too */ if (duplicate) { + nd6_dad_duplicated(ifa, dp); dp = NULL; /* will be freed in nd6_dad_duplicated() */ - nd6_dad_duplicated(ifa); } else { /* * not sure if I got a duplicate. @@ -1593,5 +1591,5 @@ nd6_dad_na_input(struct ifaddr *ifa) dp->dad_na_icount++; /* remove the address. */ - nd6_dad_duplicated(ifa); + nd6_dad_duplicated(ifa, dp); } From 002c24396d159ac2eeb89e78c2a53fd88f5eb905 Mon Sep 17 00:00:00 2001 From: "Andrey V. Elsukov" Date: Mon, 10 Nov 2014 16:12:51 +0000 Subject: [PATCH 034/280] Add sa6_checkzone_ifp() function. It checks correctness of struct sockaddr_in6, usually obtained from the user level through ioctl. It initializes sin6_scope_id using given interface. Sponsored by: Yandex LLC --- sys/netinet6/scope6.c | 21 +++++++++++++++++++++ sys/netinet6/scope6_var.h | 1 + 2 files changed, 22 insertions(+) diff --git a/sys/netinet6/scope6.c b/sys/netinet6/scope6.c index f19909189b1..0e4d303cfaa 100644 --- a/sys/netinet6/scope6.c +++ b/sys/netinet6/scope6.c @@ -532,4 +532,25 @@ sa6_checkzone(struct sockaddr_in6 *sa6) return (sa6->sin6_scope_id ? 0: EADDRNOTAVAIL); } +/* + * This function is similar to sa6_checkzone, but it uses given ifp + * to initialize sin6_scope_id. + */ +int +sa6_checkzone_ifp(struct ifnet *ifp, struct sockaddr_in6 *sa6) +{ + int scope; + + scope = in6_addrscope(&sa6->sin6_addr); + if (scope == IPV6_ADDR_SCOPE_LINKLOCAL || + scope == IPV6_ADDR_SCOPE_INTFACELOCAL) { + if (sa6->sin6_scope_id == 0) { + sa6->sin6_scope_id = in6_getscopezone(ifp, scope); + return (0); + } else if (sa6->sin6_scope_id != in6_getscopezone(ifp, scope)) + return (EADDRNOTAVAIL); + } + return (sa6_checkzone(sa6)); +} + diff --git a/sys/netinet6/scope6_var.h b/sys/netinet6/scope6_var.h index 8a4b6fcf775..d4d0478aaf5 100644 --- a/sys/netinet6/scope6_var.h +++ b/sys/netinet6/scope6_var.h @@ -58,6 +58,7 @@ u_int32_t scope6_addr2default(struct in6_addr *); int sa6_embedscope(struct sockaddr_in6 *, int); int sa6_recoverscope(struct sockaddr_in6 *); int sa6_checkzone(struct sockaddr_in6 *); +int sa6_checkzone_ifp(struct ifnet *, struct sockaddr_in6 *); int in6_setscope(struct in6_addr *, struct ifnet *, u_int32_t *); int in6_clearscope(struct in6_addr *); uint16_t in6_getscope(struct in6_addr *); From 4de737d34905c511b6355e6c7e51e3622c3d4849 Mon Sep 17 00:00:00 2001 From: Ed Maste Date: Mon, 10 Nov 2014 18:20:46 +0000 Subject: [PATCH 035/280] Add /usr/lib/debug directory to hier(7) The canonical standalone debug directory established by the GNU toolchain is /usr/lib/debug, and we use it when WITH_DEBUG_FILES is set. Mention it in the file system hierarchy page. Reviewed by: bcr Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D1134 --- share/man/man7/hier.7 | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/share/man/man7/hier.7 b/share/man/man7/hier.7 index 9cba8961d16..76d4f62f630 100644 --- a/share/man/man7/hier.7 +++ b/share/man/man7/hier.7 @@ -28,7 +28,7 @@ .\" @(#)hier.7 8.1 (Berkeley) 6/5/93 .\" $FreeBSD$ .\" -.Dd July 25, 2014 +.Dd November 10, 2014 .Dt HIER 7 .Os .Sh NAME @@ -380,6 +380,8 @@ shared libraries for compatibility .It Pa aout/ a.out backward compatibility libraries .El +.It Pa debug/ +standalone debug data for the base system libraries and binaries .It Pa dtrace/ DTrace library scripts .It Pa engines/ From 62fc63abfbd197f4527bdaef2a55dce15ab4c3fc Mon Sep 17 00:00:00 2001 From: Navdeep Parhar Date: Mon, 10 Nov 2014 19:45:28 +0000 Subject: [PATCH 036/280] cxgbe(4): adjust PMRX and PMTX parameters. MFC after: 1 week --- sys/dev/cxgbe/firmware/t4fw_cfg.txt | 6 +++--- sys/dev/cxgbe/firmware/t4fw_cfg_uwire.txt | 6 +++--- sys/dev/cxgbe/firmware/t5fw_cfg.txt | 6 +++--- sys/dev/cxgbe/firmware/t5fw_cfg_fpga.txt | 6 +++--- sys/dev/cxgbe/firmware/t5fw_cfg_uwire.txt | 6 +++--- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/sys/dev/cxgbe/firmware/t4fw_cfg.txt b/sys/dev/cxgbe/firmware/t4fw_cfg.txt index 81d7df2466d..f417abb6a7e 100644 --- a/sys/dev/cxgbe/firmware/t4fw_cfg.txt +++ b/sys/dev/cxgbe/firmware/t4fw_cfg.txt @@ -28,8 +28,8 @@ tp_ntxch = 0 # TP rx and tx payload memory (% of the total EDRAM + DDR3). - tp_pmrx = 38 - tp_pmtx = 60 + tp_pmrx = 38, 512 + tp_pmtx = 60, 512 tp_pmrx_pagesize = 64K tp_pmtx_pagesize = 64K @@ -160,7 +160,7 @@ [fini] version = 0x1 - checksum = 0x6a1f8858 + checksum = 0xb4168add # # $FreeBSD$ # diff --git a/sys/dev/cxgbe/firmware/t4fw_cfg_uwire.txt b/sys/dev/cxgbe/firmware/t4fw_cfg_uwire.txt index 2690edd1e6f..95fc7b1eae6 100644 --- a/sys/dev/cxgbe/firmware/t4fw_cfg_uwire.txt +++ b/sys/dev/cxgbe/firmware/t4fw_cfg_uwire.txt @@ -125,7 +125,7 @@ # Percentage of dynamic memory (in either the EDRAM or external MEM) # to use for TP RX payload - tp_pmrx = 34 + tp_pmrx = 34, 512 # TP RX payload page size tp_pmrx_pagesize = 64K @@ -135,7 +135,7 @@ # Percentage of dynamic memory (in either the EDRAM or external MEM) # to use for TP TX payload - tp_pmtx = 32 + tp_pmtx = 32, 512 # TP TX payload page size tp_pmtx_pagesize = 64K @@ -544,7 +544,7 @@ [fini] version = 0x14250012 - checksum = 0xd9ae0325 + checksum = 0x22f592a9 # Total resources used by above allocations: # Virtual Interfaces: 104 diff --git a/sys/dev/cxgbe/firmware/t5fw_cfg.txt b/sys/dev/cxgbe/firmware/t5fw_cfg.txt index 51b9129bbde..59ca453066f 100644 --- a/sys/dev/cxgbe/firmware/t5fw_cfg.txt +++ b/sys/dev/cxgbe/firmware/t5fw_cfg.txt @@ -37,8 +37,8 @@ tp_ntxch = 0 # TP rx and tx payload memory (% of the total EDRAM + DDR3). - tp_pmrx = 38 - tp_pmtx = 60 + tp_pmrx = 38, 512 + tp_pmtx = 60, 512 tp_pmrx_pagesize = 64K tp_pmtx_pagesize = 64K @@ -173,7 +173,7 @@ [fini] version = 0x1 - checksum = 0xa0ee1715 + checksum = 0x4f45e608 # # $FreeBSD$ # diff --git a/sys/dev/cxgbe/firmware/t5fw_cfg_fpga.txt b/sys/dev/cxgbe/firmware/t5fw_cfg_fpga.txt index eb4ed4b205c..e1c8b000815 100644 --- a/sys/dev/cxgbe/firmware/t5fw_cfg_fpga.txt +++ b/sys/dev/cxgbe/firmware/t5fw_cfg_fpga.txt @@ -149,7 +149,7 @@ # Percentage of dynamic memory (in either the EDRAM or external MEM) # to use for TP RX payload - tp_pmrx = 30 + tp_pmrx = 30, 512 # TP RX payload page size tp_pmrx_pagesize = 64K @@ -159,7 +159,7 @@ # Percentage of dynamic memory (in either the EDRAM or external MEM) # to use for TP TX payload - tp_pmtx = 50 + tp_pmtx = 50, 512 # TP TX payload page size tp_pmtx_pagesize = 64K @@ -463,7 +463,7 @@ [fini] version = 0x1425000d - checksum = 0xe56cb999 + checksum = 0x22f1530b # Total resources used by above allocations: # Virtual Interfaces: 104 diff --git a/sys/dev/cxgbe/firmware/t5fw_cfg_uwire.txt b/sys/dev/cxgbe/firmware/t5fw_cfg_uwire.txt index 3a3d2a938c0..a3c4482e668 100644 --- a/sys/dev/cxgbe/firmware/t5fw_cfg_uwire.txt +++ b/sys/dev/cxgbe/firmware/t5fw_cfg_uwire.txt @@ -153,7 +153,7 @@ # Percentage of dynamic memory (in either the EDRAM or external MEM) # to use for TP RX payload - tp_pmrx = 30 + tp_pmrx = 30, 512 # TP RX payload page size tp_pmrx_pagesize = 64K @@ -163,7 +163,7 @@ # Percentage of dynamic memory (in either the EDRAM or external MEM) # to use for TP TX payload - tp_pmtx = 50 + tp_pmtx = 50, 512 # TP TX payload page size tp_pmtx_pagesize = 64K @@ -587,7 +587,7 @@ [fini] version = 0x14250016 - checksum = 0x5d740273 + checksum = 0xafaf8723 # Total resources used by above allocations: # Virtual Interfaces: 104 From db5cb21105b8fe4aaa4f792c6c81df20995b1755 Mon Sep 17 00:00:00 2001 From: Luigi Rizzo Date: Mon, 10 Nov 2014 20:19:58 +0000 Subject: [PATCH 037/280] sync a comment with our internal repo --- sys/dev/netmap/netmap_generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/dev/netmap/netmap_generic.c b/sys/dev/netmap/netmap_generic.c index 774038206b1..ecdb3682407 100644 --- a/sys/dev/netmap/netmap_generic.c +++ b/sys/dev/netmap/netmap_generic.c @@ -821,7 +821,7 @@ generic_netmap_attach(struct ifnet *ifp) num_tx_desc = num_rx_desc = netmap_generic_ringsize; /* starting point */ - generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc); + generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc); /* ignore errors */ ND("Netmap ring size: TX = %d, RX = %d", num_tx_desc, num_rx_desc); if (num_tx_desc == 0 || num_rx_desc == 0) { D("Device has no hw slots (tx %u, rx %u)", num_tx_desc, num_rx_desc); From 4e93beff927b4bf5874c604d629c91ddbb9a7835 Mon Sep 17 00:00:00 2001 From: Luigi Rizzo Date: Mon, 10 Nov 2014 20:25:33 +0000 Subject: [PATCH 038/280] initialize *color if passed as an argument --- sys/dev/netmap/netmap_freebsd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sys/dev/netmap/netmap_freebsd.c b/sys/dev/netmap/netmap_freebsd.c index 160b7c0ef95..037abc30ebe 100644 --- a/sys/dev/netmap/netmap_freebsd.c +++ b/sys/dev/netmap/netmap_freebsd.c @@ -466,6 +466,8 @@ netmap_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, if (netmap_verbose) D("handle %p size %jd prot %d foff %jd", handle, (intmax_t)size, prot, (intmax_t)foff); + if (color) + *color = 0; dev_ref(vmh->dev); return 0; } From 6435a0dc1b89e5ba04e75e032b7e96b4337b3961 Mon Sep 17 00:00:00 2001 From: Luigi Rizzo Date: Mon, 10 Nov 2014 21:00:23 +0000 Subject: [PATCH 039/280] fix a typo --- sys/dev/netmap/netmap_kern.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h index 76d893588e3..45548a641d2 100644 --- a/sys/dev/netmap/netmap_kern.h +++ b/sys/dev/netmap/netmap_kern.h @@ -266,7 +266,7 @@ struct netmap_kring { struct netmap_adapter *na; - /* The folloiwing fields are for VALE switch support */ + /* The following fields are for VALE switch support */ struct nm_bdg_fwd *nkr_ft; uint32_t *nkr_leases; #define NR_NOSLOT ((uint32_t)~0) /* used in nkr_*lease* */ From d1f79a3bfc91aefacc8bd6e40d081f850eba17b5 Mon Sep 17 00:00:00 2001 From: "Alexander V. Chernikov" Date: Mon, 10 Nov 2014 23:10:01 +0000 Subject: [PATCH 040/280] Remove kernel handling of ICMP_SOURCEQUENCH. It hasn't been used for a very long time. Additionally, it was deprecated by RFC 6633. --- sys/netinet/ip_fastfwd.c | 1 - sys/netinet/ip_icmp.c | 7 +------ sys/netinet/ip_input.c | 25 ------------------------- sys/netinet/tcp_subr.c | 8 -------- sys/sys/protosw.h | 4 ++-- tools/tools/sysdoc/tunables.mdoc | 9 --------- 6 files changed, 3 insertions(+), 51 deletions(-) diff --git a/sys/netinet/ip_fastfwd.c b/sys/netinet/ip_fastfwd.c index 36d9985ea59..e9196cc0592 100644 --- a/sys/netinet/ip_fastfwd.c +++ b/sys/netinet/ip_fastfwd.c @@ -502,7 +502,6 @@ passout: if ((ifp->if_snd.ifq_len + ip_len / ifp->if_mtu + 1) >= ifp->if_snd.ifq_maxlen) { IPSTAT_INC(ips_odropped); - /* would send source quench here but that is depreciated */ goto drop; } #endif diff --git a/sys/netinet/ip_icmp.c b/sys/netinet/ip_icmp.c index 23f7a567c74..25ccb4f71b9 100644 --- a/sys/netinet/ip_icmp.c +++ b/sys/netinet/ip_icmp.c @@ -486,12 +486,6 @@ icmp_input(struct mbuf **mp, int *offp, int proto) if (code > 1) goto badcode; code = PRC_PARAMPROB; - goto deliver; - - case ICMP_SOURCEQUENCH: - if (code) - goto badcode; - code = PRC_QUENCH; deliver: /* * Problem with datagram; advise higher level routines. @@ -670,6 +664,7 @@ reflect: case ICMP_TSTAMPREPLY: case ICMP_IREQREPLY: case ICMP_MASKREPLY: + case ICMP_SOURCEQUENCH: default: break; } diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index 1ad020a2114..6f48d935cbc 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -104,12 +104,6 @@ SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_R &VNET_NAME(ipsendredirects), 0, "Enable sending IP redirects"); -static VNET_DEFINE(int, ip_sendsourcequench); -#define V_ip_sendsourcequench VNET(ip_sendsourcequench) -SYSCTL_INT(_net_inet_ip, OID_AUTO, sendsourcequench, CTLFLAG_VNET | CTLFLAG_RW, - &VNET_NAME(ip_sendsourcequench), 0, - "Enable the transmission of source quench packets"); - VNET_DEFINE(int, ip_do_randomid); SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_do_randomid), 0, @@ -1647,25 +1641,6 @@ ip_forward(struct mbuf *m, int srcrt) break; case ENOBUFS: - /* - * A router should not generate ICMP_SOURCEQUENCH as - * required in RFC1812 Requirements for IP Version 4 Routers. - * Source quench could be a big problem under DoS attacks, - * or if the underlying interface is rate-limited. - * Those who need source quench packets may re-enable them - * via the net.inet.ip.sendsourcequench sysctl. - */ - if (V_ip_sendsourcequench == 0) { - m_freem(mcopy); - if (ia != NULL) - ifa_free(&ia->ia_ifa); - return; - } else { - type = ICMP_SOURCEQUENCH; - code = 0; - } - break; - case EACCES: /* ipfw denied packet */ m_freem(mcopy); if (ia != NULL) diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index b5152d246e7..5b845f09705 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -1429,11 +1429,6 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip) */ else if (PRC_IS_REDIRECT(cmd)) return; - /* - * Source quench is depreciated. - */ - else if (cmd == PRC_QUENCH) - return; /* * Hostdead is ugly because it goes linearly through all PCBs. * XXX: We never get this from ICMP, otherwise it makes an @@ -1539,9 +1534,6 @@ tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d) else if (!PRC_IS_REDIRECT(cmd) && ((unsigned)cmd >= PRC_NCMDS || inet6ctlerrmap[cmd] == 0)) return; - /* Source quench is depreciated. */ - else if (cmd == PRC_QUENCH) - return; /* if the parameter is from icmp6, decode it. */ if (d != NULL) { diff --git a/sys/sys/protosw.h b/sys/sys/protosw.h index ba45f946196..2d98a4c25ed 100644 --- a/sys/sys/protosw.h +++ b/sys/sys/protosw.h @@ -274,8 +274,8 @@ int pru_sopoll_notsupp(struct socket *so, int events, struct ucred *cred, #define PRC_IFDOWN 0 /* interface transition */ #define PRC_ROUTEDEAD 1 /* select new route if possible ??? */ #define PRC_IFUP 2 /* interface has come back up */ -#define PRC_QUENCH2 3 /* DEC congestion bit says slow down */ -#define PRC_QUENCH 4 /* some one said to slow down */ +/* was PRC_QUENCH2 3 DEC congestion bit says slow down */ +/* was PRC_QUENCH 4 Deprecated by RFC 6633 */ #define PRC_MSGSIZE 5 /* message size forced drop */ #define PRC_HOSTDEAD 6 /* host appears to be down */ #define PRC_HOSTUNREACH 7 /* deprecated (use PRC_UNREACH_HOST) */ diff --git a/tools/tools/sysdoc/tunables.mdoc b/tools/tools/sysdoc/tunables.mdoc index 6d88856cfd0..12b6f4651c8 100644 --- a/tools/tools/sysdoc/tunables.mdoc +++ b/tools/tools/sysdoc/tunables.mdoc @@ -1346,15 +1346,6 @@ See .Xr inet 4 for more information. ---- -net.inet.ip.sendsourcequench -bool - -This -.Nm -enables or disables the transmission of -source quench packets. - --- net.inet.ip.sourceroute bool From 204f91dd3a716784960430840d07e554a5734cde Mon Sep 17 00:00:00 2001 From: Luigi Rizzo Date: Tue, 11 Nov 2014 00:10:44 +0000 Subject: [PATCH 041/280] - fix typo: use ring size from the rx ring, not the tx one (they should be the same, but just in case); - reuse the previously computed len-1 value --- sys/dev/netmap/netmap_pipe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/dev/netmap/netmap_pipe.c b/sys/dev/netmap/netmap_pipe.c index bc998c04a17..ab77f4b6dcd 100644 --- a/sys/dev/netmap/netmap_pipe.c +++ b/sys/dev/netmap/netmap_pipe.c @@ -197,10 +197,10 @@ netmap_pipe_txsync(struct netmap_kring *txkring, int flags) if (m < 0) m += txkring->nkr_num_slots; limit = m; - m = rxkring->nkr_num_slots - 1; /* max avail space on destination */ + m = lim_rx; /* max avail space on destination */ busy = j - rxkring->nr_hwcur; /* busy slots */ if (busy < 0) - busy += txkring->nkr_num_slots; + busy += rxkring->nkr_num_slots; m -= busy; /* subtract busy slots */ ND(2, "m %d limit %d", m, limit); if (m < limit) From 039dd540f58fca5779965233bfa8e788382a2f54 Mon Sep 17 00:00:00 2001 From: Luigi Rizzo Date: Tue, 11 Nov 2014 00:13:28 +0000 Subject: [PATCH 042/280] in the Linux section, properly define the NMG_LOCK type. Also import WITH_GENERIC in preparation to adding fine-grained options to disable specific netmap components. --- sys/dev/netmap/netmap_kern.h | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h index 45548a641d2..3e838a2df71 100644 --- a/sys/dev/netmap/netmap_kern.h +++ b/sys/dev/netmap/netmap_kern.h @@ -37,6 +37,7 @@ #define WITH_VALE // comment out to disable VALE support #define WITH_PIPES #define WITH_MONITOR +#define WITH_GENERIC #if defined(__FreeBSD__) @@ -44,6 +45,8 @@ #define unlikely(x) __builtin_expect((long)!!(x), 0L) #define NM_LOCK_T struct mtx + +/* netmap global lock */ #define NMG_LOCK_T struct sx #define NMG_LOCK_INIT() sx_init(&netmap_global_lock, \ "netmap global lock") @@ -107,13 +110,20 @@ struct hrtimer { #define NM_ATOMIC_T volatile long unsigned int -// XXX a mtx would suffice here too 20130404 gl -#define NMG_LOCK_T struct semaphore -#define NMG_LOCK_INIT() sema_init(&netmap_global_lock, 1) -#define NMG_LOCK_DESTROY() -#define NMG_LOCK() down(&netmap_global_lock) -#define NMG_UNLOCK() up(&netmap_global_lock) -#define NMG_LOCK_ASSERT() // XXX to be completed +#define NM_MTX_T struct mutex +#define NM_MTX_INIT(m, s) do { (void)s; mutex_init(&(m)); } while (0) +#define NM_MTX_DESTROY(m) do { (void)m; } while (0) +#define NM_MTX_LOCK(m) mutex_lock(&(m)) +#define NM_MTX_UNLOCK(m) mutex_unlock(&(m)) +#define NM_MTX_LOCK_ASSERT(m) mutex_is_locked(&(m)) + +#define NMG_LOCK_T NM_MTX_T +#define NMG_LOCK_INIT() NM_MTX_INIT(netmap_global_lock, \ + "netmap_global_lock") +#define NMG_LOCK_DESTROY() NM_MTX_DESTROY(netmap_global_lock) +#define NMG_LOCK() NM_MTX_LOCK(netmap_global_lock) +#define NMG_UNLOCK() NM_MTX_UNLOCK(netmap_global_lock) +#define NMG_LOCK_ASSERT() NM_MTX_LOCK_ASSERT(netmap_global_lock) #ifndef DEV_NETMAP #define DEV_NETMAP @@ -641,6 +651,7 @@ struct netmap_hw_adapter { /* physical device */ int (*nm_hw_register)(struct netmap_adapter *, int onoff); }; +#ifdef WITH_GENERIC /* Mitigation support. */ struct nm_generic_mit { struct hrtimer mit_timer; @@ -668,6 +679,7 @@ struct netmap_generic_adapter { /* emulated device */ netdev_tx_t (*save_start_xmit)(struct mbuf *, struct ifnet *); #endif }; +#endif /* WITH_GENERIC */ static __inline int netmap_real_tx_rings(struct netmap_adapter *na) @@ -1481,6 +1493,7 @@ struct netmap_monitor_adapter { #endif /* WITH_MONITOR */ +#ifdef WITH_GENERIC /* * generic netmap emulation for devices that do not have * native netmap support. @@ -1512,6 +1525,7 @@ void netmap_mitigation_start(struct nm_generic_mit *mit); void netmap_mitigation_restart(struct nm_generic_mit *mit); int netmap_mitigation_active(struct nm_generic_mit *mit); void netmap_mitigation_cleanup(struct nm_generic_mit *mit); +#endif /* WITH_GENERIC */ From 670e8b3b8c6b14a31d015af2b6d595ad2bce17bb Mon Sep 17 00:00:00 2001 From: "Alexander V. Chernikov" Date: Tue, 11 Nov 2014 02:52:40 +0000 Subject: [PATCH 043/280] Kill custom in_matroute() radix mathing function removing one rte mutex lock. Initially in_matrote() in_clsroute() in their current state was introduced by r4105 20 years ago. Instead of deleting inactive routes immediately, we kept them in route table, setting RTPRF_OURS flag and some expire time. After that, either GC came or RTPRF_OURS got removed on first-packet. It was a good solution in that days (and probably another decade after that) to keep TCP metrics. However, after moving metrics to TCP hostcache in r122922, most of in_rmx functionality became unused. It might had been used for flushing icmp-originated routes before rte mutexes/refcounting, but I'm not sure about that. So it looks like this is nearly impossible to make GC do its work nowadays: in_rtkill() ignores non-RTPRF_OURS routes. route can only become RTPRF_OURS after dropping last reference via rtfree() which calls in_clsroute(), which, it turn, ignores UP and non-RTF_DYNAMIC routes. Dynamic routes can still be installed via received redirect, but they have default lifetime (no specific rt_expire) and no one has another trie walker to call RTFREE() on them. So, the changelist: * remove custom rnh_match / rnh_close matching function. * remove all GC functions * partially revert r256695 (proto3 is no more used inside kernel, it is not possible to use rt_expire from user point of view, proto3 support is not complete) * Finish r241884 (similar to this commit) and remove remaining IPv6 parts MFC after: 1 month --- sbin/route/keywords | 1 - sbin/route/route.8 | 3 +- sbin/route/route.c | 3 - share/man/man4/inet.4 | 15 -- share/man/man4/inet6.4 | 15 -- share/man/man7/security.7 | 35 ----- sys/netinet/in.h | 6 +- sys/netinet/in_rmx.c | 241 ------------------------------- sys/netinet/in_var.h | 2 - sys/netinet/ip_icmp.c | 36 +---- sys/netinet/ip_input.c | 1 - sys/netinet6/in6.h | 6 +- sys/netinet6/in6_rmx.c | 19 --- tools/tools/sysdoc/tunables.mdoc | 22 --- 14 files changed, 10 insertions(+), 395 deletions(-) diff --git a/sbin/route/keywords b/sbin/route/keywords index 8b64be28d38..82edc46690d 100644 --- a/sbin/route/keywords +++ b/sbin/route/keywords @@ -40,7 +40,6 @@ osi prefixlen proto1 proto2 -proto3 proxy recvpipe reject diff --git a/sbin/route/route.8 b/sbin/route/route.8 index 000fbe9c187..5e6f78b4e01 100644 --- a/sbin/route/route.8 +++ b/sbin/route/route.8 @@ -28,7 +28,7 @@ .\" @(#)route.8 8.3 (Berkeley) 3/19/94 .\" $FreeBSD$ .\" -.Dd January 11, 2014 +.Dd November 11, 2014 .Dt ROUTE 8 .Os .Sh NAME @@ -315,7 +315,6 @@ by indicating the following corresponding modifiers: -blackhole RTF_BLACKHOLE - silently discard pkts (during updates) -proto1 RTF_PROTO1 - set protocol specific routing flag #1 -proto2 RTF_PROTO2 - set protocol specific routing flag #2 --proto3 RTF_PROTO3 - set protocol specific routing flag #3 .Ed .Pp The optional modifiers diff --git a/sbin/route/route.c b/sbin/route/route.c index 604057d147d..2c328a39c41 100644 --- a/sbin/route/route.c +++ b/sbin/route/route.c @@ -847,9 +847,6 @@ newroute(int argc, char **argv) case K_PROTO2: flags |= RTF_PROTO2; break; - case K_PROTO3: - flags |= RTF_PROTO3; - break; case K_PROXY: nrflags |= F_PROXY; break; diff --git a/share/man/man4/inet.4 b/share/man/man4/inet.4 index 0b7a108db52..b0b5a9ee30e 100644 --- a/share/man/man4/inet.4 +++ b/share/man/man4/inet.4 @@ -211,21 +211,6 @@ Boolean: enable/disable accepting of source-routed IP packets (default false). .It Dv IPCTL_SOURCEROUTE .Pq ip.sourceroute Boolean: enable/disable forwarding of source-routed IP packets (default false). -.It Dv IPCTL_RTEXPIRE -.Pq ip.rtexpire -Integer: lifetime in seconds of protocol-cloned -.Tn IP -routes after the last reference drops (default one hour). -This value varies dynamically as described above. -.It Dv IPCTL_RTMINEXPIRE -.Pq ip.rtminexpire -Integer: minimum value of ip.rtexpire (default ten seconds). -This value has no effect on user modifications, but restricts the dynamic -adaptation described above. -.It Dv IPCTL_RTMAXCACHE -.Pq ip.rtmaxcache -Integer: trigger level of cached, unreferenced, protocol-cloned routes -which initiates dynamic adaptation (default 128). .It Va ip.process_options Integer: control IP options processing. By setting this variable to 0, all IP options in the incoming packets diff --git a/share/man/man4/inet6.4 b/share/man/man4/inet6.4 index 0e505db691c..815dee7c038 100644 --- a/share/man/man4/inet6.4 +++ b/share/man/man4/inet6.4 @@ -312,21 +312,6 @@ mapped address on .Dv AF_INET6 sockets. Defaults to on. -.It Dv IPV6CTL_RTEXPIRE -.Pq ip6.rtexpire -Integer: lifetime in seconds of protocol-cloned -.Tn IP -routes after the last reference drops (default one hour). -.\"This value varies dynamically as described above. -.It Dv IPV6CTL_RTMINEXPIRE -.Pq ip6.rtminexpire -Integer: minimum value of ip.rtexpire (default ten seconds). -.\"This value has no effect on user modifications, but restricts the dynamic -.\"adaptation described above. -.It Dv IPV6CTL_RTMAXCACHE -.Pq ip6.rtmaxcache -Integer: trigger level of cached, unreferenced, protocol-cloned routes -which initiates dynamic adaptation (default 128). .El .Ss Interaction between IPv4/v6 sockets By default, diff --git a/share/man/man7/security.7 b/share/man/man7/security.7 index d51eea2dc68..d84e4a23c62 100644 --- a/share/man/man7/security.7 +++ b/share/man/man7/security.7 @@ -894,41 +894,6 @@ A competent sysadmin will turn off all of these .Xr inetd 8 Ns -internal test services. -.Pp -Spoofed packet attacks may also be used to overload the kernel route cache. -Refer to the -.Va net.inet.ip.rtexpire , net.inet.ip.rtminexpire , -and -.Va net.inet.ip.rtmaxcache -.Xr sysctl 8 -variables. -A spoofed packet attack that uses a random source IP will cause -the kernel to generate a temporary cached route in the route table, viewable -with -.Dq Li "netstat -rna | fgrep W3" . -These routes typically timeout in 1600 -seconds or so. -If the kernel detects that the cached route table has gotten -too big it will dynamically reduce the -.Va rtexpire -but will never decrease it to -less than -.Va rtminexpire . -There are two problems: (1) The kernel does not react -quickly enough when a lightly loaded server is suddenly attacked, and (2) The -.Va rtminexpire -is not low enough for the kernel to survive a sustained attack. -If your servers are connected to the internet via a T3 or better it may be -prudent to manually override both -.Va rtexpire -and -.Va rtminexpire -via -.Xr sysctl 8 . -Never set either parameter to zero -(unless you want to crash the machine :-)). -Setting both parameters to 2 seconds should be sufficient to protect the route -table from attack. .Sh ACCESS ISSUES WITH KERBEROS AND SSH There are a few issues with both Kerberos and SSH that need to be addressed if you intend to use them. diff --git a/sys/netinet/in.h b/sys/netinet/in.h index f2dbce29c69..1f79761e7eb 100644 --- a/sys/netinet/in.h +++ b/sys/netinet/in.h @@ -619,9 +619,9 @@ int getsourcefilter(int, uint32_t, struct sockaddr *, socklen_t, #ifdef notyet #define IPCTL_DEFMTU 4 /* default MTU */ #endif -#define IPCTL_RTEXPIRE 5 /* cloned route expiration time */ -#define IPCTL_RTMINEXPIRE 6 /* min value for expiration time */ -#define IPCTL_RTMAXCACHE 7 /* trigger level for dynamic expire */ +/* IPCTL_RTEXPIRE 5 deprecated */ +/* IPCTL_RTMINEXPIRE 6 deprecated */ +/* IPCTL_RTMAXCACHE 7 deprecated */ #define IPCTL_SOURCEROUTE 8 /* may perform source routes */ #define IPCTL_DIRECTEDBROADCAST 9 /* may re-broadcast received packets */ #define IPCTL_INTRQMAXLEN 10 /* max length of netisr queue */ diff --git a/sys/netinet/in_rmx.c b/sys/netinet/in_rmx.c index a48c9d8e85d..a53a7ac221b 100644 --- a/sys/netinet/in_rmx.c +++ b/sys/netinet/in_rmx.c @@ -36,8 +36,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include -#include #include #include @@ -55,8 +53,6 @@ extern int in_inithead(void **head, int off); extern int in_detachhead(void **head, int off); #endif -#define RTPRF_OURS RTF_PROTO3 /* set on routes we manage */ - /* * Do what we need to do when inserting a route. */ @@ -110,238 +106,6 @@ in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, return (rn_addroute(v_arg, n_arg, head, treenodes)); } -/* - * This code is the inverse of in_clsroute: on first reference, if we - * were managing the route, stop doing so and set the expiration timer - * back off again. - */ -static struct radix_node * -in_matroute(void *v_arg, struct radix_node_head *head) -{ - struct radix_node *rn = rn_match(v_arg, head); - struct rtentry *rt = (struct rtentry *)rn; - - if (rt) { - RT_LOCK(rt); - if (rt->rt_flags & RTPRF_OURS) { - rt->rt_flags &= ~RTPRF_OURS; - rt->rt_expire = 0; - } - RT_UNLOCK(rt); - } - return rn; -} - -static VNET_DEFINE(int, rtq_reallyold) = 60*60; /* one hour is "really old" */ -#define V_rtq_reallyold VNET(rtq_reallyold) -SYSCTL_INT(_net_inet_ip, IPCTL_RTEXPIRE, rtexpire, CTLFLAG_VNET | CTLFLAG_RW, - &VNET_NAME(rtq_reallyold), 0, - "Default expiration time on dynamically learned routes"); - -/* never automatically crank down to less */ -static VNET_DEFINE(int, rtq_minreallyold) = 10; -#define V_rtq_minreallyold VNET(rtq_minreallyold) -SYSCTL_INT(_net_inet_ip, IPCTL_RTMINEXPIRE, rtminexpire, CTLFLAG_VNET | CTLFLAG_RW, - &VNET_NAME(rtq_minreallyold), 0, - "Minimum time to attempt to hold onto dynamically learned routes"); - -/* 128 cached routes is "too many" */ -static VNET_DEFINE(int, rtq_toomany) = 128; -#define V_rtq_toomany VNET(rtq_toomany) -SYSCTL_INT(_net_inet_ip, IPCTL_RTMAXCACHE, rtmaxcache, CTLFLAG_VNET | CTLFLAG_RW, - &VNET_NAME(rtq_toomany), 0, - "Upper limit on dynamically learned routes"); - -/* - * On last reference drop, mark the route as belong to us so that it can be - * timed out. - */ -static void -in_clsroute(struct radix_node *rn, struct radix_node_head *head) -{ - struct rtentry *rt = (struct rtentry *)rn; - - RT_LOCK_ASSERT(rt); - - if (!(rt->rt_flags & RTF_UP)) - return; /* prophylactic measures */ - - if (rt->rt_flags & RTPRF_OURS) - return; - - if (!(rt->rt_flags & RTF_DYNAMIC)) - return; - - /* - * If rtq_reallyold is 0, just delete the route without - * waiting for a timeout cycle to kill it. - */ - if (V_rtq_reallyold != 0) { - rt->rt_flags |= RTPRF_OURS; - rt->rt_expire = time_uptime + V_rtq_reallyold; - } else - rt_expunge(head, rt); -} - -struct rtqk_arg { - struct radix_node_head *rnh; - int draining; - int killed; - int found; - int updating; - time_t nextstop; -}; - -/* - * Get rid of old routes. When draining, this deletes everything, even when - * the timeout is not expired yet. When updating, this makes sure that - * nothing has a timeout longer than the current value of rtq_reallyold. - */ -static int -in_rtqkill(struct radix_node *rn, void *rock) -{ - struct rtqk_arg *ap = rock; - struct rtentry *rt = (struct rtentry *)rn; - int err; - - RADIX_NODE_HEAD_WLOCK_ASSERT(ap->rnh); - - if (rt->rt_flags & RTPRF_OURS) { - ap->found++; - - if (ap->draining || rt->rt_expire <= time_uptime) { - if (rt->rt_refcnt > 0) - panic("rtqkill route really not free"); - - err = in_rtrequest(RTM_DELETE, - (struct sockaddr *)rt_key(rt), - rt->rt_gateway, rt_mask(rt), - rt->rt_flags | RTF_RNH_LOCKED, 0, - rt->rt_fibnum); - if (err) { - log(LOG_WARNING, "in_rtqkill: error %d\n", err); - } else { - ap->killed++; - } - } else { - if (ap->updating && - (rt->rt_expire - time_uptime > V_rtq_reallyold)) - rt->rt_expire = time_uptime + V_rtq_reallyold; - ap->nextstop = lmin(ap->nextstop, rt->rt_expire); - } - } - - return 0; -} - -#define RTQ_TIMEOUT 60*10 /* run no less than once every ten minutes */ -static VNET_DEFINE(int, rtq_timeout) = RTQ_TIMEOUT; -static VNET_DEFINE(struct callout, rtq_timer); - -#define V_rtq_timeout VNET(rtq_timeout) -#define V_rtq_timer VNET(rtq_timer) - -static void in_rtqtimo_one(void *rock); - -static void -in_rtqtimo(void *rock) -{ - CURVNET_SET((struct vnet *) rock); - int fibnum; - void *newrock; - struct timeval atv; - - for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { - newrock = rt_tables_get_rnh(fibnum, AF_INET); - if (newrock != NULL) - in_rtqtimo_one(newrock); - } - atv.tv_usec = 0; - atv.tv_sec = V_rtq_timeout; - callout_reset(&V_rtq_timer, tvtohz(&atv), in_rtqtimo, rock); - CURVNET_RESTORE(); -} - -static void -in_rtqtimo_one(void *rock) -{ - struct radix_node_head *rnh = rock; - struct rtqk_arg arg; - static time_t last_adjusted_timeout = 0; - - arg.found = arg.killed = 0; - arg.rnh = rnh; - arg.nextstop = time_uptime + V_rtq_timeout; - arg.draining = arg.updating = 0; - RADIX_NODE_HEAD_LOCK(rnh); - rnh->rnh_walktree(rnh, in_rtqkill, &arg); - RADIX_NODE_HEAD_UNLOCK(rnh); - - /* - * Attempt to be somewhat dynamic about this: - * If there are ``too many'' routes sitting around taking up space, - * then crank down the timeout, and see if we can't make some more - * go away. However, we make sure that we will never adjust more - * than once in rtq_timeout seconds, to keep from cranking down too - * hard. - */ - if ((arg.found - arg.killed > V_rtq_toomany) && - (time_uptime - last_adjusted_timeout >= V_rtq_timeout) && - V_rtq_reallyold > V_rtq_minreallyold) { - V_rtq_reallyold = 2 * V_rtq_reallyold / 3; - if (V_rtq_reallyold < V_rtq_minreallyold) { - V_rtq_reallyold = V_rtq_minreallyold; - } - - last_adjusted_timeout = time_uptime; -#ifdef DIAGNOSTIC - log(LOG_DEBUG, "in_rtqtimo: adjusted rtq_reallyold to %d\n", - V_rtq_reallyold); -#endif - arg.found = arg.killed = 0; - arg.updating = 1; - RADIX_NODE_HEAD_LOCK(rnh); - rnh->rnh_walktree(rnh, in_rtqkill, &arg); - RADIX_NODE_HEAD_UNLOCK(rnh); - } - -} - -void -in_rtqdrain(void) -{ - VNET_ITERATOR_DECL(vnet_iter); - struct radix_node_head *rnh; - struct rtqk_arg arg; - int fibnum; - - VNET_LIST_RLOCK_NOSLEEP(); - VNET_FOREACH(vnet_iter) { - CURVNET_SET(vnet_iter); - - for ( fibnum = 0; fibnum < rt_numfibs; fibnum++) { - rnh = rt_tables_get_rnh(fibnum, AF_INET); - arg.found = arg.killed = 0; - arg.rnh = rnh; - arg.nextstop = 0; - arg.draining = 1; - arg.updating = 0; - RADIX_NODE_HEAD_LOCK(rnh); - rnh->rnh_walktree(rnh, in_rtqkill, &arg); - RADIX_NODE_HEAD_UNLOCK(rnh); - } - CURVNET_RESTORE(); - } - VNET_LIST_RUNLOCK_NOSLEEP(); -} - -void -in_setmatchfunc(struct radix_node_head *rnh, int val) -{ - - rnh->rnh_matchaddr = (val != 0) ? rn_match : in_matroute; -} - static int _in_rt_was_here; /* * Initialize our routing tree. @@ -358,11 +122,7 @@ in_inithead(void **head, int off) RADIX_NODE_HEAD_LOCK_INIT(rnh); rnh->rnh_addaddr = in_addroute; - in_setmatchfunc(rnh, V_drop_redirect); - rnh->rnh_close = in_clsroute; if (_in_rt_was_here == 0 ) { - callout_init(&V_rtq_timer, CALLOUT_MPSAFE); - callout_reset(&V_rtq_timer, 1, in_rtqtimo, curvnet); _in_rt_was_here = 1; } return 1; @@ -373,7 +133,6 @@ int in_detachhead(void **head, int off) { - callout_drain(&V_rtq_timer); return (1); } #endif diff --git a/sys/netinet/in_var.h b/sys/netinet/in_var.h index 6f0dcbf7872..16edeb142a7 100644 --- a/sys/netinet/in_var.h +++ b/sys/netinet/in_var.h @@ -407,7 +407,6 @@ int in_leavegroup_locked(struct in_multi *, /*const*/ struct in_mfilter *); int in_control(struct socket *, u_long, caddr_t, struct ifnet *, struct thread *); -void in_rtqdrain(void); int in_addprefix(struct in_ifaddr *, int); int in_scrubprefix(struct in_ifaddr *, u_int); void ip_input(struct mbuf *); @@ -426,7 +425,6 @@ void in_rtredirect(struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct sockaddr *, u_int); int in_rtrequest(int, struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct rtentry **, u_int); -void in_setmatchfunc(struct radix_node_head *, int); #if 0 int in_rt_getifa(struct rt_addrinfo *, u_int fibnum); diff --git a/sys/netinet/ip_icmp.c b/sys/netinet/ip_icmp.c index 25ccb4f71b9..845c6ec9bb2 100644 --- a/sys/netinet/ip_icmp.c +++ b/sys/netinet/ip_icmp.c @@ -115,6 +115,9 @@ SYSCTL_UINT(_net_inet_icmp, OID_AUTO, maskfake, CTLFLAG_VNET | CTLFLAG_RW, "Fake reply to ICMP Address Mask Request packets."); VNET_DEFINE(int, drop_redirect) = 0; +#define V_drop_redirect VNET(drop_redirect) +SYSCTL_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_VNET | CTLFLAG_RW, + &VNET_NAME(drop_redirect), 0, "Ignore ICMP redirects"); static VNET_DEFINE(int, log_redirect) = 0; #define V_log_redirect VNET(log_redirect) @@ -163,39 +166,6 @@ static void icmp_send(struct mbuf *, struct mbuf *); extern struct protosw inetsw[]; -static int -sysctl_net_icmp_drop_redir(SYSCTL_HANDLER_ARGS) -{ - int error, new; - int i; - struct radix_node_head *rnh; - - new = V_drop_redirect; - error = sysctl_handle_int(oidp, &new, 0, req); - if (error == 0 && req->newptr) { - new = (new != 0) ? 1 : 0; - - if (new == V_drop_redirect) - return (0); - - for (i = 0; i < rt_numfibs; i++) { - if ((rnh = rt_tables_get_rnh(i, AF_INET)) == NULL) - continue; - RADIX_NODE_HEAD_LOCK(rnh); - in_setmatchfunc(rnh, new); - RADIX_NODE_HEAD_UNLOCK(rnh); - } - - V_drop_redirect = new; - } - - return (error); -} - -SYSCTL_PROC(_net_inet_icmp, OID_AUTO, drop_redirect, - CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, 0, 0, - sysctl_net_icmp_drop_redir, "I", "Ignore ICMP redirects"); - /* * Kernel module interface for updating icmpstat. The argument is an index * into icmpstat treated as an array of u_long. While this encodes the diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index 6f48d935cbc..d0229b2dccd 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -1330,7 +1330,6 @@ ip_drain(void) } IPQ_UNLOCK(); VNET_LIST_RUNLOCK_NOSLEEP(); - in_rtqdrain(); } /* diff --git a/sys/netinet6/in6.h b/sys/netinet6/in6.h index 7187aec7b7d..95b4a0c6a9a 100644 --- a/sys/netinet6/in6.h +++ b/sys/netinet6/in6.h @@ -593,9 +593,9 @@ struct ip6_mtuinfo { #define IPV6CTL_MAPPED_ADDR 23 #endif #define IPV6CTL_V6ONLY 24 -#define IPV6CTL_RTEXPIRE 25 /* cloned route expiration time */ -#define IPV6CTL_RTMINEXPIRE 26 /* min value for expiration time */ -#define IPV6CTL_RTMAXCACHE 27 /* trigger level for dynamic expire */ +/* IPV6CTL_RTEXPIRE 25 deprecated */ +/* IPV6CTL_RTMINEXPIRE 26 deprecated */ +/* IPV6CTL_RTMAXCACHE 27 deprecated */ #define IPV6CTL_USETEMPADDR 32 /* use temporary addresses (RFC3041) */ #define IPV6CTL_TEMPPLTIME 33 /* preferred lifetime for tmpaddrs */ diff --git a/sys/netinet6/in6_rmx.c b/sys/netinet6/in6_rmx.c index c96db6f5168..1403647380b 100644 --- a/sys/netinet6/in6_rmx.c +++ b/sys/netinet6/in6_rmx.c @@ -66,7 +66,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include #include @@ -179,24 +178,6 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, return (ret); } -SYSCTL_DECL(_net_inet6_ip6); - -static VNET_DEFINE(int, rtq_toomany6) = 128; - /* 128 cached routes is ``too many'' */ -#define V_rtq_toomany6 VNET(rtq_toomany6) -SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RTMAXCACHE, rtmaxcache, CTLFLAG_VNET | CTLFLAG_RW, - &VNET_NAME(rtq_toomany6) , 0, ""); - -struct rtqk_arg { - struct radix_node_head *rnh; - int mode; - int updating; - int draining; - int killed; - int found; - time_t nextstop; -}; - /* * Age old PMTUs. */ diff --git a/tools/tools/sysdoc/tunables.mdoc b/tools/tools/sysdoc/tunables.mdoc index 12b6f4651c8..7c8c9a345e2 100644 --- a/tools/tools/sysdoc/tunables.mdoc +++ b/tools/tools/sysdoc/tunables.mdoc @@ -1324,28 +1324,6 @@ bool Controls the sending of ICMP redirects in response to unforwardable IP packets. ---- -net.inet.ip.rtexpire -int - -Lifetime in seconds of protocol-cloned IP routes after the last -reference drops (default one hour). - ---- -net.inet.ip.rtmaxcache -int - -Trigger level of cached, unreferenced, protocol-cloned -routes which initiates dynamic adaptation. - ---- -net.inet.ip.rtminexpire -int - -See -.Xr inet 4 -for more information. - --- net.inet.ip.sourceroute bool From ad11def5216e921e38db43caaea835b84d76815a Mon Sep 17 00:00:00 2001 From: Enji Cooper Date: Tue, 11 Nov 2014 04:06:05 +0000 Subject: [PATCH 044/280] Add baud rate support to telnet(1) This implements part of RFC-2217 It's based off a patch originally written by Sujal Patel at Isilon, and contributions from other Isilon employees. PR: 173728 Phabric: D995 Reviewed by: markj, markm MFC after: 2 weeks Sponsored by: EMC / Isilon Storage Division --- contrib/telnet/arpa/telnet.h | 1 + contrib/telnet/telnet/baud.h | 121 +++++++++++++++++++++++++++++ contrib/telnet/telnet/commands.c | 1 + contrib/telnet/telnet/externs.h | 14 ++++ contrib/telnet/telnet/main.c | 11 ++- contrib/telnet/telnet/sys_bsd.c | 66 +--------------- contrib/telnet/telnet/telnet.1 | 4 + contrib/telnet/telnet/telnet.c | 49 ++++++++++-- contrib/telnet/telnet/types.h | 14 ++-- contrib/telnet/telnetd/sys_term.c | 52 +------------ contrib/tzcode/stdtime/localtime.c | 12 ++- libexec/telnetd/Makefile | 1 + 12 files changed, 210 insertions(+), 136 deletions(-) create mode 100644 contrib/telnet/telnet/baud.h diff --git a/contrib/telnet/arpa/telnet.h b/contrib/telnet/arpa/telnet.h index 26f75fb3130..b028a1df794 100644 --- a/contrib/telnet/arpa/telnet.h +++ b/contrib/telnet/arpa/telnet.h @@ -127,6 +127,7 @@ extern char *telcmds[]; #define TELOPT_KERMIT 47 /* RFC2840 - Kermit */ #define TELOPT_EXOPL 255 /* extended-options-list */ +#define COMPORT_SET_BAUDRATE 1 /* RFC2217 - Com Port Set Baud Rate */ #define NTELOPTS (1+TELOPT_KERMIT) #ifdef TELOPTS diff --git a/contrib/telnet/telnet/baud.h b/contrib/telnet/telnet/baud.h new file mode 100644 index 00000000000..c422535cf8a --- /dev/null +++ b/contrib/telnet/telnet/baud.h @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2014 EMC Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * Try to guess whether speeds are "encoded" (4.2BSD) or just numeric (4.4BSD). + */ +#if B4800 != 4800 +#define DECODE_BAUD +#endif + +#ifdef DECODE_BAUD +#ifndef B7200 +#define B7200 B4800 +#endif + +#ifndef B14400 +#define B14400 B9600 +#endif + +#ifndef B19200 +#define B19200 B14400 +#endif + +#ifndef B28800 +#define B28800 B19200 +#endif + +#ifndef B38400 +#define B38400 B28800 +#endif + +#ifndef B57600 +#define B57600 B38400 +#endif + +#ifndef B76800 +#define B76800 B57600 +#endif + +#ifndef B115200 +#define B115200 B76800 +#endif + +#ifndef B115200 +#define B115200 B76800 +#endif +#endif + +#ifndef B230400 +#define B230400 B115200 +#endif + +/* + * A table of available terminal speeds + */ +struct termspeeds termspeeds[] = { + { 0, B0 }, + { 50, B50 }, + { 75, B75 }, + { 110, B110 }, + { 134, B134 }, + { 150, B150 }, + { 200, B200 }, + { 300, B300 }, + { 600, B600 }, + { 1200, B1200 }, + { 1800, B1800 }, + { 2400, B2400 }, + { 4800, B4800 }, +#ifdef B7200 + { 7200, B7200 }, +#endif + { 9600, B9600 }, +#ifdef B14400 + { 14400, B14400 }, +#endif +#ifdef B19200 + { 19200, B19200 }, +#endif +#ifdef B28800 + { 28800, B28800 }, +#endif +#ifdef B38400 + { 38400, B38400 }, +#endif +#ifdef B57600 + { 57600, B57600 }, +#endif +#ifdef B115200 + { 115200, B115200 }, +#endif +#ifdef B230400 + { 230400, B230400 }, +#endif + { -1, 0 } +}; diff --git a/contrib/telnet/telnet/commands.c b/contrib/telnet/telnet/commands.c index c39b18744ca..74cce6d5ded 100644 --- a/contrib/telnet/telnet/commands.c +++ b/contrib/telnet/telnet/commands.c @@ -896,6 +896,7 @@ static struct setlist Setlist[] = { { "forw1", "alternate end of line character", NULL, termForw1Charp }, { "forw2", "alternate end of line character", NULL, termForw2Charp }, { "ayt", "alternate AYT character", NULL, termAytCharp }, + { "baudrate", "set remote baud rate", DoBaudRate, ComPortBaudRate }, { NULL, NULL, NULL, NULL } }; diff --git a/contrib/telnet/telnet/externs.h b/contrib/telnet/telnet/externs.h index e07aebbdb82..d42bddd4cd0 100644 --- a/contrib/telnet/telnet/externs.h +++ b/contrib/telnet/telnet/externs.h @@ -231,6 +231,10 @@ extern unsigned char NetTraceFile[]; /* Name of file where debugging output goes */ extern void SetNetTrace(char *); /* Function to change where debugging goes */ +extern unsigned char + ComPortBaudRate[]; /* Baud rate of the remote end */ +extern void + DoBaudRate(char *); /* Function to set the baud rate of the remote end */ extern jmp_buf toplevel; /* For error conditions. */ @@ -475,6 +479,16 @@ extern cc_t termAytChar; # endif #endif +typedef struct { + int + system, /* what the current time is */ + echotoggle, /* last time user entered echo character */ + modenegotiated, /* last time operating mode negotiated */ + didnetreceive, /* last time we read data from network */ + gotDM; /* when did we last see a data mark */ +} Clocks; + +extern Clocks clocks; /* Ring buffer structures which are shared */ diff --git a/contrib/telnet/telnet/main.c b/contrib/telnet/telnet/main.c index f6eb1ffb08e..1ddec8256a0 100644 --- a/contrib/telnet/telnet/main.c +++ b/contrib/telnet/telnet/main.c @@ -91,10 +91,10 @@ usage(void) fprintf(stderr, "usage: %s %s%s%s%s\n", prompt, #ifdef AUTHENTICATION - "[-4] [-6] [-8] [-E] [-K] [-L] [-N] [-S tos] [-X atype] [-c] [-d]", - "\n\t[-e char] [-k realm] [-l user] [-f/-F] [-n tracefile] ", + "[-4] [-6] [-8] [-B baudrate] [-E] [-K] [-L] [-N] [-S tos] [-X atype]", + "\n\t[-c] [-d] [-e char] [-k realm] [-l user] [-f/-F] [-n tracefile] ", #else - "[-4] [-6] [-8] [-E] [-L] [-N] [-S tos] [-c] [-d]", + "[-4] [-6] [-8] [-B baudrate] [-E] [-L] [-N] [-S tos] [-c] [-d]", "\n\t[-e char] [-l user] [-n tracefile] ", #endif "[-r] [-s src_addr] [-u] ", @@ -154,7 +154,7 @@ main(int argc, char *argv[]) #define IPSECOPT #endif while ((ch = getopt(argc, argv, - "468EKLNS:X:acde:fFk:l:n:rs:uxy" IPSECOPT)) != -1) + "468B:EKLNS:X:acde:fFk:l:n:rs:uxy" IPSECOPT)) != -1) #undef IPSECOPT { switch(ch) { @@ -169,6 +169,9 @@ main(int argc, char *argv[]) case '8': eight = 3; /* binary output and input */ break; + case 'B': + DoBaudRate(optarg); + break; case 'E': rlogin = escape = _POSIX_VDISABLE; break; diff --git a/contrib/telnet/telnet/sys_bsd.c b/contrib/telnet/telnet/sys_bsd.c index 9fba74feada..32f84bf85e4 100644 --- a/contrib/telnet/telnet/sys_bsd.c +++ b/contrib/telnet/telnet/sys_bsd.c @@ -60,6 +60,7 @@ __FBSDID("$FreeBSD$"); #include "defines.h" #include "externs.h" #include "types.h" +#include "baud.h" int tout, /* Output file descriptor */ @@ -682,71 +683,6 @@ TerminalNewMode(int f) } -/* - * Try to guess whether speeds are "encoded" (4.2BSD) or just numeric (4.4BSD). - */ -#if B4800 != 4800 -#define DECODE_BAUD -#endif - -#ifdef DECODE_BAUD -#ifndef B7200 -#define B7200 B4800 -#endif - -#ifndef B14400 -#define B14400 B9600 -#endif - -#ifndef B19200 -# define B19200 B14400 -#endif - -#ifndef B28800 -#define B28800 B19200 -#endif - -#ifndef B38400 -# define B38400 B28800 -#endif - -#ifndef B57600 -#define B57600 B38400 -#endif - -#ifndef B76800 -#define B76800 B57600 -#endif - -#ifndef B115200 -#define B115200 B76800 -#endif - -#ifndef B230400 -#define B230400 B115200 -#endif - - -/* - * This code assumes that the values B0, B50, B75... - * are in ascending order. They do not have to be - * contiguous. - */ -struct termspeeds { - long speed; - long value; -} termspeeds[] = { - { 0, B0 }, { 50, B50 }, { 75, B75 }, - { 110, B110 }, { 134, B134 }, { 150, B150 }, - { 200, B200 }, { 300, B300 }, { 600, B600 }, - { 1200, B1200 }, { 1800, B1800 }, { 2400, B2400 }, - { 4800, B4800 }, { 7200, B7200 }, { 9600, B9600 }, - { 14400, B14400 }, { 19200, B19200 }, { 28800, B28800 }, - { 38400, B38400 }, { 57600, B57600 }, { 115200, B115200 }, - { 230400, B230400 }, { -1, B230400 } -}; -#endif /* DECODE_BAUD */ - void TerminalSpeeds(long *ispeed, long *ospeed) { diff --git a/contrib/telnet/telnet/telnet.1 b/contrib/telnet/telnet/telnet.1 index 2dce4c51dd5..f55c62e8729 100644 --- a/contrib/telnet/telnet/telnet.1 +++ b/contrib/telnet/telnet/telnet.1 @@ -43,6 +43,7 @@ protocol .Sh SYNOPSIS .Nm .Op Fl 468EFKLNacdfruxy +.Op Fl B Ar baudrate .Op Fl S Ar tos .Op Fl X Ar authtype .Op Fl e Ar escapechar @@ -89,6 +90,9 @@ This causes an attempt to negotiate the .Dv TELNET BINARY option on both input and output. +.It Fl B Ar baudrate +Sets the baud rate to +.Ar baudrate . .It Fl E Stops any character from being recognized as an escape character. .It Fl F diff --git a/contrib/telnet/telnet/telnet.c b/contrib/telnet/telnet/telnet.c index 8c457cf613d..80f43b2fc0a 100644 --- a/contrib/telnet/telnet/telnet.c +++ b/contrib/telnet/telnet/telnet.c @@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include "ring.h" @@ -68,7 +69,7 @@ __FBSDID("$FreeBSD$"); #include #endif #include - + #define strip(x) ((my_want_state_is_wont(TELOPT_BINARY)) ? ((x)&0x7f) : (x)) static unsigned char subbuffer[SUBBUFSIZE], @@ -162,7 +163,7 @@ static int is_unique(char *, char **, char **); */ Clocks clocks; - + /* * Initialize telnet environment. */ @@ -196,7 +197,7 @@ init_telnet(void) flushline = 1; telrcv_state = TS_DATA; } - + /* * These routines are in charge of sending option negotiations @@ -206,6 +207,42 @@ init_telnet(void) * is in disagreement as to what the current state should be. */ +unsigned char ComPortBaudRate[256]; + +void +DoBaudRate(char *arg) +{ + char *temp, temp2[10]; + int i; + uint32_t baudrate; + + errno = 0; + baudrate = (uint32_t)strtol(arg, &temp, 10); + if (temp[0] != '\0' || (baudrate == 0 && errno != 0)) + ExitString("Invalid baud rate provided.\n", 1); + + for (i = 1; termspeeds[i].speed != -1; i++) + if (baudrate == termspeeds[i].speed) + break; + if (termspeeds[i].speed == -1) + ExitString("Invalid baud rate provided.\n", 1); + + strlcpy(ComPortBaudRate, arg, sizeof(ComPortBaudRate)); + + if (NETROOM() < sizeof(temp2)) { + ExitString("No room in buffer for baud rate.\n", 1); + /* NOTREACHED */ + } + + snprintf(temp2, sizeof(temp2), "%c%c%c%c....%c%c", IAC, SB, TELOPT_COMPORT, + COMPORT_SET_BAUDRATE, IAC, SE); + + baudrate = htonl(baudrate); + memcpy(&temp2[4], &baudrate, sizeof(baudrate)); + ring_supply_data(&netoring, temp2, sizeof(temp2)); + printsub('>', &temp[2], sizeof(temp2) - 2); +} + void send_do(int c, int init) { @@ -1084,7 +1121,7 @@ lm_mode(unsigned char *cmd, int len, int init) setconnmode(0); /* set changed mode */ } - + /* * slc() @@ -1628,7 +1665,7 @@ env_opt_end(int emptyok) } } - + int telrcv(void) @@ -2013,7 +2050,7 @@ telsnd(void) ring_consumed(&ttyiring, count); return returnValue||count; /* Non-zero if we did anything */ } - + /* * Scheduler() * diff --git a/contrib/telnet/telnet/types.h b/contrib/telnet/telnet/types.h index 191d311fd15..4db5292e49d 100644 --- a/contrib/telnet/telnet/types.h +++ b/contrib/telnet/telnet/types.h @@ -40,13 +40,9 @@ typedef struct { extern Modelist modelist[]; -typedef struct { - int - system, /* what the current time is */ - echotoggle, /* last time user entered echo character */ - modenegotiated, /* last time operating mode negotiated */ - didnetreceive, /* last time we read data from network */ - gotDM; /* when did we last see a data mark */ -} Clocks; +struct termspeeds { + int speed; + int value; +}; -extern Clocks clocks; +extern struct termspeeds termspeeds[]; diff --git a/contrib/telnet/telnetd/sys_term.c b/contrib/telnet/telnetd/sys_term.c index 5a9421b1b15..bdc43f6e1a2 100644 --- a/contrib/telnet/telnetd/sys_term.c +++ b/contrib/telnet/telnetd/sys_term.c @@ -46,6 +46,8 @@ __FBSDID("$FreeBSD$"); #include "telnetd.h" #include "pathnames.h" +#include "types.h" +#include "baud.h" #ifdef AUTHENTICATION #include @@ -743,56 +745,6 @@ tty_iscrnl(void) #endif } -/* - * Try to guess whether speeds are "encoded" (4.2BSD) or just numeric (4.4BSD). - */ -#if B4800 != 4800 -#define DECODE_BAUD -#endif - -#ifdef DECODE_BAUD - -/* - * A table of available terminal speeds - */ -struct termspeeds { - int speed; - int value; -} termspeeds[] = { - { 0, B0 }, { 50, B50 }, { 75, B75 }, - { 110, B110 }, { 134, B134 }, { 150, B150 }, - { 200, B200 }, { 300, B300 }, { 600, B600 }, - { 1200, B1200 }, { 1800, B1800 }, { 2400, B2400 }, - { 4800, B4800 }, -#ifdef B7200 - { 7200, B7200 }, -#endif - { 9600, B9600 }, -#ifdef B14400 - { 14400, B14400 }, -#endif -#ifdef B19200 - { 19200, B19200 }, -#endif -#ifdef B28800 - { 28800, B28800 }, -#endif -#ifdef B38400 - { 38400, B38400 }, -#endif -#ifdef B57600 - { 57600, B57600 }, -#endif -#ifdef B115200 - { 115200, B115200 }, -#endif -#ifdef B230400 - { 230400, B230400 }, -#endif - { -1, 0 } -}; -#endif /* DECODE_BAUD */ - void tty_tspeed(int val) { diff --git a/contrib/tzcode/stdtime/localtime.c b/contrib/tzcode/stdtime/localtime.c index 9605eeb30e5..51119f552a5 100644 --- a/contrib/tzcode/stdtime/localtime.c +++ b/contrib/tzcode/stdtime/localtime.c @@ -1792,7 +1792,11 @@ int delta; number0 = *number; *number += delta; - return (*number < number0) != (delta < 0); + if ((*number < number0) != (delta < 0)) { + errno = EOVERFLOW; + return (1); + } + return (0); } static int @@ -1804,7 +1808,11 @@ int delta; number0 = *number; *number += delta; - return (*number < number0) != (delta < 0); + if ((*number < number0) != (delta < 0)) { + errno = EOVERFLOW; + return (1); + } + return (0); } static int diff --git a/libexec/telnetd/Makefile b/libexec/telnetd/Makefile index 690b03c7ff9..07de1970471 100644 --- a/libexec/telnetd/Makefile +++ b/libexec/telnetd/Makefile @@ -25,6 +25,7 @@ CFLAGS+= -DINET6 .endif CFLAGS+= -I${TELNETDIR} +CFLAGS+= -I${TELNETDIR}/telnet LIBTELNET= ${.OBJDIR}/../../lib/libtelnet/libtelnet.a From 0b837c87ceea68219a59bb7b1fe5357d214b22a7 Mon Sep 17 00:00:00 2001 From: Enji Cooper Date: Tue, 11 Nov 2014 04:07:41 +0000 Subject: [PATCH 045/280] Revert WiP to contrib/tzcode accidentally committed with r274364 --- contrib/tzcode/stdtime/localtime.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/contrib/tzcode/stdtime/localtime.c b/contrib/tzcode/stdtime/localtime.c index 51119f552a5..9605eeb30e5 100644 --- a/contrib/tzcode/stdtime/localtime.c +++ b/contrib/tzcode/stdtime/localtime.c @@ -1792,11 +1792,7 @@ int delta; number0 = *number; *number += delta; - if ((*number < number0) != (delta < 0)) { - errno = EOVERFLOW; - return (1); - } - return (0); + return (*number < number0) != (delta < 0); } static int @@ -1808,11 +1804,7 @@ int delta; number0 = *number; *number += delta; - if ((*number < number0) != (delta < 0)) { - errno = EOVERFLOW; - return (1); - } - return (0); + return (*number < number0) != (delta < 0); } static int From 5ebb15b942bfe98b9b972117ecb04e97de8cb947 Mon Sep 17 00:00:00 2001 From: Pawel Jakub Dawidek Date: Tue, 11 Nov 2014 04:48:09 +0000 Subject: [PATCH 046/280] Add missing privilege check when setting the dump device. Before that change it was possible for a regular user to setup the dump device if he had write access to the given device. In theory it is a security issue as user might get access to kernel's memory after provoking kernel crash, but in practise it is not recommended to give regular users direct access to storage devices. Rework the code so that we do privileges check within the set_dumper() function to avoid similar problems in the future. Discussed with: secteam --- sys/dev/null/null.c | 5 +---- sys/geom/geom_dev.c | 14 +++++++------- sys/kern/kern_shutdown.c | 7 ++++++- sys/sys/conf.h | 2 +- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/sys/dev/null/null.c b/sys/dev/null/null.c index f836147a773..c8966df8ac4 100644 --- a/sys/dev/null/null.c +++ b/sys/dev/null/null.c @@ -37,7 +37,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include #include @@ -110,9 +109,7 @@ null_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t data __unused, switch (cmd) { case DIOCSKERNELDUMP: - error = priv_check(td, PRIV_SETDUMPER); - if (error == 0) - error = set_dumper(NULL, NULL); + error = set_dumper(NULL, NULL, td); break; case FIONBIO: break; diff --git a/sys/geom/geom_dev.c b/sys/geom/geom_dev.c index 7cb756befb8..6380e407f0e 100644 --- a/sys/geom/geom_dev.c +++ b/sys/geom/geom_dev.c @@ -127,14 +127,14 @@ g_dev_fini(struct g_class *mp) } static int -g_dev_setdumpdev(struct cdev *dev) +g_dev_setdumpdev(struct cdev *dev, struct thread *td) { struct g_kerneldump kd; struct g_consumer *cp; int error, len; if (dev == NULL) - return (set_dumper(NULL, NULL)); + return (set_dumper(NULL, NULL, td)); cp = dev->si_drv2; len = sizeof(kd); @@ -142,7 +142,7 @@ g_dev_setdumpdev(struct cdev *dev) kd.length = OFF_MAX; error = g_io_getattr("GEOM::kerneldump", cp, &len, &kd); if (error == 0) { - error = set_dumper(&kd.di, devtoname(dev)); + error = set_dumper(&kd.di, devtoname(dev), td); if (error == 0) dev->si_flags |= SI_DUMPDEV; } @@ -157,7 +157,7 @@ init_dumpdev(struct cdev *dev) return; if (strcmp(devtoname(dev), dumpdev) != 0) return; - if (g_dev_setdumpdev(dev) == 0) { + if (g_dev_setdumpdev(dev, curthread) == 0) { freeenv(dumpdev); dumpdev = NULL; } @@ -453,9 +453,9 @@ g_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread break; case DIOCSKERNELDUMP: if (*(u_int *)data == 0) - error = g_dev_setdumpdev(NULL); + error = g_dev_setdumpdev(NULL, td); else - error = g_dev_setdumpdev(dev); + error = g_dev_setdumpdev(dev, td); break; case DIOCGFLUSH: error = g_io_flush(cp); @@ -673,7 +673,7 @@ g_dev_orphan(struct g_consumer *cp) /* Reset any dump-area set on this device */ if (dev->si_flags & SI_DUMPDEV) - (void)set_dumper(NULL, NULL); + (void)set_dumper(NULL, NULL, curthread); /* Destroy the struct cdev *so we get no more requests */ destroy_dev_sched_cb(dev, g_dev_callback, cp); diff --git a/sys/kern/kern_shutdown.c b/sys/kern/kern_shutdown.c index dfdca15194a..357099b59b0 100644 --- a/sys/kern/kern_shutdown.c +++ b/sys/kern/kern_shutdown.c @@ -827,9 +827,14 @@ SYSCTL_STRING(_kern_shutdown, OID_AUTO, dumpdevname, CTLFLAG_RD, /* Registration of dumpers */ int -set_dumper(struct dumperinfo *di, const char *devname) +set_dumper(struct dumperinfo *di, const char *devname, struct thread *td) { size_t wantcopy; + int error; + + error = priv_check(td, PRIV_SETDUMPER); + if (error != 0) + return (error); if (di == NULL) { bzero(&dumper, sizeof dumper); diff --git a/sys/sys/conf.h b/sys/sys/conf.h index 8c50581cb1c..9d73d59a078 100644 --- a/sys/sys/conf.h +++ b/sys/sys/conf.h @@ -336,7 +336,7 @@ struct dumperinfo { off_t mediasize; /* Space available in bytes. */ }; -int set_dumper(struct dumperinfo *, const char *_devname); +int set_dumper(struct dumperinfo *, const char *_devname, struct thread *td); int dump_write(struct dumperinfo *, void *, vm_offset_t, off_t, size_t); int dumpsys(struct dumperinfo *); int doadump(boolean_t); From a31070e90de2602a7364c81c45f25f69aa9e9d3e Mon Sep 17 00:00:00 2001 From: Xin LI Date: Tue, 11 Nov 2014 05:49:57 +0000 Subject: [PATCH 047/280] Rename variable name from 'index' to 'idx' to avoid shadowing index(3). Noticed by: dim MFC after: 2 weeks --- contrib/hyperv/tools/hv_kvp_daemon.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/contrib/hyperv/tools/hv_kvp_daemon.c b/contrib/hyperv/tools/hv_kvp_daemon.c index b0a84d41109..3da0e70ac5d 100644 --- a/contrib/hyperv/tools/hv_kvp_daemon.c +++ b/contrib/hyperv/tools/hv_kvp_daemon.c @@ -511,25 +511,25 @@ kvp_get_value(int pool, __u8 *key, int key_size, __u8 *value, static int -kvp_pool_enumerate(int pool, int index, __u8 *key, int key_size, +kvp_pool_enumerate(int pool, int idx, __u8 *key, int key_size, __u8 *value, int value_size) { struct kvp_record *record; KVP_LOG(LOG_DEBUG, "kvp_pool_enumerate: pool = %d, index = %d\n,", - pool, index); + pool, idx); /* First update our in-memory state first. */ kvp_update_mem_state(pool); record = kvp_pools[pool].records; /* Index starts with 0 */ - if (index >= kvp_pools[pool].num_records) { + if (idx >= kvp_pools[pool].num_records) { return (1); } - memcpy(key, record[index].key, key_size); - memcpy(value, record[index].value, value_size); + memcpy(key, record[idx].key, key_size); + memcpy(value, record[idx].value, value_size); return (0); } From 00f22c06e8a1e36cbac03d96f1e5bb736d4bc705 Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Tue, 11 Nov 2014 10:22:33 +0000 Subject: [PATCH 048/280] Move struct ether_vlan_header to ethernet.h, out of if_vlan_var.h, since this structure is protocol definition, not part of implementation. --- sys/net/ethernet.h | 19 +++++++++++++++++++ sys/net/if_vlan_var.h | 16 ---------------- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/sys/net/ethernet.h b/sys/net/ethernet.h index 10abf641d9f..438028d1567 100644 --- a/sys/net/ethernet.h +++ b/sys/net/ethernet.h @@ -72,6 +72,25 @@ struct ether_addr { #define ETHER_IS_MULTICAST(addr) (*(addr) & 0x01) /* is address mcast/bcast? */ +/* + * 802.1q Virtual LAN header. + */ +struct ether_vlan_header { + uint8_t evl_dhost[ETHER_ADDR_LEN]; + uint8_t evl_shost[ETHER_ADDR_LEN]; + uint16_t evl_encap_proto; + uint16_t evl_tag; + uint16_t evl_proto; +} __packed; + +#define EVL_VLID_MASK 0x0FFF +#define EVL_PRI_MASK 0xE000 +#define EVL_VLANOFTAG(tag) ((tag) & EVL_VLID_MASK) +#define EVL_PRIOFTAG(tag) (((tag) >> 13) & 7) +#define EVL_CFIOFTAG(tag) (((tag) >> 12) & 1) +#define EVL_MAKETAG(vlid, pri, cfi) \ + ((((((pri) & 7) << 1) | ((cfi) & 1)) << 12) | ((vlid) & EVL_VLID_MASK)) + /* * NOTE: 0x0000-0x05DC (0..1500) are generally IEEE 802.3 length fields. * However, there are some conflicts. diff --git a/sys/net/if_vlan_var.h b/sys/net/if_vlan_var.h index b24087435da..ed4753ecfe4 100644 --- a/sys/net/if_vlan_var.h +++ b/sys/net/if_vlan_var.h @@ -32,22 +32,6 @@ #ifndef _NET_IF_VLAN_VAR_H_ #define _NET_IF_VLAN_VAR_H_ 1 -struct ether_vlan_header { - u_char evl_dhost[ETHER_ADDR_LEN]; - u_char evl_shost[ETHER_ADDR_LEN]; - u_int16_t evl_encap_proto; - u_int16_t evl_tag; - u_int16_t evl_proto; -}; - -#define EVL_VLID_MASK 0x0FFF -#define EVL_PRI_MASK 0xE000 -#define EVL_VLANOFTAG(tag) ((tag) & EVL_VLID_MASK) -#define EVL_PRIOFTAG(tag) (((tag) >> 13) & 7) -#define EVL_CFIOFTAG(tag) (((tag) >> 12) & 1) -#define EVL_MAKETAG(vlid, pri, cfi) \ - ((((((pri) & 7) << 1) | ((cfi) & 1)) << 12) | ((vlid) & EVL_VLID_MASK)) - /* Set the VLAN ID in an mbuf packet header non-destructively. */ #define EVL_APPLY_VLID(m, vlid) \ do { \ From 3c7c188c16d8d5cc62b804b16e9a2bc1c8efbc5c Mon Sep 17 00:00:00 2001 From: Hans Petter Selasky Date: Tue, 11 Nov 2014 12:05:59 +0000 Subject: [PATCH 049/280] Fix some minor TSO issues: - Improve description of TSO limits. - Remove a not needed KASSERT() - Remove some not needed variable casts. Sponsored by: Mellanox Technologies Discussed with: lstewart @ MFC after: 1 week --- sys/net/if.c | 7 ------- sys/net/if_var.h | 22 +++++++++++++++------- sys/netinet/tcp_output.c | 16 ++++++++-------- 3 files changed, 23 insertions(+), 22 deletions(-) diff --git a/sys/net/if.c b/sys/net/if.c index 0103c3fdd5c..d97e0e11cdf 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -717,13 +717,6 @@ if_attach_internal(struct ifnet *ifp, int vmove) ifp->if_hw_tsomaxsegsize); } } - /* - * If the "if_hw_tsomax" limit is set, check if it is - * too small: - */ - KASSERT(ifp->if_hw_tsomax == 0 || - ifp->if_hw_tsomax >= (IP_MAXPACKET / 8), - ("%s: if_hw_tsomax is outside of range", __func__)); #endif } #ifdef VIMAGE diff --git a/sys/net/if_var.h b/sys/net/if_var.h index 643a1a489a5..98e9828dab5 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -232,16 +232,24 @@ struct ifnet { counter_u64_t if_counters[IFCOUNTERS]; /* Stuff that's only temporary and doesn't belong here. */ - u_int if_hw_tsomax; /* TSO total burst length - * limit in bytes. A value of - * zero means no limit. Have - * to find a better place for - * it eventually. */ /* - * TSO fields for segment limits. If a field below is zero, - * there is no TSO segment limit. + * Network adapter TSO limits: + * =========================== + * + * If the "if_hw_tsomax" field is zero the maximum segment + * length limit does not apply. If the "if_hw_tsomaxsegcount" + * or the "if_hw_tsomaxsegsize" field is zero the TSO segment + * count limit does not apply. If all three fields are zero, + * there is no TSO limit. + * + * NOTE: The TSO limits only apply to the data payload part of + * a TCP/IP packet. That means there is no need to subtract + * space for ethernet-, vlan-, IP- or TCP- headers from the + * TSO limits unless the hardware driver in question requires + * so. */ + u_int if_hw_tsomax; /* TSO maximum size in bytes */ u_int if_hw_tsomaxsegcount; /* TSO maximum segment count */ u_int if_hw_tsomaxsegsize; /* TSO maximum segment size in bytes */ diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index d9d13a32f47..7919e2b72e0 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -802,9 +802,9 @@ send: max_len = (if_hw_tsomax - hdrlen); if (max_len <= 0) { len = 0; - } else if (len > (u_int)max_len) { + } else if (len > max_len) { sendalot = 1; - len = (u_int)max_len; + len = max_len; } } @@ -817,7 +817,7 @@ send: max_len = 0; mb = sbsndmbuf(&so->so_snd, off, &moff); - while (mb != NULL && (u_int)max_len < len) { + while (mb != NULL && max_len < len) { u_int mlen; u_int frags; @@ -851,9 +851,9 @@ send: } if (max_len <= 0) { len = 0; - } else if (len > (u_int)max_len) { + } else if (len > max_len) { sendalot = 1; - len = (u_int)max_len; + len = max_len; } } @@ -864,7 +864,7 @@ send: */ max_len = (tp->t_maxopd - optlen); if ((off + len) < so->so_snd.sb_cc) { - moff = len % (u_int)max_len; + moff = len % max_len; if (moff != 0) { len -= moff; sendalot = 1; @@ -875,8 +875,8 @@ send: * In case there are too many small fragments * don't use TSO: */ - if (len <= (u_int)max_len) { - len = (u_int)max_len; + if (len <= max_len) { + len = max_len; sendalot = 1; tso = 0; } From a41f4046f5dc2f25e048391769ad69ac4bea8929 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Tue, 11 Nov 2014 12:37:13 +0000 Subject: [PATCH 050/280] On 965 and higher, map GTT as write-combining. Sponsored by: The FreeBSD Foundation MFC after: 1 week --- sys/dev/agp/agp_i810.c | 51 +++++++++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 8 deletions(-) diff --git a/sys/dev/agp/agp_i810.c b/sys/dev/agp/agp_i810.c index 9afd201d205..6e0629250d8 100644 --- a/sys/dev/agp/agp_i810.c +++ b/sys/dev/agp/agp_i810.c @@ -115,6 +115,8 @@ static int agp_sb_get_gtt_total_entries(device_t dev); static int agp_i810_install_gatt(device_t dev); static int agp_i830_install_gatt(device_t dev); +static int agp_i965_install_gatt(device_t dev); +static int agp_g4x_install_gatt(device_t dev); static void agp_i810_deinstall_gatt(device_t dev); static void agp_i830_deinstall_gatt(device_t dev); @@ -397,7 +399,7 @@ static const struct agp_i810_driver agp_i810_g965_driver = { .get_stolen_size = agp_i915_get_stolen_size, .get_gtt_mappable_entries = agp_i915_get_gtt_mappable_entries, .get_gtt_total_entries = agp_i965_get_gtt_total_entries, - .install_gatt = agp_i830_install_gatt, + .install_gatt = agp_i965_install_gatt, .deinstall_gatt = agp_i830_deinstall_gatt, .write_gtt = agp_i965_write_gtt, .install_gtt_pte = agp_i965_install_gtt_pte, @@ -466,7 +468,7 @@ static const struct agp_i810_driver agp_i810_g4x_driver = { .get_stolen_size = agp_i915_get_stolen_size, .get_gtt_mappable_entries = agp_i915_get_gtt_mappable_entries, .get_gtt_total_entries = agp_gen5_get_gtt_total_entries, - .install_gatt = agp_i830_install_gatt, + .install_gatt = agp_g4x_install_gatt, .deinstall_gatt = agp_i830_deinstall_gatt, .write_gtt = agp_g4x_write_gtt, .install_gtt_pte = agp_g4x_install_gtt_pte, @@ -489,7 +491,7 @@ static const struct agp_i810_driver agp_i810_sb_driver = { .get_stolen_size = agp_sb_get_stolen_size, .get_gtt_mappable_entries = agp_i915_get_gtt_mappable_entries, .get_gtt_total_entries = agp_sb_get_gtt_total_entries, - .install_gatt = agp_i830_install_gatt, + .install_gatt = agp_g4x_install_gatt, .deinstall_gatt = agp_i830_deinstall_gatt, .write_gtt = agp_sb_write_gtt, .install_gtt_pte = agp_sb_install_gtt_pte, @@ -1406,14 +1408,11 @@ agp_i810_install_gatt(device_t dev) return (0); } -static int -agp_i830_install_gatt(device_t dev) +static void +agp_i830_install_gatt_init(struct agp_i810_softc *sc) { - struct agp_i810_softc *sc; uint32_t pgtblctl; - sc = device_get_softc(dev); - /* * The i830 automatically initializes the 128k gatt on boot. * GATT address is already in there, make sure it's enabled. @@ -1423,9 +1422,45 @@ agp_i830_install_gatt(device_t dev) bus_write_4(sc->sc_res[0], AGP_I810_PGTBL_CTL, pgtblctl); sc->gatt->ag_physical = pgtblctl & ~1; +} + +static int +agp_i830_install_gatt(device_t dev) +{ + struct agp_i810_softc *sc; + + sc = device_get_softc(dev); + agp_i830_install_gatt_init(sc); return (0); } +static int +agp_gen4_install_gatt(device_t dev, const vm_size_t gtt_offset) +{ + struct agp_i810_softc *sc; + + sc = device_get_softc(dev); + pmap_change_attr((vm_offset_t)rman_get_virtual(sc->sc_res[0]) + + gtt_offset, rman_get_size(sc->sc_res[0]) - gtt_offset, + VM_MEMATTR_WRITE_COMBINING); + agp_i830_install_gatt_init(sc); + return (0); +} + +static int +agp_i965_install_gatt(device_t dev) +{ + + return (agp_gen4_install_gatt(dev, 512 * 1024)); +} + +static int +agp_g4x_install_gatt(device_t dev) +{ + + return (agp_gen4_install_gatt(dev, 2 * 1024 * 1024)); +} + static int agp_i810_attach(device_t dev) { From d6fe56e5d70ef5a1f968d528734f71620a253a66 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Tue, 11 Nov 2014 12:52:45 +0000 Subject: [PATCH 051/280] Based on some BIOS configuration (GGC register in host bridge, bit 1), IGP may declare subclass as either VGA-compatible, or non-VGA. The difference is that in the later case, IGP does not claim VGA cycles. Other than that, the device functions normally, and agp_i810 should attach to it. Sponsored by: The FreeBSD Foundation MFC after: 1 week --- sys/dev/agp/agp_i810.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sys/dev/agp/agp_i810.c b/sys/dev/agp/agp_i810.c index 6e0629250d8..f6b8ab893a7 100644 --- a/sys/dev/agp/agp_i810.c +++ b/sys/dev/agp/agp_i810.c @@ -749,7 +749,8 @@ agp_i810_match(device_t dev) int i, devid; if (pci_get_class(dev) != PCIC_DISPLAY - || pci_get_subclass(dev) != PCIS_DISPLAY_VGA) + || (pci_get_subclass(dev) != PCIS_DISPLAY_VGA && + pci_get_subclass(dev) != PCIS_DISPLAY_OTHER)) return (NULL); devid = pci_get_devid(dev); From a537a017e837cecb598d1565bec53884327d6478 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Tue, 11 Nov 2014 12:56:30 +0000 Subject: [PATCH 052/280] Initial attachment of the agp(4) to Haswell IGP. There is no handling of cacheablility control bits in GTT PTEs yet. Sponsored by: The FreeBSD Foundation MFC after: 1 week --- sys/dev/agp/agp_i810.c | 58 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/sys/dev/agp/agp_i810.c b/sys/dev/agp/agp_i810.c index f6b8ab893a7..0db332bbec9 100644 --- a/sys/dev/agp/agp_i810.c +++ b/sys/dev/agp/agp_i810.c @@ -503,6 +503,29 @@ static const struct agp_i810_driver agp_i810_sb_driver = { .chipset_flush = agp_i810_chipset_flush, }; +static const struct agp_i810_driver agp_i810_hsw_driver = { + .chiptype = CHIP_SB, + .gen = 7, + .busdma_addr_mask_sz = 40, + .res_spec = agp_g4x_res_spec, + .check_active = agp_sb_check_active, + .set_desc = agp_i810_set_desc, + .dump_regs = agp_sb_dump_regs, + .get_stolen_size = agp_sb_get_stolen_size, + .get_gtt_mappable_entries = agp_i915_get_gtt_mappable_entries, + .get_gtt_total_entries = agp_sb_get_gtt_total_entries, + .install_gatt = agp_g4x_install_gatt, + .deinstall_gatt = agp_i830_deinstall_gatt, + .write_gtt = agp_sb_write_gtt, + .install_gtt_pte = agp_sb_install_gtt_pte, + .read_gtt_pte = agp_g4x_read_gtt_pte, + .read_gtt_pte_paddr = agp_sb_read_gtt_pte_paddr, + .set_aperture = agp_i915_set_aperture, + .chipset_flush_setup = agp_i810_chipset_flush_setup, + .chipset_flush_teardown = agp_i810_chipset_flush_teardown, + .chipset_flush = agp_i810_chipset_flush, +}; + /* For adding new devices, devid is the id of the graphics controller * (pci:0:2:0, for example). The placeholder (usually at pci:0:2:1) for the * second head should never be added. The bridge_offset is the offset to @@ -738,6 +761,41 @@ static const struct agp_i810_match { .name = "IvyBridge server GT2 IG", .driver = &agp_i810_sb_driver }, + { + .devid = 0x04028086, + .name = "Haswell desktop GT1", + .driver = &agp_i810_hsw_driver + }, + { + .devid = 0x04128086, + .name = "Haswell desktop GT2", + .driver = &agp_i810_hsw_driver + }, + { + .devid = 0x040a8086, + .name = "Haswell server GT1", + .driver = &agp_i810_hsw_driver + }, + { + .devid = 0x041a8086, + .name = "Haswell server GT2", + .driver = &agp_i810_hsw_driver + }, + { + .devid = 0x04068086, + .name = "Haswell mobile GT1", + .driver = &agp_i810_hsw_driver + }, + { + .devid = 0x04168086, + .name = "Haswell mobile GT2", + .driver = &agp_i810_hsw_driver + }, + { + .devid = 0x0c168086, + .name = "Haswell SDV", + .driver = &agp_i810_hsw_driver + }, { .devid = 0, } From 136fae42a93f1ac91dbce3ddfb736ae954fa6799 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dag-Erling=20Sm=C3=B8rgrav?= Date: Tue, 11 Nov 2014 13:37:28 +0000 Subject: [PATCH 053/280] Fix gcc build: preserve const qualifier when casting input values. Noticed by: bz@ Submitted by: dim@ --- sys/crypto/rijndael/rijndael-api-fst.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sys/crypto/rijndael/rijndael-api-fst.c b/sys/crypto/rijndael/rijndael-api-fst.c index 24e5646694e..bf7b4d14e6a 100644 --- a/sys/crypto/rijndael/rijndael-api-fst.c +++ b/sys/crypto/rijndael/rijndael-api-fst.c @@ -233,10 +233,10 @@ int rijndael_padEncrypt(cipherInstance *cipher, keyInstance *key, case MODE_CBC: iv = cipher->IV; for (i = numBlocks; i > 0; i--) { - ((u_int32_t*)block)[0] = ((u_int32_t*)input)[0] ^ ((u_int32_t*)iv)[0]; - ((u_int32_t*)block)[1] = ((u_int32_t*)input)[1] ^ ((u_int32_t*)iv)[1]; - ((u_int32_t*)block)[2] = ((u_int32_t*)input)[2] ^ ((u_int32_t*)iv)[2]; - ((u_int32_t*)block)[3] = ((u_int32_t*)input)[3] ^ ((u_int32_t*)iv)[3]; + ((u_int32_t*)block)[0] = ((const u_int32_t*)input)[0] ^ ((u_int32_t*)iv)[0]; + ((u_int32_t*)block)[1] = ((const u_int32_t*)input)[1] ^ ((u_int32_t*)iv)[1]; + ((u_int32_t*)block)[2] = ((const u_int32_t*)input)[2] ^ ((u_int32_t*)iv)[2]; + ((u_int32_t*)block)[3] = ((const u_int32_t*)input)[3] ^ ((u_int32_t*)iv)[3]; rijndaelEncrypt(key->rk, key->Nr, block, outBuffer); iv = outBuffer; input += 16; From 843c718fa7be4c86f09ccb85724c70fb926191a2 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Tue, 11 Nov 2014 14:30:35 +0000 Subject: [PATCH 054/280] Update comment. Noted by: dim Approved by: secteam (des) MFC after: 4 days --- sys/dev/random/ivy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/dev/random/ivy.c b/sys/dev/random/ivy.c index bbc4e78e740..71a61f4d8d4 100644 --- a/sys/dev/random/ivy.c +++ b/sys/dev/random/ivy.c @@ -70,7 +70,7 @@ ivy_rng_store(u_long *buf) retry = RETRY_COUNT; __asm __volatile( "1:\n\t" - "rdrand %1\n\t" /* read randomness into tmp */ + "rdrand %1\n\t" /* read randomness into rndval */ "jc 2f\n\t" /* CF is set on success, exit retry loop */ "dec %0\n\t" /* otherwise, retry-- */ "jne 1b\n\t" /* and loop if retries are not exhausted */ From 96699e86a37f459f2ebb0992769220895c0872be Mon Sep 17 00:00:00 2001 From: Ed Maste Date: Tue, 11 Nov 2014 14:59:46 +0000 Subject: [PATCH 055/280] Add workaround for vt efifb's early use of PHYS_TO_DMAP In vt_efifb_init the framebuffer's physaddr is passed to PHYS_TO_DMAP before the DMAP is setup. The result is not actually accessed until after the mapping is setup, though. Loosen the assertion in PHYS_TO_DMAP for now, to allow use when dmaplimit == 0. Reviewed by: kib Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D1142 --- sys/amd64/include/vmparam.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sys/amd64/include/vmparam.h b/sys/amd64/include/vmparam.h index b1b89b98d8f..58cd694d6fd 100644 --- a/sys/amd64/include/vmparam.h +++ b/sys/amd64/include/vmparam.h @@ -175,8 +175,14 @@ #define VM_MAX_ADDRESS UPT_MAX_ADDRESS #define VM_MIN_ADDRESS (0) +/* + * XXX Allowing dmaplimit == 0 is a temporary workaround for vt(4) efifb's + * early use of PHYS_TO_DMAP before the mapping is actually setup. This works + * because the result is not actually accessed until later, but the early + * vt fb startup needs to be reworked. + */ #define PHYS_TO_DMAP(x) ({ \ - KASSERT((x) < dmaplimit, \ + KASSERT(dmaplimit == 0 || (x) < dmaplimit, \ ("physical address %#jx not covered by the DMAP", \ (uintmax_t)x)); \ (x) | DMAP_MIN_ADDRESS; }) From 2582ba5ec35af8a8437fde376ddb775aed4379fd Mon Sep 17 00:00:00 2001 From: Xin LI Date: Tue, 11 Nov 2014 16:49:33 +0000 Subject: [PATCH 056/280] Test errno against EEXIST as well. MFC after: 3 days --- contrib/hyperv/tools/hv_kvp_daemon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/hyperv/tools/hv_kvp_daemon.c b/contrib/hyperv/tools/hv_kvp_daemon.c index 3da0e70ac5d..1c31d3f9b62 100644 --- a/contrib/hyperv/tools/hv_kvp_daemon.c +++ b/contrib/hyperv/tools/hv_kvp_daemon.c @@ -285,7 +285,7 @@ kvp_file_init(void) int alloc_unit = sizeof(struct kvp_record) * ENTRIES_PER_BLOCK; if (mkdir("/var/db/hyperv/pool", S_IRUSR | S_IWUSR | S_IROTH) < 0 && - errno != EISDIR) { + (errno != EEXIST && errno != EISDIR)) { KVP_LOG(LOG_ERR, " Failed to create /var/db/hyperv/pool\n"); exit(EXIT_FAILURE); } From 1cb8f1cd72c9967f1b276d5ba0f706876cd243ee Mon Sep 17 00:00:00 2001 From: David Christensen Date: Tue, 11 Nov 2014 16:51:00 +0000 Subject: [PATCH 057/280] Remove myself from bxe(4) maintenance. --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 98fa63d98f9..227e01dd210 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -48,7 +48,6 @@ cd(4) ken Pre-commit review requested. pass(4) ken Pre-commit review requested. ch(4) ken Pre-commit review requested. em(4) jfv Pre-commit review requested. -bxe(4) davidch Pre-commit review requested. tdfx(4) cokane Just keep me informed of changes, try not to break it. sendmail gshapiro Pre-commit review requested. etc/mail gshapiro Pre-commit review requested. From 2da2ade0210e75b9a36256cc505d0b9bfc81a975 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 11 Nov 2014 17:14:11 +0000 Subject: [PATCH 058/280] Use the correct device (child) when asking the bus layer about which power state said device should go into. This was a snafu introduced in the ACPI/PCI awareness separation. When putting a device into a power state, the bus (and thus firmware, eg ACPI) should be asked before hand to check whether the device can indeed go into that power state. There's a set of nodes in ACPI under each device - the _SxD nodes - which state which ACPI power state to put the device into when the system is going into power save state 'x'. So when going into S3, the existence of an _S3D node would override whatever the system was trying to do. By default the PCI code wants to put devices into D3 before suspending. I have a laptop here (Asus Zenbook - check the PR) whose EHCI controller really wants to be in D2 during suspend, not D3. So if we put it into D3 and then try to enter S3, everything hangs. The device itself can go into D3 - it just can't be there when the call to ACPI to enter S3 occurs. The PCI patch fixes this. jkim@ noticed that the same is needed for the ACPI child device enumeration. Thankyou to Matt Dillon (the programmer, not the actor) for buying me this particular laptop so I could debug the issues with the Atheros AR9485 that is in it. It's his fault that I ended up with this laptop and was sufficiently annoyed by the lack of USB suspend to go down this rabbit hole. Tested: * Thinkpad T400 * Thinkpad X230 * Thinkpad T42 * Thinkpad T60 * Asus Zenbook (see PR) * Asus EEEPC 701 * Asus EEEPC 1001PX TODO: * Figure out what we should do about devices we unload drivers for that want to be in a specific state when entering S3 / S4 - the "put devices into D3 if they're not bound to a driver" option may also mess with things. PR: kern/194884 Reviewed by: jhb, jkim MFC after: 1 week Relnotes: yes Sponsored by: Matt Dillon (hardware) --- sys/dev/acpica/acpi.c | 2 +- sys/dev/pci/pci.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/dev/acpica/acpi.c b/sys/dev/acpica/acpi.c index 71fbe3226b6..e7389af50db 100644 --- a/sys/dev/acpica/acpi.c +++ b/sys/dev/acpica/acpi.c @@ -710,7 +710,7 @@ acpi_set_power_children(device_t dev, int state) child = devlist[i]; dstate = state; if (device_is_attached(child) && - acpi_device_pwr_for_sleep(parent, dev, &dstate) == 0) + acpi_device_pwr_for_sleep(parent, child, &dstate) == 0) acpi_set_powerstate(child, dstate); } free(devlist, M_TEMP); diff --git a/sys/dev/pci/pci.c b/sys/dev/pci/pci.c index 485f7289c96..d2ac111372e 100644 --- a/sys/dev/pci/pci.c +++ b/sys/dev/pci/pci.c @@ -3651,7 +3651,7 @@ pci_set_power_child(device_t dev, device_t child, int state) dinfo = device_get_ivars(child); dstate = state; if (device_is_attached(child) && - PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0) + PCIB_POWER_FOR_SLEEP(pcib, child, &dstate) == 0) pci_set_powerstate(child, dstate); } From 858b72dbb8b2e130a2cc605ea68c1128d10c96bd Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Tue, 11 Nov 2014 18:15:05 +0000 Subject: [PATCH 059/280] Use the callout(9) API instead of timeout(9). To do this more cleanly, convert a global timer to a per-controller timer. This works much better with locking and removes the need for several global lookup tables. Tested by: ambrisko --- sys/dev/rp/rp.c | 106 +++++++++++--------------------------------- sys/dev/rp/rp_pci.c | 3 +- sys/dev/rp/rpreg.h | 6 +-- sys/dev/rp/rpvar.h | 1 + 4 files changed, 32 insertions(+), 84 deletions(-) diff --git a/sys/dev/rp/rp.c b/sys/dev/rp/rp.c index 520ca80f930..5027694d54e 100644 --- a/sys/dev/rp/rp.c +++ b/sys/dev/rp/rp.c @@ -552,24 +552,12 @@ void sDisInterrupts(CHANNEL_T *ChP,Word_t Flags) Begin FreeBsd-specific driver code **********************************************************************/ -struct callout_handle rp_callout_handle; - -static int rp_num_ports_open = 0; -static int rp_ndevs = 0; - -static int rp_num_ports[4]; /* Number of ports on each controller */ - -#define POLL_INTERVAL 1 +#define POLL_INTERVAL (hz / 100) #define RP_ISMULTIPORT(dev) ((dev)->id_flags & 0x1) #define RP_MPMASTER(dev) (((dev)->id_flags >> 8) & 0xff) #define RP_NOTAST4(dev) ((dev)->id_flags & 0x04) -static struct rp_port *p_rp_addr[4]; -static struct rp_port *p_rp_table[MAX_RP_PORTS]; -#define rp_addr(unit) (p_rp_addr[unit]) -#define rp_table(port) (p_rp_table[port]) - /* * The top-level routines begin here */ @@ -676,46 +664,31 @@ static void rp_handle_port(struct rp_port *rp) */ } -static void rp_do_poll(void *not_used) +static void rp_do_poll(void *arg) { CONTROLLER_t *ctl; struct rp_port *rp; struct tty *tp; - int unit, aiop, ch, line, count; + int count; unsigned char CtlMask, AiopMask; - for(unit = 0; unit < rp_ndevs; unit++) { - rp = rp_addr(unit); + rp = arg; + tp = rp->rp_tty; + tty_lock_assert(tp, MA_OWNED); ctl = rp->rp_ctlp; CtlMask = ctl->ctlmask(ctl); - for(aiop=0; CtlMask; CtlMask >>=1, aiop++) { - if(CtlMask & 1) { - AiopMask = sGetAiopIntStatus(ctl, aiop); - for(ch = 0; AiopMask; AiopMask >>=1, ch++) { - if(AiopMask & 1) { - line = (unit << 5) | (aiop << 3) | ch; - rp = rp_table(line); - rp_handle_port(rp); - } - } + if (CtlMask & (1 << rp->rp_aiop)) { + AiopMask = sGetAiopIntStatus(ctl, rp->rp_aiop); + if (AiopMask & (1 << rp->rp_chan)) { + rp_handle_port(rp); } } - for(line = 0, rp = rp_addr(unit); line < rp_num_ports[unit]; - line++, rp++) { - tp = rp->rp_tty; - tty_lock(tp); - count = sGetTxCnt(&rp->rp_channel); - if (count >= 0 && - (count <= rp->rp_restart)) { - rpstart(tp); - } - tty_unlock(tp); + count = sGetTxCnt(&rp->rp_channel); + if (count >= 0 && (count <= rp->rp_restart)) { + rpstart(tp); } - } - if(rp_num_ports_open) - rp_callout_handle = timeout(rp_do_poll, - (void *)NULL, POLL_INTERVAL); + callout_schedule(&rp->rp_timer, POLL_INTERVAL); } static struct ttydevsw rp_tty_class = { @@ -745,7 +718,7 @@ rp_attachcommon(CONTROLLER_T *ctlp, int num_aiops, int num_ports) int unit; int num_chan; int aiop, chan, port; - int ChanStatus, line, count; + int ChanStatus; int retval; struct rp_port *rp; struct tty *tp; @@ -754,9 +727,8 @@ rp_attachcommon(CONTROLLER_T *ctlp, int num_aiops, int num_ports) printf("RocketPort%d (Version %s) %d ports.\n", unit, RocketPortVersion, num_ports); - rp_num_ports[unit] = num_ports; - callout_handle_init(&rp_callout_handle); + ctlp->num_ports = num_ports; ctlp->rp = rp = (struct rp_port *) malloc(sizeof(struct rp_port) * num_ports, M_DEVBUF, M_NOWAIT | M_ZERO); if (rp == NULL) { @@ -765,16 +737,12 @@ rp_attachcommon(CONTROLLER_T *ctlp, int num_aiops, int num_ports) goto nogo; } - count = unit * 32; /* board times max ports per card SG */ - - bzero(rp, sizeof(struct rp_port) * num_ports); - rp_addr(unit) = rp; - port = 0; for(aiop=0; aiop < num_aiops; aiop++) { num_chan = sGetAiopNumChan(ctlp, aiop); for(chan=0; chan < num_chan; chan++, port++, rp++) { rp->rp_tty = tp = tty_alloc(&rp_tty_class, rp); + callout_init_mtx(&rp->rp_timer, tty_getlock(tp), 0); rp->rp_port = port; rp->rp_ctlp = ctlp; rp->rp_unit = unit; @@ -794,13 +762,10 @@ rp_attachcommon(CONTROLLER_T *ctlp, int num_aiops, int num_ports) } ChanStatus = sGetChanStatus(&rp->rp_channel); rp->rp_cts = (ChanStatus & CTS_ACT) != 0; - line = (unit << 5) | (aiop << 3) | chan; - rp_table(line) = rp; tty_makedev(tp, NULL, "R%r%r", unit, port); } } - rp_ndevs++; mtx_init(&ctlp->hwmtx, "rp_hwmtx", NULL, MTX_DEF); ctlp->hwmtx_init = 1; return (0); @@ -814,40 +779,26 @@ nogo: void rp_releaseresource(CONTROLLER_t *ctlp) { - int i, unit; struct rp_port *rp; + int i; - - unit = device_get_unit(ctlp->dev); - if (rp_addr(unit) != NULL) { - for (i = 0; i < rp_num_ports[unit]; i++) { - rp = rp_addr(unit) + i; + if (ctlp->rp != NULL) { + for (i = 0; i < ctlp->num_ports; i++) { + rp = ctlp->rp + i; atomic_add_32(&ctlp->free, 1); tty_lock(rp->rp_tty); tty_rel_gone(rp->rp_tty); } + free(ctlp->rp, M_DEVBUF); + ctlp->rp = NULL; } while (ctlp->free != 0) { pause("rpwt", hz / 10); } - if (ctlp->rp != NULL) { - for (i = 0 ; i < sizeof(p_rp_addr) / sizeof(*p_rp_addr) ; i++) - if (p_rp_addr[i] == ctlp->rp) - p_rp_addr[i] = NULL; - for (i = 0 ; i < sizeof(p_rp_table) / sizeof(*p_rp_table) ; i++) - if (p_rp_table[i] == ctlp->rp) - p_rp_table[i] = NULL; - free(ctlp->rp, M_DEVBUF); - ctlp->rp = NULL; - } -} - -void -rp_untimeout(void) -{ - untimeout(rp_do_poll, (void *)NULL, rp_callout_handle); + if (ctlp->hwmtx_init) + mtx_destroy(&ctlp->hwmtx); } static int @@ -893,15 +844,11 @@ rpopen(struct tty *tp) sSetRTS(&rp->rp_channel); */ - rp_num_ports_open++; - IntMask = sGetChanIntID(&rp->rp_channel); IntMask = IntMask & rp->rp_intmask; ChanStatus = sGetChanStatus(&rp->rp_channel); - if(rp_num_ports_open == 1) - rp_callout_handle = timeout(rp_do_poll, - (void *)NULL, POLL_INTERVAL); + callout_reset(&rp->rp_timer, POLL_INTERVAL, rp_do_poll, rp); device_busy(rp->rp_ctlp->dev); return(0); @@ -913,6 +860,7 @@ rpclose(struct tty *tp) struct rp_port *rp; rp = tty_softc(tp); + callout_stop(&rp->rp_timer); rphardclose(tp); device_unbusy(rp->rp_ctlp->dev); } diff --git a/sys/dev/rp/rp_pci.c b/sys/dev/rp/rp_pci.c index e7b98325bd0..3479ab7c020 100644 --- a/sys/dev/rp/rp_pci.c +++ b/sys/dev/rp/rp_pci.c @@ -237,7 +237,7 @@ rp_pcishutdown(device_t dev) static void rp_pcireleaseresource(CONTROLLER_t *ctlp) { - rp_untimeout(); + rp_releaseresource(ctlp); if (ctlp->io != NULL) { if (ctlp->io[0] != NULL) bus_release_resource(ctlp->dev, SYS_RES_IOPORT, ctlp->io_rid[0], ctlp->io[0]); @@ -248,7 +248,6 @@ rp_pcireleaseresource(CONTROLLER_t *ctlp) free(ctlp->io_rid, M_DEVBUF); ctlp->io = NULL; } - rp_releaseresource(ctlp); } static int diff --git a/sys/dev/rp/rpreg.h b/sys/dev/rp/rpreg.h index c8960eb5dc2..c5a8cac7b07 100644 --- a/sys/dev/rp/rpreg.h +++ b/sys/dev/rp/rpreg.h @@ -364,6 +364,7 @@ struct CONTROLLER_str struct mtx hwmtx; /* Spinlock protecting hardware. */ int hwmtx_init; int free; + int num_ports; /* Device and resource management */ device_t dev; /* device */ @@ -1008,18 +1009,17 @@ void sEnInterrupts(CHANNEL_T *ChP,Word_t Flags); void sDisInterrupts(CHANNEL_T *ChP,Word_t Flags); int rp_attachcommon(CONTROLLER_T *ctlp, int num_aiops, int num_ports); void rp_releaseresource(CONTROLLER_t *ctlp); -void rp_untimeout(void); static __inline void rp_lock(CONTROLLER_T *CtlP) { if (CtlP->hwmtx_init != 0) - mtx_lock_spin(&CtlP->hwmtx); + mtx_lock(&CtlP->hwmtx); } static __inline void rp_unlock(CONTROLLER_T *CtlP) { if (CtlP->hwmtx_init != 0) - mtx_unlock_spin(&CtlP->hwmtx); + mtx_unlock(&CtlP->hwmtx); } #ifndef ROCKET_C diff --git a/sys/dev/rp/rpvar.h b/sys/dev/rp/rpvar.h index f7f28aa6409..fd5faf3d85c 100644 --- a/sys/dev/rp/rpvar.h +++ b/sys/dev/rp/rpvar.h @@ -43,6 +43,7 @@ struct rp_port { struct tty * rp_tty; /* cross reference */ + struct callout rp_timer; unsigned char state; /* state of dtr */ From 7a3659bcace0c9568e3b414fffc639fb9d8d15d8 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Tue, 11 Nov 2014 18:54:57 +0000 Subject: [PATCH 060/280] Change kbdb's kthr::cpu field into an int, to avoid gcc warnings about comparing it with NOCPU, which became -1 recently. While here, avoid using it for address calculations if it is negative. Reviewed by: jhb, adrian MFC after: 1 week --- gnu/usr.bin/gdb/kgdb/kgdb.h | 2 +- gnu/usr.bin/gdb/kgdb/trgt_i386.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gnu/usr.bin/gdb/kgdb/kgdb.h b/gnu/usr.bin/gdb/kgdb/kgdb.h index 1a32d8a125a..379861b0cad 100644 --- a/gnu/usr.bin/gdb/kgdb/kgdb.h +++ b/gnu/usr.bin/gdb/kgdb/kgdb.h @@ -41,7 +41,7 @@ struct kthr { uintptr_t pcb; int tid; int pid; - u_char cpu; + int cpu; }; extern struct kthr *curkthr; diff --git a/gnu/usr.bin/gdb/kgdb/trgt_i386.c b/gnu/usr.bin/gdb/kgdb/trgt_i386.c index 02c99182fcf..6d206d5ab1a 100644 --- a/gnu/usr.bin/gdb/kgdb/trgt_i386.c +++ b/gnu/usr.bin/gdb/kgdb/trgt_i386.c @@ -139,7 +139,7 @@ kgdb_trgt_fetch_tss(void) uintptr_t addr, cpu0prvpage, tss; kt = kgdb_thr_lookup_tid(ptid_get_pid(inferior_ptid)); - if (kt == NULL || kt->cpu == NOCPU) + if (kt == NULL || kt->cpu == NOCPU || kt->cpu < 0) return (0); addr = kgdb_lookup("gdt"); From e91afc1cda50cbcb8fffa3f52cc0f8c595a392a3 Mon Sep 17 00:00:00 2001 From: Devin Teske Date: Tue, 11 Nov 2014 19:37:17 +0000 Subject: [PATCH 061/280] Default `bsdconfig timezone' and `tzsetup' to `-s' in a VM. Recommended by: cperciva Reviewed by: cperciva Relnotes: tzsetup and bsdconfig now assume that the "hardware" clock inside a VM is set to UTC --- usr.sbin/bsdconfig/timezone/timezone | 5 ++++- usr.sbin/tzsetup/tzsetup.c | 9 +++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/usr.sbin/bsdconfig/timezone/timezone b/usr.sbin/bsdconfig/timezone/timezone index 0452230441b..66f2d789228 100755 --- a/usr.sbin/bsdconfig/timezone/timezone +++ b/usr.sbin/bsdconfig/timezone/timezone @@ -62,7 +62,7 @@ _PATH_WALL_CMOS_CLOCK="/etc/wall_cmos_clock" REALLYDOIT=1 REINSTALL= USEDIALOG=1 -SKIPUTC= +SKIPUTC= # See MAIN VERBOSE= TZ_OR_FAIL= CHROOTENV= @@ -119,6 +119,9 @@ dialog_menu_main() ############################################################ MAIN +# Skip initial question regarding UTC v. Wall-Clock time if run in VM +[ "$( sysctl -n kern.vm_guest 2> /dev/null )" = "none" ] || SKIPUTC=1 + # Incorporate rc-file if it exists [ -f "$HOME/.bsdconfigrc" ] && f_include "$HOME/.bsdconfigrc" diff --git a/usr.sbin/tzsetup/tzsetup.c b/usr.sbin/tzsetup/tzsetup.c index cea8533d463..17502274385 100644 --- a/usr.sbin/tzsetup/tzsetup.c +++ b/usr.sbin/tzsetup/tzsetup.c @@ -47,6 +47,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include @@ -910,8 +911,16 @@ main(int argc, char **argv) { char title[64], prompt[128]; int c, fd, rv, skiputc; + char vm_guest[16] = ""; + size_t len = sizeof(vm_guest); skiputc = 0; + + /* Default skiputc to 1 for VM guests */ + if (sysctlbyname("kern.vm_guest", vm_guest, &len, NULL, 0) == 0 && + strcmp(vm_guest, "none") != 0) + skiputc = 1; + while ((c = getopt(argc, argv, "C:nrs")) != -1) { switch(c) { case 'C': From 44aba0f6c2df47098233cbc6590b8dafa212a672 Mon Sep 17 00:00:00 2001 From: Jung-uk Kim Date: Tue, 11 Nov 2014 19:42:10 +0000 Subject: [PATCH 062/280] Use the correct device. Note this commit complements r274386. PR: 194884 --- sys/dev/acpica/acpi.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sys/dev/acpica/acpi.c b/sys/dev/acpica/acpi.c index e7389af50db..0e8998b0949 100644 --- a/sys/dev/acpica/acpi.c +++ b/sys/dev/acpica/acpi.c @@ -694,7 +694,7 @@ acpi_attach(device_t dev) static void acpi_set_power_children(device_t dev, int state) { - device_t child, parent; + device_t child; device_t *devlist; int dstate, i, numdevs; @@ -705,12 +705,11 @@ acpi_set_power_children(device_t dev, int state) * Retrieve and set D-state for the sleep state if _SxD is present. * Skip children who aren't attached since they are handled separately. */ - parent = device_get_parent(dev); for (i = 0; i < numdevs; i++) { child = devlist[i]; dstate = state; if (device_is_attached(child) && - acpi_device_pwr_for_sleep(parent, child, &dstate) == 0) + acpi_device_pwr_for_sleep(dev, child, &dstate) == 0) acpi_set_powerstate(child, dstate); } free(devlist, M_TEMP); From 8b20d1c1c567360d8017784f0905c2a3dc1b4ff4 Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Tue, 11 Nov 2014 19:44:59 +0000 Subject: [PATCH 063/280] Move NFS and TFTP filesystems before the synthetic filesystems (bzip, gzip, and split). "Real" filesystems should always be listed first so that the "bare" filename is tried before alternate filenames. For PXE booting in particular this can remove a lot of spurious pathname lookups. While here, move splitfs to the bottom after the bzip and gzip filesystems as it is the least often used. Tested by: Prokash Sinha MFC after: 1 week --- sys/boot/i386/loader/conf.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sys/boot/i386/loader/conf.c b/sys/boot/i386/loader/conf.c index ac19751bd7e..fda6fd25a07 100644 --- a/sys/boot/i386/loader/conf.c +++ b/sys/boot/i386/loader/conf.c @@ -80,8 +80,11 @@ struct fs_ops *file_system[] = { #if defined(LOADER_NANDFS_SUPPORT) &nandfs_fsops, #endif -#ifdef LOADER_SPLIT_SUPPORT - &splitfs_fsops, +#ifdef LOADER_NFS_SUPPORT + &nfs_fsops, +#endif +#ifdef LOADER_TFTP_SUPPORT + &tftp_fsops, #endif #ifdef LOADER_GZIP_SUPPORT &gzipfs_fsops, @@ -89,11 +92,8 @@ struct fs_ops *file_system[] = { #ifdef LOADER_BZIP2_SUPPORT &bzipfs_fsops, #endif -#ifdef LOADER_NFS_SUPPORT - &nfs_fsops, -#endif -#ifdef LOADER_TFTP_SUPPORT - &tftp_fsops, +#ifdef LOADER_SPLIT_SUPPORT + &splitfs_fsops, #endif NULL }; From 7e31e02573adfcba5380a131e10fd67473a2244e Mon Sep 17 00:00:00 2001 From: Devin Teske Date: Tue, 11 Nov 2014 19:45:14 +0000 Subject: [PATCH 064/280] Fix whitespace. Thanks to: nwhitehorn --- usr.sbin/tzsetup/tzsetup.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/usr.sbin/tzsetup/tzsetup.c b/usr.sbin/tzsetup/tzsetup.c index 17502274385..71ba63b4698 100644 --- a/usr.sbin/tzsetup/tzsetup.c +++ b/usr.sbin/tzsetup/tzsetup.c @@ -911,15 +911,15 @@ main(int argc, char **argv) { char title[64], prompt[128]; int c, fd, rv, skiputc; - char vm_guest[16] = ""; - size_t len = sizeof(vm_guest); + char vm_guest[16] = ""; + size_t len = sizeof(vm_guest); skiputc = 0; /* Default skiputc to 1 for VM guests */ - if (sysctlbyname("kern.vm_guest", vm_guest, &len, NULL, 0) == 0 && + if (sysctlbyname("kern.vm_guest", vm_guest, &len, NULL, 0) == 0 && strcmp(vm_guest, "none") != 0) - skiputc = 1; + skiputc = 1; while ((c = getopt(argc, argv, "C:nrs")) != -1) { switch(c) { From 7bfc98355a6de3a6b634e38e6ddd5c7303324e1e Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Tue, 11 Nov 2014 20:05:50 +0000 Subject: [PATCH 065/280] Add device ID for the T502-BT (dual-port 1G) adapter. Reviewed by: np MFC after: 1 week --- sys/dev/cxgbe/t4_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c index 475845b27a9..4c141de4a5d 100644 --- a/sys/dev/cxgbe/t4_main.c +++ b/sys/dev/cxgbe/t4_main.c @@ -490,6 +490,7 @@ struct { {0x5411, "Chelsio T520-LL-CR"}, /* 2 x 10G */ {0x5412, "Chelsio T560-CR"}, /* 1 x 40G, 2 x 10G */ {0x5414, "Chelsio T580-LP-SO-CR"}, /* 2 x 40G, nomem */ + {0x5415, "Chelsio T502-BT"}, /* 2 x 1G */ #ifdef notyet {0x5404, "Chelsio T520-BCH"}, {0x5405, "Chelsio T540-BCH"}, From 0e87b36eaac0db20b8cbb28ed1950c7fa121c2a5 Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Tue, 11 Nov 2014 20:32:46 +0000 Subject: [PATCH 066/280] Remove SF_KQUEUE code. This code was developed at Netflix, but was not ever used. It didn't go into stable/10, neither was documented. It might be useful, but we collectively decided to remove it, rather leave it abandoned and unmaintained. It is removed in one single commit, so restoring it should be easy, if anyone wants to reopen this idea. Sponsored by: Netflix --- sys/compat/freebsd32/freebsd32_misc.c | 46 +- sys/kern/kern_descrip.c | 4 +- sys/kern/uipc_syscalls.c | 734 ++------------------------ sys/sys/file.h | 10 +- sys/sys/sf_base.h | 37 -- sys/sys/sf_sync.h | 64 --- sys/sys/socket.h | 16 - 7 files changed, 71 insertions(+), 840 deletions(-) delete mode 100644 sys/sys/sf_base.h delete mode 100644 sys/sys/sf_sync.h diff --git a/sys/compat/freebsd32/freebsd32_misc.c b/sys/compat/freebsd32/freebsd32_misc.c index 5ea062ea368..b2ec8b357d8 100644 --- a/sys/compat/freebsd32/freebsd32_misc.c +++ b/sys/compat/freebsd32/freebsd32_misc.c @@ -83,10 +83,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include -#include -#include -#include #ifdef INET #include @@ -1567,26 +1563,16 @@ struct sf_hdtr32 { int trl_cnt; }; -struct sf_hdtr_kq32 { - int kq_fd; - uint32_t kq_flags; - uint32_t kq_udata; /* 32-bit void ptr */ - uint32_t kq_ident; /* 32-bit uintptr_t */ -}; - static int freebsd32_do_sendfile(struct thread *td, struct freebsd32_sendfile_args *uap, int compat) { struct sf_hdtr32 hdtr32; struct sf_hdtr hdtr; - struct sf_hdtr_kq32 hdtr_kq32; - struct sf_hdtr_kq hdtr_kq; struct uio *hdr_uio, *trl_uio; struct iovec32 *iov32; - off_t offset; + off_t offset, sbytes; int error; - off_t sbytes; offset = PAIR32TO64(off_t, uap->offset); if (offset < 0) @@ -1617,31 +1603,17 @@ freebsd32_do_sendfile(struct thread *td, if (error) goto out; } - - /* - * If SF_KQUEUE is set, then we need to also copy in - * the kqueue data after the normal hdtr set and set do_kqueue=1. - */ - if (uap->flags & SF_KQUEUE) { - error = copyin(((char *) uap->hdtr) + sizeof(hdtr32), - &hdtr_kq32, - sizeof(hdtr_kq32)); - if (error != 0) - goto out; - - /* 32->64 bit fields */ - CP(hdtr_kq32, hdtr_kq, kq_fd); - CP(hdtr_kq32, hdtr_kq, kq_flags); - PTRIN_CP(hdtr_kq32, hdtr_kq, kq_udata); - CP(hdtr_kq32, hdtr_kq, kq_ident); - } } + AUDIT_ARG_FD(uap->fd); - /* Call sendfile */ - /* XXX stack depth! */ - error = _do_sendfile(td, uap->fd, uap->s, uap->flags, compat, - offset, uap->nbytes, &sbytes, hdr_uio, trl_uio, &hdtr_kq); + if ((error = fget_read(td, uap->fd, + cap_rights_init(&rights, CAP_PREAD), &fp)) != 0) + goto out; + + error = fo_sendfile(fp, uap->s, hdr_uio, trl_uio, offset, + uap->nbytes, &sbytes, uap->flags, compat ? SFK_COMPAT : 0, td); + fdrop(fp, td); if (uap->sbytes != NULL) copyout(&sbytes, uap->sbytes, sizeof(off_t)); diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 32c837c57c1..e955b8753fd 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -3684,7 +3684,7 @@ badfo_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, static int badfo_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags, - int kflags, struct sendfile_sync *sfs, struct thread *td) + int kflags, struct thread *td) { return (EBADF); @@ -3770,7 +3770,7 @@ invfo_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, int invfo_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags, - int kflags, struct sendfile_sync *sfs, struct thread *td) + int kflags, struct thread *td) { return (EINVAL); diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index 6d423ba5106..b53883deff7 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -63,8 +63,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include -#include #include #include #include @@ -115,10 +113,6 @@ static int getpeername1(struct thread *td, struct getpeername_args *uap, counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)]; -static int filt_sfsync_attach(struct knote *kn); -static void filt_sfsync_detach(struct knote *kn); -static int filt_sfsync(struct knote *kn, long hint); - /* * sendfile(2)-related variables and associated sysctls */ @@ -128,28 +122,6 @@ static int sfreadahead = 1; SYSCTL_INT(_kern_ipc_sendfile, OID_AUTO, readahead, CTLFLAG_RW, &sfreadahead, 0, "Number of sendfile(2) read-ahead MAXBSIZE blocks"); -#ifdef SFSYNC_DEBUG -static int sf_sync_debug = 0; -SYSCTL_INT(_debug, OID_AUTO, sf_sync_debug, CTLFLAG_RW, - &sf_sync_debug, 0, "Output debugging during sf_sync lifecycle"); -#define SFSYNC_DPRINTF(s, ...) \ - do { \ - if (sf_sync_debug) \ - printf((s), ##__VA_ARGS__); \ - } while (0) -#else -#define SFSYNC_DPRINTF(c, ...) -#endif - -static uma_zone_t zone_sfsync; - -static struct filterops sendfile_filtops = { - .f_isfd = 0, - .f_attach = filt_sfsync_attach, - .f_detach = filt_sfsync_detach, - .f_event = filt_sfsync, -}; - static void sfstat_init(const void *unused) { @@ -159,19 +131,6 @@ sfstat_init(const void *unused) } SYSINIT(sfstat, SI_SUB_MBUF, SI_ORDER_FIRST, sfstat_init, NULL); -static void -sf_sync_init(const void *unused) -{ - - zone_sfsync = uma_zcreate("sendfile_sync", sizeof(struct sendfile_sync), - NULL, NULL, - NULL, NULL, - UMA_ALIGN_CACHE, - 0); - kqueue_add_filteropts(EVFILT_SENDFILE, &sendfile_filtops); -} -SYSINIT(sf_sync, SI_SUB_MBUF, SI_ORDER_FIRST, sf_sync_init, NULL); - static int sfstat_sysctl(SYSCTL_HANDLER_ARGS) { @@ -1864,116 +1823,11 @@ getsockaddr(namp, uaddr, len) return (error); } -static int -filt_sfsync_attach(struct knote *kn) -{ - struct sendfile_sync *sfs = (struct sendfile_sync *) kn->kn_sdata; - struct knlist *knl = &sfs->klist; - - SFSYNC_DPRINTF("%s: kn=%p, sfs=%p\n", __func__, kn, sfs); - - /* - * Validate that we actually received this via the kernel API. - */ - if ((kn->kn_flags & EV_FLAG1) == 0) - return (EPERM); - - kn->kn_ptr.p_v = sfs; - kn->kn_flags &= ~EV_FLAG1; - - knl->kl_lock(knl->kl_lockarg); - /* - * If we're in the "freeing" state, - * don't allow the add. That way we don't - * end up racing with some other thread that - * is trying to finish some setup. - */ - if (sfs->state == SF_STATE_FREEING) { - knl->kl_unlock(knl->kl_lockarg); - return (EINVAL); - } - knlist_add(&sfs->klist, kn, 1); - knl->kl_unlock(knl->kl_lockarg); - - return (0); -} - -/* - * Called when a knote is being detached. - */ -static void -filt_sfsync_detach(struct knote *kn) -{ - struct knlist *knl; - struct sendfile_sync *sfs; - int do_free = 0; - - sfs = kn->kn_ptr.p_v; - knl = &sfs->klist; - - SFSYNC_DPRINTF("%s: kn=%p, sfs=%p\n", __func__, kn, sfs); - - knl->kl_lock(knl->kl_lockarg); - if (!knlist_empty(knl)) - knlist_remove(knl, kn, 1); - - /* - * If the list is empty _AND_ the refcount is 0 - * _AND_ we've finished the setup phase and now - * we're in the running phase, we can free the - * underlying sendfile_sync. - * - * But we shouldn't do it before finishing the - * underlying divorce from the knote. - * - * So, we have the sfsync lock held; transition - * it to "freeing", then unlock, then free - * normally. - */ - if (knlist_empty(knl)) { - if (sfs->state == SF_STATE_COMPLETED && sfs->count == 0) { - SFSYNC_DPRINTF("%s: (%llu) sfs=%p; completed, " - "count==0, empty list: time to free!\n", - __func__, - (unsigned long long) curthread->td_tid, - sfs); - sf_sync_set_state(sfs, SF_STATE_FREEING, 1); - do_free = 1; - } - } - knl->kl_unlock(knl->kl_lockarg); - - /* - * Only call free if we're the one who has transitioned things - * to free. Otherwise we could race with another thread that - * is currently tearing things down. - */ - if (do_free == 1) { - SFSYNC_DPRINTF("%s: (%llu) sfs=%p, %s:%d\n", - __func__, - (unsigned long long) curthread->td_tid, - sfs, - __FILE__, - __LINE__); - sf_sync_free(sfs); - } -} - -static int -filt_sfsync(struct knote *kn, long hint) -{ - struct sendfile_sync *sfs = (struct sendfile_sync *) kn->kn_ptr.p_v; - int ret; - - SFSYNC_DPRINTF("%s: kn=%p, sfs=%p\n", __func__, kn, sfs); - - /* - * XXX add a lock assertion here! - */ - ret = (sfs->count == 0 && sfs->state == SF_STATE_COMPLETED); - - return (ret); -} +struct sendfile_sync { + struct mtx mtx; + struct cv cv; + unsigned count; +}; /* * Add more references to a vm_page + sf_buf + sendfile_sync. @@ -2022,344 +1876,13 @@ sf_ext_free(void *arg1, void *arg2) vm_page_free(pg); vm_page_unlock(pg); - if (sfs != NULL) - sf_sync_deref(sfs); -} - -/* - * Called to remove a reference to a sf_sync object. - * - * This is generally done during the mbuf free path to signify - * that one of the mbufs in the transaction has been completed. - * - * If we're doing SF_SYNC and the refcount is zero then we'll wake - * up any waiters. - * - * IF we're doing SF_KQUEUE and the refcount is zero then we'll - * fire off the knote. - */ -void -sf_sync_deref(struct sendfile_sync *sfs) -{ - int do_free = 0; - - if (sfs == NULL) - return; - - mtx_lock(&sfs->mtx); - KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0")); - sfs->count --; - - /* - * Only fire off the wakeup / kqueue notification if - * we are in the running state. - */ - if (sfs->count == 0 && sfs->state == SF_STATE_COMPLETED) { - if (sfs->flags & SF_SYNC) - cv_signal(&sfs->cv); - - if (sfs->flags & SF_KQUEUE) { - SFSYNC_DPRINTF("%s: (%llu) sfs=%p: knote!\n", - __func__, - (unsigned long long) curthread->td_tid, - sfs); - KNOTE_LOCKED(&sfs->klist, 1); - } - - /* - * If we're not waiting around for a sync, - * check if the knote list is empty. - * If it is, we transition to free. - * - * XXX I think it's about time I added some state - * or flag that says whether we're supposed to be - * waiting around until we've done a signal. - * - * XXX Ie, the reason that I don't free it here - * is because the caller will free the last reference, - * not us. That should be codified in some flag - * that indicates "self-free" rather than checking - * for SF_SYNC all the time. - */ - if ((sfs->flags & SF_SYNC) == 0 && knlist_empty(&sfs->klist)) { - SFSYNC_DPRINTF("%s: (%llu) sfs=%p; completed, " - "count==0, empty list: time to free!\n", - __func__, - (unsigned long long) curthread->td_tid, - sfs); - sf_sync_set_state(sfs, SF_STATE_FREEING, 1); - do_free = 1; - } - - } - mtx_unlock(&sfs->mtx); - - /* - * Attempt to do a free here. - * - * We do this outside of the lock because it may destroy the - * lock in question as it frees things. We can optimise this - * later. - * - * XXX yes, we should make it a requirement to hold the - * lock across sf_sync_free(). - */ - if (do_free == 1) { - SFSYNC_DPRINTF("%s: (%llu) sfs=%p\n", - __func__, - (unsigned long long) curthread->td_tid, - sfs); - sf_sync_free(sfs); - } -} - -/* - * Allocate a sendfile_sync state structure. - * - * For now this only knows about the "sleep" sync, but later it will - * grow various other personalities. - */ -struct sendfile_sync * -sf_sync_alloc(uint32_t flags) -{ - struct sendfile_sync *sfs; - - sfs = uma_zalloc(zone_sfsync, M_WAITOK | M_ZERO); - mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF); - cv_init(&sfs->cv, "sendfile"); - sfs->flags = flags; - sfs->state = SF_STATE_SETUP; - knlist_init_mtx(&sfs->klist, &sfs->mtx); - - SFSYNC_DPRINTF("%s: sfs=%p, flags=0x%08x\n", __func__, sfs, sfs->flags); - - return (sfs); -} - -/* - * Take a reference to a sfsync instance. - * - * This has to map 1:1 to free calls coming in via sf_ext_free(), - * so typically this will be referenced once for each mbuf allocated. - */ -void -sf_sync_ref(struct sendfile_sync *sfs) -{ - - if (sfs == NULL) - return; - - mtx_lock(&sfs->mtx); - sfs->count++; - mtx_unlock(&sfs->mtx); -} - -void -sf_sync_syscall_wait(struct sendfile_sync *sfs) -{ - - if (sfs == NULL) - return; - - KASSERT(mtx_owned(&sfs->mtx), ("%s: sfs=%p: not locked but should be!", - __func__, - sfs)); - - /* - * If we're not requested to wait during the syscall, - * don't bother waiting. - */ - if ((sfs->flags & SF_SYNC) == 0) - goto out; - - /* - * This is a bit suboptimal and confusing, so bear with me. - * - * Ideally sf_sync_syscall_wait() will wait until - * all pending mbuf transmit operations are done. - * This means that when sendfile becomes async, it'll - * run in the background and will transition from - * RUNNING to COMPLETED when it's finished acquiring - * new things to send. Then, when the mbufs finish - * sending, COMPLETED + sfs->count == 0 is enough to - * know that no further work is being done. - * - * So, we will sleep on both RUNNING and COMPLETED. - * It's up to the (in progress) async sendfile loop - * to transition the sf_sync from RUNNING to - * COMPLETED so the wakeup above will actually - * do the cv_signal() call. - */ - if (sfs->state != SF_STATE_COMPLETED && sfs->state != SF_STATE_RUNNING) - goto out; - - if (sfs->count != 0) - cv_wait(&sfs->cv, &sfs->mtx); - KASSERT(sfs->count == 0, ("sendfile sync still busy")); - -out: - return; -} - -/* - * Free an sf_sync if it's appropriate to. - */ -void -sf_sync_free(struct sendfile_sync *sfs) -{ - - if (sfs == NULL) - return; - - SFSYNC_DPRINTF("%s: (%lld) sfs=%p; called; state=%d, flags=0x%08x " - "count=%d\n", - __func__, - (long long) curthread->td_tid, - sfs, - sfs->state, - sfs->flags, - sfs->count); - - mtx_lock(&sfs->mtx); - - /* - * We keep the sf_sync around if the state is active, - * we are doing kqueue notification and we have active - * knotes. - * - * If the caller wants to free us right this second it - * should transition this to the freeing state. - * - * So, complain loudly if they break this rule. - */ - if (sfs->state != SF_STATE_FREEING) { - printf("%s: (%llu) sfs=%p; not freeing; let's wait!\n", - __func__, - (unsigned long long) curthread->td_tid, - sfs); - mtx_unlock(&sfs->mtx); - return; - } - - KASSERT(sfs->count == 0, ("sendfile sync still busy")); - cv_destroy(&sfs->cv); - /* - * This doesn't call knlist_detach() on each knote; it just frees - * the entire list. - */ - knlist_delete(&sfs->klist, curthread, 1); - mtx_destroy(&sfs->mtx); - SFSYNC_DPRINTF("%s: (%llu) sfs=%p; freeing\n", - __func__, - (unsigned long long) curthread->td_tid, - sfs); - uma_zfree(zone_sfsync, sfs); -} - -/* - * Setup a sf_sync to post a kqueue notification when things are complete. - */ -int -sf_sync_kqueue_setup(struct sendfile_sync *sfs, struct sf_hdtr_kq *sfkq) -{ - struct kevent kev; - int error; - - sfs->flags |= SF_KQUEUE; - - /* Check the flags are valid */ - if ((sfkq->kq_flags & ~(EV_CLEAR | EV_DISPATCH | EV_ONESHOT)) != 0) - return (EINVAL); - - SFSYNC_DPRINTF("%s: sfs=%p: kqfd=%d, flags=0x%08x, ident=%p, udata=%p\n", - __func__, - sfs, - sfkq->kq_fd, - sfkq->kq_flags, - (void *) sfkq->kq_ident, - (void *) sfkq->kq_udata); - - /* Setup and register a knote on the given kqfd. */ - kev.ident = (uintptr_t) sfkq->kq_ident; - kev.filter = EVFILT_SENDFILE; - kev.flags = EV_ADD | EV_ENABLE | EV_FLAG1 | sfkq->kq_flags; - kev.data = (intptr_t) sfs; - kev.udata = sfkq->kq_udata; - - error = kqfd_register(sfkq->kq_fd, &kev, curthread, 1); - if (error != 0) { - SFSYNC_DPRINTF("%s: returned %d\n", __func__, error); - } - return (error); -} - -void -sf_sync_set_state(struct sendfile_sync *sfs, sendfile_sync_state_t state, - int islocked) -{ - sendfile_sync_state_t old_state; - - if (! islocked) + if (sfs != NULL) { mtx_lock(&sfs->mtx); - - /* - * Update our current state. - */ - old_state = sfs->state; - sfs->state = state; - SFSYNC_DPRINTF("%s: (%llu) sfs=%p; going from %d to %d\n", - __func__, - (unsigned long long) curthread->td_tid, - sfs, - old_state, - state); - - /* - * If we're transitioning from RUNNING to COMPLETED and the count is - * zero, then post the knote. The caller may have completed the - * send before we updated the state to COMPLETED and we need to make - * sure this is communicated. - */ - if (old_state == SF_STATE_RUNNING - && state == SF_STATE_COMPLETED - && sfs->count == 0 - && sfs->flags & SF_KQUEUE) { - SFSYNC_DPRINTF("%s: (%llu) sfs=%p: triggering knote!\n", - __func__, - (unsigned long long) curthread->td_tid, - sfs); - KNOTE_LOCKED(&sfs->klist, 1); - } - - if (! islocked) + KASSERT(sfs->count > 0, ("Sendfile sync botchup count == 0")); + if (--sfs->count == 0) + cv_signal(&sfs->cv); mtx_unlock(&sfs->mtx); -} - -/* - * Set the retval/errno for the given transaction. - * - * This will eventually/ideally be used when the KNOTE is fired off - * to signify the completion of this transaction. - * - * The sfsync lock should be held before entering this function. - */ -void -sf_sync_set_retval(struct sendfile_sync *sfs, off_t retval, int xerrno) -{ - - KASSERT(mtx_owned(&sfs->mtx), ("%s: sfs=%p: not locked but should be!", - __func__, - sfs)); - - SFSYNC_DPRINTF("%s: (%llu) sfs=%p: errno=%d, retval=%jd\n", - __func__, - (unsigned long long) curthread->td_tid, - sfs, - xerrno, - (intmax_t) retval); - - sfs->retval = retval; - sfs->xerrno = xerrno; + } } /* @@ -2380,174 +1903,15 @@ sys_sendfile(struct thread *td, struct sendfile_args *uap) return (do_sendfile(td, uap, 0)); } -int -_do_sendfile(struct thread *td, int src_fd, int sock_fd, int flags, - int compat, off_t offset, size_t nbytes, off_t *sbytes, - struct uio *hdr_uio, - struct uio *trl_uio, struct sf_hdtr_kq *hdtr_kq) -{ - cap_rights_t rights; - struct sendfile_sync *sfs = NULL; - struct file *fp; - int error; - int do_kqueue = 0; - int do_free = 0; - - AUDIT_ARG_FD(src_fd); - - if (hdtr_kq != NULL) - do_kqueue = 1; - - /* - * sendfile(2) can start at any offset within a file so we require - * CAP_READ+CAP_SEEK = CAP_PREAD. - */ - if ((error = fget_read(td, src_fd, - cap_rights_init(&rights, CAP_PREAD), &fp)) != 0) { - goto out; - } - - /* - * IF SF_KQUEUE is set but we haven't copied in anything for - * kqueue data, error out. - */ - if (flags & SF_KQUEUE && do_kqueue == 0) { - SFSYNC_DPRINTF("%s: SF_KQUEUE but no KQUEUE data!\n", __func__); - goto out; - } - - /* - * If we need to wait for completion, initialise the sfsync - * state here. - */ - if (flags & (SF_SYNC | SF_KQUEUE)) - sfs = sf_sync_alloc(flags & (SF_SYNC | SF_KQUEUE)); - - if (flags & SF_KQUEUE) { - error = sf_sync_kqueue_setup(sfs, hdtr_kq); - if (error) { - SFSYNC_DPRINTF("%s: (%llu) error; sfs=%p\n", - __func__, - (unsigned long long) curthread->td_tid, - sfs); - sf_sync_set_state(sfs, SF_STATE_FREEING, 0); - sf_sync_free(sfs); - goto out; - } - } - - /* - * Do the sendfile call. - * - * If this fails, it'll free the mbuf chain which will free up the - * sendfile_sync references. - */ - error = fo_sendfile(fp, sock_fd, hdr_uio, trl_uio, offset, - nbytes, sbytes, flags, compat ? SFK_COMPAT : 0, sfs, td); - - /* - * If the sendfile call succeeded, transition the sf_sync state - * to RUNNING, then COMPLETED. - * - * If the sendfile call failed, then the sendfile call may have - * actually sent some data first - so we check to see whether - * any data was sent. If some data was queued (ie, count > 0) - * then we can't call free; we have to wait until the partial - * transaction completes before we continue along. - * - * This has the side effect of firing off the knote - * if the refcount has hit zero by the time we get here. - */ - if (sfs != NULL) { - mtx_lock(&sfs->mtx); - if (error == 0 || sfs->count > 0) { - /* - * When it's time to do async sendfile, the transition - * to RUNNING signifies that we're actually actively - * adding and completing mbufs. When the last disk - * buffer is read (ie, when we're not doing any - * further read IO and all subsequent stuff is mbuf - * transmissions) we'll transition to COMPLETED - * and when the final mbuf is freed, the completion - * will be signaled. - */ - sf_sync_set_state(sfs, SF_STATE_RUNNING, 1); - - /* - * Set the retval before we signal completed. - * If we do it the other way around then transitioning to - * COMPLETED may post the knote before you set the return - * status! - * - * XXX for now, errno is always 0, as we don't post - * knotes if sendfile failed. Maybe that'll change later. - */ - sf_sync_set_retval(sfs, *sbytes, error); - - /* - * And now transition to completed, which will kick off - * the knote if required. - */ - sf_sync_set_state(sfs, SF_STATE_COMPLETED, 1); - } else { - /* - * Error isn't zero, sfs_count is zero, so we - * won't have some other thing to wake things up. - * Thus free. - */ - sf_sync_set_state(sfs, SF_STATE_FREEING, 1); - do_free = 1; - } - - /* - * Next - wait if appropriate. - */ - sf_sync_syscall_wait(sfs); - - /* - * If we're not doing kqueue notifications, we can - * transition this immediately to the freeing state. - */ - if ((sfs->flags & SF_KQUEUE) == 0) { - sf_sync_set_state(sfs, SF_STATE_FREEING, 1); - do_free = 1; - } - - mtx_unlock(&sfs->mtx); - } - - /* - * If do_free is set, free here. - * - * If we're doing no-kqueue notification and it's just sleep notification, - * we also do free; it's the only chance we have. - */ - if (sfs != NULL && do_free == 1) { - sf_sync_free(sfs); - } - - /* - * XXX Should we wait until the send has completed before freeing the source - * file handle? It's the previous behaviour, sure, but is it required? - * We've wired down the page references after all. - */ - fdrop(fp, td); - -out: - /* Return error */ - return (error); -} - - static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) { struct sf_hdtr hdtr; - struct sf_hdtr_kq hdtr_kq; struct uio *hdr_uio, *trl_uio; - int error; + struct file *fp; + cap_rights_t rights; off_t sbytes; - int do_kqueue = 0; + int error; /* * File offset must be positive. If it goes beyond EOF @@ -2563,38 +1927,37 @@ do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) if (error != 0) goto out; if (hdtr.headers != NULL) { - error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio); + error = copyinuio(hdtr.headers, hdtr.hdr_cnt, + &hdr_uio); if (error != 0) goto out; } if (hdtr.trailers != NULL) { - error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio); + error = copyinuio(hdtr.trailers, hdtr.trl_cnt, + &trl_uio); if (error != 0) goto out; } - - /* - * If SF_KQUEUE is set, then we need to also copy in - * the kqueue data after the normal hdtr set and set - * do_kqueue=1. - */ - if (uap->flags & SF_KQUEUE) { - error = copyin(((char *) uap->hdtr) + sizeof(hdtr), - &hdtr_kq, - sizeof(hdtr_kq)); - if (error != 0) - goto out; - do_kqueue = 1; - } } - /* Call sendfile */ - error = _do_sendfile(td, uap->fd, uap->s, uap->flags, compat, - uap->offset, uap->nbytes, &sbytes, hdr_uio, trl_uio, &hdtr_kq); + AUDIT_ARG_FD(src_fd); - if (uap->sbytes != NULL) { + /* + * sendfile(2) can start at any offset within a file so we require + * CAP_READ+CAP_SEEK = CAP_PREAD. + */ + if ((error = fget_read(td, uap->fd, + cap_rights_init(&rights, CAP_PREAD), &fp)) != 0) { + goto out; + } + + error = fo_sendfile(fp, uap->s, hdr_uio, trl_uio, uap->offset, + uap->nbytes, &sbytes, uap->flags, compat ? SFK_COMPAT : 0, td); + fdrop(fp, td); + + if (uap->sbytes != NULL) copyout(&sbytes, uap->sbytes, sizeof(off_t)); - } + out: free(hdr_uio, M_IOV); free(trl_uio, M_IOV); @@ -2819,7 +2182,7 @@ kern_sendfile_getsock(struct thread *td, int s, struct file **sock_fp, int vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags, - int kflags, struct sendfile_sync *sfs, struct thread *td) + int kflags, struct thread *td) { struct file *sock_fp; struct vnode *vp; @@ -2829,6 +2192,7 @@ vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, struct sf_buf *sf; struct vm_page *pg; struct shmfd *shmfd; + struct sendfile_sync *sfs; struct vattr va; off_t off, xfsize, fsbytes, sbytes, rem, obj_size; int error, bsize, nd, hdrlen, mnw; @@ -2837,6 +2201,7 @@ vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, obj = NULL; so = NULL; m = NULL; + sfs = NULL; fsbytes = sbytes = 0; hdrlen = mnw = 0; rem = nbytes; @@ -2860,6 +2225,12 @@ vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, if (flags & SF_MNOWAIT) mnw = 1; + if (flags & SF_SYNC) { + sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK | M_ZERO); + mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF); + cv_init(&sfs->cv, "sendfile"); + } + #ifdef MAC error = mac_socket_check_send(td->td_ucred, so); if (error != 0) @@ -3106,12 +2477,11 @@ retry_space: loopbytes += xfsize; off += xfsize; - /* - * XXX eventually this should be a sfsync - * method call! - */ - if (sfs != NULL) - sf_sync_ref(sfs); + if (sfs != NULL) { + mtx_lock(&sfs->mtx); + sfs->count++; + mtx_unlock(&sfs->mtx); + } } if (vp != NULL) @@ -3193,6 +2563,16 @@ out: if (m) m_freem(m); + if (sfs != NULL) { + mtx_lock(&sfs->mtx); + if (sfs->count != 0) + cv_wait(&sfs->cv, &sfs->mtx); + KASSERT(sfs->count == 0, ("sendfile sync still busy")); + cv_destroy(&sfs->cv); + mtx_destroy(&sfs->mtx); + free(sfs, M_TEMP); + } + if (error == ERESTART) error = EINTR; diff --git a/sys/sys/file.h b/sys/sys/file.h index e593d432624..d102a871ee3 100644 --- a/sys/sys/file.h +++ b/sys/sys/file.h @@ -90,9 +90,6 @@ foffset_get(struct file *fp) return (foffset_lock(fp, FOF_NOLOCK)); } -/* XXX pollution? */ -struct sendfile_sync; - typedef int fo_rdwr_t(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td); @@ -112,8 +109,7 @@ typedef int fo_chown_t(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, struct thread *td); typedef int fo_sendfile_t(struct file *fp, int sockfd, struct uio *hdr_uio, struct uio *trl_uio, off_t offset, size_t nbytes, - off_t *sent, int flags, int kflags, - struct sendfile_sync *sfs, struct thread *td); + off_t *sent, int flags, int kflags, struct thread *td); typedef int fo_seek_t(struct file *fp, off_t offset, int whence, struct thread *td); typedef int fo_fill_kinfo_t(struct file *fp, struct kinfo_file *kif, @@ -371,11 +367,11 @@ fo_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, static __inline int fo_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags, - int kflags, struct sendfile_sync *sfs, struct thread *td) + int kflags, struct thread *td) { return ((*fp->f_ops->fo_sendfile)(fp, sockfd, hdr_uio, trl_uio, offset, - nbytes, sent, flags, kflags, sfs, td)); + nbytes, sent, flags, kflags, td)); } static __inline int diff --git a/sys/sys/sf_base.h b/sys/sys/sf_base.h deleted file mode 100644 index 7c8d49cde4f..00000000000 --- a/sys/sys/sf_base.h +++ /dev/null @@ -1,37 +0,0 @@ -/*- - * Copyright (c) 2013 Adrian Chadd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _SYS_SF_BASE_H_ -#define _SYS_SF_BASE_H_ - -extern int _do_sendfile(struct thread *, int src_fd, int sock_fd, int flags, - int compat, off_t offset, size_t nbytes, off_t *sbytes, - struct uio *hdr_uio, struct uio *trl_uio, - struct sf_hdtr_kq *hdtr_kq); - -#endif /* _SYS_SF_BASE_H_ */ diff --git a/sys/sys/sf_sync.h b/sys/sys/sf_sync.h deleted file mode 100644 index 04dee3801a5..00000000000 --- a/sys/sys/sf_sync.h +++ /dev/null @@ -1,64 +0,0 @@ -/*- - * Copyright (c) 2013 Adrian Chadd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _SYS_SF_SYNC_H_ -#define _SYS_SF_SYNC_H_ - -typedef enum { - SF_STATE_NONE, - SF_STATE_SETUP, - SF_STATE_RUNNING, - SF_STATE_COMPLETED, - SF_STATE_FREEING -} sendfile_sync_state_t; - -struct sendfile_sync { - struct mtx mtx; - struct cv cv; - struct knlist klist; - uint32_t flags; - uint32_t count; - int32_t xerrno; /* Completion errno, if retval < 0 */ - off_t retval; /* Completion retval (eg written bytes) */ - sendfile_sync_state_t state; -}; - -/* XXX pollution */ -struct sf_hdtr_kq; - -extern struct sendfile_sync * sf_sync_alloc(uint32_t flags); -extern void sf_sync_syscall_wait(struct sendfile_sync *); -extern void sf_sync_free(struct sendfile_sync *); -extern void sf_sync_try_free(struct sendfile_sync *); -extern void sf_sync_ref(struct sendfile_sync *); -extern void sf_sync_deref(struct sendfile_sync *); -extern int sf_sync_kqueue_setup(struct sendfile_sync *, struct sf_hdtr_kq *); -extern void sf_sync_set_state(struct sendfile_sync *, sendfile_sync_state_t, int); -extern void sf_sync_set_retval(struct sendfile_sync *, off_t, int); - -#endif /* !_SYS_SF_BUF_H_ */ diff --git a/sys/sys/socket.h b/sys/sys/socket.h index cc43fa21dc0..18e2de10e10 100644 --- a/sys/sys/socket.h +++ b/sys/sys/socket.h @@ -583,28 +583,12 @@ struct sf_hdtr { int trl_cnt; /* number of trailer iovec's */ }; -/* - * sendfile(2) kqueue information - */ -struct sf_hdtr_kq { - uintptr_t kq_ident; /* ident (from userland?) */ - void *kq_udata; /* user data pointer */ - uint32_t kq_flags; /* extra flags to pass in */ - int kq_fd; /* kq fd to post completion events on */ -}; - -struct sf_hdtr_all { - struct sf_hdtr hdtr; - struct sf_hdtr_kq kq; -}; - /* * Sendfile-specific flag(s) */ #define SF_NODISKIO 0x00000001 #define SF_MNOWAIT 0x00000002 #define SF_SYNC 0x00000004 -#define SF_KQUEUE 0x00000008 #ifdef _KERNEL #define SFK_COMPAT 0x00000001 From a0f704ffc16be42abf4be1573eab25857c2d1fcc Mon Sep 17 00:00:00 2001 From: Marcel Moolenaar Date: Tue, 11 Nov 2014 21:37:17 +0000 Subject: [PATCH 067/280] Upgrade libxo to 0.1.5 Obtained from: https://github.com/Juniper/libxo --- contrib/libxo/README.md | 2 ++ contrib/libxo/configure.ac | 2 +- contrib/libxo/libxo/libxo.c | 33 +++++++++++-------- contrib/libxo/libxo/xoconfig.h | 6 ++-- contrib/libxo/libxo/xoversion.h | 2 +- contrib/libxo/tests/core/Makefile.am | 4 +-- contrib/libxo/tests/core/saved/test_07.J.out | 2 +- contrib/libxo/tests/core/saved/test_07.JP.out | 5 +++ contrib/libxo/tests/core/saved/test_07.X.out | 2 +- contrib/libxo/tests/core/saved/test_07.XP.out | 3 ++ contrib/libxo/tests/core/test_07.c | 6 ++++ 11 files changed, 45 insertions(+), 22 deletions(-) diff --git a/contrib/libxo/README.md b/contrib/libxo/README.md index 40c162b0875..e9b3b4bd093 100644 --- a/contrib/libxo/README.md +++ b/contrib/libxo/README.md @@ -60,3 +60,5 @@ option: View the beautiful documentation at: http://juniper.github.io/libxo/libxo-manual.html + +[![Analytics](https://ga-beacon.appspot.com/UA-56056421-1/Juniper/libxo/Readme)](https://github.com/Juniper/libxo) diff --git a/contrib/libxo/configure.ac b/contrib/libxo/configure.ac index 2412d12cf0a..958b21cdbec 100644 --- a/contrib/libxo/configure.ac +++ b/contrib/libxo/configure.ac @@ -12,7 +12,7 @@ # AC_PREREQ(2.2) -AC_INIT([libxo], [0.1.4], [phil@juniper.net]) +AC_INIT([libxo], [0.1.5], [phil@juniper.net]) AM_INIT_AUTOMAKE([-Wall -Werror foreign -Wno-portability]) # Support silent build rules. Requires at least automake-1.11. diff --git a/contrib/libxo/libxo/libxo.c b/contrib/libxo/libxo/libxo.c index ebe55b9e4fd..77c6a3e8edc 100644 --- a/contrib/libxo/libxo/libxo.c +++ b/contrib/libxo/libxo/libxo.c @@ -79,7 +79,7 @@ struct xo_handle_s { unsigned short xo_indent; /* Indent level (if pretty) */ unsigned short xo_indent_by; /* Indent amount (tab stop) */ xo_write_func_t xo_write; /* Write callback */ - xo_close_func_t xo_close; /* Clo;se callback */ + xo_close_func_t xo_close; /* Close callback */ xo_formatter_t xo_formatter; /* Custom formating function */ xo_checkpointer_t xo_checkpointer; /* Custom formating support function */ void *xo_opaque; /* Opaque data for write function */ @@ -1912,6 +1912,7 @@ xo_format_string (xo_handle_t *xop, xo_buffer_t *xbp, xo_xff_flags_t flags, xo_format_t *xfp) { static char null[] = "(null)"; + char *cp = NULL; wchar_t *wcp = NULL; int len, cols = 0, rc = 0; @@ -1922,16 +1923,33 @@ xo_format_string (xo_handle_t *xop, xo_buffer_t *xbp, xo_xff_flags_t flags, if (xo_check_conversion(xop, xfp->xf_enc, need_enc)) return 0; + len = xfp->xf_width[XF_WIDTH_SIZE]; + if (xfp->xf_enc == XF_ENC_WIDE) { wcp = va_arg(xop->xo_vap, wchar_t *); if (xfp->xf_skip) return 0; + /* + * Dont' deref NULL; use the traditional "(null)" instead + * of the more accurate "who's been a naughty boy, then?". + */ + if (wcp == NULL) { + cp = null; + len = sizeof(null) - 1; + } + } else { cp = va_arg(xop->xo_vap, char *); /* UTF-8 or native */ if (xfp->xf_skip) return 0; + /* Echo "Dont' deref NULL" logic */ + if (cp == NULL) { + cp = null; + len = sizeof(null) - 1; + } + /* * Optimize the most common case, which is "%s". We just * need to copy the complete string to the output buffer. @@ -1957,17 +1975,6 @@ xo_format_string (xo_handle_t *xop, xo_buffer_t *xbp, xo_xff_flags_t flags, } } - len = xfp->xf_width[XF_WIDTH_SIZE]; - - /* - * Dont' deref NULL; use the traditional "(null)" instead - * of the more accurate "who's been a naughty boy, then?". - */ - if (cp == NULL && wcp == NULL) { - cp = null; - len = sizeof(null) - 1; - } - cols = xo_format_string_direct(xop, xbp, flags, wcp, cp, len, xfp->xf_width[XF_WIDTH_MAX], need_enc, xfp->xf_enc); @@ -3859,7 +3866,7 @@ xo_close_list_h (xo_handle_t *xop, const char *name) rc = xo_printf(xop, "%s%*s]", pre_nl, xo_indent(xop), ""); xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST; - return 0; + return rc; } int diff --git a/contrib/libxo/libxo/xoconfig.h b/contrib/libxo/libxo/xoconfig.h index 7a6dbe8b78b..e42bde27c88 100644 --- a/contrib/libxo/libxo/xoconfig.h +++ b/contrib/libxo/libxo/xoconfig.h @@ -158,7 +158,7 @@ #define PACKAGE_NAME "libxo" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "libxo 0.1.4" +#define PACKAGE_STRING "libxo 0.1.5" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "libxo" @@ -167,7 +167,7 @@ #define PACKAGE_URL "" /* Define to the version of this package. */ -#define PACKAGE_VERSION "0.1.4" +#define PACKAGE_VERSION "0.1.5" /* If using the C implementation of alloca, define if you know the direction of stack growth for your system; otherwise it will be @@ -181,7 +181,7 @@ #define STDC_HEADERS 1 /* Version number of package */ -#define VERSION "0.1.4" +#define VERSION "0.1.5" /* Define to `__inline__' or `__inline' if that's what the C compiler calls it, or to nothing if 'inline' is not supported under any name. */ diff --git a/contrib/libxo/libxo/xoversion.h b/contrib/libxo/libxo/xoversion.h index 51da744036e..2d639b693ad 100644 --- a/contrib/libxo/libxo/xoversion.h +++ b/contrib/libxo/libxo/xoversion.h @@ -18,7 +18,7 @@ /** * The version string */ -#define LIBXO_VERSION "0.1.4" +#define LIBXO_VERSION "0.1.5" /** * The version number diff --git a/contrib/libxo/tests/core/Makefile.am b/contrib/libxo/tests/core/Makefile.am index a87fcc55adc..a5470f37547 100644 --- a/contrib/libxo/tests/core/Makefile.am +++ b/contrib/libxo/tests/core/Makefile.am @@ -30,7 +30,7 @@ test_07_test_SOURCES = test_07.c # TEST_CASES := $(shell cd ${srcdir} ; echo *.c ) -bin_PROGRAMS = ${TEST_CASES:.c=.test} +noinst_PROGRAMS = ${TEST_CASES:.c=.test} LDADD = \ ${top_builddir}/libxo/libxo.la @@ -66,7 +66,7 @@ valgrind: TEST_ONE = \ LIBXO_OPTIONS=:W$$fmt \ - ${CHECKER} $$base.test ${TEST_OPTS} \ + ${CHECKER} ./$$base.test ${TEST_OPTS} \ > out/$$base.$$fmt.out 2> out/$$base.$$fmt.err ; \ ${DIFF} -Nu ${srcdir}/saved/$$base.$$fmt.out out/$$base.$$fmt.out ${S2O} ; \ ${DIFF} -Nu ${srcdir}/saved/$$base.$$fmt.err out/$$base.$$fmt.err ${S2O} diff --git a/contrib/libxo/tests/core/saved/test_07.J.out b/contrib/libxo/tests/core/saved/test_07.J.out index 2c9a9286f20..9285ff5a6c5 100644 --- a/contrib/libxo/tests/core/saved/test_07.J.out +++ b/contrib/libxo/tests/core/saved/test_07.J.out @@ -1,2 +1,2 @@ -{"employees": {"v1":"γιγνώσκειν","v2":"ὦ ἄνδρες ᾿Αθηναῖοι","columns":28,"columns":2,"v1":"ახლავე გაიაროთ რეგისტრაცია","v2":"Unicode-ის მეათე საერთაშორისო","columns":55, "employee": ["columns":0, {"first-name":"Jim","nic-name":"\"რეგტ\"","last-name":"გთხოვთ ახ","department":431,"percent-time":90,"columns":23,"benefits":"full"}, {"first-name":"Terry","nic-name":"\"γιγνώσκεινὦ ἄνδρες ᾿Αθηναῖοι282ახლავე გაიაროთ რეგისტრაციაUnicode-ის მეათე საერთაშორისო550Jim"რეგტ"გთხოვთ ახ4319023fullTerry"<one"Οὐχὶ ταὐτὰ παρίσταταί μοι Jones6609047fullLeslie"Les"Patterson3416025fullAshley"Ash"Meter & Smith144040300123456789"0123456789"01234567890123456789014404049ახლა"გაიარო"საერთაშორისო1239029full \ No newline at end of file +(null)γιγνώσκεινὦ ἄνδρες ᾿Αθηναῖοι282ახლავე გაიაროთ რეგისტრაციაUnicode-ის მეათე საერთაშორისო550Jim"რეგტ"გთხოვთ ახ4319023fullTerry"<one"Οὐχὶ ταὐτὰ παρίσταταί μοι Jones6609047fullLeslie"Les"Patterson3416025fullAshley"Ash"Meter & Smith144040300123456789"0123456789"01234567890123456789014404049ახლა"გაიარო"საერთაშორისო1239029full \ No newline at end of file diff --git a/contrib/libxo/tests/core/saved/test_07.XP.out b/contrib/libxo/tests/core/saved/test_07.XP.out index b502650596d..c13f838c85f 100644 --- a/contrib/libxo/tests/core/saved/test_07.XP.out +++ b/contrib/libxo/tests/core/saved/test_07.XP.out @@ -1,4 +1,7 @@ + + (null) + γιγνώσκειν ὦ ἄνδρες ᾿Αθηναῖοι 28 diff --git a/contrib/libxo/tests/core/test_07.c b/contrib/libxo/tests/core/test_07.c index 3ceba8edf6a..18b7baa146b 100644 --- a/contrib/libxo/tests/core/test_07.c +++ b/contrib/libxo/tests/core/test_07.c @@ -52,6 +52,12 @@ main (int argc, char **argv) xo_open_container("employees"); + xo_open_list("test"); + xo_open_instance("test"); + xo_emit("{ek:filename/%s}", NULL); + xo_close_instance("test"); + xo_close_list("test"); + rc = xo_emit("Οὐχὶ ταὐτὰ παρίσταταί μοι {:v1/%s}, {:v2/%s}\n", "γιγνώσκειν", "ὦ ἄνδρες ᾿Αθηναῖοι"); rc = xo_emit("{:columns/%d}\n", rc); From bab0558297f0614aa6b5cd273477eaf1a3684ceb Mon Sep 17 00:00:00 2001 From: Marcel Moolenaar Date: Tue, 11 Nov 2014 21:52:10 +0000 Subject: [PATCH 068/280] Fix text output for the uptime command. Reported by: "Max N. Boyarov" , ae@ --- usr.bin/w/w.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/usr.bin/w/w.c b/usr.bin/w/w.c index 059f7ce5602..effd0e304ae 100644 --- a/usr.bin/w/w.c +++ b/usr.bin/w/w.c @@ -509,7 +509,7 @@ pr_header(time_t *nowp, int nusers) } /* Print number of users logged in to system */ - xo_emit(" {:users/%d} user%s", nusers, nusers == 1 ? "" : "s"); + xo_emit(" {:users/%d} {N:user%s}", nusers, nusers == 1 ? "" : "s"); /* * Print 1, 5, and 15 minute load averages. From d971cd47f6a3a73d2c214be650901e2c64447f8d Mon Sep 17 00:00:00 2001 From: Peter Grehan Date: Tue, 11 Nov 2014 22:03:11 +0000 Subject: [PATCH 069/280] Fix incorrect reading of 32-bit modinfo by 64-bit loaders. The various structures in the mod_metadata set of a FreeBSD kernel and modules contain pointers. The FreeBSD loader correctly deals with a mismatch in loader and kernel pointer size (e.g. 32-bit i386/ppc loader, loading 64-bit amd64/ppc64 kernels), but wasn't dealing with the inverse case where a 64-bit loader was loading a 32-bit kernel. Reported by: ktcallbox@gmail.com with a bhyve/i386 and ZFS root install Differential Revision: https://reviews.freebsd.org/D1129 Reviewed by: neel, jhb MFC after: 1 week --- sys/boot/common/load_elf.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/sys/boot/common/load_elf.c b/sys/boot/common/load_elf.c index 04a7dbe0c34..62ae7471420 100644 --- a/sys/boot/common/load_elf.c +++ b/sys/boot/common/load_elf.c @@ -640,6 +640,14 @@ struct mod_metadata64 { u_int64_t md_cval; /* common string label */ }; #endif +#if defined(__amd64__) && __ELF_WORD_SIZE == 32 +struct mod_metadata32 { + int md_version; /* structure version MDTV_* */ + int md_type; /* type of entry MDT_* */ + u_int32_t md_data; /* specific data */ + u_int32_t md_cval; /* common string label */ +}; +#endif int __elfN(parse_modmetadata)(struct preloaded_file *fp, elf_file_t ef) @@ -647,6 +655,8 @@ __elfN(parse_modmetadata)(struct preloaded_file *fp, elf_file_t ef) struct mod_metadata md; #if (defined(__i386__) || defined(__powerpc__)) && __ELF_WORD_SIZE == 64 struct mod_metadata64 md64; +#elif defined(__amd64__) && __ELF_WORD_SIZE == 32 + struct mod_metadata32 md32; #endif struct mod_depend *mdepend; struct mod_version mver; @@ -682,6 +692,18 @@ __elfN(parse_modmetadata)(struct preloaded_file *fp, elf_file_t ef) md.md_type = md64.md_type; md.md_cval = (const char *)(uintptr_t)md64.md_cval; md.md_data = (void *)(uintptr_t)md64.md_data; +#elif defined(__amd64__) && __ELF_WORD_SIZE == 32 + COPYOUT(v, &md32, sizeof(md32)); + error = __elfN(reloc_ptr)(fp, ef, v, &md32, sizeof(md32)); + if (error == EOPNOTSUPP) { + md32.md_cval += ef->off; + md32.md_data += ef->off; + } else if (error != 0) + return (error); + md.md_version = md32.md_version; + md.md_type = md32.md_type; + md.md_cval = (const char *)(uintptr_t)md32.md_cval; + md.md_data = (void *)(uintptr_t)md32.md_data; #else COPYOUT(v, &md, sizeof(md)); error = __elfN(reloc_ptr)(fp, ef, v, &md, sizeof(md)); From efe28398f5d5851eff8a4c8d7db0dbab284e46de Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Tue, 11 Nov 2014 22:08:18 +0000 Subject: [PATCH 070/280] Fix build. --- sys/compat/freebsd32/freebsd32_misc.c | 2 ++ sys/kern/uipc_syscalls.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/sys/compat/freebsd32/freebsd32_misc.c b/sys/compat/freebsd32/freebsd32_misc.c index b2ec8b357d8..892d3c0a700 100644 --- a/sys/compat/freebsd32/freebsd32_misc.c +++ b/sys/compat/freebsd32/freebsd32_misc.c @@ -1570,6 +1570,8 @@ freebsd32_do_sendfile(struct thread *td, struct sf_hdtr32 hdtr32; struct sf_hdtr hdtr; struct uio *hdr_uio, *trl_uio; + struct file *fp; + cap_rights_t rights; struct iovec32 *iov32; off_t offset, sbytes; int error; diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index b53883deff7..85487cd93a2 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -1940,7 +1940,7 @@ do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) } } - AUDIT_ARG_FD(src_fd); + AUDIT_ARG_FD(uap->fd); /* * sendfile(2) can start at any offset within a file so we require From bea71143ed0660038e93baafb84fbdc6167cb95e Mon Sep 17 00:00:00 2001 From: Luiz Otavio O Souza Date: Tue, 11 Nov 2014 23:55:37 +0000 Subject: [PATCH 071/280] Since r273264 the SD card detection on Raspberry Pi is reliably working and that expose new bugs with HS mode. When the old code could not do the proper card detection it would boot with lower defaults (and no HS mode) and this makes some HS cards boots. Now, with the card always identified as HS capable, the sdhci controller tries to run the card at HS speeds and makes the boot always fail. Disable the HS mode for now (which still can be enabled with the tunable) until it is properly fixed. MFC with: r273264 Requested by: many --- sys/arm/broadcom/bcm2835/bcm2835_sdhci.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sys/arm/broadcom/bcm2835/bcm2835_sdhci.c b/sys/arm/broadcom/bcm2835/bcm2835_sdhci.c index a92ddba11c7..9f3c27b7b62 100644 --- a/sys/arm/broadcom/bcm2835/bcm2835_sdhci.c +++ b/sys/arm/broadcom/bcm2835/bcm2835_sdhci.c @@ -84,10 +84,13 @@ __FBSDID("$FreeBSD$"); /* * Arasan HC seems to have problem with Data CRC on lower frequencies. * Use this tunable to cap initialization sequence frequency at higher - * value. Default is standard 400kHz + * value. Default is standard 400kHz. + * HS mode brings too many problems for most of cards, so disable HS mode + * until a better fix comes up. + * HS mode still can be enabled with the tunable. */ static int bcm2835_sdhci_min_freq = 400000; -static int bcm2835_sdhci_hs = 1; +static int bcm2835_sdhci_hs = 0; static int bcm2835_sdhci_pio_mode = 0; TUNABLE_INT("hw.bcm2835.sdhci.min_freq", &bcm2835_sdhci_min_freq); From 9ba57342c93f0d4db407243afcd46a7e73e8cca4 Mon Sep 17 00:00:00 2001 From: Marcel Moolenaar Date: Wed, 12 Nov 2014 00:10:27 +0000 Subject: [PATCH 072/280] SEEK_DATA has interesting behaviour for sparse files on ZFS. A sparse file with 128K of random data and truncated to 800K can have SEEK_DATA return -1 when given an offset of 128K. On UFS, the SEEK_DATA returns 800K (the size of the file). SEEK_HOLE on ZFS seems to behave the same as UFS. To handle this, map -1 to the size of the file (`end') when lseek returns this for either SEEK_HOLE or SEEK_DATA. When sparse files are not supported by the file system both `hole' and `data' will now be equal to `end' and we will treat the entire file as data. This way, the -1 return for SEEK_DATA on ZFS will end up doing the right thing. Reported by: gjb@ MFC after: 3 days --- usr.bin/mkimg/image.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/usr.bin/mkimg/image.c b/usr.bin/mkimg/image.c index 3e7c7d2945a..be1c2e9fcbb 100644 --- a/usr.bin/mkimg/image.c +++ b/usr.bin/mkimg/image.c @@ -405,16 +405,18 @@ image_copyin_mapped(lba_t blk, int fd, uint64_t *sizep) error = 0; while (!error && cur < end) { hole = lseek(fd, cur, SEEK_HOLE); + if (hole == -1) + hole = end; data = lseek(fd, cur, SEEK_DATA); + if (data == -1) + data = end; /* * Treat the entire file as data if sparse files * are not supported by the underlying file system. */ - if (hole == -1 && data == -1) { + if (hole == end && data == end) data = cur; - hole = end; - } if (cur == hole && data > hole) { hole = pos; From aec66495b4f73c87dfea10a9adaff6ba882381af Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Wed, 12 Nov 2014 01:28:28 +0000 Subject: [PATCH 073/280] Improve CAM's reaction on asymmetric access errors. MFC after: 1 month --- sys/cam/scsi/scsi_all.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sys/cam/scsi/scsi_all.c b/sys/cam/scsi/scsi_all.c index 959eda9125a..abe7e2ed167 100644 --- a/sys/cam/scsi/scsi_all.c +++ b/sys/cam/scsi/scsi_all.c @@ -1106,13 +1106,13 @@ static struct asc_table_entry asc_table[] = { { SST(0x04, 0x09, SS_RDEF, /* XXX TBD */ "Logical unit not ready, self-test in progress") }, /* DTLPWROMAEBKVF */ - { SST(0x04, 0x0A, SS_RDEF, /* XXX TBD */ + { SST(0x04, 0x0A, SS_TUR | SSQ_MANY | SSQ_DECREMENT_COUNT | ENXIO, "Logical unit not accessible, asymmetric access state transition")}, /* DTLPWROMAEBKVF */ - { SST(0x04, 0x0B, SS_RDEF, /* XXX TBD */ + { SST(0x04, 0x0B, SS_FATAL | ENXIO, "Logical unit not accessible, target port in standby state") }, /* DTLPWROMAEBKVF */ - { SST(0x04, 0x0C, SS_RDEF, /* XXX TBD */ + { SST(0x04, 0x0C, SS_FATAL | ENXIO, "Logical unit not accessible, target port in unavailable state") }, /* F */ { SST(0x04, 0x0D, SS_RDEF, /* XXX TBD */ From 3a8af166a2663a5c29a1e1b9ec03d96b314a6537 Mon Sep 17 00:00:00 2001 From: Ian Lepore Date: Wed, 12 Nov 2014 02:37:27 +0000 Subject: [PATCH 074/280] Bugfixes for the imx5/imx6 iomux fdt_pinctrl driver. I originally overlooked a couple flag bits defined in the fdt binding docs. One flag suppresses the pad configuration (pullup/pulldown/etc). The other one requires that the SION (set input on) flag be set in the mux register. Also, it appears from the data involved that if the input register address in the config tuple is zero, there is no input configuration. The old code was writing to register zero, which contains a collection of misc control bits (having nothing to do with input configuration) that probably shouldn't get overwritten arbitrarily. The bindings doc doesn't explictly mention this. --- sys/arm/freescale/imx/imx_iomux.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/sys/arm/freescale/imx/imx_iomux.c b/sys/arm/freescale/imx/imx_iomux.c index e5d5085bf0e..403028f988c 100644 --- a/sys/arm/freescale/imx/imx_iomux.c +++ b/sys/arm/freescale/imx/imx_iomux.c @@ -99,6 +99,10 @@ struct pincfg { uint32_t padconf_val; }; +#define PADCONF_NONE (1U << 31) /* Do not configure pad. */ +#define PADCONF_SION (1U << 30) /* Force SION bit in mux register. */ +#define PADMUX_SION (1U << 4) /* The SION bit in the mux register. */ + static inline uint32_t RD4(struct iomux_softc *sc, bus_size_t off) { @@ -120,6 +124,7 @@ iomux_configure_pins(device_t dev, phandle_t cfgxref) struct pincfg *cfgtuples, *cfg; phandle_t cfgnode; int i, ntuples; + uint32_t sion; sc = device_get_softc(dev); cfgnode = OF_node_from_xref(cfgxref); @@ -130,9 +135,22 @@ iomux_configure_pins(device_t dev, phandle_t cfgxref) if (ntuples == 0) return (0); /* Empty property is not an error. */ for (i = 0, cfg = cfgtuples; i < ntuples; i++, cfg++) { - WR4(sc, cfg->mux_reg, cfg->mux_val); - WR4(sc, cfg->input_reg, cfg->input_val); - WR4(sc, cfg->padconf_reg, cfg->padconf_val); + sion = (cfg->padconf_val & PADCONF_SION) ? PADMUX_SION : 0; + WR4(sc, cfg->mux_reg, cfg->mux_val | sion); + if (cfg->input_reg != 0) + WR4(sc, cfg->input_reg, cfg->input_val); + if ((cfg->padconf_val & PADCONF_NONE) != 0) + WR4(sc, cfg->padconf_reg, cfg->padconf_val); + if (bootverbose) { + char name[32]; + OF_getprop(cfgnode, "name", &name, sizeof(name)); + printf("%16s: muxreg 0x%04x muxval 0x%02x " + "inpreg 0x%04x inpval 0x%02x " + "padreg 0x%04x padval 0x%08x\n", + name, cfg->mux_reg, cfg->mux_val | sion, + cfg->input_reg, cfg->input_val, + cfg->padconf_reg, cfg->padconf_val); + } } free(cfgtuples, M_OFWPROP); return (0); From f088768b98fa8f5a6f21b0d1e480d674c3a6c4d6 Mon Sep 17 00:00:00 2001 From: Ian Lepore Date: Wed, 12 Nov 2014 02:38:25 +0000 Subject: [PATCH 075/280] Remove an #ifdef DEBUG wrapper, and instead use if (bootverbose). --- sys/dev/fdt/fdt_pinctrl.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/sys/dev/fdt/fdt_pinctrl.c b/sys/dev/fdt/fdt_pinctrl.c index 474fb00e22a..0a0dd84989a 100644 --- a/sys/dev/fdt/fdt_pinctrl.c +++ b/sys/dev/fdt/fdt_pinctrl.c @@ -124,15 +124,14 @@ pinctrl_configure_children(device_t pinctrl, phandle_t parent) pinctrl_configure_children(pinctrl, node); nconfigs = OF_getencprop_alloc(node, "pinctrl-0", sizeof(*configs), (void **)&configs); -#ifdef DEBUG - { - char name[32]; - OF_getprop(node, "name", &name, sizeof(name)); - printf("%d items in pinctrl-0 for %s\n", nconfigs, name); - } -#endif if (nconfigs <= 0) continue; + if (bootverbose) { + char name[32]; + OF_getprop(node, "name", &name, sizeof(name)); + printf("Processing %d pin-config node(s) in pinctrl-0 for %s\n", + nconfigs, name); + } for (i = 0; i < nconfigs; i++) { if (OF_device_from_xref(configs[i]) == pinctrl) FDT_PINCTRL_CONFIGURE(pinctrl, configs[i]); From 6eef1a334dfb7a70867984657f84f4532a9ea7bf Mon Sep 17 00:00:00 2001 From: Ian Lepore Date: Wed, 12 Nov 2014 02:44:27 +0000 Subject: [PATCH 076/280] Fix the reversed sense of the PADCONF_NONE test. --- sys/arm/freescale/imx/imx_iomux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/arm/freescale/imx/imx_iomux.c b/sys/arm/freescale/imx/imx_iomux.c index 403028f988c..7786b76e0a4 100644 --- a/sys/arm/freescale/imx/imx_iomux.c +++ b/sys/arm/freescale/imx/imx_iomux.c @@ -139,7 +139,7 @@ iomux_configure_pins(device_t dev, phandle_t cfgxref) WR4(sc, cfg->mux_reg, cfg->mux_val | sion); if (cfg->input_reg != 0) WR4(sc, cfg->input_reg, cfg->input_val); - if ((cfg->padconf_val & PADCONF_NONE) != 0) + if ((cfg->padconf_val & PADCONF_NONE) == 0) WR4(sc, cfg->padconf_reg, cfg->padconf_val); if (bootverbose) { char name[32]; From 0a39cc71f8203f6ebbc23fab622fb7fd0449d54f Mon Sep 17 00:00:00 2001 From: Luiz Otavio O Souza Date: Wed, 12 Nov 2014 03:07:46 +0000 Subject: [PATCH 077/280] Fix the error checking, broken on r273337, to _not_ ignore controller errors. Without this fix you can't even scan the bus (all operations will always succeed). MFC with: r273337 Pointy hat to: loos --- sys/arm/broadcom/bcm2835/bcm2835_bsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/arm/broadcom/bcm2835/bcm2835_bsc.c b/sys/arm/broadcom/bcm2835/bcm2835_bsc.c index 4c6976525ee..58fa09e1acd 100644 --- a/sys/arm/broadcom/bcm2835/bcm2835_bsc.c +++ b/sys/arm/broadcom/bcm2835/bcm2835_bsc.c @@ -441,7 +441,7 @@ bcm_bsc_transfer(device_t dev, struct iic_msg *msgs, uint32_t nmsgs) err = mtx_sleep(dev, &sc->sc_mtx, 0, "bsciow", hz); /* Check for errors. */ - if (err != 0 && (sc->sc_flags & BCM_I2C_ERROR)) + if (err == 0 && (sc->sc_flags & BCM_I2C_ERROR)) err = EIO; if (err != 0) break; From 82a2ce40623540e1cc9624fc550d9aac8dd94560 Mon Sep 17 00:00:00 2001 From: Luiz Otavio O Souza Date: Wed, 12 Nov 2014 03:59:26 +0000 Subject: [PATCH 078/280] Fix a few cases of use of uninitialized variables. Found with -Wall. MFC after: 1 week --- usr.sbin/i2c/i2c.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/usr.sbin/i2c/i2c.c b/usr.sbin/i2c/i2c.c index 3a61aa53fe7..920dda21f33 100644 --- a/usr.sbin/i2c/i2c.c +++ b/usr.sbin/i2c/i2c.c @@ -142,6 +142,7 @@ scan_bus(struct iiccmd cmd, char *dev, int skip, char *skip_addr) if (tokens == NULL) { fprintf(stderr, "Error allocating tokens " "buffer\n"); + error = -1; goto out; } index = skip_get_tokens(skip_addr, tokens, @@ -150,6 +151,7 @@ scan_bus(struct iiccmd cmd, char *dev, int skip, char *skip_addr) if (!no_range && (addr_range.start > addr_range.end)) { fprintf(stderr, "Skip address out of range\n"); + error = -1; goto out; } } @@ -409,8 +411,10 @@ i2c_read(char *dev, struct options i2c_opt, char *i2c_buf) if (i2c_opt.mode == I2C_MODE_STOP_START) { cmd.slave = i2c_opt.addr; error = ioctl(fd, I2CSTOP, &cmd); - if (error == -1) + if (error == -1) { + err_msg = "error sending stop condtion\n"; goto err2; + } } } cmd.slave = i2c_opt.addr; @@ -432,8 +436,10 @@ i2c_read(char *dev, struct options i2c_opt, char *i2c_buf) } } error = ioctl(fd, I2CSTOP, &cmd); - if (error == -1) + if (error == -1) { + err_msg = "error sending stop condtion\n"; goto err2; + } for (i = 0; i < i2c_opt.count; i++) { error = read(fd, &i2c_buf[i], 1); From b10cc05cf5d5fc7a0827c50a1a58265752d3edc8 Mon Sep 17 00:00:00 2001 From: Glen Barber Date: Wed, 12 Nov 2014 08:36:42 +0000 Subject: [PATCH 079/280] Fix an mdoc(7) macro that is not an option in the provided description. Bump Dd. As CDDL License dictates, update the Copyright accordingly. Sponsored by: The FreeBSD Foundation --- cddl/contrib/opensolaris/cmd/zfs/zfs.8 | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cddl/contrib/opensolaris/cmd/zfs/zfs.8 b/cddl/contrib/opensolaris/cmd/zfs/zfs.8 index 065497f2a04..e37b14868e9 100644 --- a/cddl/contrib/opensolaris/cmd/zfs/zfs.8 +++ b/cddl/contrib/opensolaris/cmd/zfs/zfs.8 @@ -27,10 +27,11 @@ .\" Copyright (c) 2014, Joyent, Inc. All rights reserved. .\" Copyright (c) 2013, Steven Hartland .\" Copyright (c) 2014, Xin LI +.\" Copyright (c) 2014, The FreeBSD Foundation, All Rights Reserved. .\" .\" $FreeBSD$ .\" -.Dd November 10, 2014 +.Dd November 12, 2014 .Dt ZFS 8 .Os .Sh NAME @@ -1791,7 +1792,7 @@ descendent file systems. Recursively destroy all clones of these snapshots, including the clones, snapshots, and children. If this flag is specified, the -.Op fl d +.Fl d flag will have no effect. .It Fl n Do a dry-run ("No-op") deletion. No data will be deleted. This is useful in From cfa6009e3646cc9efc24f3b3fb2d15b4f6a6c2b5 Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Wed, 12 Nov 2014 09:57:15 +0000 Subject: [PATCH 080/280] In preparation of merging projects/sendfile, transform bare access to sb_cc member of struct sockbuf to a couple of inline functions: sbavail() and sbused() Right now they are equal, but once notion of "not ready socket buffer data", will be checked in, they are going to be different. Sponsored by: Netflix Sponsored by: Nginx, Inc. --- sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c | 4 +- sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c | 14 +++--- sys/dev/cxgbe/iw_cxgbe/cm.c | 12 ++--- sys/dev/cxgbe/tom/t4_cpl_io.c | 24 +++++----- sys/dev/cxgbe/tom/t4_ddp.c | 44 +++++++++---------- sys/dev/iscsi/icl.c | 2 +- sys/kern/sys_socket.c | 25 ++++++----- sys/kern/uipc_socket.c | 42 +++++++++--------- .../bluetooth/socket/ng_btsocket_l2cap.c | 5 +-- .../bluetooth/socket/ng_btsocket_rfcomm.c | 2 +- .../bluetooth/socket/ng_btsocket_sco.c | 4 +- sys/netinet/accf_dns.c | 6 +-- sys/netinet/accf_http.c | 11 ++--- sys/netinet/siftr.c | 4 +- sys/netinet/tcp_input.c | 14 +++--- sys/netinet/tcp_output.c | 31 +++++++------ .../drivers/infiniband/ulp/sdp/sdp_main.c | 26 +++++------ sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c | 2 +- sys/rpc/clnt_vc.c | 4 +- sys/rpc/svc_vc.c | 2 +- sys/sys/sockbuf.h | 28 ++++++++++++ sys/sys/socketvar.h | 2 +- 22 files changed, 173 insertions(+), 135 deletions(-) diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c index fea86eba709..eba1ab72b0f 100644 --- a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c +++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c @@ -1507,11 +1507,11 @@ process_data(struct iwch_ep *ep) process_mpa_request(ep); break; default: - if (ep->com.so->so_rcv.sb_cc) + if (sbavail(&ep->com.so->so_rcv)) printf("%s Unexpected streaming data." " ep %p state %d so %p so_state %x so_rcv.sb_cc %u so_rcv.sb_mb %p\n", __FUNCTION__, ep, state_read(&ep->com), ep->com.so, ep->com.so->so_state, - ep->com.so->so_rcv.sb_cc, ep->com.so->so_rcv.sb_mb); + sbavail(&ep->com.so->so_rcv), ep->com.so->so_rcv.sb_mb); break; } return; diff --git a/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c b/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c index a86bf720779..81a446a64a4 100644 --- a/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c +++ b/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c @@ -445,8 +445,8 @@ t3_push_frames(struct socket *so, int req_completion) * Autosize the send buffer. */ if (snd->sb_flags & SB_AUTOSIZE && VNET(tcp_do_autosndbuf)) { - if (snd->sb_cc >= (snd->sb_hiwat / 8 * 7) && - snd->sb_cc < VNET(tcp_autosndbuf_max)) { + if (sbused(snd) >= (snd->sb_hiwat / 8 * 7) && + sbused(snd) < VNET(tcp_autosndbuf_max)) { if (!sbreserve_locked(snd, min(snd->sb_hiwat + VNET(tcp_autosndbuf_inc), VNET(tcp_autosndbuf_max)), so, curthread)) @@ -597,10 +597,10 @@ t3_rcvd(struct toedev *tod, struct tcpcb *tp) INP_WLOCK_ASSERT(inp); SOCKBUF_LOCK(so_rcv); - KASSERT(toep->tp_enqueued >= so_rcv->sb_cc, - ("%s: so_rcv->sb_cc > enqueued", __func__)); - toep->tp_rx_credits += toep->tp_enqueued - so_rcv->sb_cc; - toep->tp_enqueued = so_rcv->sb_cc; + KASSERT(toep->tp_enqueued >= sbused(so_rcv), + ("%s: sbused(so_rcv) > enqueued", __func__)); + toep->tp_rx_credits += toep->tp_enqueued - sbused(so_rcv); + toep->tp_enqueued = sbused(so_rcv); SOCKBUF_UNLOCK(so_rcv); must_send = toep->tp_rx_credits + 16384 >= tp->rcv_wnd; @@ -1768,7 +1768,7 @@ wr_ack(struct toepcb *toep, struct mbuf *m) so_sowwakeup_locked(so); } - if (snd->sb_sndptroff < snd->sb_cc) + if (snd->sb_sndptroff < sbused(snd)) t3_push_frames(so, 0); out_free: diff --git a/sys/dev/cxgbe/iw_cxgbe/cm.c b/sys/dev/cxgbe/iw_cxgbe/cm.c index d9009bdf6b8..4702e19a586 100644 --- a/sys/dev/cxgbe/iw_cxgbe/cm.c +++ b/sys/dev/cxgbe/iw_cxgbe/cm.c @@ -584,8 +584,8 @@ process_data(struct c4iw_ep *ep) { struct sockaddr_in *local, *remote; - CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s, sb_cc %d", __func__, - ep->com.so, ep, states[ep->com.state], ep->com.so->so_rcv.sb_cc); + CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s, sbused %d", __func__, + ep->com.so, ep, states[ep->com.state], sbused(&ep->com.so->so_rcv)); switch (state_read(&ep->com)) { case MPA_REQ_SENT: @@ -601,11 +601,11 @@ process_data(struct c4iw_ep *ep) process_mpa_request(ep); break; default: - if (ep->com.so->so_rcv.sb_cc) - log(LOG_ERR, "%s: Unexpected streaming data. " - "ep %p, state %d, so %p, so_state 0x%x, sb_cc %u\n", + if (sbused(&ep->com.so->so_rcv)) + log(LOG_ERR, "%s: Unexpected streaming data. ep %p, " + "state %d, so %p, so_state 0x%x, sbused %u\n", __func__, ep, state_read(&ep->com), ep->com.so, - ep->com.so->so_state, ep->com.so->so_rcv.sb_cc); + ep->com.so->so_state, sbused(&ep->com.so->so_rcv)); break; } } diff --git a/sys/dev/cxgbe/tom/t4_cpl_io.c b/sys/dev/cxgbe/tom/t4_cpl_io.c index 9af2248dede..29e5fa243be 100644 --- a/sys/dev/cxgbe/tom/t4_cpl_io.c +++ b/sys/dev/cxgbe/tom/t4_cpl_io.c @@ -365,15 +365,15 @@ t4_rcvd(struct toedev *tod, struct tcpcb *tp) INP_WLOCK_ASSERT(inp); SOCKBUF_LOCK(sb); - KASSERT(toep->sb_cc >= sb->sb_cc, + KASSERT(toep->sb_cc >= sbused(sb), ("%s: sb %p has more data (%d) than last time (%d).", - __func__, sb, sb->sb_cc, toep->sb_cc)); + __func__, sb, sbused(sb), toep->sb_cc)); if (toep->ulp_mode == ULP_MODE_ISCSI) { toep->rx_credits += toep->sb_cc; toep->sb_cc = 0; } else { - toep->rx_credits += toep->sb_cc - sb->sb_cc; - toep->sb_cc = sb->sb_cc; + toep->rx_credits += toep->sb_cc - sbused(sb); + toep->sb_cc = sbused(sb); } credits = toep->rx_credits; SOCKBUF_UNLOCK(sb); @@ -1079,15 +1079,15 @@ do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) tp->rcv_nxt = be32toh(cpl->rcv_nxt); toep->ddp_flags &= ~(DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE); - KASSERT(toep->sb_cc >= sb->sb_cc, + KASSERT(toep->sb_cc >= sbused(sb), ("%s: sb %p has more data (%d) than last time (%d).", - __func__, sb, sb->sb_cc, toep->sb_cc)); - toep->rx_credits += toep->sb_cc - sb->sb_cc; + __func__, sb, sbused(sb), toep->sb_cc)); + toep->rx_credits += toep->sb_cc - sbused(sb); #ifdef USE_DDP_RX_FLOW_CONTROL toep->rx_credits -= m->m_len; /* adjust for F_RX_FC_DDP */ #endif sbappendstream_locked(sb, m); - toep->sb_cc = sb->sb_cc; + toep->sb_cc = sbused(sb); } socantrcvmore_locked(so); /* unlocks the sockbuf */ @@ -1582,12 +1582,12 @@ do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) } } - KASSERT(toep->sb_cc >= sb->sb_cc, + KASSERT(toep->sb_cc >= sbused(sb), ("%s: sb %p has more data (%d) than last time (%d).", - __func__, sb, sb->sb_cc, toep->sb_cc)); - toep->rx_credits += toep->sb_cc - sb->sb_cc; + __func__, sb, sbused(sb), toep->sb_cc)); + toep->rx_credits += toep->sb_cc - sbused(sb); sbappendstream_locked(sb, m); - toep->sb_cc = sb->sb_cc; + toep->sb_cc = sbused(sb); sorwakeup_locked(so); SOCKBUF_UNLOCK_ASSERT(sb); diff --git a/sys/dev/cxgbe/tom/t4_ddp.c b/sys/dev/cxgbe/tom/t4_ddp.c index 3691a3b516d..89585cf8303 100644 --- a/sys/dev/cxgbe/tom/t4_ddp.c +++ b/sys/dev/cxgbe/tom/t4_ddp.c @@ -224,15 +224,15 @@ insert_ddp_data(struct toepcb *toep, uint32_t n) tp->rcv_wnd -= n; #endif - KASSERT(toep->sb_cc >= sb->sb_cc, + KASSERT(toep->sb_cc >= sbused(sb), ("%s: sb %p has more data (%d) than last time (%d).", - __func__, sb, sb->sb_cc, toep->sb_cc)); - toep->rx_credits += toep->sb_cc - sb->sb_cc; + __func__, sb, sbused(sb), toep->sb_cc)); + toep->rx_credits += toep->sb_cc - sbused(sb); #ifdef USE_DDP_RX_FLOW_CONTROL toep->rx_credits -= n; /* adjust for F_RX_FC_DDP */ #endif sbappendstream_locked(sb, m); - toep->sb_cc = sb->sb_cc; + toep->sb_cc = sbused(sb); } /* SET_TCB_FIELD sent as a ULP command looks like this */ @@ -459,15 +459,15 @@ handle_ddp_data(struct toepcb *toep, __be32 ddp_report, __be32 rcv_nxt, int len) else discourage_ddp(toep); - KASSERT(toep->sb_cc >= sb->sb_cc, + KASSERT(toep->sb_cc >= sbused(sb), ("%s: sb %p has more data (%d) than last time (%d).", - __func__, sb, sb->sb_cc, toep->sb_cc)); - toep->rx_credits += toep->sb_cc - sb->sb_cc; + __func__, sb, sbused(sb), toep->sb_cc)); + toep->rx_credits += toep->sb_cc - sbused(sb); #ifdef USE_DDP_RX_FLOW_CONTROL toep->rx_credits -= len; /* adjust for F_RX_FC_DDP */ #endif sbappendstream_locked(sb, m); - toep->sb_cc = sb->sb_cc; + toep->sb_cc = sbused(sb); wakeup: KASSERT(toep->ddp_flags & db_flag, ("%s: DDP buffer not active. toep %p, ddp_flags 0x%x, report 0x%x", @@ -908,7 +908,7 @@ handle_ddp(struct socket *so, struct uio *uio, int flags, int error) #endif /* XXX: too eager to disable DDP, could handle NBIO better than this. */ - if (sb->sb_cc >= uio->uio_resid || uio->uio_resid < sc->tt.ddp_thres || + if (sbused(sb) >= uio->uio_resid || uio->uio_resid < sc->tt.ddp_thres || uio->uio_resid > MAX_DDP_BUFFER_SIZE || uio->uio_iovcnt > 1 || so->so_state & SS_NBIO || flags & (MSG_DONTWAIT | MSG_NBIO) || error || so->so_error || sb->sb_state & SBS_CANTRCVMORE) @@ -946,7 +946,7 @@ handle_ddp(struct socket *so, struct uio *uio, int flags, int error) * payload. */ ddp_flags = select_ddp_flags(so, flags, db_idx); - wr = mk_update_tcb_for_ddp(sc, toep, db_idx, sb->sb_cc, ddp_flags); + wr = mk_update_tcb_for_ddp(sc, toep, db_idx, sbused(sb), ddp_flags); if (wr == NULL) { /* * Just unhold the pages. The DDP buffer's software state is @@ -1134,8 +1134,8 @@ restart: /* uio should be just as it was at entry */ KASSERT(oresid == uio->uio_resid, - ("%s: oresid = %d, uio_resid = %zd, sb_cc = %d", - __func__, oresid, uio->uio_resid, sb->sb_cc)); + ("%s: oresid = %d, uio_resid = %zd, sbused = %d", + __func__, oresid, uio->uio_resid, sbused(sb))); error = handle_ddp(so, uio, flags, 0); ddp_handled = 1; @@ -1145,7 +1145,7 @@ restart: /* Abort if socket has reported problems. */ if (so->so_error) { - if (sb->sb_cc > 0) + if (sbused(sb)) goto deliver; if (oresid > uio->uio_resid) goto out; @@ -1157,32 +1157,32 @@ restart: /* Door is closed. Deliver what is left, if any. */ if (sb->sb_state & SBS_CANTRCVMORE) { - if (sb->sb_cc > 0) + if (sbused(sb)) goto deliver; else goto out; } /* Socket buffer is empty and we shall not block. */ - if (sb->sb_cc == 0 && + if (sbused(sb) == 0 && ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) { error = EAGAIN; goto out; } /* Socket buffer got some data that we shall deliver now. */ - if (sb->sb_cc > 0 && !(flags & MSG_WAITALL) && + if (sbused(sb) && !(flags & MSG_WAITALL) && ((sb->sb_flags & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)) || - sb->sb_cc >= sb->sb_lowat || - sb->sb_cc >= uio->uio_resid || - sb->sb_cc >= sb->sb_hiwat) ) { + sbused(sb) >= sb->sb_lowat || + sbused(sb) >= uio->uio_resid || + sbused(sb) >= sb->sb_hiwat) ) { goto deliver; } /* On MSG_WAITALL we must wait until all data or error arrives. */ if ((flags & MSG_WAITALL) && - (sb->sb_cc >= uio->uio_resid || sb->sb_cc >= sb->sb_lowat)) + (sbused(sb) >= uio->uio_resid || sbused(sb) >= sb->sb_lowat)) goto deliver; /* @@ -1201,7 +1201,7 @@ restart: deliver: SOCKBUF_LOCK_ASSERT(&so->so_rcv); - KASSERT(sb->sb_cc > 0, ("%s: sockbuf empty", __func__)); + KASSERT(sbused(sb) > 0, ("%s: sockbuf empty", __func__)); KASSERT(sb->sb_mb != NULL, ("%s: sb_mb == NULL", __func__)); if (sb->sb_flags & SB_DDP_INDICATE && !ddp_handled) @@ -1212,7 +1212,7 @@ deliver: uio->uio_td->td_ru.ru_msgrcv++; /* Fill uio until full or current end of socket buffer is reached. */ - len = min(uio->uio_resid, sb->sb_cc); + len = min(uio->uio_resid, sbused(sb)); if (mp0 != NULL) { /* Dequeue as many mbufs as possible. */ if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) { diff --git a/sys/dev/iscsi/icl.c b/sys/dev/iscsi/icl.c index f56e494fea7..6bce1802042 100644 --- a/sys/dev/iscsi/icl.c +++ b/sys/dev/iscsi/icl.c @@ -758,7 +758,7 @@ icl_receive_thread(void *arg) * is enough data received to read the PDU. */ SOCKBUF_LOCK(&so->so_rcv); - available = so->so_rcv.sb_cc; + available = sbavail(&so->so_rcv); if (available < ic->ic_receive_len) { so->so_rcv.sb_lowat = ic->ic_receive_len; cv_wait(&ic->ic_receive_cv, &so->so_rcv.sb_mtx); diff --git a/sys/kern/sys_socket.c b/sys/kern/sys_socket.c index 47cedfeab4b..dd831ae81a6 100644 --- a/sys/kern/sys_socket.c +++ b/sys/kern/sys_socket.c @@ -175,16 +175,17 @@ soo_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred, case FIONREAD: /* Unlocked read. */ - *(int *)data = so->so_rcv.sb_cc; + *(int *)data = sbavail(&so->so_rcv); break; case FIONWRITE: /* Unlocked read. */ - *(int *)data = so->so_snd.sb_cc; + *(int *)data = sbavail(&so->so_snd); break; case FIONSPACE: - if ((so->so_snd.sb_hiwat < so->so_snd.sb_cc) || + /* Unlocked read. */ + if ((so->so_snd.sb_hiwat < sbused(&so->so_snd)) || (so->so_snd.sb_mbmax < so->so_snd.sb_mbcnt)) *(int *)data = 0; else @@ -254,6 +255,7 @@ soo_stat(struct file *fp, struct stat *ub, struct ucred *active_cred, struct thread *td) { struct socket *so = fp->f_data; + struct sockbuf *sb; #ifdef MAC int error; #endif @@ -269,15 +271,18 @@ soo_stat(struct file *fp, struct stat *ub, struct ucred *active_cred, * If SBS_CANTRCVMORE is set, but there's still data left in the * receive buffer, the socket is still readable. */ - SOCKBUF_LOCK(&so->so_rcv); - if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0 || - so->so_rcv.sb_cc != 0) + sb = &so->so_rcv; + SOCKBUF_LOCK(sb); + if ((sb->sb_state & SBS_CANTRCVMORE) == 0 || sbavail(sb)) ub->st_mode |= S_IRUSR | S_IRGRP | S_IROTH; - ub->st_size = so->so_rcv.sb_cc - so->so_rcv.sb_ctl; - SOCKBUF_UNLOCK(&so->so_rcv); - /* Unlocked read. */ - if ((so->so_snd.sb_state & SBS_CANTSENDMORE) == 0) + ub->st_size = sbavail(sb) - sb->sb_ctl; + SOCKBUF_UNLOCK(sb); + + sb = &so->so_snd; + SOCKBUF_LOCK(sb); + if ((sb->sb_state & SBS_CANTSENDMORE) == 0) ub->st_mode |= S_IWUSR | S_IWGRP | S_IWOTH; + SOCKBUF_UNLOCK(sb); ub->st_uid = so->so_cred->cr_uid; ub->st_gid = so->so_cred->cr_gid; return (*so->so_proto->pr_usrreqs->pru_sense)(so, ub); diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 706632716b1..e2fd1f3a6a5 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -1522,12 +1522,12 @@ restart: * 2. MSG_DONTWAIT is not set */ if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && - so->so_rcv.sb_cc < uio->uio_resid) && - so->so_rcv.sb_cc < so->so_rcv.sb_lowat && + sbavail(&so->so_rcv) < uio->uio_resid) && + sbavail(&so->so_rcv) < so->so_rcv.sb_lowat && m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) { - KASSERT(m != NULL || !so->so_rcv.sb_cc, - ("receive: m == %p so->so_rcv.sb_cc == %u", - m, so->so_rcv.sb_cc)); + KASSERT(m != NULL || !sbavail(&so->so_rcv), + ("receive: m == %p sbavail == %u", + m, sbavail(&so->so_rcv))); if (so->so_error) { if (m != NULL) goto dontblock; @@ -1976,7 +1976,7 @@ restart: /* Abort if socket has reported problems. */ if (so->so_error) { - if (sb->sb_cc > 0) + if (sbavail(sb) > 0) goto deliver; if (oresid > uio->uio_resid) goto out; @@ -1988,32 +1988,32 @@ restart: /* Door is closed. Deliver what is left, if any. */ if (sb->sb_state & SBS_CANTRCVMORE) { - if (sb->sb_cc > 0) + if (sbavail(sb) > 0) goto deliver; else goto out; } /* Socket buffer is empty and we shall not block. */ - if (sb->sb_cc == 0 && + if (sbavail(sb) == 0 && ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) { error = EAGAIN; goto out; } /* Socket buffer got some data that we shall deliver now. */ - if (sb->sb_cc > 0 && !(flags & MSG_WAITALL) && + if (sbavail(sb) > 0 && !(flags & MSG_WAITALL) && ((sb->sb_flags & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)) || - sb->sb_cc >= sb->sb_lowat || - sb->sb_cc >= uio->uio_resid || - sb->sb_cc >= sb->sb_hiwat) ) { + sbavail(sb) >= sb->sb_lowat || + sbavail(sb) >= uio->uio_resid || + sbavail(sb) >= sb->sb_hiwat) ) { goto deliver; } /* On MSG_WAITALL we must wait until all data or error arrives. */ if ((flags & MSG_WAITALL) && - (sb->sb_cc >= uio->uio_resid || sb->sb_cc >= sb->sb_hiwat)) + (sbavail(sb) >= uio->uio_resid || sbavail(sb) >= sb->sb_hiwat)) goto deliver; /* @@ -2027,7 +2027,7 @@ restart: deliver: SOCKBUF_LOCK_ASSERT(&so->so_rcv); - KASSERT(sb->sb_cc > 0, ("%s: sockbuf empty", __func__)); + KASSERT(sbavail(sb) > 0, ("%s: sockbuf empty", __func__)); KASSERT(sb->sb_mb != NULL, ("%s: sb_mb == NULL", __func__)); /* Statistics. */ @@ -2035,7 +2035,7 @@ deliver: uio->uio_td->td_ru.ru_msgrcv++; /* Fill uio until full or current end of socket buffer is reached. */ - len = min(uio->uio_resid, sb->sb_cc); + len = min(uio->uio_resid, sbavail(sb)); if (mp0 != NULL) { /* Dequeue as many mbufs as possible. */ if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) { @@ -2170,9 +2170,9 @@ soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio, */ SOCKBUF_LOCK(&so->so_rcv); while ((m = so->so_rcv.sb_mb) == NULL) { - KASSERT(so->so_rcv.sb_cc == 0, - ("soreceive_dgram: sb_mb NULL but sb_cc %u", - so->so_rcv.sb_cc)); + KASSERT(sbavail(&so->so_rcv) == 0, + ("soreceive_dgram: sb_mb NULL but sbavail %u", + sbavail(&so->so_rcv))); if (so->so_error) { error = so->so_error; so->so_error = 0; @@ -3248,7 +3248,7 @@ filt_soread(struct knote *kn, long hint) so = kn->kn_fp->f_data; SOCKBUF_LOCK_ASSERT(&so->so_rcv); - kn->kn_data = so->so_rcv.sb_cc - so->so_rcv.sb_ctl; + kn->kn_data = sbavail(&so->so_rcv) - so->so_rcv.sb_ctl; if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { kn->kn_flags |= EV_EOF; kn->kn_fflags = so->so_error; @@ -3260,7 +3260,7 @@ filt_soread(struct knote *kn, long hint) if (kn->kn_data >= kn->kn_sdata) return 1; } else { - if (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat) + if (sbavail(&so->so_rcv) >= so->so_rcv.sb_lowat) return 1; } @@ -3451,7 +3451,7 @@ soisdisconnected(struct socket *so) sorwakeup_locked(so); SOCKBUF_LOCK(&so->so_snd); so->so_snd.sb_state |= SBS_CANTSENDMORE; - sbdrop_locked(&so->so_snd, so->so_snd.sb_cc); + sbdrop_locked(&so->so_snd, sbused(&so->so_snd)); sowwakeup_locked(so); wakeup(&so->so_timeo); } diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c b/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c index bab8bbbbdd8..d2e04879a9f 100644 --- a/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c +++ b/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c @@ -1127,9 +1127,8 @@ ng_btsocket_l2cap_process_l2ca_write_rsp(struct ng_mesg *msg, /* * Check if we have more data to send */ - sbdroprecord(&pcb->so->so_snd); - if (pcb->so->so_snd.sb_cc > 0) { + if (sbavail(&pcb->so->so_snd) > 0) { if (ng_btsocket_l2cap_send2(pcb) == 0) ng_btsocket_l2cap_timeout(pcb); else @@ -2513,7 +2512,7 @@ ng_btsocket_l2cap_send2(ng_btsocket_l2cap_pcb_p pcb) mtx_assert(&pcb->pcb_mtx, MA_OWNED); - if (pcb->so->so_snd.sb_cc == 0) + if (sbavail(&pcb->so->so_snd) == 0) return (EINVAL); /* XXX */ m = m_dup(pcb->so->so_snd.sb_mb, M_NOWAIT); diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c b/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c index cb3753d4bf0..a2190c78a61 100644 --- a/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c +++ b/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c @@ -3279,7 +3279,7 @@ ng_btsocket_rfcomm_pcb_send(ng_btsocket_rfcomm_pcb_p pcb, int limit) } for (error = 0, sent = 0; sent < limit; sent ++) { - length = min(pcb->mtu, pcb->so->so_snd.sb_cc); + length = min(pcb->mtu, sbavail(&pcb->so->so_snd)); if (length == 0) break; diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_sco.c b/sys/netgraph/bluetooth/socket/ng_btsocket_sco.c index f0d87b3940b..9ff0cebabb8 100644 --- a/sys/netgraph/bluetooth/socket/ng_btsocket_sco.c +++ b/sys/netgraph/bluetooth/socket/ng_btsocket_sco.c @@ -906,7 +906,7 @@ ng_btsocket_sco_default_msg_input(struct ng_mesg *msg, hook_p hook) sbdroprecord(&pcb->so->so_snd); /* Send more if we have any */ - if (pcb->so->so_snd.sb_cc > 0) + if (sbavail(&pcb->so->so_snd) > 0) if (ng_btsocket_sco_send2(pcb) == 0) ng_btsocket_sco_timeout(pcb); @@ -1748,7 +1748,7 @@ ng_btsocket_sco_send2(ng_btsocket_sco_pcb_p pcb) mtx_assert(&pcb->pcb_mtx, MA_OWNED); while (pcb->rt->pending < pcb->rt->num_pkts && - pcb->so->so_snd.sb_cc > 0) { + sbavail(&pcb->so->so_snd) > 0) { /* Get a copy of the first packet on send queue */ m = m_dup(pcb->so->so_snd.sb_mb, M_NOWAIT); if (m == NULL) { diff --git a/sys/netinet/accf_dns.c b/sys/netinet/accf_dns.c index ec2b4cfb804..85214d6d93c 100644 --- a/sys/netinet/accf_dns.c +++ b/sys/netinet/accf_dns.c @@ -75,7 +75,7 @@ sohasdns(struct socket *so, void *arg, int waitflag) struct sockbuf *sb = &so->so_rcv; /* If the socket is full, we're ready. */ - if (sb->sb_cc >= sb->sb_hiwat || sb->sb_mbcnt >= sb->sb_mbmax) + if (sbused(sb) >= sb->sb_hiwat || sb->sb_mbcnt >= sb->sb_mbmax) goto ready; /* Check to see if we have a request. */ @@ -115,14 +115,14 @@ skippacket(struct sockbuf *sb) { unsigned long packlen; struct packet q, *p = &q; - if (sb->sb_cc < 2) + if (sbavail(sb) < 2) return DNS_WAIT; q.m = sb->sb_mb; q.n = q.m->m_nextpkt; q.moff = 0; q.offset = 0; - q.len = sb->sb_cc; + q.len = sbavail(sb); GET16(p, packlen); if (packlen + 2 > q.len) diff --git a/sys/netinet/accf_http.c b/sys/netinet/accf_http.c index 41e442c4fa7..33734c717a6 100644 --- a/sys/netinet/accf_http.c +++ b/sys/netinet/accf_http.c @@ -92,7 +92,7 @@ sbfull(struct sockbuf *sb) "mbcnt(%ld) >= mbmax(%ld): %d", sb->sb_cc, sb->sb_hiwat, sb->sb_cc >= sb->sb_hiwat, sb->sb_mbcnt, sb->sb_mbmax, sb->sb_mbcnt >= sb->sb_mbmax); - return (sb->sb_cc >= sb->sb_hiwat || sb->sb_mbcnt >= sb->sb_mbmax); + return (sbused(sb) >= sb->sb_hiwat || sb->sb_mbcnt >= sb->sb_mbmax); } /* @@ -162,13 +162,14 @@ static int sohashttpget(struct socket *so, void *arg, int waitflag) { - if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0 && !sbfull(&so->so_rcv)) { + if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0 && + !sbfull(&so->so_rcv)) { struct mbuf *m; char *cmp; int cmplen, cc; m = so->so_rcv.sb_mb; - cc = so->so_rcv.sb_cc - 1; + cc = sbavail(&so->so_rcv) - 1; if (cc < 1) return (SU_OK); switch (*mtod(m, char *)) { @@ -215,7 +216,7 @@ soparsehttpvers(struct socket *so, void *arg, int waitflag) goto fallout; m = so->so_rcv.sb_mb; - cc = so->so_rcv.sb_cc; + cc = sbavail(&so->so_rcv); inspaces = spaces = 0; for (m = so->so_rcv.sb_mb; m; m = n) { n = m->m_nextpkt; @@ -304,7 +305,7 @@ soishttpconnected(struct socket *so, void *arg, int waitflag) * have NCHRS left */ copied = 0; - ccleft = so->so_rcv.sb_cc; + ccleft = sbavail(&so->so_rcv); if (ccleft < NCHRS) goto readmore; a = b = c = '\0'; diff --git a/sys/netinet/siftr.c b/sys/netinet/siftr.c index 9d2ca50232f..d65564f990f 100644 --- a/sys/netinet/siftr.c +++ b/sys/netinet/siftr.c @@ -782,9 +782,9 @@ siftr_siftdata(struct pkt_node *pn, struct inpcb *inp, struct tcpcb *tp, pn->flags = tp->t_flags; pn->rxt_length = tp->t_rxtcur; pn->snd_buf_hiwater = inp->inp_socket->so_snd.sb_hiwat; - pn->snd_buf_cc = inp->inp_socket->so_snd.sb_cc; + pn->snd_buf_cc = sbused(&inp->inp_socket->so_snd); pn->rcv_buf_hiwater = inp->inp_socket->so_rcv.sb_hiwat; - pn->rcv_buf_cc = inp->inp_socket->so_rcv.sb_cc; + pn->rcv_buf_cc = sbused(&inp->inp_socket->so_rcv); pn->sent_inflight_bytes = tp->snd_max - tp->snd_una; pn->t_segqlen = tp->t_segqlen; diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 625992afaef..468f1421056 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -1745,7 +1745,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur); sowwakeup(so); - if (so->so_snd.sb_cc) + if (sbavail(&so->so_snd)) (void) tcp_output(tp); goto check_delack; } @@ -2526,7 +2526,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, * Otherwise we would send pure ACKs. */ SOCKBUF_LOCK(&so->so_snd); - avail = so->so_snd.sb_cc - + avail = sbavail(&so->so_snd) - (tp->snd_nxt - tp->snd_una); SOCKBUF_UNLOCK(&so->so_snd); if (avail > 0) @@ -2661,10 +2661,10 @@ process_ACK: cc_ack_received(tp, th, CC_ACK); SOCKBUF_LOCK(&so->so_snd); - if (acked > so->so_snd.sb_cc) { - tp->snd_wnd -= so->so_snd.sb_cc; + if (acked > sbavail(&so->so_snd)) { + tp->snd_wnd -= sbavail(&so->so_snd); mfree = sbcut_locked(&so->so_snd, - (int)so->so_snd.sb_cc); + (int)sbavail(&so->so_snd)); ourfinisacked = 1; } else { mfree = sbcut_locked(&so->so_snd, acked); @@ -2790,7 +2790,7 @@ step6: * actually wanting to send this much urgent data. */ SOCKBUF_LOCK(&so->so_rcv); - if (th->th_urp + so->so_rcv.sb_cc > sb_max) { + if (th->th_urp + sbavail(&so->so_rcv) > sb_max) { th->th_urp = 0; /* XXX */ thflags &= ~TH_URG; /* XXX */ SOCKBUF_UNLOCK(&so->so_rcv); /* XXX */ @@ -2812,7 +2812,7 @@ step6: */ if (SEQ_GT(th->th_seq+th->th_urp, tp->rcv_up)) { tp->rcv_up = th->th_seq + th->th_urp; - so->so_oobmark = so->so_rcv.sb_cc + + so->so_oobmark = sbavail(&so->so_rcv) + (tp->rcv_up - tp->rcv_nxt) - 1; if (so->so_oobmark == 0) so->so_rcv.sb_state |= SBS_RCVATMARK; diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index 7919e2b72e0..160fadaea5c 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -322,7 +322,7 @@ after_sack_rexmit: * to send then the probe will be the FIN * itself. */ - if (off < so->so_snd.sb_cc) + if (off < sbused(&so->so_snd)) flags &= ~TH_FIN; sendwin = 1; } else { @@ -348,7 +348,8 @@ after_sack_rexmit: */ if (sack_rxmit == 0) { if (sack_bytes_rxmt == 0) - len = ((long)ulmin(so->so_snd.sb_cc, sendwin) - off); + len = ((long)ulmin(sbavail(&so->so_snd), sendwin) - + off); else { long cwin; @@ -357,8 +358,8 @@ after_sack_rexmit: * sending new data, having retransmitted all the * data possible in the scoreboard. */ - len = ((long)ulmin(so->so_snd.sb_cc, tp->snd_wnd) - - off); + len = ((long)ulmin(sbavail(&so->so_snd), tp->snd_wnd) - + off); /* * Don't remove this (len > 0) check ! * We explicitly check for len > 0 here (although it @@ -457,12 +458,15 @@ after_sack_rexmit: * TODO: Shrink send buffer during idle periods together * with congestion window. Requires another timer. Has to * wait for upcoming tcp timer rewrite. + * + * XXXGL: should there be used sbused() or sbavail()? */ if (V_tcp_do_autosndbuf && so->so_snd.sb_flags & SB_AUTOSIZE) { if ((tp->snd_wnd / 4 * 5) >= so->so_snd.sb_hiwat && - so->so_snd.sb_cc >= (so->so_snd.sb_hiwat / 8 * 7) && - so->so_snd.sb_cc < V_tcp_autosndbuf_max && - sendwin >= (so->so_snd.sb_cc - (tp->snd_nxt - tp->snd_una))) { + sbused(&so->so_snd) >= (so->so_snd.sb_hiwat / 8 * 7) && + sbused(&so->so_snd) < V_tcp_autosndbuf_max && + sendwin >= (sbused(&so->so_snd) - + (tp->snd_nxt - tp->snd_una))) { if (!sbreserve_locked(&so->so_snd, min(so->so_snd.sb_hiwat + V_tcp_autosndbuf_inc, V_tcp_autosndbuf_max), so, curthread)) @@ -499,10 +503,11 @@ after_sack_rexmit: tso = 1; if (sack_rxmit) { - if (SEQ_LT(p->rxmit + len, tp->snd_una + so->so_snd.sb_cc)) + if (SEQ_LT(p->rxmit + len, tp->snd_una + sbused(&so->so_snd))) flags &= ~TH_FIN; } else { - if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc)) + if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + + sbused(&so->so_snd))) flags &= ~TH_FIN; } @@ -532,7 +537,7 @@ after_sack_rexmit: */ if (!(tp->t_flags & TF_MORETOCOME) && /* normal case */ (idle || (tp->t_flags & TF_NODELAY)) && - len + off >= so->so_snd.sb_cc && + len + off >= sbavail(&so->so_snd) && (tp->t_flags & TF_NOPUSH) == 0) { goto send; } @@ -660,7 +665,7 @@ dontupdate: * if window is nonzero, transmit what we can, * otherwise force out a byte. */ - if (so->so_snd.sb_cc && !tcp_timer_active(tp, TT_REXMT) && + if (sbavail(&so->so_snd) && !tcp_timer_active(tp, TT_REXMT) && !tcp_timer_active(tp, TT_PERSIST)) { tp->t_rxtshift = 0; tcp_setpersist(tp); @@ -863,7 +868,7 @@ send: * emptied: */ max_len = (tp->t_maxopd - optlen); - if ((off + len) < so->so_snd.sb_cc) { + if ((off + len) < sbavail(&so->so_snd)) { moff = len % max_len; if (moff != 0) { len -= moff; @@ -979,7 +984,7 @@ send: * give data to the user when a buffer fills or * a PUSH comes in.) */ - if (off + len == so->so_snd.sb_cc) + if (off + len == sbused(&so->so_snd)) flags |= TH_PUSH; SOCKBUF_UNLOCK(&so->so_snd); } else { diff --git a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c index 910424dd768..a6eba64e162 100644 --- a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c +++ b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c @@ -747,7 +747,7 @@ sdp_start_disconnect(struct sdp_sock *ssk) ("sdp_start_disconnect: sdp_drop() returned NULL")); } else { soisdisconnecting(so); - unread = so->so_rcv.sb_cc; + unread = sbused(&so->so_rcv); sbflush(&so->so_rcv); sdp_usrclosed(ssk); if (!(ssk->flags & SDP_DROPPED)) { @@ -1259,7 +1259,7 @@ sdp_sorecv(struct socket *so, struct sockaddr **psa, struct uio *uio, /* We will never ever get anything unless we are connected. */ if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) { /* When disconnecting there may be still some data left. */ - if (sb->sb_cc > 0) + if (sbavail(sb)) goto deliver; if (!(so->so_state & SS_ISDISCONNECTED)) error = ENOTCONN; @@ -1267,7 +1267,7 @@ sdp_sorecv(struct socket *so, struct sockaddr **psa, struct uio *uio, } /* Socket buffer is empty and we shall not block. */ - if (sb->sb_cc == 0 && + if (sbavail(sb) == 0 && ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) { error = EAGAIN; goto out; @@ -1278,7 +1278,7 @@ restart: /* Abort if socket has reported problems. */ if (so->so_error) { - if (sb->sb_cc > 0) + if (sbavail(sb)) goto deliver; if (oresid > uio->uio_resid) goto out; @@ -1290,25 +1290,25 @@ restart: /* Door is closed. Deliver what is left, if any. */ if (sb->sb_state & SBS_CANTRCVMORE) { - if (sb->sb_cc > 0) + if (sbavail(sb)) goto deliver; else goto out; } /* Socket buffer got some data that we shall deliver now. */ - if (sb->sb_cc > 0 && !(flags & MSG_WAITALL) && + if (sbavail(sb) && !(flags & MSG_WAITALL) && ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)) || - sb->sb_cc >= sb->sb_lowat || - sb->sb_cc >= uio->uio_resid || - sb->sb_cc >= sb->sb_hiwat) ) { + sbavail(sb) >= sb->sb_lowat || + sbavail(sb) >= uio->uio_resid || + sbavail(sb) >= sb->sb_hiwat) ) { goto deliver; } /* On MSG_WAITALL we must wait until all data or error arrives. */ if ((flags & MSG_WAITALL) && - (sb->sb_cc >= uio->uio_resid || sb->sb_cc >= sb->sb_lowat)) + (sbavail(sb) >= uio->uio_resid || sbavail(sb) >= sb->sb_lowat)) goto deliver; /* @@ -1322,7 +1322,7 @@ restart: deliver: SOCKBUF_LOCK_ASSERT(&so->so_rcv); - KASSERT(sb->sb_cc > 0, ("%s: sockbuf empty", __func__)); + KASSERT(sbavail(sb), ("%s: sockbuf empty", __func__)); KASSERT(sb->sb_mb != NULL, ("%s: sb_mb == NULL", __func__)); /* Statistics. */ @@ -1330,7 +1330,7 @@ deliver: uio->uio_td->td_ru.ru_msgrcv++; /* Fill uio until full or current end of socket buffer is reached. */ - len = min(uio->uio_resid, sb->sb_cc); + len = min(uio->uio_resid, sbavail(sb)); if (mp0 != NULL) { /* Dequeue as many mbufs as possible. */ if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) { @@ -1510,7 +1510,7 @@ sdp_urg(struct sdp_sock *ssk, struct mbuf *mb) if (so == NULL) return; - so->so_oobmark = so->so_rcv.sb_cc + mb->m_pkthdr.len - 1; + so->so_oobmark = sbused(&so->so_rcv) + mb->m_pkthdr.len - 1; sohasoutofband(so); ssk->oobflags &= ~(SDP_HAVEOOB | SDP_HADOOB); if (!(so->so_options & SO_OOBINLINE)) { diff --git a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c index f8d6181c0ed..1fe5cb060fa 100644 --- a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c +++ b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c @@ -183,7 +183,7 @@ sdp_post_recvs_needed(struct sdp_sock *ssk) * Compute bytes in the receive queue and socket buffer. */ bytes_in_process = (posted - SDP_MIN_TX_CREDITS) * buffer_size; - bytes_in_process += ssk->socket->so_rcv.sb_cc; + bytes_in_process += sbused(&ssk->socket->so_rcv); return bytes_in_process < max_bytes; } diff --git a/sys/rpc/clnt_vc.c b/sys/rpc/clnt_vc.c index 67ad58f5cd1..3899511990a 100644 --- a/sys/rpc/clnt_vc.c +++ b/sys/rpc/clnt_vc.c @@ -860,7 +860,7 @@ clnt_vc_soupcall(struct socket *so, void *arg, int waitflag) * error condition */ do_read = FALSE; - if (so->so_rcv.sb_cc >= sizeof(uint32_t) + if (sbavail(&so->so_rcv) >= sizeof(uint32_t) || (so->so_rcv.sb_state & SBS_CANTRCVMORE) || so->so_error) do_read = TRUE; @@ -913,7 +913,7 @@ clnt_vc_soupcall(struct socket *so, void *arg, int waitflag) * buffered. */ do_read = FALSE; - if (so->so_rcv.sb_cc >= ct->ct_record_resid + if (sbavail(&so->so_rcv) >= ct->ct_record_resid || (so->so_rcv.sb_state & SBS_CANTRCVMORE) || so->so_error) do_read = TRUE; diff --git a/sys/rpc/svc_vc.c b/sys/rpc/svc_vc.c index df1d86e045c..0190a0ce383 100644 --- a/sys/rpc/svc_vc.c +++ b/sys/rpc/svc_vc.c @@ -546,7 +546,7 @@ svc_vc_ack(SVCXPRT *xprt, uint32_t *ack) { *ack = atomic_load_acq_32(&xprt->xp_snt_cnt); - *ack -= xprt->xp_socket->so_snd.sb_cc; + *ack -= sbused(&xprt->xp_socket->so_snd); return (TRUE); } diff --git a/sys/sys/sockbuf.h b/sys/sys/sockbuf.h index ef80e9c1457..f9e8da4cde0 100644 --- a/sys/sys/sockbuf.h +++ b/sys/sys/sockbuf.h @@ -165,6 +165,34 @@ int sbwait(struct sockbuf *sb); int sblock(struct sockbuf *sb, int flags); void sbunlock(struct sockbuf *sb); +/* + * Return how much data is available to be taken out of socket + * bufffer right now. + */ +static inline u_int +sbavail(struct sockbuf *sb) +{ + +#if 0 + SOCKBUF_LOCK_ASSERT(sb); +#endif + return (sb->sb_cc); +} + +/* + * Return how much data sits there in the socket buffer + * It might be that some data is not yet ready to be read. + */ +static inline u_int +sbused(struct sockbuf *sb) +{ + +#if 0 + SOCKBUF_LOCK_ASSERT(sb); +#endif + return (sb->sb_cc); +} + /* * How much space is there in a socket buffer (so->so_snd or so->so_rcv)? * This is problematical if the fields are unsigned, as the space might diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h index bfdae0d0716..dfeeede33bb 100644 --- a/sys/sys/socketvar.h +++ b/sys/sys/socketvar.h @@ -208,7 +208,7 @@ struct xsocket { /* can we read something from so? */ #define soreadabledata(so) \ - ((so)->so_rcv.sb_cc >= (so)->so_rcv.sb_lowat || \ + (sbavail(&(so)->so_rcv) >= (so)->so_rcv.sb_lowat || \ !TAILQ_EMPTY(&(so)->so_comp) || (so)->so_error) #define soreadable(so) \ (soreadabledata(so) || ((so)->so_rcv.sb_state & SBS_CANTRCVMORE)) From 2b21d0e88317c3d7bfe7b217873b4afcc116ebf8 Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Wed, 12 Nov 2014 10:17:46 +0000 Subject: [PATCH 081/280] Merge from projects/sendfile: - Use KASSERT()s instead of panic(). - Use sbavail() instead of sb_cc. Sponsored by: Nginx, Inc. Sponsored by: Netflix --- sys/kern/uipc_usrreq.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index 11b27d93d11..00fd8099c2e 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -793,10 +793,9 @@ uipc_rcvd(struct socket *so, int flags) u_int mbcnt, sbcc; unp = sotounpcb(so); - KASSERT(unp != NULL, ("uipc_rcvd: unp == NULL")); - - if (so->so_type != SOCK_STREAM && so->so_type != SOCK_SEQPACKET) - panic("uipc_rcvd socktype %d", so->so_type); + KASSERT(unp != NULL, ("%s: unp == NULL", __func__)); + KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET, + ("%s: socktype %d", __func__, so->so_type)); /* * Adjust backpressure on sender and wakeup any waiting to write. @@ -810,7 +809,7 @@ uipc_rcvd(struct socket *so, int flags) */ SOCKBUF_LOCK(&so->so_rcv); mbcnt = so->so_rcv.sb_mbcnt; - sbcc = so->so_rcv.sb_cc; + sbcc = sbavail(&so->so_rcv); SOCKBUF_UNLOCK(&so->so_rcv); /* * There is a benign race condition at this point. If we're planning to @@ -846,7 +845,10 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, int error = 0; unp = sotounpcb(so); - KASSERT(unp != NULL, ("uipc_send: unp == NULL")); + KASSERT(unp != NULL, ("%s: unp == NULL", __func__)); + KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM || + so->so_type == SOCK_SEQPACKET, + ("%s: socktype %d", __func__, so->so_type)); if (flags & PRUS_OOB) { error = EOPNOTSUPP; @@ -997,8 +999,11 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, } mbcnt = so2->so_rcv.sb_mbcnt; - sbcc = so2->so_rcv.sb_cc; - sorwakeup_locked(so2); + sbcc = sbavail(&so2->so_rcv); + if (sbcc) + sorwakeup_locked(so2); + else + SOCKBUF_UNLOCK(&so2->so_rcv); /* * The PCB lock on unp2 protects the SB_STOP flag. Without it, @@ -1014,9 +1019,6 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, UNP_PCB_UNLOCK(unp2); m = NULL; break; - - default: - panic("uipc_send unknown socktype"); } /* From f3c93842bf59a2b93db9cf53446a620aee19576a Mon Sep 17 00:00:00 2001 From: "Andrey V. Elsukov" Date: Wed, 12 Nov 2014 14:00:49 +0000 Subject: [PATCH 082/280] Fix ips_out_nosa errors accounting. MFC after: 1 week Sponsored by: Yandex LLC --- sys/netipsec/ipsec_output.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/sys/netipsec/ipsec_output.c b/sys/netipsec/ipsec_output.c index 1f03196453d..83735b02f2b 100644 --- a/sys/netipsec/ipsec_output.c +++ b/sys/netipsec/ipsec_output.c @@ -358,7 +358,16 @@ again: * this packet because it is responsibility for * upper layer to retransmit the packet. */ - IPSECSTAT_INC(ips_out_nosa); + switch(af) { + case AF_INET: + IPSECSTAT_INC(ips_out_nosa); + break; +#ifdef INET6 + case AF_INET6: + IPSEC6STAT_INC(ips_out_nosa); + break; +#endif + } goto bad; } sav = isr->sav; From 5bd38bcab8afc7c2d3d043ccf545992428bac024 Mon Sep 17 00:00:00 2001 From: Hans Petter Selasky Date: Wed, 12 Nov 2014 14:52:44 +0000 Subject: [PATCH 083/280] Decode more fields when dumping USB descriptors. - Some minor style changes while at it. Submitted by: Dmitry Luhtionov MFC after: 1 week --- usr.sbin/usbconfig/dump.c | 83 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 79 insertions(+), 4 deletions(-) diff --git a/usr.sbin/usbconfig/dump.c b/usr.sbin/usbconfig/dump.c index 52dd132095d..df5cde0c546 100644 --- a/usr.sbin/usbconfig/dump.c +++ b/usr.sbin/usbconfig/dump.c @@ -110,7 +110,6 @@ dump_field(struct libusb20_device *pdev, const char *plevel, printf(" \n"); return; } - if (strcmp(field, "bmAttributes") == 0) { switch (value & 0x03) { case 0: @@ -142,7 +141,6 @@ dump_field(struct libusb20_device *pdev, const char *plevel, return; } } - if ((field[0] == 'i') && (field[1] != 'd')) { /* Indirect String Descriptor */ if (value == 0) { @@ -157,7 +155,84 @@ dump_field(struct libusb20_device *pdev, const char *plevel, printf(" <%s>\n", temp_string); return; } + if (strlen(plevel) == 2 || strlen(plevel) == 6) { + /* Device and Interface Descriptor class codes */ + + if (strcmp(field, "bInterfaceClass") == 0 || + strcmp(field, "bDeviceClass") == 0) { + + switch (value) { + case 0x00: + printf(" \n"); + break; + case 0x01: + printf("